├── .codeclimate.yml
├── .gitignore
├── .rspec
├── .travis.yml
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── bin
    ├── console
    └── setup
├── examples
    ├── image_compression.rb
    └── neuroevolution.rb
├── lib
    ├── machine_learning_workbench.rb
    └── machine_learning_workbench
    │   ├── compressor.rb
    │   ├── compressor
    │       ├── copy_vq.rb
    │       ├── decaying_learning_rate_vq.rb
    │       ├── incr_dict_vq.rb
    │       └── vector_quantization.rb
    │   ├── monkey.rb
    │   ├── neural_network.rb
    │   ├── neural_network
    │       ├── base.rb
    │       ├── feed_forward.rb
    │       └── recurrent.rb
    │   ├── optimizer.rb
    │   ├── optimizer
    │       └── natural_evolution_strategies
    │       │   ├── base.rb
    │       │   ├── bdnes.rb
    │       │   ├── fnes.rb
    │       │   ├── rnes.rb
    │       │   ├── snes.rb
    │       │   └── xnes.rb
    │   ├── systems.rb
    │   ├── systems
    │       └── neuroevolution.rb
    │   ├── tools.rb
    │   └── tools
    │       ├── execution.rb
    │       ├── imaging.rb
    │       ├── logging.rb
    │       ├── normalization.rb
    │       └── verification.rb
├── machine_learning_workbench.gemspec
└── spec
    ├── compressor
        └── vector_quantization_spec.rb
    ├── helpers
        └── uses_temporary_folders.rb
    ├── monkey
        └── monkey_spec.rb
    ├── neural_network
        └── neural_network_spec.rb
    ├── optimizer
        └── natural_evolution_strategies
        │   ├── individuals_spec.rb
        │   ├── magic_numbers_spec.rb
        │   └── nes_spec.rb
    ├── spec_helper.rb
    └── systems
        └── neuroevo_spec.rb


/.codeclimate.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | engines:
 3 |   rubocop:
 4 |     enabled: true
 5 |   duplication:
 6 |     enabled: true
 7 |     config:
 8 |       languages:
 9 |       - ruby
10 | ratings:
11 |   paths:
12 |   - lib/**
13 |   - "**.rb"
14 | exclude_paths:
15 |   - spec/**
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /_yardoc/
 4 | /coverage/
 5 | /doc/
 6 | /pkg/
 7 | /spec/reports/
 8 | /tmp/
 9 | Gemfile.lock
10 | /stats/
11 | # rspec failure tracking
12 | .rspec_status
13 | 


--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --format documentation
2 | --color
3 | --require spec_helper
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: ruby
 3 | rvm:
 4 |   - 2.4.2
 5 | addons:
 6 |   apt:
 7 |     packages:
 8 |     - libopenblas-base
 9 |     - liblapacke
10 | before_install: gem install bundler -v 1.16.0
11 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | 
3 | git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4 | 
5 | # Specify your gem's dependencies in machine_learning_workbench.gemspec
6 | gemspec
7 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Giuseppe Cuccu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [Machine Learning Workbench](https://github.com/giuse/machine_learning_workbench)
 2 | 
 3 | [![Gem Version](https://badge.fury.io/rb/machine_learning_workbench.svg)](https://badge.fury.io/rb/machine_learning_workbench)
 4 | [![Build Status](https://travis-ci.org/giuse/machine_learning_workbench.svg?branch=master)](https://travis-ci.org/giuse/machine_learning_workbench)
 5 | [![Code Climate](https://codeclimate.com/github/giuse/machine_learning_workbench/badges/gpa.svg)](https://codeclimate.com/github/giuse/machine_learning_workbench)
 6 | 
 7 | This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
 8 | 
 9 | ## Installation
10 | 
11 | Add this line to your application's Gemfile:
12 | 
13 | ```ruby
14 | gem 'machine_learning_workbench'
15 | ```
16 | 
17 | And then execute:
18 | 
19 |     $ bundle
20 | 
21 | Or install it yourself as:
22 | 
23 |     $ gem install machine_learning_workbench
24 | 
25 | ## Usage
26 | 
27 | TLDR: Check out [the `examples` directory](examples), e.g. [this script](examples/neuroevolution.rb).
28 | 
29 | This library is thought as a practical workbench: there is plenty of tools hanging, each has multiple uses and applications, and as such it is built as atomic and flexible as possible. Folders [in the lib structure](lib/machine_learning_workbench) categorize them them.
30 | 
31 | The [systems directory](lib/machine_learning_workbench/systems) holds few examples of how to bring them together in higher abstractions, i.e. as _compound tools_.
32 | For example, a [neuroevolution setup](lib/machine_learning_workbench/systems/neuroevolution.rb) brings together evolutionary computation and neural networks.
33 | 
34 | For an example of how to build it from scratch, check this [neuroevolution script](examples/neuroevolution.rb). To run it, use `bundle exec ruby examples/neuroevolution.rb`
35 | 
36 | 
37 | ## Development
38 | 
39 | After cloning the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
40 | 
41 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
42 | 
43 | 
44 | ## Contributing
45 | 
46 | Bug reports and pull requests are welcome on GitHub at https://github.com/giuse/machine_learning_workbench.
47 | 
48 | ## License
49 | 
50 | The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
51 | 
52 | ## References
53 | 
54 | Please feel free to contribute to this list (see `Contributing` above).
55 | 
56 | - **NES** stands for Natural Evolution Strategies. Check its [Wikipedia page](https://en.wikipedia.org/wiki/Natural_evolution_strategy) for more info.
57 | - **CMA-ES** stands for Covariance Matrix Adaptation Evolution Strategy. Check its [Wikipedia page](https://en.wikipedia.org/wiki/CMA-ES) for more info.
58 | - **UL-ELR** stands for Unsupervised Learning plus Evolutionary Reinforcement Learning, from the paper _"Intrinsically Motivated Neuroevolution for Vision-Based Reinforcement Learning" (ICDL2011)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf.
59 | - **BD-NES** stands for Block Diagonal Natural Evolution Strategy, from the homonymous paper _"Block Diagonal Natural Evolution Strategies" (PPSN2012)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf.
60 | - **RNES** stands for Radial Natural Evolution Strategy, from the paper _"Novelty-Based Restarts for Evolution Strategies" (CEC2011)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf.
61 | - **DLR-VQ** stands for Decaying Learning Rate Vector Quantization, from the algorithm originally named _*Online VQ*_ in the paper _"Intrinsically Motivated Neuroevolution for Vision-Based Reinforcement Learning" (ICDL2011)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf.
62 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | require "bundler/gem_tasks"
4 | require "rspec/core/rake_task"
5 | 
6 | RSpec::Core::RakeTask.new(:spec)
7 | 
8 | task :default => :spec
9 | 


--------------------------------------------------------------------------------
/bin/console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require 'bundler/setup'
 4 | require 'machine_learning_workbench'
 5 | 
 6 | # You can add fixtures and/or initialization code here to make experimenting
 7 | # with your gem easier. You can also use a different console, if you like.
 8 | 
 9 | require 'pry'
10 | Pry.start
11 | 
12 | # alternatively:
13 | # require "irb"
14 | # IRB.start(__FILE__)
15 | 


--------------------------------------------------------------------------------
/bin/setup:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | IFS=$'\n\t'
 4 | set -vx
 5 | 
 6 | # ubuntu:
 7 | sudo apt install libopenblas-base liblapacke # for numo-linalg
 8 | 
 9 | bundle install
10 | 
11 | # Do any other automated setup that you need to do here
12 | 


--------------------------------------------------------------------------------
/examples/image_compression.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Run as: `bundle exec ruby examples/image_compression.rb`
 4 | 
 5 | require 'rmagick'
 6 | require 'machine_learning_workbench'
 7 | VectorQuantization = MachineLearningWorkbench::Compressor::VectorQuantization
 8 | Img = MachineLearningWorkbench::Tools::Imaging
 9 | Norm = MachineLearningWorkbench::Tools::Normalization
10 | 
11 | ncentrs = 1
12 | # image_files = Dir[ENV['HOME']+'/jaffe/KA.HA*.png']
13 | image_files = Dir[ENV['HOME']+'/jaffe/*.png']
14 | raise "Download the JAFFE dataset in your home dir" if image_files&.empty?
15 | # ... and convert the `.tiff` in `.png`: `mogrify -format png jaffe/*.tiff`
16 | centr_range = [-1, 1]
17 | orig_shape = [256, 256]
18 | img_range = [0, 2**16-1]
19 | 
20 | puts "Loading images"
21 | images = image_files.map do |fname|
22 |   ary = Img.narr_from_png fname, flat: true
23 |   ret = Norm.feature_scaling ary, from: img_range, to: centr_range
24 | end
25 | 
26 | puts "Initializing VQ"
27 | vq = VectorQuantization.new ncentrs: ncentrs,
28 |   dims: images.first.shape, lrate: 0.3, vrange: centr_range
29 | 
30 | puts "Training"
31 | vq.train images, debug: true
32 | 
33 | puts "Done!"
34 | begin
35 |   vq.centrs.map { |c| Img.display c, shape: orig_shape }
36 |   require 'pry'; binding.pry
37 | ensure
38 |   MachineLearningWorkbench::Tools::Execution.kill_forks
39 | end
40 | 


--------------------------------------------------------------------------------
/examples/neuroevolution.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Run as: `bundle exec ruby examples/neuroevolution.rb`
 4 | 
 5 | # Make sure the gem is installed first with `gem install machine_learning_workbench`
 6 | # Alternatively, add `gem 'machine_learning_workbench'` to your Gemfile if using Bundle,
 7 | # followed by a `bundle install`
 8 | require 'machine_learning_workbench'
 9 | # Workbench shorthands
10 | XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
11 | FFNN = WB::NeuralNetwork::FeedForward
12 | 
13 | # Let's address the XOR problem, as it requires nonlinear fitting
14 | XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
15 | # A classic [2,2,1] (2 inputs, 2 hidden neurons, 1 output neurons) feed-forward
16 | # network with nonlinear activations can solve this problem.
17 | # To approximate more complex functions, keep the number of inputs and outputs
18 | # fixed (they depend on the problem) and increase the number and/or size of
19 | # hidden neurons. For example: [2, 10, 7, 4, 1].
20 | # NOTE: If your network grows above few thousands of weights, XNES may be too slow.
21 | # Try using SNES for large shallow networks or BDNES for deep networks.
22 | NET = FFNN.new [2,2,1], act_fn: :logistic
23 | # Note: the process is exactly the same, from instantiation to training, for recurrent
24 | # networks using the class `WB::NeuralNetwork::Recursive`.
25 | # Of course RNNs should be applied to sequential tasks, while XOR is static
26 | 
27 | # We will search for the network's weights with a black-box optimization algorithm
28 | # This means we will search for arrays of numbers, which need to be scored.
29 | # The scoring process will work as follows: use the numbers as weights for the neural
30 | # network, test the network on classifying the 4 cases of XOR, use that count as the
31 | # score for the weights (original array of numbers).
32 | 
33 | # Hence the fitness looks as follows:
34 | def fitness weights
35 |   # Each list of weights uniquely defines a neural network
36 |   NET.load_weights weights
37 |   # Activate the network on each of the XOR instances
38 |   # - prediction: the output of the network
39 |   # - observation: correct value, our target
40 |   pred_obs = XOR.map do |input, obs|
41 |     # The network can have an arbitrary number of output neurons
42 |     # Since here we have only one, we extract the value as the output
43 |     output = NET.activate(input)[0]
44 |     # Here we interpret the output as classification
45 |     pred = output > 0.5 ? 1 : 0
46 |     # Finally accumulate prediction-observation pairs
47 |     [pred, obs]
48 |   end
49 |   # To build a score out of this, we count the number of correct classifications
50 |   score = Float(pred_obs.count { |pr, ob| pr == ob })
51 |   # That's it, this will score the weights based on their network's performance
52 | end
53 | 
54 | # Next comes initializing the black-box stochastic optimization algorithm
55 | # We are searching for the network's weights, this gives us the search space dimensionality
56 | # We'll use XNES as we are working with less than 100 dimensions (weights)
57 | nes = XNES.new NET.nweights, method(:fitness), :max, rseed: 0
58 | # Note BDNES requires `NET.nweights_per_layer` rather than `NET.nweights` in initialization:
59 | # nes = WB::Optimizer::NaturalEvolutionStrategies::BDNES.new NET.nweights_per_layer,
60 | #   method(:fitness), :max, rseed: 10
61 | # The random seed is fixed here to ensure a reproducible behavior
62 | # In a real task, best using an oversized network, more iterations, and try several seeds
63 | 
64 | # NOTE: In practical applications it is best to delegate parallelization to the fitness
65 | # function instead of computing the fitness of one individual at a time. This can be
66 | # achieved by passing  an objective function defined on a _list_ of weight-lists, and
67 | # setting the `parallel_fit` switch to `true`:
68 | # nes = XNES.new NET.nweights,
69 | #   -> (genotypes) { Parallel.map genotypes, &method(:fitness) },
70 | #   :max, rseed: 0, parallel_fit: true
71 | 
72 | 
73 | # Nothing left but to run the optimization algorithm
74 | # Depending on the random seed (read: luck)few epochs here will suffice
75 | 50.times { nes.train }
76 | # OK! now remember, `NET` currently holds the weights of the last evaluation
77 | # Let's fetch the best individual found so far
78 | best_fit, best_weights = nes.best
79 | # Let's run them again to check they work
80 | result = fitness best_weights
81 | # Note if you defined a parallel fitness above you'll need instead
82 | # result = fitness([best_weights])[0]
83 | puts "The found network achieves a score of #{result} out of #{XOR.size} in the XOR task"
84 | puts "Weights: #{best_weights.to_a}"
85 | puts "Done!"
86 | # That's it! 18 lines and you got a working neuroevolution algorithm, congrats :)
87 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | gpu = false             # prepare for switching to GPUs
 4 | if gpu
 5 |   require 'cumo/narray'
 6 |   Xumo = Cumo
 7 |   require 'cumo/linalg'
 8 | else
 9 |   require 'numo/narray'
10 |   Xumo = Numo
11 |   # gem `numo-linalg` depends on openblas and lapacke:
12 |   # `sudo apt install libopenblas-base liblapacke`
13 |   require 'numo/linalg'
14 | end
15 | 
16 | # Shorthands
17 | NArray = Xumo::DFloat   # set a single data type across the WB for now
18 | NMath = Xumo::NMath     # shorthand for extended math module
19 | NLinalg = Xumo::Linalg  # shorthand for linear algebra module
20 | 
21 | module MachineLearningWorkbench
22 |   module Compressor
23 |   end
24 |   module NeuralNetwork
25 |   end
26 |   module Optimizer
27 |   end
28 |   module Tools
29 |   end
30 | end
31 | WB = MachineLearningWorkbench # import MachineLearningWorkbench as WB ;)
32 | 
33 | require_relative 'machine_learning_workbench/monkey'
34 | require_relative 'machine_learning_workbench/tools'
35 | require_relative 'machine_learning_workbench/compressor'
36 | require_relative 'machine_learning_workbench/neural_network'
37 | require_relative 'machine_learning_workbench/optimizer'
38 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/compressor.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | require_relative 'compressor/vector_quantization'
4 | require_relative 'compressor/decaying_learning_rate_vq'
5 | require_relative 'compressor/copy_vq'
6 | require_relative 'compressor/incr_dict_vq'
7 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/compressor/copy_vq.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Compressor
 4 |   # Train-less VQ, copying new images into centroids
 5 |   # Optimized for online training.
 6 |   class CopyVQ < VectorQuantization
 7 | 
 8 |     attr_reader :equal_simil, :next_train
 9 | 
10 |     def initialize **opts
11 |       puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
12 |       puts "Ignoring similarity: `simil_type: #{opts[:simil_type]}`" if opts[:simil_type]
13 |       # TODO: try different epsilons to reduce the number of states
14 |       # for example, in qbert we care what is lit and what is not, not the colors
15 |       @equal_simil = opts.delete(:equal_simil) || 0.0
16 |       super **opts.merge({lrate: nil, simil_type: nil})
17 |       @ntrains << 0 # to count duplicates, images we skip the train on
18 |       @next_train = 0 # pointer to the next centroid to train
19 |     end
20 | 
21 |     def ntrains; @ntrains[0...-1]; end
22 |     def ntrains_skip; @ntrains.last; end
23 | 
24 |     # Overloading lrate check from original VQ
25 |     def check_lrate lrate; nil; end
26 | 
27 |     # Train on one vector:
28 |     # - train only if the image is not already in dictionary
29 |     # - find the next untrained centroid
30 |     # - training is just overwriting it
31 |     # @return [Integer] index of trained centroid
32 |     def train_one vec, eps: equal_simil
33 |       mses = centrs.map do |centr|
34 |         ((centr-vec)**2).sum / centr.size
35 |       end
36 |       # BEWARE: I am currently not handling the case where we run out of centroids!
37 |       # => Will be addressed directly by dynamic dictionary size
38 |       # return -1 if mses.min < eps
39 |       return -1 if mses.min < eps || next_train == ncentrs
40 |       trg_idx = next_train
41 |       @next_train += 1
42 |       # require 'pry'; binding.pry if next_train == ncentrs
43 |       puts "Overwriting centr #{next_train}"
44 |       # norm_vec = vec / NLinalg.norm(vec)
45 |       # centrs[trg_idx, true] = norm_vec
46 |       centrs[trg_idx, true] = vec
47 |       trg_idx
48 |     end
49 | 
50 |   end
51 | end
52 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/compressor/decaying_learning_rate_vq.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Compressor
 4 |   # VQ with per-centroid decaying learning rates.
 5 |   # Optimized for online training.
 6 |   class DecayingLearningRateVQ < VectorQuantization
 7 | 
 8 |     attr_reader :lrate_min, :lrate_min_den, :decay_rate
 9 | 
10 |     def initialize **opts
11 |       puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
12 |       @lrate_min = opts.delete(:lrate_min) || 0.001
13 |       @lrate_min_den = opts.delete(:lrate_min_den) || 1
14 |       @decay_rate = opts.delete(:decay_rate) || 1
15 |       super **opts.merge({lrate: nil})
16 |     end
17 | 
18 |     # Overloading lrate check from original VQ
19 |     def check_lrate lrate; nil; end
20 | 
21 |     # Decaying per-centroid learning rate.
22 |     # @param centr_idx [Integer] index of the centroid
23 |     # @param lower_bound [Float] minimum learning rate
24 |     # @note nicely overloads the `attr_reader` of parent class
25 |     def lrate centr_idx, min_den: lrate_min_den, lower_bound: lrate_min, decay: decay_rate
26 |       [1.0/(ntrains[centr_idx]*decay+min_den), lower_bound].max
27 |       .tap { |l| puts "centr: #{centr_idx}, ntrains: #{ntrains[centr_idx]}, lrate: #{l}" }
28 |     end
29 | 
30 |     # Train on one vector
31 |     # @return [Integer] index of trained centroid
32 |     def train_one vec, eps: nil
33 |       # NOTE: ignores epsilon if passed
34 |       trg_idx, _simil = most_similar_centr(vec)
35 |       # norm_vec = vec / NLinalg.norm(vec)
36 |       # centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate(trg_idx)) + norm_vec * lrate(trg_idx)
37 |       centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate(trg_idx)) + vec * lrate(trg_idx)
38 |       trg_idx
39 |     end
40 | 
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/compressor/incr_dict_vq.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Compressor
 4 |   # Incremental Dictionary Train-less VQ, creating new centroids rather than training
 5 |   # Optimized for online training.
 6 |   # TODO: as the deadline grows nigh, the hacks grow foul. Refactor all VQs together.
 7 |   class IncrDictVQ < VectorQuantization
 8 | 
 9 |     attr_reader :equal_simil
10 |     undef :ntrains # centroids are not trained
11 | 
12 |     def initialize **opts
13 |       puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
14 |       puts "Ignoring similarity: `simil_type: #{opts[:simil_type]}`" unless opts[:simil_type] == :dot
15 |       puts "Ignoring ncentrs: `ncentrs: #{opts[:ncentrs]}`" if opts[:ncentrs]
16 |       # TODO: try different epsilons to reduce the number of states
17 |       # for example, in qbert we care what is lit and what is not, not the colors
18 |       @equal_simil = opts.delete(:equal_simil) || 0.0
19 |       super **opts.merge({ncentrs: 1, lrate: nil, simil_type: :dot})
20 | 
21 |       @ntrains = nil # will disable the counting
22 |     end
23 | 
24 |     # Overloading lrate check from original VQ
25 |     def check_lrate lrate; nil; end
26 | 
27 |     # Train on one vector:
28 |     # - train only if the image is not already in dictionary
29 |     # - create new centroid from the image
30 |     # @return [Integer] index of new centroid
31 |     def train_one vec, eps: equal_simil
32 |       # NOTE:  novelty needs to be re-computed for each image, as after each
33 |       # training the novelty signal changes!
34 | 
35 | # NOTE the reconstruction error here depends once more on the _color_
36 | # this is wrong and should be taken out of the equation
37 | # NOTE: this is fixed if I use the differences sparse coding method
38 |       residual_img = reconstr_error(vec)
39 |       rec_err = residual_img.mean
40 |       return -1 if rec_err < eps
41 |       puts "Creating centr #{ncentrs} (rec_err: #{rec_err})"
42 |       # norm_vec = vec / NLinalg.norm(vec)
43 |       # @centrs = centrs.concatenate norm_vec
44 |       # @centrs = centrs.concatenate vec
45 |       @centrs = centrs.concatenate residual_img
46 |       # HACK: make it more general by using `code_size`
47 |       @utility = @utility.concatenate [0] * (encoding_type == :sparse_coding_v1 ? 2 : 1)
48 |       ncentrs
49 |     end
50 | 
51 |   end
52 | end
53 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/compressor/vector_quantization.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module MachineLearningWorkbench::Compressor
  4 | 
  5 |   # Standard Vector Quantization
  6 |   class VectorQuantization
  7 |     attr_reader :centrs, :dims, :vrange, :init_centr_vrange, :lrate,
  8 |       :simil_type, :encoding_type, :rng, :ntrains, :utility, :ncodes
  9 |     attr_writer :utility, :ncodes # allows access from outside
 10 | 
 11 |     def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed
 12 | 
 13 |       @rng = Random.new rseed # TODO: RNG CURRENTLY NOT USED!!
 14 | 
 15 |       @dims = Array(dims)
 16 |       check_lrate lrate # hack: so that we can overload it in dlr_vq
 17 |       @lrate = lrate
 18 |       @simil_type = simil_type || raise("missing simil_type")
 19 |       @encoding_type = encoding_type || raise("missing encoding_type")
 20 |       @init_centr_vrange ||= vrange
 21 |       @vrange = case vrange
 22 |         when Array
 23 |           raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
 24 |           vrange.map &method(:Float)
 25 |         when Range
 26 |           [vrange.first, vrange.last].map &method(:Float)
 27 |         else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
 28 |       end
 29 |       init_centrs nc: ncentrs
 30 |       @ntrains = [0]*ncentrs              # per-centroid number of trainings
 31 |       @utility = NArray.zeros [code_size] # trace how 'useful' are centroids to encodings
 32 |       @ncodes = 0
 33 |     end
 34 | 
 35 |     def ncentrs
 36 |       @centrs.shape.first
 37 |     end
 38 | 
 39 |     # HACKKETY HACKKETY HACK (can't wait to refactor after the deadline)
 40 |     def code_size
 41 |       encoding_type == :sparse_coding_v1 ? 2*ncentrs : ncentrs
 42 |     end
 43 | 
 44 |     # Verify lrate to be present and withing unit bounds
 45 |     # As a separate method only so it can be overloaded in `DecayingLearningRateVQ`
 46 |     def check_lrate lrate
 47 |       raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
 48 |     end
 49 | 
 50 |     # Initializes a list of centroids
 51 |     def init_centrs nc: ncentrs, base: nil, proport: nil
 52 |       @centrs = nc.times.map { new_centr base, proport }.to_na
 53 |     end
 54 | 
 55 |     # Creates a new (random) centroid
 56 |     # If a base is passed, this is meshed with the random centroid.
 57 |     # This is done to facilitate distributing the training across centroids.
 58 |     # TODO: USE RNG HERE!!
 59 |     def new_centr base=nil, proport=nil
 60 |       raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
 61 |       # require 'pry'; binding.pry if base.nil? ^ proport.nil?
 62 |       ret = NArray.new(*dims).rand(*init_centr_vrange)
 63 |       ret = ret * (1-proport) + base * proport if base&&proport
 64 |       ret
 65 |     end
 66 | 
 67 |     # SIMIL = {
 68 |     #   dot: -> (centr, vec) { centr.dot(vec) },
 69 |     #   mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
 70 |     # }
 71 | 
 72 |     # Computes similarities between vector and all centroids
 73 |     def similarities vec, type: simil_type
 74 |       raise NotImplementedError if vec.shape.size > 1
 75 |       raise "need to check since centrs is a NArray now" if type == :mse
 76 |       # simil_fn = SIMIL[type] || raise(ArgumentError, "Unrecognized simil #{type}")
 77 |       # centrs.map { |centr| simil_fn.call centr, vec }
 78 |       centrs.dot vec
 79 |     end
 80 | 
 81 |     # Encode a vector
 82 |     # tracks utility of centroids based on how much they contribute to encoding
 83 |     # TODO: `encode = Encodings.const_get(type)` in initialize`
 84 |     # NOTE: hashes of lambdas or modules cannot access ncodes and utility
 85 |     # TODO: refactor anyway through `stats` object, this thing is getting out of hand
 86 |     def encode vec, type: encoding_type
 87 |       case type
 88 |       when :most_similar
 89 |         simils = similarities vec
 90 |         code = simils.max_index
 91 |         @ncodes += 1
 92 |         @utility[code] += 1
 93 |         code
 94 |       when :most_similar_ary
 95 |         simils = similarities vec
 96 |         code = simils.new_zeros
 97 |         code[simils.max_index] = 1
 98 |         @ncodes += 1
 99 |         @utility += code
100 |         code
101 |       when :ensemble
102 |         simils = similarities vec
103 |         code = simils
104 |         tot = simils.sum
105 |         tot = 1 if tot < 1e-5  # HACK: avoid division by zero
106 |         contrib = code / tot
107 |         @ncodes += 1
108 |         @utility += (contrib - utility) / ncodes # cumulative moving average
109 |         code
110 |       when :norm_ensemble
111 |         simils = similarities vec
112 |         tot = simils.sum
113 |         # NOTE this actually makes a big discontinuity if the total is equal to zero.
114 |         # Does that even ever happen? I guess only w/ reset img (zeros) as lone centroid.
115 |         # Which after first gen is really useless and should just be dropped anyway...
116 |         tot = 1 if tot < 1e-5  # HACK: avoid division by zero
117 |         code = simils / tot
118 |         @ncodes += 1
119 |         @utility += (code - utility) / ncodes # cumulative moving average
120 |         code
121 |       when :sparse_coding_v1
122 |         raise "requires centroids normalized to unit length!"
123 |         @encoder = nil if @encoder&.shape&.first != centrs.shape.first
124 |         # Danafar & Cuccu: compact form linear regression encoder
125 |         @encoder ||= (centrs.dot centrs.transpose).invert.dot centrs
126 | 
127 |         raw_code = @encoder.dot(vec)
128 |         # separate positive and negative features (NOTE: all features will be positive)
129 |         # i.e. split[0...n] = max {0, raw[i]}; split[n...2*n] = max {0, -raw[i]}
130 |         # TODO: cite Coates & Ng
131 |         # TODO: optimize and remove redundant variables
132 |         split_code = raw_code.concatenate(-raw_code)
133 |         split_code[split_code<0] = 0
134 |         # normalize such that the code sums to 1
135 |         norm_code = split_code / split_code.sum
136 |         # Danafar: drop to say 80% of info (à la pca)
137 |         thold = 0.2
138 |         sparse_code = norm_code.dup
139 |         sum = 0
140 |         # NOTE: the last element in the sort below has the highest contribution and
141 |         # should NEVER be put to 0, even if it could contribute alone to 100% of the
142 |         # total
143 |         # NOTE: upon further study I disagree this represent information content unless
144 |         # the centroids are unit vectors. So I'm commenting this implementation now,
145 |         # together with the following, until I implement a switch to normalize the
146 |         # centroids based on configuration.
147 | 
148 | 
149 | 
150 |         # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97
151 |         # norm_code.sort_index[0...-1].each do |idx|
152 |         norm_code.size.times.sort_by { |i| norm_code[i] }[0...-1].each do |idx|
153 | 
154 | 
155 | 
156 |           sparse_code[idx] = 0
157 |           sum += norm_code[idx]
158 |           break if sum >= thold # we know the code's total is normalized to 1 and has no negatives
159 |         end
160 |         code = sparse_code / sparse_code.sum # re-normalize sum to 1
161 | 
162 |         @ncodes += 1
163 |         @utility += (code - utility) / ncodes # cumulative moving average
164 |         code
165 |        when :sparse_coding_v2
166 |         # Cuccu & Danafar: incremental reconstruction encoding
167 |         # turns out to be closely related to (Orthogonal) Matching Pursuit
168 |         raise "requires centroids normalized to unit length!"
169 |         # return centrs.dot vec # speed test for the rest of the system
170 |         sparse_code = NArray.zeros code_size
171 |         resid = vec
172 |         # cap the number of non-zero elements in the code
173 |         max_nonzero = [1,ncentrs/3].max
174 |         max_nonzero.times do |i|
175 |           # OPT: remove msc from centrs at each loop
176 |           # the algorithm should work even without this opt because
177 |           # we are working on the residuals each time
178 |           simils = centrs.dot resid
179 | 
180 | 
181 | 
182 |           # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97
183 |           # msc = simils.max_index
184 |           simils = simils.to_a
185 |           simils_abs = simils.map &:abs
186 |           msc = simils_abs.index simils_abs.max # most similar centroid
187 | 
188 | 
189 | 
190 |           max_simil = simils[msc]
191 |           # remember to distinguish here to use the pos/neg features trick
192 |           sparse_code[msc] = max_simil
193 |           reconstr = max_simil * centrs[msc, true]
194 |           resid -= reconstr
195 |           # puts "resid#{i} #{resid.abs.mean}" # if debug
196 |           epsilon = 0.005
197 |           # print resid.abs.mean, ' '
198 |           # print sparse_code.to_a, ' '
199 |           break if resid.abs.mean <= epsilon
200 |         end
201 | 
202 |         # should normalize sum to 1?
203 |         code = sparse_code #/ sparse_code.sum # normalize sum to 1
204 | 
205 |         @ncodes += 1
206 |         @utility += (code - utility) / ncodes # cumulative moving average
207 |         code
208 |       when :sparse_coding
209 |         # Cuccu: Direct residual encoding
210 |         # return centrs.dot vec # speed test for the rest of the system
211 |         sparse_code = NArray.zeros code_size
212 |         resid = vec
213 |         # cap the number of non-zero elements in the code
214 |         max_nonzero = [1,ncentrs/3].max
215 |         max_nonzero.times do |i|
216 |           # OPT: remove msc from centrs at each loop
217 |           # the algorithm should work even without this opt because
218 |           # we are working on the residuals each time
219 |           diff = (centrs - resid).abs.sum(1)
220 | 
221 | 
222 | 
223 |           # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97
224 |           # msc = diff.max_index
225 |           diff = diff.to_a
226 |           msc = diff.index diff.min # most similar centroid
227 | 
228 | 
229 | 
230 |           min_diff = diff[msc]
231 |           # remember to distinguish here to use the pos/neg features trick
232 |           sparse_code[msc] = 1
233 |           reconstr = centrs[msc, true]
234 |           resid -= reconstr
235 |           resid[(resid<0).where] = 0 # ignore artifacts introduced by the centroids in reconstruction
236 | 
237 |           # puts "resid#{i} #{resid.abs.mean}" # if debug
238 |           epsilon = 0.005
239 |           # print resid.abs.mean, ' ' if $ngen == 2; exit if $ngen==3
240 |           # print sparse_code.to_a, ' ' if $ngen == 3; exit if $ngen==4
241 |           break if resid.abs.mean <= epsilon
242 |         end
243 | 
244 |         code = sparse_code
245 |         @ncodes += 1
246 |         @utility += (code - utility) / ncodes # cumulative moving average
247 |         code
248 |       else raise ArgumentError, "Unrecognized encode #{type}"
249 |       end
250 |     end
251 | 
252 |     # Reconstruct vector from its code (encoding)
253 |     def reconstruction code, type: encoding_type
254 |       case type
255 |       when :most_similar
256 |         centrs[code, true]
257 |       when :most_similar_ary
258 |         centrs[code.eq(1), true]
259 |       when :ensemble
260 |         # tot = code.reduce :+
261 |         # centrs.zip(code).map { |centr, contr| centr*contr/tot }.reduce :+
262 |         centrs.dot(code) / code.sum
263 |       when :norm_ensemble
264 |         centrs.dot code
265 |         # centrs.zip(code).map { |centr, contr| centr*contr }.reduce :+
266 |       when :sparse_coding_v1
267 |         raise "requires normalized centroids!"
268 |         reconstr_code = code[0...(code.size/2)] - code[(code.size/2)..-1]
269 |         reconstr = centrs.transpose.dot reconstr_code
270 |       when :sparse_coding_v2
271 |         raise "requires normalized centroids!"
272 | 
273 | 
274 |         # BUG IN NARRAY DOT!! ruby-numo/numo-narray#99
275 |         # reconstr = code.dot centrs
276 |         reconstr = code.expand_dims(0).dot centrs
277 | 
278 | 
279 |       when :sparse_coding
280 |         # the code is binary, so just sum over the corresponding centroids
281 |         # note: sum, not mean, because of how it's used in reconstr_error
282 |         reconstr = centrs[code.cast_to(Numo::Bit).where, true].sum(0)
283 |       else raise ArgumentError, "unrecognized reconstruction type: #{type}"
284 |       end
285 |     end
286 | 
287 |     # Returns index and similitude of most similar centroid to vector
288 |     # @return [Array<Integer, Float>] the index of the most similar centroid,
289 |     #   followed by the corresponding similarity
290 |     def most_similar_centr vec
291 |       simils = similarities vec
292 |       max_idx = simils.max_index
293 |       [max_idx, simils[max_idx]]
294 |     end
295 | 
296 |     # Per-pixel errors in reconstructing vector
297 |     # @return [NArray] residuals
298 |     def reconstr_error vec, code: nil, type: encoding_type
299 |       code ||= encode vec, type: type
300 |       resid = vec - reconstruction(code, type: type)
301 |       # we ignore the extra stuff coming from the centroids,
302 |       # only care that everything in the obs is represented in centrs
303 |       resid[resid<0] = 0 if encoding_type == :sparse_coding
304 |       resid
305 |     end
306 | 
307 |     # Train on one vector
308 |     # @return [Integer] index of trained centroid
309 |     def train_one vec, eps: nil
310 |       # NOTE: ignores epsilon if passed
311 |       trg_idx, _simil = most_similar_centr(vec)
312 |       # note: uhm that actually looks like a dot product... maybe faster?
313 |       #   `[c[i], vec].dot([1-lrate, lrate])`
314 |       # norm_vec = vec / NLinalg.norm(vec)
315 |       # centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate) + norm_vec * lrate
316 |       centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate) + vec * lrate
317 |       trg_idx
318 |     end
319 | 
320 |     # Train on vector list
321 |     def train vec_lst, debug: false
322 |       # Two ways here:
323 |       # - Batch: canonical, centrs updated with each vec
324 |       # - Parallel: could be parallel either on simils or on training (?)
325 |       # Unsure on the correctness of either Parallel, let's stick with Batch
326 |       vec_lst.each_with_index do |vec, i|
327 |         trained_idx = train_one vec
328 |         print '.' if debug
329 |         @ntrains[trained_idx] += 1 if @ntrains
330 |       end
331 |     end
332 |   end
333 | end
334 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/monkey.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | # Monkey patches
  4 | 
  5 | module MachineLearningWorkbench::Monkey
  6 |   module Dimensionable
  7 |     def dims ret: []
  8 |       ret << size
  9 |       if first.kind_of? Array
 10 |         # hypothesize all elements having same size and save some checks
 11 |         first.dims ret: ret
 12 |       else
 13 |         ret
 14 |       end
 15 |     end
 16 |   end
 17 | 
 18 |   module Buildable
 19 |     def new *args
 20 |       super.tap do |m|
 21 |         if block_given?
 22 |           m.each_stored_with_indices do |_,*idxs|
 23 |             m[*idxs] = yield *idxs
 24 |           end
 25 |         end
 26 |       end
 27 |     end
 28 |   end
 29 | 
 30 |   # module AdvancelyOperationable # how am I supposed to name these things??
 31 | 
 32 |   #   # Outer matrix relationship generalization.
 33 |   #   # Make a matrix the same shape as `self`; each element is a matrix,
 34 |   #   # with the same shape as `other`, resulting from the interaction of
 35 |   #   # the corresponding element in `self` and all the elements in `other`.
 36 |   #   # @param other [NMatrix] other matrix
 37 |   #   # @note This implementation works only for 2D matrices (same as most
 38 |   #   #   other methods here). It's a quick hack, a proof of concept barely
 39 |   #   #   sufficient for my urgent needs.
 40 |   #   # @note Output size is fixed! Since NMatrix does not graciously yield to
 41 |   #   #   being composed of other NMatrices (by adapting the shape of the root
 42 |   #   #   matrix), the block cannot return matrices in there.
 43 |   #   # @return [NMatrix]
 44 |   #   def outer other
 45 |   #     # NOTE: Map of map in NMatrix does not work as expected!
 46 |   #     # self.map { |v1| other.map { |v2| yield(v1,v2) } }
 47 |   #     # NOTE: this doesn't cut it either... can't capture the structure
 48 |   #     # NMatrix[ *self.collect { |v1| other.collect { |v2| yield(v1,v2) } } ]
 49 |   #     raise ArgumentError unless block_given?
 50 |   #     NMatrix.new(self.shape+other.shape).tap do |m|
 51 |   #       each_stored_with_indices do |v1,r1,c1|
 52 |   #         other.each_stored_with_indices do |v2,r2,c2|
 53 |   #           m[r1,c1,r2,c2] = yield(v1,v2)
 54 |   #         end
 55 |   #       end
 56 |   #     end
 57 |   #   end
 58 | 
 59 |   #   # Flat-output generalized outer relationship. Same as `#outer`, but the
 60 |   #   # result is a 2-dim matrix of the interactions between all the elements
 61 |   #   # in `self` (as rows) and all the elements in `other` (as columns)
 62 |   #   # @param other [NMatrix] other matrix
 63 |   #   # @return [NMatrix]
 64 |   #   def outer_flat other
 65 |   #     raise ArgumentError unless block_given?
 66 |   #     data = collect { |v1| other.collect { |v2| yield(v1, v2) } }
 67 |   #     self.class[*data, dtype: dtype]
 68 |   #   end
 69 | 
 70 |   #   # Matrix exponential: `e^self` (not to be confused with `self^n`!)
 71 |   #   # @return [NMatrix]
 72 |   #   def exponential
 73 |   #     # special case: one-dimensional matrix: just exponentiate the values
 74 |   #     if (dim == 1) || (dim == 2 && shape.include?(1))
 75 |   #       return NMatrix.new shape, collect(&Math.method(:exp)), dtype: dtype
 76 |   #     end
 77 | 
 78 |   #     # Eigenvalue decomposition method from scipy/linalg/matfuncs.py#expm2
 79 | 
 80 |   #     # TODO: find out why can't I get away without double transpose!
 81 |   #     e_values, e_vectors = eigen_symm
 82 | 
 83 |   #     e_vals_exp_dmat = NMatrix.diagonal e_values.collect(&Math.method(:exp))
 84 |   #     # ASSUMING WE'RE ONLY USING THIS TO EXPONENTIATE LOG_SIGMA IN XNES
 85 |   #     # Theoretically we need the right eigenvectors, which for a symmetric
 86 |   #     # matrix should be just transposes of the eigenvectors.
 87 |   #     # But we have a positive definite matrix, so the final composition
 88 |   #     # below holds without transposing
 89 |   #     # BUT, strangely, I can't seem to get eigen_symm to green the tests
 90 |   #     # ...with or without transpose
 91 |   #     # e_vectors = e_vectors.transpose
 92 |   #     e_vectors.dot(e_vals_exp_dmat).dot(e_vectors.invert)#.transpose
 93 |   #   end
 94 | 
 95 |   #   # Calculate matrix eigenvalues and eigenvectors using LAPACK
 96 |   #   # @param which [:both, :left, :right] which eigenvectors do you want?
 97 |   #   # @return [Array<NMatrix, NMatrix[, NMatrix]>]
 98 |   #   #   eigenvalues (as column vector), left eigenvectors, right eigenvectors.
 99 |   #   #   A value different than `:both` for param `which` reduces the return size.
100 |   #   # @note requires LAPACK
101 |   #   # @note WARNING! a param `which` different than :both alters the returns
102 |   #   # @note WARNING! machine-precision-error imaginary part Complex
103 |   #   # often returned! For symmetric matrices use #eigen_symm_right below
104 |   #   def eigen which=:both
105 |   #     raise ArgumentError unless [:both, :left, :right].include? which
106 |   #     NMatrix::LAPACK.geev(self, which)
107 |   #   end
108 | 
109 |   #   # Eigenvalues and right eigenvectors for symmetric matrices using LAPACK
110 |   #   # @note code taken from gem `nmatrix-atlas` NMatrix::LAPACK#geev
111 |   #   # @note FOR SYMMETRIC MATRICES ONLY!!
112 |   #   # @note WARNING: will return real matrices, imaginary parts are discarded!
113 |   #   # @note WARNING: only left eigenvectors will be returned!
114 |   #   # @todo could it be possible to save some of the transpositions?
115 |   #   # @return [Array<NMatrix, NMatrix>] eigenvalues and (left) eigenvectors
116 |   #   def eigen_symm
117 |   #     # TODO: check for symmetry if not too slow
118 |   #     raise TypeError, "Only real-valued matrices" if complex_dtype?
119 |   #     raise StorageTypeError, "Only dense matrices (because LAPACK)" unless dense?
120 |   #     raise ShapeError, "Only square matrices" unless dim == 2 && shape[0] == shape[1]
121 | 
122 |   #     n = shape[0]
123 | 
124 |   #     # Outputs
125 |   #     e_values = NMatrix.new([n, 1], dtype: dtype)
126 |   #     e_values_img = NMatrix.new([n, 1], dtype: dtype) # to satisfy C alloc
127 |   #     e_vectors = clone_structure
128 | 
129 |   #     NMatrix::LAPACK::lapack_geev(
130 |   #       false,        # compute left eigenvectors of A?
131 |   #       :t,           # compute right eigenvectors of A? (left eigenvectors of A**T)
132 |   #       n,            # order of the matrix
133 |   #       transpose,    # input matrix => needs to be column-wise  # self,
134 |   #       n,            # leading dimension of matrix
135 |   #       e_values,     # real part of computed eigenvalues
136 |   #       e_values_img, # imaginary part of computed eigenvalues (will be discarded)
137 |   #       nil,          # left eigenvectors, if applicable
138 |   #       n,            # leading dimension of left_output
139 |   #       e_vectors,    # right eigenvectors, if applicable
140 |   #       n,            # leading dimension of right_output
141 |   #       2*n           # no clue what's this
142 |   #     )
143 | 
144 |   #     raise "Uhm why complex eigenvalues?" if e_values_img.any? {|v| v>1e-10}
145 |   #     return [e_values, e_vectors.transpose]
146 |   #   end
147 | 
148 | 
149 |   #   # The NMatrix documentation refers to a function `#nrm2` (aliased to `#norm2`)
150 |   #   # to compute the norm of a matrix. Fun fact: that is the implementation for vectors,
151 |   #   # and calling it on a matrix returns NotImplementedError :) you have to toggle the
152 |   #   # source to understand why:
153 |   #   # http://sciruby.com/nmatrix/docs/NMatrix.html#method-i-norm2 .
154 |   #   # A search for the actual source on GitHub reveals a (I guess new?) method
155 |   #   # `#matrix_norm`, with a decent choice of norms to choose from. Unfortunately, as the
156 |   #   # name says, it is stuck to compute full-matrix norms.
157 |   #   # So I resigned to dance to `Array`s and back, and implemented it with `#each_rank`.
158 |   #   # Unexplicably, I get a list of constant values as the return value; same with
159 |   #   # `#each_row`.
160 |   #   # What can I say, we're back to referencing rows by index. I am just wasting too much
161 |   #   # time figuring out these details to write a generalized version with an optional
162 |   #   # `dimension` to go along.
163 |   #   # @return [NMatrix] the vector norm along the rows
164 |   #   def row_norms
165 |   #     norms = rows.times.map { |i| row(i).norm2 }
166 |   #     NMatrix.new [rows, 1], norms, dtype: dtype
167 |   #   end
168 | 
169 |   #   # `NMatrix#to_a` has inconsistent behavior: single-row matrices are
170 |   #   # converted to one-dimensional Arrays rather than a 2D Array with
171 |   #   # only one row. Patching `#to_a` directly is not feasible as the
172 |   #   # constructor seems to depend on it, and I have little interest in
173 |   #   # investigating further.
174 |   #   # @return [Array<Array>] a consistent array representation, such that
175 |   #   #   `nmat.to_consistent_a.to_nm == nmat` holds for single-row matrices
176 |   #   def to_consistent_a
177 |   #     dim == 2 && shape[0] == 1 ? [to_a] : to_a
178 |   #   end
179 |   #   alias :to_ca :to_consistent_a
180 |   # end
181 | 
182 |   module NumericallyApproximatable
183 |     # Verifies if `self` and `other` are withing `epsilon` of each other.
184 |     # @param other [Numeric]
185 |     # @param epsilon [Numeric]
186 |     # @return [Boolean]
187 |     def approximates? other, epsilon=1e-5
188 |       # Used for testing and NMatrix#approximates?, should I move to spec_helper?
189 |       (self - other).abs < epsilon
190 |     end
191 |   end
192 | 
193 |   # module MatrixApproximatable
194 |   #   # Verifies if all values at corresponding indices approximate each other.
195 |   #   # @param other [NMatrix]
196 |   #   # @param epsilon [Float]
197 |   #   def approximates? other, epsilon=1e-5
198 |   #     return false unless self.shape == other.shape
199 |   #     # two ways to go here:
200 |   #     # - epsilon is aggregated: total cumulative accepted error
201 |   #     #   => `(self - other).reduce(:+) < epsilon`
202 |   #     # - epsilon is local: per element accepted error
203 |   #     #   => `v.approximates? other[*idxs], epsilon`
204 |   #     # Given the use I make (near-equality), I choose the first interpretation
205 |   #     # Note the second is sensitive to opposite signs balancing up
206 |   #     self.each_stored_with_indices.all? do |v,*idxs|
207 |   #       v.approximates? other[*idxs], epsilon
208 |   #     end
209 |   #   end
210 |   # end
211 | 
212 |   # module CPtrDumpable
213 |   #   def marshall_dump
214 |   #     [shape, dtype, data_pointer]
215 |   #   end
216 | 
217 |   #   def marshall_load
218 |   #     raise NotImplementedError, "There's no setter for the data pointer!"
219 |   #   end
220 |   # end
221 | 
222 |   module ToNArrayConvertible
223 |     def to_na
224 |       NArray[*self]
225 |     end
226 |   end
227 | 
228 |   module NArrayOuterFlattable
229 |     # Flat-output generalized outer relationship. Same as `#outer`, but the
230 |     # result is a 2-dim matrix of the interactions between all the elements
231 |     # in `self` (as rows) and all the elements in `other` (as columns)
232 |     # @param other [NArray] other matrix
233 |     # @return [NArray]
234 |     def outer_flat other
235 |       # TODO: Xumo::NArray should be able to implement this with `#outer` and some other
236 |       # function to flatten the right layer -- much faster
237 |       raise ArgumentError, "Need to pass an operand block" unless block_given?
238 |       self.class.zeros([self.size, other.size]).tap do |ret|
239 |         self.size.times do |r|
240 |           other.size.times do |c|
241 |             ret[r,c] = yield self[r], other[c]
242 |           end
243 |         end
244 |       end
245 |     end
246 |   end
247 | 
248 |   module NArrayApproximatable
249 |     # Verifies if `self` and `other` are withing `epsilon` of each other.
250 |     # @param other [NArray]
251 |     # @param epsilon [NArray]
252 |     # @return [Boolean]
253 |     def approximates? other, epsilon=1e-5
254 |       ((self - other).abs < epsilon).all?
255 |     end
256 |   end
257 | 
258 |   module Invertable
259 |     # Inverses matrix
260 |     # @return [NArray]
261 |     def invert
262 |       NLinalg.inv self
263 |     end
264 |   end
265 | 
266 |   module Exponentiable
267 |     # Matrix exponential: `e**self` (not to be confused with `self**n`)
268 |     # @return [NArray]
269 |     def exponential
270 |       raise ArgumentError if ndim > 2
271 |       # special case: one-dimensional matrix: just exponentiate the values
272 |       return NMath.exp(self) if (ndim == 1) || shape.include?(1)
273 |       # at this point we need to validate it is a square matrix
274 |       raise ArgumentError unless shape.reduce(&:==)
275 | 
276 |       # Eigenvalue decomposition method from `scipy/linalg/matfuncs.py#expm2` (deprecated)
277 |       # https://github.com/scipy/scipy/commit/236e0740ba951cb455ba8b6a306abb32740131cf
278 |       # s, vr = eig(A)
279 |       # vri = inv(vr)
280 |       # r = dot(dot(vr, diag(exp(s))), vri)
281 | 
282 |       # TODO: this is a simple but outdated method, switch to Pade approximation
283 |       # https://github.com/scipy/scipy/blob/11509c4a98edded6c59423ac44ca1b7f28fba1fd/scipy/sparse/linalg/matfuncs.py#L557
284 | 
285 |       # e_values, l_e_vectors, r_e_vectors_t = NLinalg.svd self
286 |       evals, _wi, _vl, r_evecs = NLinalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
287 |       r_evecs_t = r_evecs#.transpose
288 |       r_evecs_inv = r_evecs_t.invert
289 |       evals_exp_dmat = NMath.exp(evals).diag
290 | 
291 |       # l_e_vectors.dot(e_vals_exp_dmat).dot(l_e_vectors.invert)#.transpose
292 |       r_evecs_t.dot(evals_exp_dmat).dot(r_evecs_inv)
293 |     end
294 |   end
295 | 
296 |   module Mappable
297 |     # Maps along a NArray dimension, and returns NArray
298 |     # @return [NArray]
299 |     # NOTE: this indexing is not consistent with NArray, which uses 0 to indicate
300 |     #   columns rather than the 0th dimension (rows)
301 |     def map dim=0
302 |       raise ArgumentError unless dim.kind_of?(Integer) && dim.between?(0,ndim)
303 |       # TODO: return iterator instead of raise
304 |       raise NotImplementedError unless block_given?
305 |       indices = [true]*ndim
306 |       ret = []
307 |       shape[dim].times.each do |i|
308 |         indices[dim] = i
309 |         ret << yield(self[*indices])
310 |       end
311 |       self.class[*ret]
312 |     end
313 |   end
314 | 
315 | end
316 | 
317 | Array.include MachineLearningWorkbench::Monkey::Dimensionable
318 | # NMatrix.extend MachineLearningWorkbench::Monkey::Buildable
319 | # require 'nmatrix/lapack_plugin' # loads whichever is installed between atlas and lapacke
320 | # NMatrix.include MachineLearningWorkbench::Monkey::AdvancelyOperationable
321 | Numeric.include MachineLearningWorkbench::Monkey::NumericallyApproximatable
322 | # NMatrix.include MachineLearningWorkbench::Monkey::MatrixApproximatable
323 | # NMatrix.include MachineLearningWorkbench::Monkey::CPtrDumpable
324 | Array.include MachineLearningWorkbench::Monkey::ToNArrayConvertible
325 | NArray.include MachineLearningWorkbench::Monkey::NArrayApproximatable
326 | NArray.include MachineLearningWorkbench::Monkey::NArrayOuterFlattable
327 | NArray.include MachineLearningWorkbench::Monkey::Exponentiable
328 | NArray.include MachineLearningWorkbench::Monkey::Invertable
329 | NArray.prepend MachineLearningWorkbench::Monkey::Mappable
330 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/neural_network.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | require_relative 'neural_network/base'
4 | require_relative 'neural_network/feed_forward'
5 | require_relative 'neural_network/recurrent'
6 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/neural_network/base.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module MachineLearningWorkbench::NeuralNetwork
  4 |   # Neural Network base class
  5 |   class Base
  6 | 
  7 |     # @!attribute [r] layers
  8 |     #   List of matrices, each being the weights
  9 |     #   connecting a layer's inputs (rows) to a layer's neurons (columns),
 10 |     #   hence its shape is `[ninputs, nneurs]`
 11 |     #   @return [Array<NArray>] list of weight matrices, each uniquely describing a layer
 12 |     #   TODO: return a NArray after the usage of `#map` is figured out
 13 |     # @!attribute [r] state
 14 |     #   It's a list of one-dimensional matrices, each an input to a layer, plus the output layer's output. The first element is the input to the first layer of the network, which is composed of the network's input, possibly the first layer's activation on the last input (recursion), and a bias (fixed `1`). The second to but-last entries follow the same structure, but with the previous layer's output in place of the network's input. The last entry is the activation of the output layer, without additions since it's not used as an input by anyone.
 15 |     #   TODO: return a NArray after the usage of `#map` is figured out
 16 |     #   @return [Array<NArray>] current state of the network.
 17 |     # @!attribute [r] act_fn
 18 |     #   activation function, common to all neurons (for now)
 19 |     #   @return [#call] activation function
 20 |     # @!attribute [r] struct
 21 |     #   list of number of (inputs or) neurons in each layer
 22 |     #   @return [Array<Integer>] structure of the network
 23 |     attr_reader :layers, :state, :act_fn, :act_fn_name, :struct
 24 | 
 25 | 
 26 |     ## Initialization
 27 | 
 28 |     # @param struct [Array<Integer>] list of layer sizes
 29 |     # @param act_fn [Symbol] choice of activation function for the neurons
 30 |     def initialize struct, act_fn: nil, **act_fn_args
 31 |       @struct = struct
 32 |       @act_fn_name = act_fn || :sigmoid
 33 |       @act_fn = send act_fn_name, **act_fn_args
 34 |       # @state holds both inputs, possibly recurrency, and bias
 35 |       # it is a complete input for the next layer, hence size from layer sizes
 36 |       @state = layer_row_sizes.collect do |size|
 37 |         NArray.zeros [1, size]
 38 |       end
 39 |       # to this, append a matrix to hold the final network output
 40 |       @state.push NArray.zeros [1, nneurs(-1)]
 41 |       reset_state
 42 |     end
 43 | 
 44 |     # Reset the network to the initial state
 45 |     def reset_state
 46 |       state.each do |s|
 47 |         s.fill 0           # reset state to zero
 48 |         s[-1] = 1        # add bias
 49 |       end
 50 |       state[-1][-1] = 0  # last layer has no bias
 51 |     end
 52 | 
 53 |     # Initialize the network with random weights
 54 |     def init_random
 55 |       # Reusing `#load_weights` instead helps catching bugs
 56 |       deep_reset
 57 |       load_weights NArray.new(nweights).rand(-1,1)
 58 |     end
 59 | 
 60 |     ## Weight utilities
 61 | 
 62 |     # Resets memoization: needed to play with structure modification
 63 |     def deep_reset
 64 |       # reset memoization
 65 |       [:@layer_row_sizes, :@layer_col_sizes, :@nlayers, :@layer_shapes,
 66 |        :@nweights_per_layer, :@nweights].each do |sym|
 67 |          instance_variable_set sym, nil
 68 |       end
 69 |       reset_state
 70 |     end
 71 | 
 72 |     # Total weights in the network
 73 |     # @return [Integer] total number of weights
 74 |     def nweights
 75 |       @nweights ||= nweights_per_layer.reduce(:+)
 76 |     end
 77 | 
 78 |     # List of per-layer number of weights
 79 |     # @return [Array<Integer>] list of weights per each layer
 80 |     def nweights_per_layer
 81 |       @nweights_per_layer ||= layer_shapes.collect { |shape| shape.reduce(:*) }
 82 |     end
 83 | 
 84 |     # Count the layers. This is a computation helper, and for this implementation
 85 |     # the inputs are considered as if a layer like the others.
 86 |     # @return [Integer] number of layers
 87 |     def nlayers
 88 |       @nlayers ||= layer_shapes.size
 89 |     end
 90 | 
 91 |     # Returns the weight matrix
 92 |     # @return [Array<NArray>] list of NArray matrices of weights (one per layer).
 93 |     def weights
 94 |       layers
 95 |     end
 96 | 
 97 |     # Number of neurons per layer. Although this implementation includes inputs
 98 |     # in the layer counts, this methods correctly ignores the input as not having
 99 |     # neurons.
100 |     # @return [Array] list of neurons per each (proper) layer (i.e. no inputs)
101 |     def layer_col_sizes
102 |       @layer_col_sizes ||= struct.drop(1)
103 |     end
104 | 
105 |     # define #layer_row_sizes in child class: number of inputs per layer
106 | 
107 |     # Shapes for the weight matrices, each corresponding to a layer
108 |     # @return [Array<Array[Integer, Integer]>] Weight matrix shapes
109 |     def layer_shapes
110 |       @layer_shapes ||= layer_row_sizes.zip layer_col_sizes
111 |     end
112 | 
113 |     # Count the neurons in a particular layer or in the whole network.
114 |     # @param nlay [Integer, nil] the layer of interest, 1-indexed.
115 |     #   `0` will return the number of inputs.
116 |     #   `nil` will compute the total neurons in the network.
117 |     # @return [Integer] the number of neurons in a given layer, or in all network, or the number of inputs
118 |     def nneurs nlay=nil
119 |       nlay.nil? ? struct.reduce(:+) : struct[nlay]
120 |     end
121 | 
122 |     # Loads a plain list of weights into the weight matrices (one per layer).
123 |     # Preserves order. Reuses allocated memory if available.
124 |     # @input weights [Array<Float>] weights to load
125 |     # @return [true] always true. If something's wrong it simply fails, and if
126 |     #   all goes well there's nothing to return but a confirmation to the caller.
127 |     def load_weights weights
128 |       raise ArgumentError unless weights.size == nweights
129 |       weights = weights.to_na unless weights.kind_of? NArray
130 |       from = 0
131 |       @layers = layer_shapes.collect do |shape|
132 |         to = from + shape.reduce(:*)
133 |         lay_w = weights[from...to].reshape *shape
134 |         from = to
135 |         lay_w
136 |       end
137 |       reset_state
138 |       return true
139 |     end
140 | 
141 | 
142 |     ## Activation
143 | 
144 |     # Activate the network on a given input
145 |     # @param input [Array<Float>] the given input
146 |     # @return [Array] the activation of the output layer
147 |     def activate input
148 |       raise ArgumentError unless input.size == struct.first
149 |       # load input in first state
150 |       state[0][0...struct.first] = input
151 |       # activate layers in sequence
152 |       nlayers.times.each do |i|
153 |         act = activate_layer i
154 |         state[i+1][0...act.size] = act
155 |       end
156 |       return out
157 |     end
158 | 
159 |     # Extract and convert the output layer's activation
160 |     # @return [NArray] the activation of the output layer
161 |     def out
162 |       state.last.flatten
163 |     end
164 | 
165 |     ## Activation functions
166 | 
167 |     # Traditional sigmoid (logistic) with variable steepness
168 |     def sigmoid steepness: 1
169 |       # steepness:  0<s<1 is flatter, 1<s is flatter
170 |       # flatter makes activation less sensitive, better with large number of inputs
171 |       -> (vec) { 1.0 / (NMath.exp(-steepness * vec) + 1.0) }
172 |     end
173 |     alias logistic sigmoid
174 | 
175 |     # LeCun hyperbolic activation
176 |     # @see http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf Section 4.4
177 |     def lecun_hyperbolic
178 |       -> (vec) { 1.7159 * NMath.tanh(2.0*vec/3.0) + 1e-3*vec }
179 |     end
180 | 
181 |     # Rectified Linear Unit (ReLU)
182 |     def relu
183 |       -> (vec) { (vec>0).all? && vec || vec.class.zeros(vec.shape) }
184 |     end
185 | 
186 | 
187 |     # @!method interface_methods
188 |     # Declaring interface methods - implement in child class!
189 |     [:layer_row_sizes, :activate_layer].each do |sym|
190 |       define_method sym do
191 |         raise NotImplementedError, "Implement ##{sym} in child class!"
192 |       end
193 |     end
194 |   end
195 | end
196 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/neural_network/feed_forward.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::NeuralNetwork
 4 |   # Feed Forward Neural Network
 5 |   class FeedForward < Base
 6 | 
 7 |     # Calculate the size of each row in a layer's weight matrix.
 8 |     # Includes inputs (or previous-layer activations) and bias.
 9 |     # @return [Array<Integer>] per-layer row sizes
10 |     def layer_row_sizes
11 |       @layer_row_sizes ||= struct.each_cons(2).collect {|prev, _curr| prev+1}
12 |     end
13 | 
14 |     # Activates a layer of the network
15 |     # @param i [Integer] the layer to activate, zero-indexed
16 |     def activate_layer i
17 |       act_fn.call(state[i].dot layers[i])
18 |     end
19 | 
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/neural_network/recurrent.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::NeuralNetwork
 4 |   # Recurrent Neural Network
 5 |   class Recurrent < Base
 6 | 
 7 |     # Calculate the size of each row in a layer's weight matrix.
 8 |     # Each row holds the inputs for the next level: previous level's
 9 |     # activations (or inputs), this level's last activations
10 |     # (recursion) and bias.
11 |     # @return [Array<Integer>] per-layer row sizes
12 |     def layer_row_sizes
13 |       @layer_row_sizes ||= struct.each_cons(2).collect do |prev, rec|
14 |         prev + rec + 1
15 |       end
16 |     end
17 | 
18 |       # # NOTE: current layer index corresponds to index of next state!
19 |       # previous = nlay     # index of previous layer (inputs)
20 |       # current = nlay + 1  # index of current layer (outputs)
21 |       # # Copy the level's last-time activation to the input (previous state)
22 |       # # TODO: ranges in `NArray#[]` should be reliable, get rid of loop
23 |       # nneurs(current).times do |i| # for each activations to copy
24 |       #   # Copy output from last-time activation to recurrency in previous state
25 |       #   @state[previous][0, nneurs(previous) + i] = state[current][0, i]
26 |       # end
27 |       # act_fn.call state[previous].dot layers[nlay]
28 | 
29 |     # Activates a layer of the network.
30 |     # Bit more complex since it has to copy the layer's activation on
31 |     # last input to its own inputs, for recursion.
32 |     # @param i [Integer] the layer to activate, zero-indexed
33 |     def activate_layer nlay
34 |       # Mark begin and end of recursion outputs in current state
35 |       begin_recur = nneurs(nlay)
36 |       end_recur = nneurs(nlay) + nneurs(nlay+1)
37 |       # Copy the level's last-time activation to the current input recurrency
38 |       state[nlay][begin_recur...end_recur] = state[nlay+1][0...nneurs(nlay+1)]
39 |       # Activate current layer
40 |       act_fn.call state[nlay].dot layers[nlay]
41 |     end
42 | 
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Optimizer
 4 | end
 5 | 
 6 | require_relative 'optimizer/natural_evolution_strategies/base'
 7 | require_relative 'optimizer/natural_evolution_strategies/xnes'
 8 | require_relative 'optimizer/natural_evolution_strategies/snes'
 9 | require_relative 'optimizer/natural_evolution_strategies/rnes'
10 | # FIX SPECS FIRST
11 | # require_relative 'optimizer/natural_evolution_strategies/fnes'
12 | require_relative 'optimizer/natural_evolution_strategies/bdnes'
13 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
  4 |   # Natural Evolution Strategies base class
  5 |   class Base
  6 |     attr_reader :ndims, :mu, :sigma, :opt_type, :obj_fn, :parallel_fit, :eye, :rng, :last_fits, :best, :rescale_popsize, :rescale_lrate
  7 | 
  8 |     # NES object initialization
  9 |     # @param ndims [Integer] number of parameters to optimize
 10 |     # @param obj_fn [#call] any object defining a #call method (Proc, lambda, custom class)
 11 |     # @param opt_type [:min, :max] select minimization / maximization of obj_fn
 12 |     # @param rseed [Integer] allow for deterministic execution on rseed provided
 13 |     # @param mu_init [Numeric] values to initalize the distribution's mean
 14 |     # @param sigma_init [Numeric] values to initialize the distribution's covariance
 15 |     # @param parallel_fit [boolean] whether the `obj_fn` should be passed all the
 16 |     #   individuals together. In the canonical case the fitness function always scores a
 17 |     #   single individual; in practical cases though it is easier to delegate the scoring
 18 |     #   parallelization to the external fitness function. Turning this to `true` will make
 19 |     #   the algorithm pass _an Array_ of individuals to the fitness function, rather than
 20 |     #   a single instance.
 21 |     # @param rescale_popsize [Float] scaling for the default population size
 22 |     # @param rescale_lrate [Float] scaling for the default learning rate
 23 |     def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1, parallel_fit: false, rescale_popsize: 1, rescale_lrate: 1, utilities: nil, popsize: nil, lrate: nil
 24 |       raise ArgumentError, "opt_type: #{opt_type}" unless [:min, :max].include? opt_type
 25 |       raise ArgumentError, "obj_fn not callable: #{obj_fn}" unless obj_fn.respond_to? :call
 26 |       raise ArgumentError, "utilities only if popsize" if utilities && popsize.nil?
 27 |       raise ArgumentError, "wrong sizes" if utilities && utilities.size != popsize
 28 |       raise ArgumentError, "minimum popsize 5 for default utilities" if popsize&.<(5) && utilities.nil?
 29 |       @ndims, @opt_type, @obj_fn, @parallel_fit = ndims, opt_type, obj_fn, parallel_fit
 30 |       @rescale_popsize, @rescale_lrate = rescale_popsize, rescale_lrate # rescale defaults
 31 |       @utilities, @popsize, @lrate = utilities, popsize, lrate # if not set, defaults below
 32 |       @eye = NArray.eye(ndims)
 33 |       rseed ||= Random.new_seed
 34 |       # puts "NES rseed: #{s}"  # currently disabled
 35 |       @rng = Random.new rseed
 36 |       @best = [(opt_type==:max ? -1 : 1) * Float::INFINITY, nil]
 37 |       @last_fits = []
 38 |       initialize_distribution mu_init: mu_init, sigma_init: sigma_init
 39 |     end
 40 | 
 41 |     # Box-Muller transform: generates standard (unit) normal distribution samples
 42 |     # @return [Float] a single sample from a standard normal distribution
 43 |     # @note Xumo::NArray implements this but no random seed selection yet
 44 |     def standard_normal_sample
 45 |       rho = Math.sqrt(-2.0 * Math.log(rng.rand))
 46 |       theta = 2 * Math::PI * rng.rand
 47 |       tfn = rng.rand > 0.5 ? :cos : :sin
 48 |       rho * Math.send(tfn, theta)
 49 |     end
 50 | 
 51 |     # Memoized automatic magic numbers
 52 |     # Initialization options allow to rescale or entirely override these.
 53 |     # NOTE: Doubling popsize and halving lrate often helps
 54 |     def utils;   @utilities ||= cmaes_utilities end
 55 |     # (see #utils)
 56 |     def popsize; @popsize   ||= Integer(cmaes_popsize * rescale_popsize) end
 57 |     # (see #utils)
 58 |     def lrate;   @lrate     ||= cmaes_lrate * rescale_lrate end
 59 | 
 60 |     # Magic numbers from CMA-ES (see `README` for citation)
 61 |     # @return [NArray] scale-invariant utilities
 62 |     def cmaes_utilities
 63 |       # Algorithm equations are meant for fitness maximization
 64 |       # Match utilities with individuals sorted by INCREASING fitness
 65 |       raise ArgumentError, "Minimum `popsize` should be 5 (is #{popsize})" if popsize < 5
 66 |       log_range = (1..popsize).collect do |v|
 67 |         [0, Math.log(popsize.to_f/2 - 1) - Math.log(v)].max
 68 |       end
 69 |       total = log_range.reduce(:+)
 70 |       buf = 1.0/popsize
 71 |       vals = log_range.collect { |v| v / total - buf }.reverse
 72 |       NArray[vals]
 73 |     end
 74 | 
 75 |     # (see #cmaes_utilities)
 76 |     # @return [Float] learning rate lower bound
 77 |     def cmaes_lrate
 78 |       (3+Math.log(ndims)) / (5*Math.sqrt(ndims))
 79 |     end
 80 | 
 81 |     # (see #cmaes_utilities)
 82 |     # @return [Integer] population size lower bound
 83 |     def cmaes_popsize
 84 |       [5, 4 + (3*Math.log(ndims)).floor].max
 85 |     end
 86 | 
 87 |     # Samples a standard normal distribution to construct a NArray of
 88 |     #   popsize multivariate samples of length ndims
 89 |     # @return [NArray] standard normal samples
 90 |     # @note Xumo::NArray implements this but no random seed selection yet
 91 |     def standard_normal_samples
 92 |       NArray.zeros([popsize, ndims]).tap do |ret|
 93 |         ret.each_with_index { |_,*i| ret[*i] = standard_normal_sample }
 94 |       end
 95 |     end
 96 | 
 97 |     # Move standard normal samples to current distribution
 98 |     # @return [NArray] individuals
 99 |     def move_inds inds
100 |       # TODO: can we reduce the transpositions?
101 | 
102 |       # multi_mu = NMatrix[*inds.rows.times.collect {mu.to_a}, dtype: dtype].transpose
103 |       # (multi_mu + sigma.dot(inds.transpose)).transpose
104 | 
105 |       mu_tile = mu.tile(inds.shape.first, 1).transpose
106 |       (mu_tile + sigma.dot(inds.transpose)).transpose
107 |     end
108 | 
109 |     # Sorted individuals
110 |     # NOTE: Algorithm equations are meant for fitness maximization. Utilities need to be
111 |     # matched with individuals sorted by INCREASING fitness. Then reverse order for minimization.
112 |     # @return standard normal samples sorted by the respective individuals' fitnesses
113 |     def sorted_inds
114 |       # Xumo::NArray implements the Box-Muller, but no random seed (yet)
115 |       samples = standard_normal_samples
116 |       # samples = NArray.new([popsize, ndims]).rand_norm(0,1)
117 |       inds = move_inds(samples)
118 |       fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)
119 |       # Quick cure for NaN fitnesses
120 |       fits.map { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x }
121 |       @last_fits = fits # allows checking for stagnation
122 | 
123 |       # sorted = [fits.to_a, inds, samples.to_a].transpose.sort_by(&:first)
124 |       # sorted.reverse! if opt_type==:min
125 |       # this_best = sorted.last.take(2)
126 |       # NArray[*sorted.map(&:last)]
127 | 
128 | 
129 | 
130 |       # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97
131 |       # sort_idxs = fits.sort_index
132 |       sort_idxs = fits.size.times.sort_by { |i| fits[i] }.to_na
133 | 
134 | 
135 | 
136 |       sort_idxs = sort_idxs.reverse if opt_type == :min
137 |       this_best = [fits[sort_idxs[-1]], inds[sort_idxs[-1], true]]
138 |       opt_cmp_fn = opt_type==:min ? :< : :>
139 |       @best = this_best if this_best.first.send(opt_cmp_fn, best.first)
140 | 
141 |       samples[sort_idxs, true]
142 |     end
143 | 
144 |     # @!method interface_methods
145 |     # Declaring interface methods - implement these in child class!
146 |     [:train, :initialize_distribution, :convergence].each do |mname|
147 |       define_method mname do
148 |         raise NotImplementedError, "Implement in child class!"
149 |       end
150 |     end
151 |   end
152 | end
153 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
  4 |   # Block-Diagonal Natural Evolution Strategies
  5 |   class BDNES < Base
  6 | 
  7 |     MAX_RSEED = 10**Random.new_seed.size # block random seeds to be on the same range as `Random.new_seed`
  8 | 
  9 |     attr_reader :ndims_lst, :blocks, :popsize, :parallel_update
 10 |     undef :ndims # only `ndims_lst` here
 11 | 
 12 |     # Initialize a list of XNES, one for each block
 13 |     # see class `Base` for the description of the rest of the arguments.
 14 |     # @param ndims_lst [Array<Integer>] list of sizes for each block in the block-diagonal
 15 |     #    matrix. Note: entire (reconstructed) individuals will be passed to the `obj_fn`
 16 |     #    regardless of the division here described.
 17 |     # @param init_opts [Hash] the rest of the options will be passed directly to XNES
 18 |     # @parellel_update [bool] whether to parallelize block updates
 19 |     def initialize ndims_lst, obj_fn, opt_type, parallel_fit: false, rseed: nil, parallel_update: false, **init_opts
 20 |       # mu_init: 0, sigma_init: 1
 21 |       # init_opts = {rseed: rseed, mu_init: mu_init, sigma_init: sigma_init}
 22 |       # TODO: accept list of `mu_init`s and `sigma_init`s
 23 |       @ndims_lst, @obj_fn, @opt_type, @parallel_fit = ndims_lst, obj_fn, opt_type, parallel_fit
 24 |       block_fit = -> (*args) { raise "Should never be called" }
 25 |       # the BD-NES seed should ensure deterministic reproducibility
 26 |       # but each block should have a different seed
 27 |       # puts "BD-NES rseed: #{s}"  # currently disabled
 28 |       @rng = Random.new rseed || Random.new_seed
 29 |       @blocks = ndims_lst.map do |ndims|
 30 |         b_rseed = rng.rand MAX_RSEED
 31 |         XNES.new ndims, block_fit, opt_type, rseed: b_rseed, **init_opts
 32 |       end
 33 |       # Need `popsize` to be the same for all blocks, to make complete individuals
 34 |       @popsize = blocks.map(&:popsize).max
 35 |       blocks.each { |xnes| xnes.instance_variable_set :@popsize, popsize }
 36 | 
 37 |       @best = [(opt_type==:max ? -1 : 1) * Float::INFINITY, nil]
 38 |       @last_fits = []
 39 |       @parallel_update = parallel_update
 40 |       require 'parallel' if parallel_update
 41 |     end
 42 | 
 43 |     def sorted_inds_lst
 44 |       # Build samples and inds from the list of blocks
 45 |       samples_lst, inds_lst = blocks.map do |xnes|
 46 |         samples = xnes.standard_normal_samples
 47 |         inds = xnes.move_inds(samples)
 48 |         [samples.to_a, inds]
 49 |       end.transpose
 50 | 
 51 |       # Join the individuals for evaluation
 52 |       full_inds = inds_lst.reduce { |mem, var| mem.concatenate var, axis: 1 }
 53 |       # Need to fix sample dimensions for sorting
 54 |       # - current dims: nblocks x ninds x [block sizes]
 55 |       # - for sorting: ninds x nblocks x [block sizes]
 56 |       full_samples = samples_lst.transpose
 57 | 
 58 |       # Evaluate fitness of complete individuals
 59 |       fits = parallel_fit ? obj_fn.call(full_inds) : full_inds.map(&obj_fn)
 60 |       # Quick cure for NaN fitnesses
 61 |       fits.map { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x }
 62 |       @last_fits = fits # allows checking for stagnation
 63 | 
 64 |       # Sort inds based on fit and opt_type, save best
 65 |       # sorted = [fits, full_inds, full_samples].transpose.sort_by(&:first)
 66 |       # sorted.reverse! if opt_type==:min
 67 |       # this_best = sorted.last.take(2)
 68 |       # opt_cmp_fn = opt_type==:min ? :< : :>
 69 |       # @best = this_best if this_best.first.send(opt_cmp_fn, best.first)
 70 |       # sorted_samples = sorted.map(&:last)
 71 | 
 72 | 
 73 | 
 74 |       # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97
 75 |       # sort_idxs = fits.sort_index
 76 |       sort_idxs = fits.size.times.sort_by { |i| fits[i] }.to_na
 77 | 
 78 | 
 79 | 
 80 |       sort_idxs = sort_idxs.reverse if opt_type == :min
 81 |       this_best = [fits[sort_idxs[-1]], full_inds[sort_idxs[-1], true]]
 82 |       opt_cmp_fn = opt_type==:min ? :< : :>
 83 |       @best = this_best if this_best.first.send(opt_cmp_fn, best.first)
 84 |       sorted_samples = full_samples.values_at *sort_idxs
 85 | 
 86 |       # Need to bring back sample dimensions for each block
 87 |       # - current dims: ninds x nblocks x [block sizes]
 88 |       # - target blocks list: nblocks x ninds x [block sizes]
 89 |       block_samples = sorted_samples.transpose
 90 | 
 91 |       # then back to NArray for usage in training
 92 |       block_samples.map &:to_na
 93 |     end
 94 | 
 95 |     # duck-type the interface: [:train, :mu, :convergence, :save, :load]
 96 | 
 97 |     # TODO: refactor DRY
 98 |     def train picks: sorted_inds_lst
 99 |       if parallel_update
100 |         # Parallel.each(blocks.zip(picks)) do |xnes, s_inds|
101 |         #   xnes.train picks: s_inds
102 |         # end
103 |         # Actually it's not this simple.
104 |         # Forks do not act on the parent, so I need to send back updated mu and sigma
105 |         # Luckily we have `NES#save` and `NES#load` at the ready
106 |         # Next: need to implement `#marshal_dump` and `#marshal_load` in `Base`
107 |         # Actually using `Cumo` rather than `Parallel` may avoid marshaling altogether
108 |         raise NotImplementedError, "Should dump and load each instance"
109 |       else
110 |         blocks.zip(picks).each do |xnes, s_inds|
111 |           xnes.train picks: s_inds
112 |         end
113 |       end
114 |     end
115 | 
116 |     def mu
117 |       blocks.map(&:mu).reduce { |mem, var| mem.concatenate var, axis: 1 }
118 |     end
119 | 
120 |     def sigma
121 |       raise NotImplementedError, "need to write a concatenation like for mu here"
122 |     end
123 | 
124 |     def convergence
125 |       blocks.map(&:convergence).reduce(:+)
126 |     end
127 | 
128 |     def save
129 |       blocks.map &:save
130 |     end
131 | 
132 |     def load data
133 |       fit = -> (*args) { raise "Should never be called" }
134 |       @blocks = data.map do |block_data|
135 |         ndims = block_data.first.size
136 |         XNES.new(ndims, fit, opt_type).tap do |nes|
137 |           nes.load block_data
138 |         end
139 |       end
140 |     end
141 |   end
142 | end
143 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/fnes.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
 4 |   # Fixed Variance Natural Evolution Strategies
 5 |   class FNES < RNES
 6 | 
 7 |     def train picks: sorted_inds
 8 |       g_mu = utils.dot(picks)
 9 |       @mu += sigma.dot(g_mu.transpose).transpose * lrate
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/rnes.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
 4 |   # Radial Natural Evolution Strategies
 5 |   class RNES < Base
 6 |     attr_reader :variance
 7 | 
 8 |     def initialize_distribution mu_init: 0, sigma_init: 1
 9 |       @mu = case mu_init
10 |         when Array
11 |           raise ArgumentError unless mu_init.size == ndims
12 |           NArray[mu_init]
13 |         when Numeric
14 |           NArray.new([1,ndims]).fill mu_init
15 |         else
16 |           raise ArgumentError, "Something is wrong with mu_init: #{mu_init}"
17 |       end
18 |       @variance = sigma_init
19 |       @sigma = case sigma_init
20 |       when Array
21 |         raise ArgumentError "RNES uses single global variance"
22 |       when Numeric
23 |         NArray.new([ndims]).fill(variance).diag
24 |       else
25 |         raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
26 |       end
27 |     end
28 | 
29 |     def train picks: sorted_inds
30 |       g_mu = utils.dot(picks)
31 |       # g_sigma = utils.dot(picks.row_norms**2 - ndims).first # back to scalar
32 |       row_norms = NLinalg.norm picks, 2, axis:1
33 |       g_sigma = utils.dot(row_norms**2 - ndims)[0] # back to scalar
34 |       @mu += sigma.dot(g_mu.transpose).transpose * lrate
35 |       @variance *= Math.exp(g_sigma * lrate / 2)
36 |       @sigma = NArray.new([ndims]).fill(variance).diag
37 |     end
38 | 
39 |     # Estimate algorithm convergence based on variance
40 |     def convergence
41 |       variance
42 |     end
43 | 
44 |     def save
45 |       [mu.to_a, variance]
46 |     end
47 | 
48 |     def load data
49 |       raise ArgumentError unless data.size == 2
50 |       mu_ary, @variance = data
51 |       @mu = mu_ary.to_na
52 |       @sigma = eye * variance
53 |     end
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
 4 |   # Separable Natural Evolution Strategies
 5 |   class SNES < Base
 6 | 
 7 |     attr_reader :variances
 8 | 
 9 |     def initialize_distribution mu_init: 0, sigma_init: 1
10 |       @mu = case mu_init
11 |         when Array
12 |           raise ArgumentError unless mu_init.size == ndims
13 |           NArray[mu_init]
14 |         when Numeric
15 |           NArray.new([1,ndims]).fill mu_init
16 |         else
17 |           raise ArgumentError, "Something is wrong with mu_init: #{mu_init}"
18 |       end
19 |       @variances = case sigma_init
20 |       when Array
21 |         raise ArgumentError unless sigma_init.size == ndims
22 |         NArray[*sigma_init]
23 |       when Numeric
24 |         NArray.new([ndims]).fill(sigma_init)
25 |       else
26 |         raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}" \
27 |           "(did you remember to copy the other cases from XNES?)"
28 |       end
29 |       @sigma = @variances.diag
30 |     end
31 | 
32 |     def train picks: sorted_inds
33 |       g_mu = utils.dot(picks)
34 |       g_sigma = utils.dot(picks**2 - 1)
35 |       @mu += sigma.dot(g_mu.transpose).transpose * lrate
36 |       @variances *= (g_sigma * lrate / 2).exponential.flatten
37 |       @sigma = @variances.diag
38 |     end
39 | 
40 |     # Estimate algorithm convergence as total variance
41 |     def convergence
42 |       variances.sum
43 |     end
44 | 
45 |     def save
46 |       [mu.to_a, variances.to_a]
47 |     end
48 | 
49 |     def load data
50 |       raise ArgumentError unless data.size == 2
51 |       @mu, @variances = data.map &:to_na
52 |       @sigma = variances.diag
53 |     end
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
 4 |   # Exponential Natural Evolution Strategies
 5 |   class XNES < Base
 6 |     attr_reader :log_sigma
 7 | 
 8 |     def initialize_distribution mu_init: 0, sigma_init: 1
 9 |       @mu = case mu_init
10 |         when Range # initialize with random in range
11 |           raise ArgumentError, "mu_init: `Range` start/end in `Float`s" \
12 |             unless mu_init.first.kind_of?(Float) && mu_init.last.kind_of?(Float)
13 |           mu_rng = Random.new rng.rand 10**Random.new_seed.size
14 |           NArray[*ndims.times.map { mu_rng.rand mu_init }]
15 |         when Array
16 |           raise ArgumentError unless mu_init.size == ndims
17 |           NArray[mu_init]
18 |         when Numeric
19 |           NArray.new([1,ndims]).fill mu_init
20 |         when NArray
21 |           raise ArgumentError unless mu_init.size == ndims
22 |           mu_init.ndim < 2 ? mu_init.reshape(1, ndims) : mu_init
23 |         else
24 |           raise ArgumentError, "Something is wrong with mu_init: #{mu_init}"
25 |       end
26 |       @sigma = case sigma_init
27 |         when Array
28 |           raise ArgumentError unless sigma_init.size == ndims
29 |           NArray[*sigma_init].diag
30 |         when Numeric
31 |           NArray.new([ndims]).fill(sigma_init).diag
32 |         when NArray
33 |           raise ArgumentError unless sigma_init.size == ndims**2
34 |           sigma_init.ndim < 2 ? sigma_init.reshape(ndims, ndims) : sigma_init
35 |         else
36 |           raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
37 |       end
38 |       # Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi)
39 |       @log_sigma = NMath.log(sigma.diagonal).diag
40 |     end
41 | 
42 |     def train picks: sorted_inds
43 |       g_mu = utils.dot(picks)
44 |       g_log_sigma = popsize.times.inject(NArray.zeros sigma.shape) do |sum, i|
45 |         u = utils[i]
46 |         ind = picks[i, true]
47 |         ind_sq = ind.outer_flat(ind, &:*)
48 |         sum + (ind_sq - eye) * u
49 |       end
50 |       @mu += sigma.dot(g_mu.transpose).transpose * lrate
51 |       @log_sigma += g_log_sigma * (lrate/2)
52 |       @sigma = log_sigma.exponential
53 |     end
54 | 
55 |     # Estimate algorithm convergence as total variance
56 |     def convergence
57 |       sigma.trace
58 |     end
59 | 
60 |     def save
61 |       [mu.to_a, log_sigma.to_a]
62 |     end
63 | 
64 |     def load data
65 |       raise ArgumentError unless data.size == 2
66 |       @mu, @log_sigma = data.map &:to_na
67 |       @sigma = log_sigma.exponential
68 |     end
69 |   end
70 | end
71 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/systems.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | require_relative 'systems/neuroevolution'
4 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/systems/neuroevolution.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | "Work in progress"
4 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/tools.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | require_relative 'tools/execution'
4 | require_relative 'tools/normalization'
5 | require_relative 'tools/imaging'
6 | require_relative 'tools/verification'
7 | require_relative 'tools/logging'
8 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/tools/execution.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Tools
 4 |   module Execution
 5 |     $fork_pids ||= []
 6 | 
 7 |     # Executes block in a (detached) fork, saving the `pid` for later termination.
 8 |     # @note add `ensure MachineLearningWorkbench::Tools.kill_forks` to the block
 9 |     #    where `in_fork` is called (see `#kill_forks`).
10 |     def self.in_fork &block
11 |       raise ArgumentError "Need block to be executed in fork" unless block
12 |       pid = fork(&block)
13 |       Process.detach pid
14 |       $fork_pids << pid
15 |     end
16 | 
17 |     # Kills processes spawned by `#in_fork`.
18 |     # Call this in an `ensure` block after using `in_fork`.
19 |     # => `ensure MachineLearningWorkbench::Tools.kill_forks`
20 |     def self.kill_forks
21 |       $fork_pids&.each { |pid| Process.kill('KILL', pid) rescue Errno::ESRCH }
22 |       $fork_pids = []
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/tools/imaging.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Tools
 4 |   module Imaging
 5 |     Forkable = MachineLearningWorkbench::Tools::Execution
 6 |     Norm = MachineLearningWorkbench::Tools::Normalization
 7 | 
 8 |     # Create RMagick::Image from numeric matrix data
 9 |     # @param narr [NArray] numeric matrix to display
10 |     # @param shape [Array<Integer>] optional reshaping
11 |     def self.narr_to_img narr, shape: nil
12 |       require 'rmagick'
13 |       shape ||= narr.shape
14 |       shape = [1, shape] if shape.kind_of?(Integer) || shape.size == 1
15 |       # `Image::constitute` requires Float pixels to be in [0,1]
16 |       pixels = Norm.feature_scaling narr.cast_to(NArray), to: [0,1]
17 |       Magick::Image.constitute *shape, "I", pixels.to_a.flatten
18 |     end
19 | 
20 |     # Create PNG file from numeric matrix data
21 |     # @param narr [NArray] numeric matrix to display
22 |     # @param fname [String] path to save PNG
23 |     # @param shape [Array<Integer>] optional reshaping before saving
24 |     def self.narr_to_png narr, fname, shape: nil
25 |       narr_to_img(narr, shape: shape).write fname
26 |     end
27 | 
28 |     # Show a numeric matrix as image in a RMagick window
29 |     # @param narr [NArray] numeric matrix to display
30 |     # @param disp_size [Array] the size of the image to display
31 |     # @param shape [Array] the true shape of the image (numeric matrix could be flattened)
32 |     # @param in_fork [bool] whether to execute the display in fork (and continue running)
33 |     def self.display narr, disp_size: nil, shape: nil, in_fork: true
34 |       require 'rmagick'
35 |       img = narr_to_img narr, shape: shape
36 |       img.resize!(*disp_size, Magick::TriangleFilter,0.51) if disp_size
37 |       if in_fork
38 |         MachineLearningWorkbench::Tools::Execution.in_fork { img.display }
39 |       else
40 |         img.display
41 |       end
42 |     end
43 | 
44 |     # Create numeric matrix from png by filename.
45 |     # @param fname the file name
46 |     # @param scale optional rescaling of the image
47 |     # @param flat [bool] whether to return a flat array
48 |     # @param dtype dtype for the numeric matrix, leave `nil` for automatic detection
49 |     def self.narr_from_png fname, scale: nil, flat: false
50 |       require 'rmagick'
51 |       img = Magick::ImageList.new(fname).first
52 |       img.scale!(scale) if scale
53 |       shape = [img.columns, img.rows]
54 |       pixels = img.export_pixels(0, 0, *shape, 'I') # 'I' for intensity
55 |       raise "Sanity check" unless shape.reduce(:*)==pixels.size
56 |       return pixels.to_na if flat
57 |       pixels.to_na.to_dimensions shape
58 |     end
59 |   end
60 | end
61 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/tools/logging.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Tools
 4 |   module Logging
 5 |     # Splits calls to standard streams to be both displayed on terminal and saved to file
 6 |     class LogSplitter < File
 7 |       def initialize dest
 8 |         fname = if File.directory?(dest)
 9 |           "#{dest}/#{Time.now.strftime "%y%m%d_%H%M"}.log"
10 |         else dest
11 |         end
12 |         super fname, 'w'
13 |       end
14 | 
15 |       def write *args
16 |         STDOUT.write *args
17 |         super
18 |       end
19 |     end
20 | 
21 |     def self.split_to dest, also_stderr: false
22 |       $stdout = LogSplitter.new dest
23 |       $stderr = $stdout if also_stderr
24 |     end
25 | 
26 |     def self.restore_streams
27 |       logger = $stdout
28 |       $stdout = STDOUT
29 |       $stderr = STDERR
30 |       logger.close
31 |     end
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/tools/normalization.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Tools
 4 |   module Normalization
 5 |     def self.feature_scaling narr, from: nil, to: [0,1]
 6 |       from ||= narr.minmax
 7 |       old_min, old_max = from
 8 |       new_min, new_max = to
 9 |       ( (narr-old_min)*(new_max-new_min)/(old_max-old_min) ) + new_min
10 |     rescue ZeroDivisionError
11 |       # require 'pry'; binding.pry
12 |       raise ArgumentError, "If you get here, chances are there's a bug in `from` or `to`"
13 |     end
14 | 
15 |     # @param per_column [bool] wheather to compute stats per-column or matrix-wise
16 |     def self.z_score narr, per_column: true
17 |       raise NotImplementedError unless per_column
18 |       raise "this would be a good time to test this implementation"
19 |       means = narr.mean
20 |       stddevs = narr.std
21 |       # address edge case of zero variance
22 |       stddevs.map! { |v| v.zero? ? 1 : v }
23 |       mean_mat = means.repeat narr.rows, 0
24 |       stddev_mat = stddevs.repeat narr.rows, 0
25 |       (narr - mean_mat) / stddev_mat
26 |     end
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/machine_learning_workbench/tools/verification.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module MachineLearningWorkbench::Tools
 4 |   module Verification
 5 |     # TODO: switch to NArray
 6 | 
 7 |     # def self.in_range! nmat, vrange
 8 |     # # Raise if values not in range
 9 |     #   vmin, vmax = vrange.to_a
10 |     #   nmat.each_with_indices do |v, *idxs|
11 |     #     raise "Value not in range" unless v&.between? vmin, vmax
12 |     #   end
13 |     # end
14 | 
15 |     # # Fix if values not in range
16 |     # def self.in_range nmat, vrange
17 |     #   vmin, vmax = vrange.to_a
18 |     #   nmat.each_with_indices do |v, *idxs|
19 |     #     nmat[*idxs] = vmin if v < vmin
20 |     #     nmat[*idxs] = vmax if v > vmax
21 |     #   end
22 |     # end
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/machine_learning_workbench.gemspec:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | lib = File.expand_path("../lib", __FILE__)
 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 5 | 
 6 | Gem::Specification.new do |spec|
 7 |   spec.name          = "machine_learning_workbench"
 8 |   spec.version       = `git describe`
 9 |   spec.author        = "Giuseppe Cuccu"
10 |   spec.email         = "giuseppe.cuccu@gmail.com"
11 | 
12 |   spec.summary       = %q[Workbench for practical machine learning in Ruby.]
13 |   spec.description   = %q[This workbench holds a collection of machine learning
14 |     methods in Ruby. Rather than specializing on a single task or method, this
15 |     gem aims at providing an encompassing framework for any machine learning
16 |     application.].gsub('  ', '')
17 |   spec.homepage      = "https://github.com/giuse/machine_learning_workbench"
18 |   spec.license       = "MIT"
19 |   spec.post_install_message = %Q[\
20 |     Thanks for installing the machine learning workbench!
21 |     It is still a work in progress, feel free to open an issue or drop me an email
22 |     and start a discussion if you are using this gem. Cheers!
23 |   ].gsub('  ', '')
24 | 
25 |   spec.files = `git ls-files -z`.split("\x0").reject { |f| f.start_with? "spec" }
26 | 
27 |   # spec.bindir        = "exe"
28 |   # spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
29 |   spec.require_paths = ["lib"]
30 |   spec.required_ruby_version = '>= 2.4.0'
31 | 
32 |   # Install
33 |   spec.add_development_dependency "bundler", "~> 1.16"
34 |   spec.add_development_dependency "rake", "~> 10.0"
35 | 
36 |   # Test
37 |   spec.add_development_dependency "rspec", "~> 3.0"
38 |   spec.add_development_dependency "rmagick"  # uhm would gladly drop this
39 | 
40 |   # Debug
41 |   spec.add_development_dependency "pry", "~> 0.10"
42 |   spec.add_development_dependency "pry-nav", "~> 0.2"
43 |   spec.add_development_dependency "pry-rescue", "~> 1.4"
44 |   spec.add_development_dependency "pry-stack_explorer", "~> 0.4"
45 |   spec.add_development_dependency "pry-doc", "~> 0.12"
46 | 
47 |   # Run
48 |   spec.requirements << "libopenblas-base"  # requirement for `numo-linalg`
49 |   spec.requirements << "liblapacke"        # requirement for `numo-linalg`
50 |   spec.add_dependency "numo-narray", "~> 0.9"
51 |   spec.add_dependency "numo-linalg", "~> 0.1"
52 |   spec.add_dependency "parallel", "~> 1.12"
53 | end
54 | 


--------------------------------------------------------------------------------
/spec/compressor/vector_quantization_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe MachineLearningWorkbench::Compressor::VectorQuantization do
 4 |   VectorQuantization = MachineLearningWorkbench::Compressor::VectorQuantization
 5 | 
 6 |   it "works"
 7 |     # pick some artificial data
 8 |     # initialize with few centroids
 9 |     # train on centroids
10 |     # verify the output
11 | end
12 | 


--------------------------------------------------------------------------------
/spec/helpers/uses_temporary_folders.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # evaluate in temporary (empty) folder
 4 | module UsesTemporaryFolders
 5 |   def self.included example_group
 6 |     example_group.extend self
 7 |   end
 8 | 
 9 |   def in_temporary_folder
10 |     require 'pathname'
11 |     attr_reader :orig_dir, :tmp_dir
12 |     # ensure working in empty temporary folder
13 |     before do
14 |       @orig_dir = Pathname.pwd
15 |       @tmp_dir = orig_dir + "in_temporary_folder"
16 |       FileUtils.rm_rf tmp_dir
17 |       FileUtils.mkdir_p tmp_dir
18 |       Dir.chdir tmp_dir
19 |     end
20 |     # clean up
21 |     after do
22 |       Dir.chdir orig_dir
23 |       FileUtils.rm_rf tmp_dir
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/spec/monkey/monkey_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | RSpec.describe MachineLearningWorkbench::Monkey do
  4 | 
  5 |   describe Numeric do
  6 |     v = 1e-3
  7 |     describe "#approximates?" do
  8 |       it do
  9 |         expect(v.approximates? v+1e-4, 1e-3).to be_truthy
 10 |         expect(v.approximates? v+1e-2, 1e-3).to be_falsey
 11 |       end
 12 |     end
 13 |   end
 14 | 
 15 | #   describe NMatrix do
 16 | #     data = [[1,2,3],[4,5,6],[7,8,9]]
 17 | #     nmat = NMatrix[*data, dtype: :float64]
 18 | #     diag = [1,5,9]
 19 | 
 20 | #     it "::new with a block" do
 21 | #       shape = [data.size, data.first.size]
 22 | #       built = NMatrix.new(shape) { |i,j| data[i][j]**2 }
 23 | #       expect(built).to eq(nmat**2)
 24 | #     end
 25 | 
 26 | #     context "when looping on the diagonal", :SKIP do
 27 | #       it "#each_diag" do
 28 | #         expect(nmat.each_diag.to_a).to_eq(diag.collect {|n| NMatrix[[n]]})
 29 | #       end
 30 | 
 31 | #       it "#each_stored_diag" do
 32 | #         expect(nmat.each_stored_diag.to_a).to eq(diag)
 33 | #       end
 34 | #     end
 35 | 
 36 | #     context "when setting the diagonal", :SKIP do
 37 | #       set_diag_diag = [10,50,90]
 38 | #       set_diag_data = [[10,2,3],[4,50,6],[7,8,90]]
 39 | #       set_diag_nmat = NMatrix[*set_diag_data]
 40 | 
 41 | #       it "#set_diag" do
 42 | #         setted = nmat.set_diag {|i| set_diag_diag[i]}
 43 | #         expect(setted).to eq(set_diag_nmat)
 44 | #         expect(nmat).not_to eq(setted)
 45 | #         expect(nmat.object_id).not_to eq(setted.object_id)
 46 | #       end
 47 | 
 48 | #       it "#set_diag!" do
 49 | #         tmp_mat = nmat.clone
 50 | #         setted = tmp_mat.set_diag! {|i| set_diag_diag[i]}
 51 | #         expect(setted).to eq(set_diag_nmat)
 52 | #         expect(tmp_mat).to eq(setted)
 53 | #       end
 54 | 
 55 | #     end
 56 | 
 57 | #     describe "#outer" do
 58 | #       mini = NMatrix[[1,2],[3,4]]
 59 | #       exp = NMatrix[[[[2, 3], [4, 5]],
 60 | #                      [[3, 4], [5, 6]]],
 61 | #                     [[[4, 5], [6, 7]],
 62 | #                      [[5, 6], [7, 8]]]]
 63 | #       it "computes the correct result" do
 64 | #         res = mini.outer(mini) {|a,b| a+b}
 65 | #         expect(res.shape).to eq(exp.shape)
 66 | #         expect(res).to eq(exp)
 67 | #       end
 68 | #     end
 69 | 
 70 | #     describe "#outer_flat" do
 71 | #       mini = NMatrix[[1,2],[3,4]]
 72 | #       exp_flat = NMatrix[[2, 3, 4, 5],
 73 | #                          [3, 4, 5, 6],
 74 | #                          [4, 5, 6, 7],
 75 | #                          [5, 6, 7, 8]]
 76 | #       it "computes the correct result" do
 77 | #         res = mini.outer_flat(mini) {|a,b| a+b}
 78 | #         expect(res.shape).to eq(exp_flat.shape)
 79 | #         expect(res).to eq(exp_flat)
 80 | #       end
 81 | #     end
 82 | 
 83 | #     describe "#eigen" do
 84 | #       trg_eigenvalues = NMatrix[[16.11684, -1.11684, 0.0]].transpose
 85 | #       trg_eigenvectors = NMatrix[[0.283349, 0.641675, 1.0],
 86 | #                                  [-1.28335, -0.141675, 1.0],
 87 | #                                  [1.0, -2.0, 1.0]].transpose
 88 | #       # NMatrix (LAPACK) -- e_values, left_e_vecs, right_e_vecs
 89 | #       eigenvalues, _, eigenvectors = nmat.eigen
 90 | 
 91 | #       def eigencheck? orig, e_vals, e_vecs
 92 | #         # INPUT: original matrix, eigenvalues accessible by index,
 93 | #         #        NMatrix with corresponding eigenvectors in columns
 94 | #         e_vecs.each_column.each_with_index.all? do |e_vec_t, i|
 95 | #           left = orig.dot(e_vec_t)
 96 | #           right = e_vec_t * e_vals[i]
 97 | #           left.approximates? right
 98 | #         end
 99 | #       end
100 | 
101 | #       it "solves the eigendecomposition" do
102 | #         expect(eigencheck?(nmat, trg_eigenvalues, trg_eigenvectors)).to be_truthy
103 | #         expect(eigenvalues.approximates? trg_eigenvalues).to be_truthy
104 | #         expect(eigencheck?(nmat, eigenvalues, eigenvectors)).to be_truthy
105 | #       end
106 | #     end
107 | 
108 | #     describe "#exponential" do
109 | #       testmat = nmat/10.0 # let's avoid 1e6 values, shall we?
110 | #       exp = [[1.37316, 0.531485, 0.689809],
111 | #              [1.00926, 2.24815, 1.48704],
112 | #              [1.64536, 1.96481, 3.28426]]
113 | #       it "computes the correct result" do
114 | #         left = testmat.exponential
115 | #         right = NMatrix[*exp]
116 | #         expect(left.approximates? right).to be_truthy
117 | #       end
118 | #     end
119 | 
120 | #     describe "row_norms" do
121 | #       trg_row_norms = [[3.7416573], [8.7749643], [13.928388]]
122 | #       it "computes the correct result" do
123 | #         expect(nmat.row_norms.approximates? NMatrix[*trg_row_norms]).to be_truthy
124 | #       end
125 | #     end
126 | 
127 | #     describe "#approximates?" do
128 | #       it do
129 | #         expect(nmat.approximates? nmat+1e-4, 1e-3).to be_truthy
130 | #         expect(nmat.approximates? nmat+1e-2, 1e-3).not_to be_truthy
131 | #       end
132 | #     end
133 | 
134 | #     describe "#sort_rows_by" do
135 | #       it "should be implemented! And used in NES#sorted_inds!"
136 | #     end
137 | 
138 | #     describe "#hjoin", :SKIP do
139 | #       it "should work with smaller matrices" do
140 | #         a = NMatrix.new([1,3], [1,2,3])
141 | #         b = NMatrix.new([1,2], [4,5])
142 | #         expect(a.hjoin(b)).to eq(NMatrix.new([1,5], [1,2,3,4,5]))
143 | #       end
144 | #       it "should work with larger matrices" do
145 | #         a = NMatrix.new([1,3], [1,2,3])
146 | #         b = NMatrix.new([1,4], [4,5,6,7])
147 | #         expect(a.hjoin(b)).to eq(NMatrix.new([1,7], [1,2,3,4,5,6,7]))
148 | #       end
149 | #       # it "should be tested also with multirow matrices"
150 | #     end
151 | 
152 | #     describe "#vjoin", :SKIP do
153 | #       it "should work with smaller matrices" do
154 | #         a = NMatrix.new([3,1], [1,2,3])
155 | #         b = NMatrix.new([2,1], [4,5])
156 | #         expect(a.vjoin(b)).to eq(NMatrix.new([5,1], [1,2,3,4,5]))
157 | #       end
158 | #       it "should work with larger matrices" do
159 | #         a = NMatrix.new([3,1], [1,2,3])
160 | #         b = NMatrix.new([4,1], [4,5,6,7])
161 | #         expect(a.vjoin(b)).to eq(NMatrix.new([7,1], [1,2,3,4,5,6,7]))
162 | #       end
163 | #       # it "should be tested also with multicolumn matrices!"
164 | #     end
165 | 
166 | #     describe "#to_consistent_a" do
167 | #       it "should always return an array with the same shape as the matrix" do
168 | #         { [2,2] => [[1,2],[3,4]],        # square
169 | #           [2,3] => [[1,2,3],[4,5,6]],    # rectangular (h)
170 | #           [3,2] => [[1,2],[3,4],[5,6]],  # rectangular (v)
171 | #           [1,3] => [[1,2,3]],            # single row => THIS FAILS FOR `NMatrix#to_a`!
172 | #           [3,1] => [[1],[2],[3]],        # single column
173 | #           [3]   => [1,2,3]               # single-dimensional
174 | #         }.each do |shape, ary|
175 | #           expect(NMatrix.new(shape, ary.flatten).to_consistent_a).to eq ary
176 | #         end
177 | #       end
178 | #     end
179 | #   end
180 | # end
181 | 
182 | # RSpec.describe "NMatrix inconsistencies, fixed in `Monkey`" do
183 | 
184 | 
185 | #   # IF ANY OF THESE TESTS FAIL, DROP THE MONKEY AND USE THESE METHODS!
186 | 
187 | 
188 | #   # method #to_a not consistent! => wrote true_to_a (fixing it breaks #new)
189 | #   describe "#to_a" do
190 | #     it "does not always return an array with the same shape as the matrix" do
191 | #       {
192 | #         # [2,2] => [[1,2],[3,4]],        # square
193 | #         # [2,3] => [[1,2,3],[4,5,6]],    # rectangular (h)
194 | #         # [3,2] => [[1,2],[3,4],[5,6]],  # rectangular (v)
195 | #         [1,3] => [[1,2,3]],            # single row => THIS FAILS FOR `NMatrix#to_a`!
196 | #         # [3,1] => [[1],[2],[3]],        # single column
197 | #         # [3]   => [1,2,3]               # single-dimensional
198 | #       }.each do |shape, ary|
199 | #         expect(NMatrix.new(shape, ary.flatten).to_a).not_to eq ary
200 | #       end
201 | #     end
202 | #   end
203 | 
204 | end
205 | 


--------------------------------------------------------------------------------
/spec/neural_network/neural_network_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | RSpec.describe MachineLearningWorkbench::NeuralNetwork do
  4 |   NN = MachineLearningWorkbench::NeuralNetwork
  5 |   netstruct = [2,2,1]
  6 | 
  7 |   describe NN::FeedForward do
  8 |     net = NN::FeedForward.new netstruct
  9 | 
 10 |     it "#initialize" do
 11 |       expect(net.struct).to eq(netstruct)
 12 |     end
 13 | 
 14 |     it "#reset" do
 15 |       initial_state = [
 16 |         NArray[[0,0,1]],
 17 |         NArray[[0,0,1]],
 18 |         NArray[[0]]]
 19 |       altered_state = initial_state.collect { |m| m+1 }
 20 |       net.instance_variable_set(:@state, altered_state)
 21 |       expect(net.state).not_to eq(initial_state)
 22 |       net.reset_state
 23 |       expect(net.state).to eq(initial_state)
 24 |     end
 25 | 
 26 |     it "#deep_reset" do
 27 |       memoized_vars = [:@layer_row_sizes, :@layer_col_sizes, :@nlayers,
 28 |         :@layer_shapes, :@nweights_per_layer, :@nweights]
 29 |       net.nweights; net.nlayers # they end up calling all methods that use memoization
 30 |       memoized_vars.each do |sym|
 31 |         expect(net.instance_variable_get(sym)).not_to be_nil
 32 |       end
 33 |       net.deep_reset
 34 |       memoized_vars.each do |sym|
 35 |         expect(net.instance_variable_get(sym)).to be_nil
 36 |       end
 37 |     end
 38 | 
 39 |     it "#nweights" do
 40 |       # netstruct: [2,2,1] => layer_shapes: [[2,3],[1,3]] (remember: bias!)
 41 |       expect(net.nweights).to eq(2*3 + 1*3)
 42 |     end
 43 | 
 44 |     it "#layer_shapes" do
 45 |       # netstruct: [2,2,1] => layer_shapes: [[2,3],[1,3]] (remember: bias!)
 46 |       expect(net.layer_row_sizes.size).to eq(net.layer_col_sizes.size)
 47 |       expect(net.layer_shapes).to eq([[2+1,2],[2+1,1]])
 48 |     end
 49 | 
 50 |     context "with random weights" do
 51 |       net.init_random
 52 | 
 53 |       it "has one output" do
 54 |         expect(net.activate([2,2]).size).to eq(1)
 55 |       end
 56 | 
 57 |       it "#nweights correctly counts the weights" do
 58 |         expect(net.nweights).to eq(net.weights.map(&:to_a).flatten.size)
 59 |       end
 60 |     end
 61 | 
 62 |     context "with loaded weights" do
 63 |       weights = net.nweights.times.collect { |n| 1.0/(n+1) } # best to avoid 1.0/0
 64 | 
 65 |       it "#load_weights" do
 66 |         weights_are_safe = weights.dup
 67 |         net.load_weights weights_are_safe
 68 |         expect(weights_are_safe).to eq(weights)
 69 |         expect(net.layers.collect(&:to_a).flatten).to eq(weights)
 70 |       end
 71 | 
 72 |       it "solves the XOR problem" do
 73 |         # [0,1].repeated_permutation(2).collect{|pair| [pair, pair.reduce(:^)]}
 74 |         xor_table = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
 75 |         net = NN::FeedForward.new([2,2,1], act_fn: :logistic)
 76 |         #              2 in + b -> 3 neur,  2 in + b -> 1 neur
 77 |         # http://stats.stackexchange.com/questions/12197/can-a-2-2-1-feedforward-neural-network-with-sigmoid-activation-functions-represe
 78 |         solution_weights = [ [[1,2],[1,2],[0,0]],  [[-1000],[850],[0]] ]
 79 |         net.load_weights solution_weights.flatten
 80 |         expect(net.weights).to eq(solution_weights)
 81 |         xor_table.each do |input, target|
 82 |           expect(net.activate(input)[0].approximates? target).to be_truthy
 83 |         end
 84 |       end
 85 |     end
 86 |   end
 87 | 
 88 | 	describe NN::Recurrent do
 89 | 	  net = NN::Recurrent.new [2,2,1]
 90 | 	  context "with random weights" do
 91 | 	    net.init_random
 92 | 
 93 | 	    it "#nweights and #weights correspond" do
 94 |         expect(net.nweights).to eq(net.weights.map(&:to_a).flatten.size)
 95 | 	    end
 96 | 
 97 | 	    it "#layer_shapes" do
 98 | 	      # netstruct: [2,2,1], with recurrency and biases
 99 | 	      expect(net.layer_shapes).to eq([[2+2+1,2],[2+1+1,1]])
100 | 	    end
101 | 
102 | 	    it "works" do
103 | 	      expect(net.activate([2,2]).size).to eq(1)
104 | 	    end
105 | 
106 | 	  end
107 | 	end
108 | 
109 | end
110 | 


--------------------------------------------------------------------------------
/spec/optimizer/natural_evolution_strategies/individuals_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies do
 4 |   NES = MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
 5 | 
 6 |   describe :inds do
 7 | 
 8 |     context "when sorted by fitness" do
 9 |       fit = lambda { |ind| ind.sum }
10 | 
11 |        class TestNES < NES::Base
12 |         def initialize_distribution mu_init: nil, sigma_init: nil
13 |           @eye = NArray.eye(@ndims)
14 |           @mu = NArray.zeros([1,@ndims])
15 |           @sigma = @eye.copy
16 |           @popsize = 3 # must match with `inds` declared above
17 |         end
18 |       end
19 | 
20 |       context "with generated inds" do
21 |         ndims = 5
22 |         nes = TestNES.new(ndims, fit, :min)
23 |         # fetch individuals through nes sampling
24 | 
25 |         it "minimization" do
26 |           nes_sums = nes.sorted_inds.sum(1)
27 |           expect(nes_sums).to eq(nes_sums.sort.reverse)
28 |         end
29 | 
30 |         it "maximization" do
31 |           nes.instance_eval("@opt_type = :max")
32 |           nes_sums = nes.sorted_inds.sum(1)
33 |           expect(nes_sums).to eq(nes_sums.sort)
34 |         end
35 |       end
36 |     end
37 | 
38 |   end
39 | end
40 | 


--------------------------------------------------------------------------------
/spec/optimizer/natural_evolution_strategies/magic_numbers_spec.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | RSpec.describe MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies do
 4 |   describe "magic numbers" do
 5 |     NES = MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
 6 | 
 7 |     class TestNES < NES::Base
 8 |       def initialize_distribution **args
 9 |         @mu = NArray.zeros [1, @ndims]
10 |         @sigma = NArray.eye @ndims
11 |       end
12 |     end
13 | 
14 |     describe :utilities do
15 | 
16 |       expected = {
17 |         5 => [-0.2, -0.2, -0.2, -0.2, 0.8],
18 |         10 => [-0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, 0.0215323, 0.192823, 0.485645],
19 |         20 => [-0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.0331092, -0.0139599, 0.00814626, 0.0342923, 0.0662925, 0.107548, 0.165694, 0.265096]
20 |       }
21 | 
22 |       it "computes the correct values" do
23 |         expected.each do |n, exp|
24 |           nes = TestNES.new(n, Proc.new{}, :min)
25 |           nes.instance_eval("@popsize = n")
26 |           expect(nes.cmaes_utilities.approximates? NArray[exp]).to be_truthy
27 |         end
28 |       end
29 |     end
30 | 
31 |     describe :lrate do
32 |       expected = {
33 |         5 => 0.412281,
34 |         10 => 0.335365,
35 |         20 => 0.268137
36 |       }
37 | 
38 |       it "computes the correct values" do
39 |         expected.each do |n, exp|
40 |           nes = TestNES.new(n, Proc.new{}, :min)
41 |           expect(nes.cmaes_lrate.approximates? exp).to be_truthy
42 |         end
43 |       end
44 |     end
45 | 
46 |     describe :popsize do
47 |       expected = {
48 |         5 => 8,
49 |         10 => 10,
50 |         20 => 12
51 |       }
52 | 
53 |       it "computes the correct values" do
54 |         expected.each do |n, exp|
55 |           nes = TestNES.new(n, Proc.new{}, :min)
56 |           expect(nes.cmaes_popsize).to eq(exp)
57 |         end
58 |       end
59 |     end
60 | 
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/spec/optimizer/natural_evolution_strategies/nes_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | RSpec.describe MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies do
  4 |   NES = MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
  5 |   ndims = 5          # XNES, SNES RNES
  6 |   ndims_lst = [3,2]  # BDNES
  7 |   obj_fns = {
  8 |     # MINIMIZATION: upper parabolic with minimum in [0]*ndims
  9 |     min: -> (ind) { (ind**2).sum },
 10 |     # MAXIMIZATION: lower parabolic with maximum in [0]*ndims
 11 |     max: -> (ind) { -(ind**2).sum }
 12 |   }
 13 |   opt_types=obj_fns.keys
 14 |   one_opt_type = opt_types.first
 15 |   ntrains = 200
 16 | 
 17 |   describe NES::XNES do
 18 | 
 19 |     describe "#init" do
 20 |       it "initializes correctly" do
 21 |         opt_type = opt_types.sample # try either :)
 22 |         nes = NES::XNES.new ndims, obj_fns[opt_type], opt_type
 23 | 
 24 |         expect(opt_types).to include nes.opt_type
 25 |         expect(nes.obj_fn).to eq(obj_fns[nes.opt_type])
 26 |       end
 27 |     end
 28 | 
 29 |     describe "#train" do
 30 |       describe "full run" do
 31 |         opt_type = opt_types.sample # try either :)
 32 |         nes = NES::XNES.new ndims, obj_fns[opt_type], opt_type, rseed: 1
 33 |         context "within #{ntrains} iterations" do
 34 |           it "optimizes the negative squares function" do
 35 |             ntrains.times { nes.train }
 36 |             expect(nes.mu.approximates? 0).to be_truthy
 37 |             expect(nes.convergence.approximates? 0).to be_truthy
 38 |           end
 39 |         end
 40 |       end
 41 | 
 42 |       describe "with parallel fit" do
 43 |         opt_type = opt_types.sample # try either :)
 44 |         fit_par = -> (inds) { inds.map &obj_fns[opt_type] }
 45 |         nes = NES::XNES.new ndims, fit_par, opt_type, parallel_fit: true, rseed: 1
 46 |         context "within #{ntrains} iterations" do
 47 |           it "optimizes the negative squares function" do
 48 |             ntrains.times { nes.train }
 49 |             expect(nes.mu.approximates? 0).to be_truthy
 50 |             expect(nes.convergence.approximates? 0).to be_truthy
 51 |           end
 52 |         end
 53 |       end
 54 |     end
 55 | 
 56 |     describe "resuming" do
 57 |       it "#dump and #load" do
 58 |         a = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1
 59 |         3.times { a.train }
 60 |         a_dump = a.save
 61 |         b = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 2
 62 |         b.load a_dump
 63 |         b_dump = b.save
 64 |         expect(a_dump).to eq(b_dump)
 65 |       end
 66 | 
 67 |       it "#load allows resuming" do
 68 |         nes = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1
 69 |         4.times { nes.train }
 70 |         run_4_straight = nes.save
 71 | 
 72 |         nes = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1
 73 |         2.times { nes.train }
 74 |         run_2_only = nes.save
 75 | 
 76 |         # If I resume with a new nes, it works, but results differ because
 77 |         # it changes the number of times the rand has been sampled
 78 |         nes_new = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1
 79 |         nes_new.load run_2_only
 80 |         2.times { nes_new.train }
 81 |         run_4_resumed_new = nes_new.save
 82 |         expect(run_4_straight).not_to eq(run_4_resumed_new)
 83 | 
 84 |         # TODO: reactivate the test below after `Xumo::NArray#rand_norm` accepts
 85 |         # a random seed as input, for the moment we're giving up on this in
 86 |         # exchange for performance.
 87 | 
 88 |         # # If instead I use a nes with same rseed and same number of rand
 89 |         # # calls, even though I trash the dist info, it yields the same result
 90 |         # nes.load run_2_only
 91 |         # 2.times { nes.train }
 92 |         # run_4_resumed = nes.save
 93 |         # expect(run_4_straight).to eq(run_4_resumed)
 94 |       end
 95 |     end
 96 |   end
 97 | 
 98 |   describe NES::SNES do
 99 |     describe "full run" do
100 |       opt_type = opt_types.sample # try either :)
101 |       nes = NES::SNES.new ndims, obj_fns[opt_type], opt_type, rseed: 1
102 |       context "within #{ntrains} iterations" do
103 |         it "optimizes the negative squares function" do
104 |           ntrains.times { nes.train }
105 |           expect(nes.mu.approximates? 0).to be_truthy
106 |           expect(nes.convergence.approximates? 0).to be_truthy
107 |         end
108 |       end
109 |     end
110 | 
111 |     describe "with parallel fit" do
112 |       opt_type = opt_types.sample # try either :)
113 |       fit_par = -> (inds) { inds.map &obj_fns[opt_type] }
114 |       nes = NES::SNES.new ndims, fit_par, opt_type, parallel_fit: true, rseed: 1
115 |       context "within #{ntrains} iterations" do
116 |         it "optimizes the negative squares function" do
117 |           ntrains.times { nes.train }
118 |           expect(nes.mu.approximates? 0).to be_truthy
119 |           expect(nes.convergence.approximates? 0).to be_truthy
120 |         end
121 |       end
122 |     end
123 | 
124 |     describe "resuming" do
125 |       it "#dump and #load" do
126 |         a = NES::SNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1
127 |         3.times { a.train }
128 |         a_dump = a.save
129 |         b = NES::SNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 2
130 |         b.load a_dump
131 |         b_dump = b.save
132 |         expect(a_dump).to eq(b_dump)
133 |       end
134 |     end
135 |   end
136 | 
137 |   describe NES::RNES do
138 |     describe "full run" do
139 |       opt_type = opt_types.sample # try either :)
140 |       nes = NES::RNES.new ndims, obj_fns[opt_type], opt_type, rseed: 1
141 |       context "within #{ntrains} iterations" do
142 |         it "optimizes the negative squares function" do
143 |           ntrains.times { nes.train }
144 |           expect(nes.mu.approximates? 0).to be_truthy
145 |           expect(nes.convergence.approximates? 0).to be_truthy
146 |         end
147 |       end
148 |     end
149 | 
150 |     describe "with parallel fit" do
151 |       opt_type = opt_types.sample # try either :)
152 |       fit_par = -> (inds) { inds.map &obj_fns[opt_type] }
153 |       nes = NES::RNES.new ndims, fit_par, opt_type, parallel_fit: true, rseed: 1
154 |       context "within #{ntrains} iterations" do
155 |         it "optimizes the negative squares function" do
156 |           ntrains.times { nes.train }
157 |           expect(nes.mu.approximates? 0).to be_truthy
158 |           expect(nes.convergence.approximates? 0).to be_truthy
159 |         end
160 |       end
161 |     end
162 | 
163 |     describe "resuming" do
164 |       it "#dump and #load" do
165 |         a = NES::RNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1
166 |         3.times { a.train }
167 |         a_dump = a.save
168 |         b = NES::RNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 2
169 |         b.load a_dump
170 |         b_dump = b.save
171 |         expect(a_dump).to eq(b_dump)
172 |       end
173 |     end
174 |   end
175 | 
176 |   # Fix FNES first
177 |   # describe NES::FNES, :SKIP do
178 |   #   describe "full run" do
179 |   #     opt_type = opt_types.sample # try either :)
180 |   #     nes = NES::FNES.new ndims, obj_fns[opt_type], opt_type, rseed: 5
181 |   #     context "within #{ntrains} iterations" do
182 |   #       it "optimizes the negative squares function" do
183 |   #         ntrains.times { nes.train }
184 |   #         expect(nes.mu.approximates? 0).to be_truthy
185 |   #         expect(nes.convergence).to eq(1)
186 |   #       end
187 |   #     end
188 |   #   end
189 |   # end
190 | 
191 |   describe NES::BDNES do
192 |     describe "full run" do
193 |       opt_type = opt_types.sample # try either :)
194 |       nes = NES::BDNES.new [3,2], obj_fns[opt_type], opt_type, rseed: 1
195 |       context "within #{ntrains} iterations" do
196 |         it "optimizes the negative squares function" do
197 |           ntrains.times { nes.train }
198 |           expect(nes.mu.approximates? 0).to be_truthy
199 |           expect(nes.convergence.approximates? 0).to be_truthy
200 |         end
201 |       end
202 |     end
203 | 
204 |     describe "with parallel fit" do
205 |       opt_type = opt_types.sample # try either :)
206 |       fit_par = -> (inds) { inds.map &obj_fns[opt_type] }
207 |       nes = NES::BDNES.new [3,2], fit_par, opt_type, parallel_fit: true, rseed: 1
208 |       context "within #{ntrains} iterations" do
209 |         it "optimizes the negative squares function" do
210 |           ntrains.times { nes.train }
211 |           expect(nes.mu.approximates? 0).to be_truthy
212 |           expect(nes.convergence.approximates? 0).to be_truthy
213 |         end
214 |       end
215 |     end
216 | 
217 |     describe "resuming" do
218 |       it "#dump and #load" do
219 |         a = NES::BDNES.new ndims_lst, obj_fns[one_opt_type], one_opt_type, rseed: 1
220 |         3.times { a.train }
221 |         a_dump = a.save
222 |         b = NES::BDNES.new ndims_lst, obj_fns[one_opt_type], one_opt_type, rseed: 2
223 |         b.load a_dump
224 |         b_dump = b.save
225 |         expect(a_dump).to eq(b_dump)
226 |       end
227 |     end
228 |   end
229 | 
230 | end
231 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require "bundler/setup"
 4 | require "machine_learning_workbench"
 5 | require_relative 'helpers/uses_temporary_folders'
 6 | 
 7 | STDOUT.sync = true
 8 | 
 9 | RSpec.configure do |config|
10 |   # Enable flags like --only-failures and --next-failure
11 |   config.example_status_persistence_file_path = ".rspec_status"
12 |   # Disable RSpec exposing methods globally on `Module` and `main`
13 |   config.disable_monkey_patching!
14 | 
15 |   config.expect_with :rspec do |c|
16 |     c.syntax = :expect
17 |   end
18 | 
19 |   # These two settings work together to allow you to limit a spec run
20 |   # to individual examples or groups you care about by tagging them with
21 |   # `:focus` metadata. When nothing is tagged with `:focus`, all examples
22 |   # get run.
23 |   config.filter_run :FOCUS
24 |   config.filter_run_excluding :SKIP
25 |   config.run_all_when_everything_filtered = true
26 | end
27 | 


--------------------------------------------------------------------------------
/spec/systems/neuroevo_spec.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | "Work in progress"
4 | 


--------------------------------------------------------------------------------