├── .codeclimate.yml ├── .gitignore ├── .rspec ├── .travis.yml ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── bin ├── console └── setup ├── examples ├── image_compression.rb └── neuroevolution.rb ├── lib ├── machine_learning_workbench.rb └── machine_learning_workbench │ ├── compressor.rb │ ├── compressor │ ├── copy_vq.rb │ ├── decaying_learning_rate_vq.rb │ ├── incr_dict_vq.rb │ └── vector_quantization.rb │ ├── monkey.rb │ ├── neural_network.rb │ ├── neural_network │ ├── base.rb │ ├── feed_forward.rb │ └── recurrent.rb │ ├── optimizer.rb │ ├── optimizer │ └── natural_evolution_strategies │ │ ├── base.rb │ │ ├── bdnes.rb │ │ ├── fnes.rb │ │ ├── rnes.rb │ │ ├── snes.rb │ │ └── xnes.rb │ ├── systems.rb │ ├── systems │ └── neuroevolution.rb │ ├── tools.rb │ └── tools │ ├── execution.rb │ ├── imaging.rb │ ├── logging.rb │ ├── normalization.rb │ └── verification.rb ├── machine_learning_workbench.gemspec └── spec ├── compressor └── vector_quantization_spec.rb ├── helpers └── uses_temporary_folders.rb ├── monkey └── monkey_spec.rb ├── neural_network └── neural_network_spec.rb ├── optimizer └── natural_evolution_strategies │ ├── individuals_spec.rb │ ├── magic_numbers_spec.rb │ └── nes_spec.rb ├── spec_helper.rb └── systems └── neuroevo_spec.rb /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | engines: 3 | rubocop: 4 | enabled: true 5 | duplication: 6 | enabled: true 7 | config: 8 | languages: 9 | - ruby 10 | ratings: 11 | paths: 12 | - lib/** 13 | - "**.rb" 14 | exclude_paths: 15 | - spec/** 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | Gemfile.lock 10 | /stats/ 11 | # rspec failure tracking 12 | .rspec_status 13 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | --require spec_helper 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: ruby 3 | rvm: 4 | - 2.4.2 5 | addons: 6 | apt: 7 | packages: 8 | - libopenblas-base 9 | - liblapacke 10 | before_install: gem install bundler -v 1.16.0 11 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } 4 | 5 | # Specify your gem's dependencies in machine_learning_workbench.gemspec 6 | gemspec 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Giuseppe Cuccu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Machine Learning Workbench](https://github.com/giuse/machine_learning_workbench) 2 | 3 | [![Gem Version](https://badge.fury.io/rb/machine_learning_workbench.svg)](https://badge.fury.io/rb/machine_learning_workbench) 4 | [![Build Status](https://travis-ci.org/giuse/machine_learning_workbench.svg?branch=master)](https://travis-ci.org/giuse/machine_learning_workbench) 5 | [![Code Climate](https://codeclimate.com/github/giuse/machine_learning_workbench/badges/gpa.svg)](https://codeclimate.com/github/giuse/machine_learning_workbench) 6 | 7 | This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application. 8 | 9 | ## Installation 10 | 11 | Add this line to your application's Gemfile: 12 | 13 | ```ruby 14 | gem 'machine_learning_workbench' 15 | ``` 16 | 17 | And then execute: 18 | 19 | $ bundle 20 | 21 | Or install it yourself as: 22 | 23 | $ gem install machine_learning_workbench 24 | 25 | ## Usage 26 | 27 | TLDR: Check out [the `examples` directory](examples), e.g. [this script](examples/neuroevolution.rb). 28 | 29 | This library is thought as a practical workbench: there is plenty of tools hanging, each has multiple uses and applications, and as such it is built as atomic and flexible as possible. Folders [in the lib structure](lib/machine_learning_workbench) categorize them them. 30 | 31 | The [systems directory](lib/machine_learning_workbench/systems) holds few examples of how to bring them together in higher abstractions, i.e. as _compound tools_. 32 | For example, a [neuroevolution setup](lib/machine_learning_workbench/systems/neuroevolution.rb) brings together evolutionary computation and neural networks. 33 | 34 | For an example of how to build it from scratch, check this [neuroevolution script](examples/neuroevolution.rb). To run it, use `bundle exec ruby examples/neuroevolution.rb` 35 | 36 | 37 | ## Development 38 | 39 | After cloning the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. 40 | 41 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 42 | 43 | 44 | ## Contributing 45 | 46 | Bug reports and pull requests are welcome on GitHub at https://github.com/giuse/machine_learning_workbench. 47 | 48 | ## License 49 | 50 | The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). 51 | 52 | ## References 53 | 54 | Please feel free to contribute to this list (see `Contributing` above). 55 | 56 | - **NES** stands for Natural Evolution Strategies. Check its [Wikipedia page](https://en.wikipedia.org/wiki/Natural_evolution_strategy) for more info. 57 | - **CMA-ES** stands for Covariance Matrix Adaptation Evolution Strategy. Check its [Wikipedia page](https://en.wikipedia.org/wiki/CMA-ES) for more info. 58 | - **UL-ELR** stands for Unsupervised Learning plus Evolutionary Reinforcement Learning, from the paper _"Intrinsically Motivated Neuroevolution for Vision-Based Reinforcement Learning" (ICDL2011)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf. 59 | - **BD-NES** stands for Block Diagonal Natural Evolution Strategy, from the homonymous paper _"Block Diagonal Natural Evolution Strategies" (PPSN2012)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf. 60 | - **RNES** stands for Radial Natural Evolution Strategy, from the paper _"Novelty-Based Restarts for Evolution Strategies" (CEC2011)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf. 61 | - **DLR-VQ** stands for Decaying Learning Rate Vector Quantization, from the algorithm originally named _*Online VQ*_ in the paper _"Intrinsically Motivated Neuroevolution for Vision-Based Reinforcement Learning" (ICDL2011)_. Check [here](https://exascale.info/members/giuseppe-cuccu/) for citation reference and pdf. 62 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/gem_tasks" 4 | require "rspec/core/rake_task" 5 | 6 | RSpec::Core::RakeTask.new(:spec) 7 | 8 | task :default => :spec 9 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'bundler/setup' 4 | require 'machine_learning_workbench' 5 | 6 | # You can add fixtures and/or initialization code here to make experimenting 7 | # with your gem easier. You can also use a different console, if you like. 8 | 9 | require 'pry' 10 | Pry.start 11 | 12 | # alternatively: 13 | # require "irb" 14 | # IRB.start(__FILE__) 15 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | # ubuntu: 7 | sudo apt install libopenblas-base liblapacke # for numo-linalg 8 | 9 | bundle install 10 | 11 | # Do any other automated setup that you need to do here 12 | -------------------------------------------------------------------------------- /examples/image_compression.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Run as: `bundle exec ruby examples/image_compression.rb` 4 | 5 | require 'rmagick' 6 | require 'machine_learning_workbench' 7 | VectorQuantization = MachineLearningWorkbench::Compressor::VectorQuantization 8 | Img = MachineLearningWorkbench::Tools::Imaging 9 | Norm = MachineLearningWorkbench::Tools::Normalization 10 | 11 | ncentrs = 1 12 | # image_files = Dir[ENV['HOME']+'/jaffe/KA.HA*.png'] 13 | image_files = Dir[ENV['HOME']+'/jaffe/*.png'] 14 | raise "Download the JAFFE dataset in your home dir" if image_files&.empty? 15 | # ... and convert the `.tiff` in `.png`: `mogrify -format png jaffe/*.tiff` 16 | centr_range = [-1, 1] 17 | orig_shape = [256, 256] 18 | img_range = [0, 2**16-1] 19 | 20 | puts "Loading images" 21 | images = image_files.map do |fname| 22 | ary = Img.narr_from_png fname, flat: true 23 | ret = Norm.feature_scaling ary, from: img_range, to: centr_range 24 | end 25 | 26 | puts "Initializing VQ" 27 | vq = VectorQuantization.new ncentrs: ncentrs, 28 | dims: images.first.shape, lrate: 0.3, vrange: centr_range 29 | 30 | puts "Training" 31 | vq.train images, debug: true 32 | 33 | puts "Done!" 34 | begin 35 | vq.centrs.map { |c| Img.display c, shape: orig_shape } 36 | require 'pry'; binding.pry 37 | ensure 38 | MachineLearningWorkbench::Tools::Execution.kill_forks 39 | end 40 | -------------------------------------------------------------------------------- /examples/neuroevolution.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Run as: `bundle exec ruby examples/neuroevolution.rb` 4 | 5 | # Make sure the gem is installed first with `gem install machine_learning_workbench` 6 | # Alternatively, add `gem 'machine_learning_workbench'` to your Gemfile if using Bundle, 7 | # followed by a `bundle install` 8 | require 'machine_learning_workbench' 9 | # Workbench shorthands 10 | XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES 11 | FFNN = WB::NeuralNetwork::FeedForward 12 | 13 | # Let's address the XOR problem, as it requires nonlinear fitting 14 | XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0} 15 | # A classic [2,2,1] (2 inputs, 2 hidden neurons, 1 output neurons) feed-forward 16 | # network with nonlinear activations can solve this problem. 17 | # To approximate more complex functions, keep the number of inputs and outputs 18 | # fixed (they depend on the problem) and increase the number and/or size of 19 | # hidden neurons. For example: [2, 10, 7, 4, 1]. 20 | # NOTE: If your network grows above few thousands of weights, XNES may be too slow. 21 | # Try using SNES for large shallow networks or BDNES for deep networks. 22 | NET = FFNN.new [2,2,1], act_fn: :logistic 23 | # Note: the process is exactly the same, from instantiation to training, for recurrent 24 | # networks using the class `WB::NeuralNetwork::Recursive`. 25 | # Of course RNNs should be applied to sequential tasks, while XOR is static 26 | 27 | # We will search for the network's weights with a black-box optimization algorithm 28 | # This means we will search for arrays of numbers, which need to be scored. 29 | # The scoring process will work as follows: use the numbers as weights for the neural 30 | # network, test the network on classifying the 4 cases of XOR, use that count as the 31 | # score for the weights (original array of numbers). 32 | 33 | # Hence the fitness looks as follows: 34 | def fitness weights 35 | # Each list of weights uniquely defines a neural network 36 | NET.load_weights weights 37 | # Activate the network on each of the XOR instances 38 | # - prediction: the output of the network 39 | # - observation: correct value, our target 40 | pred_obs = XOR.map do |input, obs| 41 | # The network can have an arbitrary number of output neurons 42 | # Since here we have only one, we extract the value as the output 43 | output = NET.activate(input)[0] 44 | # Here we interpret the output as classification 45 | pred = output > 0.5 ? 1 : 0 46 | # Finally accumulate prediction-observation pairs 47 | [pred, obs] 48 | end 49 | # To build a score out of this, we count the number of correct classifications 50 | score = Float(pred_obs.count { |pr, ob| pr == ob }) 51 | # That's it, this will score the weights based on their network's performance 52 | end 53 | 54 | # Next comes initializing the black-box stochastic optimization algorithm 55 | # We are searching for the network's weights, this gives us the search space dimensionality 56 | # We'll use XNES as we are working with less than 100 dimensions (weights) 57 | nes = XNES.new NET.nweights, method(:fitness), :max, rseed: 0 58 | # Note BDNES requires `NET.nweights_per_layer` rather than `NET.nweights` in initialization: 59 | # nes = WB::Optimizer::NaturalEvolutionStrategies::BDNES.new NET.nweights_per_layer, 60 | # method(:fitness), :max, rseed: 10 61 | # The random seed is fixed here to ensure a reproducible behavior 62 | # In a real task, best using an oversized network, more iterations, and try several seeds 63 | 64 | # NOTE: In practical applications it is best to delegate parallelization to the fitness 65 | # function instead of computing the fitness of one individual at a time. This can be 66 | # achieved by passing an objective function defined on a _list_ of weight-lists, and 67 | # setting the `parallel_fit` switch to `true`: 68 | # nes = XNES.new NET.nweights, 69 | # -> (genotypes) { Parallel.map genotypes, &method(:fitness) }, 70 | # :max, rseed: 0, parallel_fit: true 71 | 72 | 73 | # Nothing left but to run the optimization algorithm 74 | # Depending on the random seed (read: luck)few epochs here will suffice 75 | 50.times { nes.train } 76 | # OK! now remember, `NET` currently holds the weights of the last evaluation 77 | # Let's fetch the best individual found so far 78 | best_fit, best_weights = nes.best 79 | # Let's run them again to check they work 80 | result = fitness best_weights 81 | # Note if you defined a parallel fitness above you'll need instead 82 | # result = fitness([best_weights])[0] 83 | puts "The found network achieves a score of #{result} out of #{XOR.size} in the XOR task" 84 | puts "Weights: #{best_weights.to_a}" 85 | puts "Done!" 86 | # That's it! 18 lines and you got a working neuroevolution algorithm, congrats :) 87 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | gpu = false # prepare for switching to GPUs 4 | if gpu 5 | require 'cumo/narray' 6 | Xumo = Cumo 7 | require 'cumo/linalg' 8 | else 9 | require 'numo/narray' 10 | Xumo = Numo 11 | # gem `numo-linalg` depends on openblas and lapacke: 12 | # `sudo apt install libopenblas-base liblapacke` 13 | require 'numo/linalg' 14 | end 15 | 16 | # Shorthands 17 | NArray = Xumo::DFloat # set a single data type across the WB for now 18 | NMath = Xumo::NMath # shorthand for extended math module 19 | NLinalg = Xumo::Linalg # shorthand for linear algebra module 20 | 21 | module MachineLearningWorkbench 22 | module Compressor 23 | end 24 | module NeuralNetwork 25 | end 26 | module Optimizer 27 | end 28 | module Tools 29 | end 30 | end 31 | WB = MachineLearningWorkbench # import MachineLearningWorkbench as WB ;) 32 | 33 | require_relative 'machine_learning_workbench/monkey' 34 | require_relative 'machine_learning_workbench/tools' 35 | require_relative 'machine_learning_workbench/compressor' 36 | require_relative 'machine_learning_workbench/neural_network' 37 | require_relative 'machine_learning_workbench/optimizer' 38 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/compressor.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'compressor/vector_quantization' 4 | require_relative 'compressor/decaying_learning_rate_vq' 5 | require_relative 'compressor/copy_vq' 6 | require_relative 'compressor/incr_dict_vq' 7 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/compressor/copy_vq.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Compressor 4 | # Train-less VQ, copying new images into centroids 5 | # Optimized for online training. 6 | class CopyVQ < VectorQuantization 7 | 8 | attr_reader :equal_simil, :next_train 9 | 10 | def initialize **opts 11 | puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate] 12 | puts "Ignoring similarity: `simil_type: #{opts[:simil_type]}`" if opts[:simil_type] 13 | # TODO: try different epsilons to reduce the number of states 14 | # for example, in qbert we care what is lit and what is not, not the colors 15 | @equal_simil = opts.delete(:equal_simil) || 0.0 16 | super **opts.merge({lrate: nil, simil_type: nil}) 17 | @ntrains << 0 # to count duplicates, images we skip the train on 18 | @next_train = 0 # pointer to the next centroid to train 19 | end 20 | 21 | def ntrains; @ntrains[0...-1]; end 22 | def ntrains_skip; @ntrains.last; end 23 | 24 | # Overloading lrate check from original VQ 25 | def check_lrate lrate; nil; end 26 | 27 | # Train on one vector: 28 | # - train only if the image is not already in dictionary 29 | # - find the next untrained centroid 30 | # - training is just overwriting it 31 | # @return [Integer] index of trained centroid 32 | def train_one vec, eps: equal_simil 33 | mses = centrs.map do |centr| 34 | ((centr-vec)**2).sum / centr.size 35 | end 36 | # BEWARE: I am currently not handling the case where we run out of centroids! 37 | # => Will be addressed directly by dynamic dictionary size 38 | # return -1 if mses.min < eps 39 | return -1 if mses.min < eps || next_train == ncentrs 40 | trg_idx = next_train 41 | @next_train += 1 42 | # require 'pry'; binding.pry if next_train == ncentrs 43 | puts "Overwriting centr #{next_train}" 44 | # norm_vec = vec / NLinalg.norm(vec) 45 | # centrs[trg_idx, true] = norm_vec 46 | centrs[trg_idx, true] = vec 47 | trg_idx 48 | end 49 | 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/compressor/decaying_learning_rate_vq.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Compressor 4 | # VQ with per-centroid decaying learning rates. 5 | # Optimized for online training. 6 | class DecayingLearningRateVQ < VectorQuantization 7 | 8 | attr_reader :lrate_min, :lrate_min_den, :decay_rate 9 | 10 | def initialize **opts 11 | puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate] 12 | @lrate_min = opts.delete(:lrate_min) || 0.001 13 | @lrate_min_den = opts.delete(:lrate_min_den) || 1 14 | @decay_rate = opts.delete(:decay_rate) || 1 15 | super **opts.merge({lrate: nil}) 16 | end 17 | 18 | # Overloading lrate check from original VQ 19 | def check_lrate lrate; nil; end 20 | 21 | # Decaying per-centroid learning rate. 22 | # @param centr_idx [Integer] index of the centroid 23 | # @param lower_bound [Float] minimum learning rate 24 | # @note nicely overloads the `attr_reader` of parent class 25 | def lrate centr_idx, min_den: lrate_min_den, lower_bound: lrate_min, decay: decay_rate 26 | [1.0/(ntrains[centr_idx]*decay+min_den), lower_bound].max 27 | .tap { |l| puts "centr: #{centr_idx}, ntrains: #{ntrains[centr_idx]}, lrate: #{l}" } 28 | end 29 | 30 | # Train on one vector 31 | # @return [Integer] index of trained centroid 32 | def train_one vec, eps: nil 33 | # NOTE: ignores epsilon if passed 34 | trg_idx, _simil = most_similar_centr(vec) 35 | # norm_vec = vec / NLinalg.norm(vec) 36 | # centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate(trg_idx)) + norm_vec * lrate(trg_idx) 37 | centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate(trg_idx)) + vec * lrate(trg_idx) 38 | trg_idx 39 | end 40 | 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/compressor/incr_dict_vq.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Compressor 4 | # Incremental Dictionary Train-less VQ, creating new centroids rather than training 5 | # Optimized for online training. 6 | # TODO: as the deadline grows nigh, the hacks grow foul. Refactor all VQs together. 7 | class IncrDictVQ < VectorQuantization 8 | 9 | attr_reader :equal_simil 10 | undef :ntrains # centroids are not trained 11 | 12 | def initialize **opts 13 | puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate] 14 | puts "Ignoring similarity: `simil_type: #{opts[:simil_type]}`" unless opts[:simil_type] == :dot 15 | puts "Ignoring ncentrs: `ncentrs: #{opts[:ncentrs]}`" if opts[:ncentrs] 16 | # TODO: try different epsilons to reduce the number of states 17 | # for example, in qbert we care what is lit and what is not, not the colors 18 | @equal_simil = opts.delete(:equal_simil) || 0.0 19 | super **opts.merge({ncentrs: 1, lrate: nil, simil_type: :dot}) 20 | 21 | @ntrains = nil # will disable the counting 22 | end 23 | 24 | # Overloading lrate check from original VQ 25 | def check_lrate lrate; nil; end 26 | 27 | # Train on one vector: 28 | # - train only if the image is not already in dictionary 29 | # - create new centroid from the image 30 | # @return [Integer] index of new centroid 31 | def train_one vec, eps: equal_simil 32 | # NOTE: novelty needs to be re-computed for each image, as after each 33 | # training the novelty signal changes! 34 | 35 | # NOTE the reconstruction error here depends once more on the _color_ 36 | # this is wrong and should be taken out of the equation 37 | # NOTE: this is fixed if I use the differences sparse coding method 38 | residual_img = reconstr_error(vec) 39 | rec_err = residual_img.mean 40 | return -1 if rec_err < eps 41 | puts "Creating centr #{ncentrs} (rec_err: #{rec_err})" 42 | # norm_vec = vec / NLinalg.norm(vec) 43 | # @centrs = centrs.concatenate norm_vec 44 | # @centrs = centrs.concatenate vec 45 | @centrs = centrs.concatenate residual_img 46 | # HACK: make it more general by using `code_size` 47 | @utility = @utility.concatenate [0] * (encoding_type == :sparse_coding_v1 ? 2 : 1) 48 | ncentrs 49 | end 50 | 51 | end 52 | end 53 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/compressor/vector_quantization.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Compressor 4 | 5 | # Standard Vector Quantization 6 | class VectorQuantization 7 | attr_reader :centrs, :dims, :vrange, :init_centr_vrange, :lrate, 8 | :simil_type, :encoding_type, :rng, :ntrains, :utility, :ncodes 9 | attr_writer :utility, :ncodes # allows access from outside 10 | 11 | def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, encoding_type: nil, init_centr_vrange: nil, rseed: Random.new_seed 12 | 13 | @rng = Random.new rseed # TODO: RNG CURRENTLY NOT USED!! 14 | 15 | @dims = Array(dims) 16 | check_lrate lrate # hack: so that we can overload it in dlr_vq 17 | @lrate = lrate 18 | @simil_type = simil_type || raise("missing simil_type") 19 | @encoding_type = encoding_type || raise("missing encoding_type") 20 | @init_centr_vrange ||= vrange 21 | @vrange = case vrange 22 | when Array 23 | raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2 24 | vrange.map &method(:Float) 25 | when Range 26 | [vrange.first, vrange.last].map &method(:Float) 27 | else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}" 28 | end 29 | init_centrs nc: ncentrs 30 | @ntrains = [0]*ncentrs # per-centroid number of trainings 31 | @utility = NArray.zeros [code_size] # trace how 'useful' are centroids to encodings 32 | @ncodes = 0 33 | end 34 | 35 | def ncentrs 36 | @centrs.shape.first 37 | end 38 | 39 | # HACKKETY HACKKETY HACK (can't wait to refactor after the deadline) 40 | def code_size 41 | encoding_type == :sparse_coding_v1 ? 2*ncentrs : ncentrs 42 | end 43 | 44 | # Verify lrate to be present and withing unit bounds 45 | # As a separate method only so it can be overloaded in `DecayingLearningRateVQ` 46 | def check_lrate lrate 47 | raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1) 48 | end 49 | 50 | # Initializes a list of centroids 51 | def init_centrs nc: ncentrs, base: nil, proport: nil 52 | @centrs = nc.times.map { new_centr base, proport }.to_na 53 | end 54 | 55 | # Creates a new (random) centroid 56 | # If a base is passed, this is meshed with the random centroid. 57 | # This is done to facilitate distributing the training across centroids. 58 | # TODO: USE RNG HERE!! 59 | def new_centr base=nil, proport=nil 60 | raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil? 61 | # require 'pry'; binding.pry if base.nil? ^ proport.nil? 62 | ret = NArray.new(*dims).rand(*init_centr_vrange) 63 | ret = ret * (1-proport) + base * proport if base&&proport 64 | ret 65 | end 66 | 67 | # SIMIL = { 68 | # dot: -> (centr, vec) { centr.dot(vec) }, 69 | # mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size } 70 | # } 71 | 72 | # Computes similarities between vector and all centroids 73 | def similarities vec, type: simil_type 74 | raise NotImplementedError if vec.shape.size > 1 75 | raise "need to check since centrs is a NArray now" if type == :mse 76 | # simil_fn = SIMIL[type] || raise(ArgumentError, "Unrecognized simil #{type}") 77 | # centrs.map { |centr| simil_fn.call centr, vec } 78 | centrs.dot vec 79 | end 80 | 81 | # Encode a vector 82 | # tracks utility of centroids based on how much they contribute to encoding 83 | # TODO: `encode = Encodings.const_get(type)` in initialize` 84 | # NOTE: hashes of lambdas or modules cannot access ncodes and utility 85 | # TODO: refactor anyway through `stats` object, this thing is getting out of hand 86 | def encode vec, type: encoding_type 87 | case type 88 | when :most_similar 89 | simils = similarities vec 90 | code = simils.max_index 91 | @ncodes += 1 92 | @utility[code] += 1 93 | code 94 | when :most_similar_ary 95 | simils = similarities vec 96 | code = simils.new_zeros 97 | code[simils.max_index] = 1 98 | @ncodes += 1 99 | @utility += code 100 | code 101 | when :ensemble 102 | simils = similarities vec 103 | code = simils 104 | tot = simils.sum 105 | tot = 1 if tot < 1e-5 # HACK: avoid division by zero 106 | contrib = code / tot 107 | @ncodes += 1 108 | @utility += (contrib - utility) / ncodes # cumulative moving average 109 | code 110 | when :norm_ensemble 111 | simils = similarities vec 112 | tot = simils.sum 113 | # NOTE this actually makes a big discontinuity if the total is equal to zero. 114 | # Does that even ever happen? I guess only w/ reset img (zeros) as lone centroid. 115 | # Which after first gen is really useless and should just be dropped anyway... 116 | tot = 1 if tot < 1e-5 # HACK: avoid division by zero 117 | code = simils / tot 118 | @ncodes += 1 119 | @utility += (code - utility) / ncodes # cumulative moving average 120 | code 121 | when :sparse_coding_v1 122 | raise "requires centroids normalized to unit length!" 123 | @encoder = nil if @encoder&.shape&.first != centrs.shape.first 124 | # Danafar & Cuccu: compact form linear regression encoder 125 | @encoder ||= (centrs.dot centrs.transpose).invert.dot centrs 126 | 127 | raw_code = @encoder.dot(vec) 128 | # separate positive and negative features (NOTE: all features will be positive) 129 | # i.e. split[0...n] = max {0, raw[i]}; split[n...2*n] = max {0, -raw[i]} 130 | # TODO: cite Coates & Ng 131 | # TODO: optimize and remove redundant variables 132 | split_code = raw_code.concatenate(-raw_code) 133 | split_code[split_code<0] = 0 134 | # normalize such that the code sums to 1 135 | norm_code = split_code / split_code.sum 136 | # Danafar: drop to say 80% of info (à la pca) 137 | thold = 0.2 138 | sparse_code = norm_code.dup 139 | sum = 0 140 | # NOTE: the last element in the sort below has the highest contribution and 141 | # should NEVER be put to 0, even if it could contribute alone to 100% of the 142 | # total 143 | # NOTE: upon further study I disagree this represent information content unless 144 | # the centroids are unit vectors. So I'm commenting this implementation now, 145 | # together with the following, until I implement a switch to normalize the 146 | # centroids based on configuration. 147 | 148 | 149 | 150 | # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97 151 | # norm_code.sort_index[0...-1].each do |idx| 152 | norm_code.size.times.sort_by { |i| norm_code[i] }[0...-1].each do |idx| 153 | 154 | 155 | 156 | sparse_code[idx] = 0 157 | sum += norm_code[idx] 158 | break if sum >= thold # we know the code's total is normalized to 1 and has no negatives 159 | end 160 | code = sparse_code / sparse_code.sum # re-normalize sum to 1 161 | 162 | @ncodes += 1 163 | @utility += (code - utility) / ncodes # cumulative moving average 164 | code 165 | when :sparse_coding_v2 166 | # Cuccu & Danafar: incremental reconstruction encoding 167 | # turns out to be closely related to (Orthogonal) Matching Pursuit 168 | raise "requires centroids normalized to unit length!" 169 | # return centrs.dot vec # speed test for the rest of the system 170 | sparse_code = NArray.zeros code_size 171 | resid = vec 172 | # cap the number of non-zero elements in the code 173 | max_nonzero = [1,ncentrs/3].max 174 | max_nonzero.times do |i| 175 | # OPT: remove msc from centrs at each loop 176 | # the algorithm should work even without this opt because 177 | # we are working on the residuals each time 178 | simils = centrs.dot resid 179 | 180 | 181 | 182 | # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97 183 | # msc = simils.max_index 184 | simils = simils.to_a 185 | simils_abs = simils.map &:abs 186 | msc = simils_abs.index simils_abs.max # most similar centroid 187 | 188 | 189 | 190 | max_simil = simils[msc] 191 | # remember to distinguish here to use the pos/neg features trick 192 | sparse_code[msc] = max_simil 193 | reconstr = max_simil * centrs[msc, true] 194 | resid -= reconstr 195 | # puts "resid#{i} #{resid.abs.mean}" # if debug 196 | epsilon = 0.005 197 | # print resid.abs.mean, ' ' 198 | # print sparse_code.to_a, ' ' 199 | break if resid.abs.mean <= epsilon 200 | end 201 | 202 | # should normalize sum to 1? 203 | code = sparse_code #/ sparse_code.sum # normalize sum to 1 204 | 205 | @ncodes += 1 206 | @utility += (code - utility) / ncodes # cumulative moving average 207 | code 208 | when :sparse_coding 209 | # Cuccu: Direct residual encoding 210 | # return centrs.dot vec # speed test for the rest of the system 211 | sparse_code = NArray.zeros code_size 212 | resid = vec 213 | # cap the number of non-zero elements in the code 214 | max_nonzero = [1,ncentrs/3].max 215 | max_nonzero.times do |i| 216 | # OPT: remove msc from centrs at each loop 217 | # the algorithm should work even without this opt because 218 | # we are working on the residuals each time 219 | diff = (centrs - resid).abs.sum(1) 220 | 221 | 222 | 223 | # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97 224 | # msc = diff.max_index 225 | diff = diff.to_a 226 | msc = diff.index diff.min # most similar centroid 227 | 228 | 229 | 230 | min_diff = diff[msc] 231 | # remember to distinguish here to use the pos/neg features trick 232 | sparse_code[msc] = 1 233 | reconstr = centrs[msc, true] 234 | resid -= reconstr 235 | resid[(resid<0).where] = 0 # ignore artifacts introduced by the centroids in reconstruction 236 | 237 | # puts "resid#{i} #{resid.abs.mean}" # if debug 238 | epsilon = 0.005 239 | # print resid.abs.mean, ' ' if $ngen == 2; exit if $ngen==3 240 | # print sparse_code.to_a, ' ' if $ngen == 3; exit if $ngen==4 241 | break if resid.abs.mean <= epsilon 242 | end 243 | 244 | code = sparse_code 245 | @ncodes += 1 246 | @utility += (code - utility) / ncodes # cumulative moving average 247 | code 248 | else raise ArgumentError, "Unrecognized encode #{type}" 249 | end 250 | end 251 | 252 | # Reconstruct vector from its code (encoding) 253 | def reconstruction code, type: encoding_type 254 | case type 255 | when :most_similar 256 | centrs[code, true] 257 | when :most_similar_ary 258 | centrs[code.eq(1), true] 259 | when :ensemble 260 | # tot = code.reduce :+ 261 | # centrs.zip(code).map { |centr, contr| centr*contr/tot }.reduce :+ 262 | centrs.dot(code) / code.sum 263 | when :norm_ensemble 264 | centrs.dot code 265 | # centrs.zip(code).map { |centr, contr| centr*contr }.reduce :+ 266 | when :sparse_coding_v1 267 | raise "requires normalized centroids!" 268 | reconstr_code = code[0...(code.size/2)] - code[(code.size/2)..-1] 269 | reconstr = centrs.transpose.dot reconstr_code 270 | when :sparse_coding_v2 271 | raise "requires normalized centroids!" 272 | 273 | 274 | # BUG IN NARRAY DOT!! ruby-numo/numo-narray#99 275 | # reconstr = code.dot centrs 276 | reconstr = code.expand_dims(0).dot centrs 277 | 278 | 279 | when :sparse_coding 280 | # the code is binary, so just sum over the corresponding centroids 281 | # note: sum, not mean, because of how it's used in reconstr_error 282 | reconstr = centrs[code.cast_to(Numo::Bit).where, true].sum(0) 283 | else raise ArgumentError, "unrecognized reconstruction type: #{type}" 284 | end 285 | end 286 | 287 | # Returns index and similitude of most similar centroid to vector 288 | # @return [Array] the index of the most similar centroid, 289 | # followed by the corresponding similarity 290 | def most_similar_centr vec 291 | simils = similarities vec 292 | max_idx = simils.max_index 293 | [max_idx, simils[max_idx]] 294 | end 295 | 296 | # Per-pixel errors in reconstructing vector 297 | # @return [NArray] residuals 298 | def reconstr_error vec, code: nil, type: encoding_type 299 | code ||= encode vec, type: type 300 | resid = vec - reconstruction(code, type: type) 301 | # we ignore the extra stuff coming from the centroids, 302 | # only care that everything in the obs is represented in centrs 303 | resid[resid<0] = 0 if encoding_type == :sparse_coding 304 | resid 305 | end 306 | 307 | # Train on one vector 308 | # @return [Integer] index of trained centroid 309 | def train_one vec, eps: nil 310 | # NOTE: ignores epsilon if passed 311 | trg_idx, _simil = most_similar_centr(vec) 312 | # note: uhm that actually looks like a dot product... maybe faster? 313 | # `[c[i], vec].dot([1-lrate, lrate])` 314 | # norm_vec = vec / NLinalg.norm(vec) 315 | # centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate) + norm_vec * lrate 316 | centrs[trg_idx, true] = centrs[trg_idx, true] * (1-lrate) + vec * lrate 317 | trg_idx 318 | end 319 | 320 | # Train on vector list 321 | def train vec_lst, debug: false 322 | # Two ways here: 323 | # - Batch: canonical, centrs updated with each vec 324 | # - Parallel: could be parallel either on simils or on training (?) 325 | # Unsure on the correctness of either Parallel, let's stick with Batch 326 | vec_lst.each_with_index do |vec, i| 327 | trained_idx = train_one vec 328 | print '.' if debug 329 | @ntrains[trained_idx] += 1 if @ntrains 330 | end 331 | end 332 | end 333 | end 334 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/monkey.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Monkey patches 4 | 5 | module MachineLearningWorkbench::Monkey 6 | module Dimensionable 7 | def dims ret: [] 8 | ret << size 9 | if first.kind_of? Array 10 | # hypothesize all elements having same size and save some checks 11 | first.dims ret: ret 12 | else 13 | ret 14 | end 15 | end 16 | end 17 | 18 | module Buildable 19 | def new *args 20 | super.tap do |m| 21 | if block_given? 22 | m.each_stored_with_indices do |_,*idxs| 23 | m[*idxs] = yield *idxs 24 | end 25 | end 26 | end 27 | end 28 | end 29 | 30 | # module AdvancelyOperationable # how am I supposed to name these things?? 31 | 32 | # # Outer matrix relationship generalization. 33 | # # Make a matrix the same shape as `self`; each element is a matrix, 34 | # # with the same shape as `other`, resulting from the interaction of 35 | # # the corresponding element in `self` and all the elements in `other`. 36 | # # @param other [NMatrix] other matrix 37 | # # @note This implementation works only for 2D matrices (same as most 38 | # # other methods here). It's a quick hack, a proof of concept barely 39 | # # sufficient for my urgent needs. 40 | # # @note Output size is fixed! Since NMatrix does not graciously yield to 41 | # # being composed of other NMatrices (by adapting the shape of the root 42 | # # matrix), the block cannot return matrices in there. 43 | # # @return [NMatrix] 44 | # def outer other 45 | # # NOTE: Map of map in NMatrix does not work as expected! 46 | # # self.map { |v1| other.map { |v2| yield(v1,v2) } } 47 | # # NOTE: this doesn't cut it either... can't capture the structure 48 | # # NMatrix[ *self.collect { |v1| other.collect { |v2| yield(v1,v2) } } ] 49 | # raise ArgumentError unless block_given? 50 | # NMatrix.new(self.shape+other.shape).tap do |m| 51 | # each_stored_with_indices do |v1,r1,c1| 52 | # other.each_stored_with_indices do |v2,r2,c2| 53 | # m[r1,c1,r2,c2] = yield(v1,v2) 54 | # end 55 | # end 56 | # end 57 | # end 58 | 59 | # # Flat-output generalized outer relationship. Same as `#outer`, but the 60 | # # result is a 2-dim matrix of the interactions between all the elements 61 | # # in `self` (as rows) and all the elements in `other` (as columns) 62 | # # @param other [NMatrix] other matrix 63 | # # @return [NMatrix] 64 | # def outer_flat other 65 | # raise ArgumentError unless block_given? 66 | # data = collect { |v1| other.collect { |v2| yield(v1, v2) } } 67 | # self.class[*data, dtype: dtype] 68 | # end 69 | 70 | # # Matrix exponential: `e^self` (not to be confused with `self^n`!) 71 | # # @return [NMatrix] 72 | # def exponential 73 | # # special case: one-dimensional matrix: just exponentiate the values 74 | # if (dim == 1) || (dim == 2 && shape.include?(1)) 75 | # return NMatrix.new shape, collect(&Math.method(:exp)), dtype: dtype 76 | # end 77 | 78 | # # Eigenvalue decomposition method from scipy/linalg/matfuncs.py#expm2 79 | 80 | # # TODO: find out why can't I get away without double transpose! 81 | # e_values, e_vectors = eigen_symm 82 | 83 | # e_vals_exp_dmat = NMatrix.diagonal e_values.collect(&Math.method(:exp)) 84 | # # ASSUMING WE'RE ONLY USING THIS TO EXPONENTIATE LOG_SIGMA IN XNES 85 | # # Theoretically we need the right eigenvectors, which for a symmetric 86 | # # matrix should be just transposes of the eigenvectors. 87 | # # But we have a positive definite matrix, so the final composition 88 | # # below holds without transposing 89 | # # BUT, strangely, I can't seem to get eigen_symm to green the tests 90 | # # ...with or without transpose 91 | # # e_vectors = e_vectors.transpose 92 | # e_vectors.dot(e_vals_exp_dmat).dot(e_vectors.invert)#.transpose 93 | # end 94 | 95 | # # Calculate matrix eigenvalues and eigenvectors using LAPACK 96 | # # @param which [:both, :left, :right] which eigenvectors do you want? 97 | # # @return [Array] 98 | # # eigenvalues (as column vector), left eigenvectors, right eigenvectors. 99 | # # A value different than `:both` for param `which` reduces the return size. 100 | # # @note requires LAPACK 101 | # # @note WARNING! a param `which` different than :both alters the returns 102 | # # @note WARNING! machine-precision-error imaginary part Complex 103 | # # often returned! For symmetric matrices use #eigen_symm_right below 104 | # def eigen which=:both 105 | # raise ArgumentError unless [:both, :left, :right].include? which 106 | # NMatrix::LAPACK.geev(self, which) 107 | # end 108 | 109 | # # Eigenvalues and right eigenvectors for symmetric matrices using LAPACK 110 | # # @note code taken from gem `nmatrix-atlas` NMatrix::LAPACK#geev 111 | # # @note FOR SYMMETRIC MATRICES ONLY!! 112 | # # @note WARNING: will return real matrices, imaginary parts are discarded! 113 | # # @note WARNING: only left eigenvectors will be returned! 114 | # # @todo could it be possible to save some of the transpositions? 115 | # # @return [Array] eigenvalues and (left) eigenvectors 116 | # def eigen_symm 117 | # # TODO: check for symmetry if not too slow 118 | # raise TypeError, "Only real-valued matrices" if complex_dtype? 119 | # raise StorageTypeError, "Only dense matrices (because LAPACK)" unless dense? 120 | # raise ShapeError, "Only square matrices" unless dim == 2 && shape[0] == shape[1] 121 | 122 | # n = shape[0] 123 | 124 | # # Outputs 125 | # e_values = NMatrix.new([n, 1], dtype: dtype) 126 | # e_values_img = NMatrix.new([n, 1], dtype: dtype) # to satisfy C alloc 127 | # e_vectors = clone_structure 128 | 129 | # NMatrix::LAPACK::lapack_geev( 130 | # false, # compute left eigenvectors of A? 131 | # :t, # compute right eigenvectors of A? (left eigenvectors of A**T) 132 | # n, # order of the matrix 133 | # transpose, # input matrix => needs to be column-wise # self, 134 | # n, # leading dimension of matrix 135 | # e_values, # real part of computed eigenvalues 136 | # e_values_img, # imaginary part of computed eigenvalues (will be discarded) 137 | # nil, # left eigenvectors, if applicable 138 | # n, # leading dimension of left_output 139 | # e_vectors, # right eigenvectors, if applicable 140 | # n, # leading dimension of right_output 141 | # 2*n # no clue what's this 142 | # ) 143 | 144 | # raise "Uhm why complex eigenvalues?" if e_values_img.any? {|v| v>1e-10} 145 | # return [e_values, e_vectors.transpose] 146 | # end 147 | 148 | 149 | # # The NMatrix documentation refers to a function `#nrm2` (aliased to `#norm2`) 150 | # # to compute the norm of a matrix. Fun fact: that is the implementation for vectors, 151 | # # and calling it on a matrix returns NotImplementedError :) you have to toggle the 152 | # # source to understand why: 153 | # # http://sciruby.com/nmatrix/docs/NMatrix.html#method-i-norm2 . 154 | # # A search for the actual source on GitHub reveals a (I guess new?) method 155 | # # `#matrix_norm`, with a decent choice of norms to choose from. Unfortunately, as the 156 | # # name says, it is stuck to compute full-matrix norms. 157 | # # So I resigned to dance to `Array`s and back, and implemented it with `#each_rank`. 158 | # # Unexplicably, I get a list of constant values as the return value; same with 159 | # # `#each_row`. 160 | # # What can I say, we're back to referencing rows by index. I am just wasting too much 161 | # # time figuring out these details to write a generalized version with an optional 162 | # # `dimension` to go along. 163 | # # @return [NMatrix] the vector norm along the rows 164 | # def row_norms 165 | # norms = rows.times.map { |i| row(i).norm2 } 166 | # NMatrix.new [rows, 1], norms, dtype: dtype 167 | # end 168 | 169 | # # `NMatrix#to_a` has inconsistent behavior: single-row matrices are 170 | # # converted to one-dimensional Arrays rather than a 2D Array with 171 | # # only one row. Patching `#to_a` directly is not feasible as the 172 | # # constructor seems to depend on it, and I have little interest in 173 | # # investigating further. 174 | # # @return [Array] a consistent array representation, such that 175 | # # `nmat.to_consistent_a.to_nm == nmat` holds for single-row matrices 176 | # def to_consistent_a 177 | # dim == 2 && shape[0] == 1 ? [to_a] : to_a 178 | # end 179 | # alias :to_ca :to_consistent_a 180 | # end 181 | 182 | module NumericallyApproximatable 183 | # Verifies if `self` and `other` are withing `epsilon` of each other. 184 | # @param other [Numeric] 185 | # @param epsilon [Numeric] 186 | # @return [Boolean] 187 | def approximates? other, epsilon=1e-5 188 | # Used for testing and NMatrix#approximates?, should I move to spec_helper? 189 | (self - other).abs < epsilon 190 | end 191 | end 192 | 193 | # module MatrixApproximatable 194 | # # Verifies if all values at corresponding indices approximate each other. 195 | # # @param other [NMatrix] 196 | # # @param epsilon [Float] 197 | # def approximates? other, epsilon=1e-5 198 | # return false unless self.shape == other.shape 199 | # # two ways to go here: 200 | # # - epsilon is aggregated: total cumulative accepted error 201 | # # => `(self - other).reduce(:+) < epsilon` 202 | # # - epsilon is local: per element accepted error 203 | # # => `v.approximates? other[*idxs], epsilon` 204 | # # Given the use I make (near-equality), I choose the first interpretation 205 | # # Note the second is sensitive to opposite signs balancing up 206 | # self.each_stored_with_indices.all? do |v,*idxs| 207 | # v.approximates? other[*idxs], epsilon 208 | # end 209 | # end 210 | # end 211 | 212 | # module CPtrDumpable 213 | # def marshall_dump 214 | # [shape, dtype, data_pointer] 215 | # end 216 | 217 | # def marshall_load 218 | # raise NotImplementedError, "There's no setter for the data pointer!" 219 | # end 220 | # end 221 | 222 | module ToNArrayConvertible 223 | def to_na 224 | NArray[*self] 225 | end 226 | end 227 | 228 | module NArrayOuterFlattable 229 | # Flat-output generalized outer relationship. Same as `#outer`, but the 230 | # result is a 2-dim matrix of the interactions between all the elements 231 | # in `self` (as rows) and all the elements in `other` (as columns) 232 | # @param other [NArray] other matrix 233 | # @return [NArray] 234 | def outer_flat other 235 | # TODO: Xumo::NArray should be able to implement this with `#outer` and some other 236 | # function to flatten the right layer -- much faster 237 | raise ArgumentError, "Need to pass an operand block" unless block_given? 238 | self.class.zeros([self.size, other.size]).tap do |ret| 239 | self.size.times do |r| 240 | other.size.times do |c| 241 | ret[r,c] = yield self[r], other[c] 242 | end 243 | end 244 | end 245 | end 246 | end 247 | 248 | module NArrayApproximatable 249 | # Verifies if `self` and `other` are withing `epsilon` of each other. 250 | # @param other [NArray] 251 | # @param epsilon [NArray] 252 | # @return [Boolean] 253 | def approximates? other, epsilon=1e-5 254 | ((self - other).abs < epsilon).all? 255 | end 256 | end 257 | 258 | module Invertable 259 | # Inverses matrix 260 | # @return [NArray] 261 | def invert 262 | NLinalg.inv self 263 | end 264 | end 265 | 266 | module Exponentiable 267 | # Matrix exponential: `e**self` (not to be confused with `self**n`) 268 | # @return [NArray] 269 | def exponential 270 | raise ArgumentError if ndim > 2 271 | # special case: one-dimensional matrix: just exponentiate the values 272 | return NMath.exp(self) if (ndim == 1) || shape.include?(1) 273 | # at this point we need to validate it is a square matrix 274 | raise ArgumentError unless shape.reduce(&:==) 275 | 276 | # Eigenvalue decomposition method from `scipy/linalg/matfuncs.py#expm2` (deprecated) 277 | # https://github.com/scipy/scipy/commit/236e0740ba951cb455ba8b6a306abb32740131cf 278 | # s, vr = eig(A) 279 | # vri = inv(vr) 280 | # r = dot(dot(vr, diag(exp(s))), vri) 281 | 282 | # TODO: this is a simple but outdated method, switch to Pade approximation 283 | # https://github.com/scipy/scipy/blob/11509c4a98edded6c59423ac44ca1b7f28fba1fd/scipy/sparse/linalg/matfuncs.py#L557 284 | 285 | # e_values, l_e_vectors, r_e_vectors_t = NLinalg.svd self 286 | evals, _wi, _vl, r_evecs = NLinalg::Lapack.call(:geev, self, jobvl: false, jobvr: true) 287 | r_evecs_t = r_evecs#.transpose 288 | r_evecs_inv = r_evecs_t.invert 289 | evals_exp_dmat = NMath.exp(evals).diag 290 | 291 | # l_e_vectors.dot(e_vals_exp_dmat).dot(l_e_vectors.invert)#.transpose 292 | r_evecs_t.dot(evals_exp_dmat).dot(r_evecs_inv) 293 | end 294 | end 295 | 296 | module Mappable 297 | # Maps along a NArray dimension, and returns NArray 298 | # @return [NArray] 299 | # NOTE: this indexing is not consistent with NArray, which uses 0 to indicate 300 | # columns rather than the 0th dimension (rows) 301 | def map dim=0 302 | raise ArgumentError unless dim.kind_of?(Integer) && dim.between?(0,ndim) 303 | # TODO: return iterator instead of raise 304 | raise NotImplementedError unless block_given? 305 | indices = [true]*ndim 306 | ret = [] 307 | shape[dim].times.each do |i| 308 | indices[dim] = i 309 | ret << yield(self[*indices]) 310 | end 311 | self.class[*ret] 312 | end 313 | end 314 | 315 | end 316 | 317 | Array.include MachineLearningWorkbench::Monkey::Dimensionable 318 | # NMatrix.extend MachineLearningWorkbench::Monkey::Buildable 319 | # require 'nmatrix/lapack_plugin' # loads whichever is installed between atlas and lapacke 320 | # NMatrix.include MachineLearningWorkbench::Monkey::AdvancelyOperationable 321 | Numeric.include MachineLearningWorkbench::Monkey::NumericallyApproximatable 322 | # NMatrix.include MachineLearningWorkbench::Monkey::MatrixApproximatable 323 | # NMatrix.include MachineLearningWorkbench::Monkey::CPtrDumpable 324 | Array.include MachineLearningWorkbench::Monkey::ToNArrayConvertible 325 | NArray.include MachineLearningWorkbench::Monkey::NArrayApproximatable 326 | NArray.include MachineLearningWorkbench::Monkey::NArrayOuterFlattable 327 | NArray.include MachineLearningWorkbench::Monkey::Exponentiable 328 | NArray.include MachineLearningWorkbench::Monkey::Invertable 329 | NArray.prepend MachineLearningWorkbench::Monkey::Mappable 330 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/neural_network.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'neural_network/base' 4 | require_relative 'neural_network/feed_forward' 5 | require_relative 'neural_network/recurrent' 6 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/neural_network/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::NeuralNetwork 4 | # Neural Network base class 5 | class Base 6 | 7 | # @!attribute [r] layers 8 | # List of matrices, each being the weights 9 | # connecting a layer's inputs (rows) to a layer's neurons (columns), 10 | # hence its shape is `[ninputs, nneurs]` 11 | # @return [Array] list of weight matrices, each uniquely describing a layer 12 | # TODO: return a NArray after the usage of `#map` is figured out 13 | # @!attribute [r] state 14 | # It's a list of one-dimensional matrices, each an input to a layer, plus the output layer's output. The first element is the input to the first layer of the network, which is composed of the network's input, possibly the first layer's activation on the last input (recursion), and a bias (fixed `1`). The second to but-last entries follow the same structure, but with the previous layer's output in place of the network's input. The last entry is the activation of the output layer, without additions since it's not used as an input by anyone. 15 | # TODO: return a NArray after the usage of `#map` is figured out 16 | # @return [Array] current state of the network. 17 | # @!attribute [r] act_fn 18 | # activation function, common to all neurons (for now) 19 | # @return [#call] activation function 20 | # @!attribute [r] struct 21 | # list of number of (inputs or) neurons in each layer 22 | # @return [Array] structure of the network 23 | attr_reader :layers, :state, :act_fn, :act_fn_name, :struct 24 | 25 | 26 | ## Initialization 27 | 28 | # @param struct [Array] list of layer sizes 29 | # @param act_fn [Symbol] choice of activation function for the neurons 30 | def initialize struct, act_fn: nil, **act_fn_args 31 | @struct = struct 32 | @act_fn_name = act_fn || :sigmoid 33 | @act_fn = send act_fn_name, **act_fn_args 34 | # @state holds both inputs, possibly recurrency, and bias 35 | # it is a complete input for the next layer, hence size from layer sizes 36 | @state = layer_row_sizes.collect do |size| 37 | NArray.zeros [1, size] 38 | end 39 | # to this, append a matrix to hold the final network output 40 | @state.push NArray.zeros [1, nneurs(-1)] 41 | reset_state 42 | end 43 | 44 | # Reset the network to the initial state 45 | def reset_state 46 | state.each do |s| 47 | s.fill 0 # reset state to zero 48 | s[-1] = 1 # add bias 49 | end 50 | state[-1][-1] = 0 # last layer has no bias 51 | end 52 | 53 | # Initialize the network with random weights 54 | def init_random 55 | # Reusing `#load_weights` instead helps catching bugs 56 | deep_reset 57 | load_weights NArray.new(nweights).rand(-1,1) 58 | end 59 | 60 | ## Weight utilities 61 | 62 | # Resets memoization: needed to play with structure modification 63 | def deep_reset 64 | # reset memoization 65 | [:@layer_row_sizes, :@layer_col_sizes, :@nlayers, :@layer_shapes, 66 | :@nweights_per_layer, :@nweights].each do |sym| 67 | instance_variable_set sym, nil 68 | end 69 | reset_state 70 | end 71 | 72 | # Total weights in the network 73 | # @return [Integer] total number of weights 74 | def nweights 75 | @nweights ||= nweights_per_layer.reduce(:+) 76 | end 77 | 78 | # List of per-layer number of weights 79 | # @return [Array] list of weights per each layer 80 | def nweights_per_layer 81 | @nweights_per_layer ||= layer_shapes.collect { |shape| shape.reduce(:*) } 82 | end 83 | 84 | # Count the layers. This is a computation helper, and for this implementation 85 | # the inputs are considered as if a layer like the others. 86 | # @return [Integer] number of layers 87 | def nlayers 88 | @nlayers ||= layer_shapes.size 89 | end 90 | 91 | # Returns the weight matrix 92 | # @return [Array] list of NArray matrices of weights (one per layer). 93 | def weights 94 | layers 95 | end 96 | 97 | # Number of neurons per layer. Although this implementation includes inputs 98 | # in the layer counts, this methods correctly ignores the input as not having 99 | # neurons. 100 | # @return [Array] list of neurons per each (proper) layer (i.e. no inputs) 101 | def layer_col_sizes 102 | @layer_col_sizes ||= struct.drop(1) 103 | end 104 | 105 | # define #layer_row_sizes in child class: number of inputs per layer 106 | 107 | # Shapes for the weight matrices, each corresponding to a layer 108 | # @return [Array] Weight matrix shapes 109 | def layer_shapes 110 | @layer_shapes ||= layer_row_sizes.zip layer_col_sizes 111 | end 112 | 113 | # Count the neurons in a particular layer or in the whole network. 114 | # @param nlay [Integer, nil] the layer of interest, 1-indexed. 115 | # `0` will return the number of inputs. 116 | # `nil` will compute the total neurons in the network. 117 | # @return [Integer] the number of neurons in a given layer, or in all network, or the number of inputs 118 | def nneurs nlay=nil 119 | nlay.nil? ? struct.reduce(:+) : struct[nlay] 120 | end 121 | 122 | # Loads a plain list of weights into the weight matrices (one per layer). 123 | # Preserves order. Reuses allocated memory if available. 124 | # @input weights [Array] weights to load 125 | # @return [true] always true. If something's wrong it simply fails, and if 126 | # all goes well there's nothing to return but a confirmation to the caller. 127 | def load_weights weights 128 | raise ArgumentError unless weights.size == nweights 129 | weights = weights.to_na unless weights.kind_of? NArray 130 | from = 0 131 | @layers = layer_shapes.collect do |shape| 132 | to = from + shape.reduce(:*) 133 | lay_w = weights[from...to].reshape *shape 134 | from = to 135 | lay_w 136 | end 137 | reset_state 138 | return true 139 | end 140 | 141 | 142 | ## Activation 143 | 144 | # Activate the network on a given input 145 | # @param input [Array] the given input 146 | # @return [Array] the activation of the output layer 147 | def activate input 148 | raise ArgumentError unless input.size == struct.first 149 | # load input in first state 150 | state[0][0...struct.first] = input 151 | # activate layers in sequence 152 | nlayers.times.each do |i| 153 | act = activate_layer i 154 | state[i+1][0...act.size] = act 155 | end 156 | return out 157 | end 158 | 159 | # Extract and convert the output layer's activation 160 | # @return [NArray] the activation of the output layer 161 | def out 162 | state.last.flatten 163 | end 164 | 165 | ## Activation functions 166 | 167 | # Traditional sigmoid (logistic) with variable steepness 168 | def sigmoid steepness: 1 169 | # steepness: 0 (vec) { 1.0 / (NMath.exp(-steepness * vec) + 1.0) } 172 | end 173 | alias logistic sigmoid 174 | 175 | # LeCun hyperbolic activation 176 | # @see http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf Section 4.4 177 | def lecun_hyperbolic 178 | -> (vec) { 1.7159 * NMath.tanh(2.0*vec/3.0) + 1e-3*vec } 179 | end 180 | 181 | # Rectified Linear Unit (ReLU) 182 | def relu 183 | -> (vec) { (vec>0).all? && vec || vec.class.zeros(vec.shape) } 184 | end 185 | 186 | 187 | # @!method interface_methods 188 | # Declaring interface methods - implement in child class! 189 | [:layer_row_sizes, :activate_layer].each do |sym| 190 | define_method sym do 191 | raise NotImplementedError, "Implement ##{sym} in child class!" 192 | end 193 | end 194 | end 195 | end 196 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/neural_network/feed_forward.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::NeuralNetwork 4 | # Feed Forward Neural Network 5 | class FeedForward < Base 6 | 7 | # Calculate the size of each row in a layer's weight matrix. 8 | # Includes inputs (or previous-layer activations) and bias. 9 | # @return [Array] per-layer row sizes 10 | def layer_row_sizes 11 | @layer_row_sizes ||= struct.each_cons(2).collect {|prev, _curr| prev+1} 12 | end 13 | 14 | # Activates a layer of the network 15 | # @param i [Integer] the layer to activate, zero-indexed 16 | def activate_layer i 17 | act_fn.call(state[i].dot layers[i]) 18 | end 19 | 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/neural_network/recurrent.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::NeuralNetwork 4 | # Recurrent Neural Network 5 | class Recurrent < Base 6 | 7 | # Calculate the size of each row in a layer's weight matrix. 8 | # Each row holds the inputs for the next level: previous level's 9 | # activations (or inputs), this level's last activations 10 | # (recursion) and bias. 11 | # @return [Array] per-layer row sizes 12 | def layer_row_sizes 13 | @layer_row_sizes ||= struct.each_cons(2).collect do |prev, rec| 14 | prev + rec + 1 15 | end 16 | end 17 | 18 | # # NOTE: current layer index corresponds to index of next state! 19 | # previous = nlay # index of previous layer (inputs) 20 | # current = nlay + 1 # index of current layer (outputs) 21 | # # Copy the level's last-time activation to the input (previous state) 22 | # # TODO: ranges in `NArray#[]` should be reliable, get rid of loop 23 | # nneurs(current).times do |i| # for each activations to copy 24 | # # Copy output from last-time activation to recurrency in previous state 25 | # @state[previous][0, nneurs(previous) + i] = state[current][0, i] 26 | # end 27 | # act_fn.call state[previous].dot layers[nlay] 28 | 29 | # Activates a layer of the network. 30 | # Bit more complex since it has to copy the layer's activation on 31 | # last input to its own inputs, for recursion. 32 | # @param i [Integer] the layer to activate, zero-indexed 33 | def activate_layer nlay 34 | # Mark begin and end of recursion outputs in current state 35 | begin_recur = nneurs(nlay) 36 | end_recur = nneurs(nlay) + nneurs(nlay+1) 37 | # Copy the level's last-time activation to the current input recurrency 38 | state[nlay][begin_recur...end_recur] = state[nlay+1][0...nneurs(nlay+1)] 39 | # Activate current layer 40 | act_fn.call state[nlay].dot layers[nlay] 41 | end 42 | 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer 4 | end 5 | 6 | require_relative 'optimizer/natural_evolution_strategies/base' 7 | require_relative 'optimizer/natural_evolution_strategies/xnes' 8 | require_relative 'optimizer/natural_evolution_strategies/snes' 9 | require_relative 'optimizer/natural_evolution_strategies/rnes' 10 | # FIX SPECS FIRST 11 | # require_relative 'optimizer/natural_evolution_strategies/fnes' 12 | require_relative 'optimizer/natural_evolution_strategies/bdnes' 13 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 4 | # Natural Evolution Strategies base class 5 | class Base 6 | attr_reader :ndims, :mu, :sigma, :opt_type, :obj_fn, :parallel_fit, :eye, :rng, :last_fits, :best, :rescale_popsize, :rescale_lrate 7 | 8 | # NES object initialization 9 | # @param ndims [Integer] number of parameters to optimize 10 | # @param obj_fn [#call] any object defining a #call method (Proc, lambda, custom class) 11 | # @param opt_type [:min, :max] select minimization / maximization of obj_fn 12 | # @param rseed [Integer] allow for deterministic execution on rseed provided 13 | # @param mu_init [Numeric] values to initalize the distribution's mean 14 | # @param sigma_init [Numeric] values to initialize the distribution's covariance 15 | # @param parallel_fit [boolean] whether the `obj_fn` should be passed all the 16 | # individuals together. In the canonical case the fitness function always scores a 17 | # single individual; in practical cases though it is easier to delegate the scoring 18 | # parallelization to the external fitness function. Turning this to `true` will make 19 | # the algorithm pass _an Array_ of individuals to the fitness function, rather than 20 | # a single instance. 21 | # @param rescale_popsize [Float] scaling for the default population size 22 | # @param rescale_lrate [Float] scaling for the default learning rate 23 | def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1, parallel_fit: false, rescale_popsize: 1, rescale_lrate: 1, utilities: nil, popsize: nil, lrate: nil 24 | raise ArgumentError, "opt_type: #{opt_type}" unless [:min, :max].include? opt_type 25 | raise ArgumentError, "obj_fn not callable: #{obj_fn}" unless obj_fn.respond_to? :call 26 | raise ArgumentError, "utilities only if popsize" if utilities && popsize.nil? 27 | raise ArgumentError, "wrong sizes" if utilities && utilities.size != popsize 28 | raise ArgumentError, "minimum popsize 5 for default utilities" if popsize&.<(5) && utilities.nil? 29 | @ndims, @opt_type, @obj_fn, @parallel_fit = ndims, opt_type, obj_fn, parallel_fit 30 | @rescale_popsize, @rescale_lrate = rescale_popsize, rescale_lrate # rescale defaults 31 | @utilities, @popsize, @lrate = utilities, popsize, lrate # if not set, defaults below 32 | @eye = NArray.eye(ndims) 33 | rseed ||= Random.new_seed 34 | # puts "NES rseed: #{s}" # currently disabled 35 | @rng = Random.new rseed 36 | @best = [(opt_type==:max ? -1 : 1) * Float::INFINITY, nil] 37 | @last_fits = [] 38 | initialize_distribution mu_init: mu_init, sigma_init: sigma_init 39 | end 40 | 41 | # Box-Muller transform: generates standard (unit) normal distribution samples 42 | # @return [Float] a single sample from a standard normal distribution 43 | # @note Xumo::NArray implements this but no random seed selection yet 44 | def standard_normal_sample 45 | rho = Math.sqrt(-2.0 * Math.log(rng.rand)) 46 | theta = 2 * Math::PI * rng.rand 47 | tfn = rng.rand > 0.5 ? :cos : :sin 48 | rho * Math.send(tfn, theta) 49 | end 50 | 51 | # Memoized automatic magic numbers 52 | # Initialization options allow to rescale or entirely override these. 53 | # NOTE: Doubling popsize and halving lrate often helps 54 | def utils; @utilities ||= cmaes_utilities end 55 | # (see #utils) 56 | def popsize; @popsize ||= Integer(cmaes_popsize * rescale_popsize) end 57 | # (see #utils) 58 | def lrate; @lrate ||= cmaes_lrate * rescale_lrate end 59 | 60 | # Magic numbers from CMA-ES (see `README` for citation) 61 | # @return [NArray] scale-invariant utilities 62 | def cmaes_utilities 63 | # Algorithm equations are meant for fitness maximization 64 | # Match utilities with individuals sorted by INCREASING fitness 65 | raise ArgumentError, "Minimum `popsize` should be 5 (is #{popsize})" if popsize < 5 66 | log_range = (1..popsize).collect do |v| 67 | [0, Math.log(popsize.to_f/2 - 1) - Math.log(v)].max 68 | end 69 | total = log_range.reduce(:+) 70 | buf = 1.0/popsize 71 | vals = log_range.collect { |v| v / total - buf }.reverse 72 | NArray[vals] 73 | end 74 | 75 | # (see #cmaes_utilities) 76 | # @return [Float] learning rate lower bound 77 | def cmaes_lrate 78 | (3+Math.log(ndims)) / (5*Math.sqrt(ndims)) 79 | end 80 | 81 | # (see #cmaes_utilities) 82 | # @return [Integer] population size lower bound 83 | def cmaes_popsize 84 | [5, 4 + (3*Math.log(ndims)).floor].max 85 | end 86 | 87 | # Samples a standard normal distribution to construct a NArray of 88 | # popsize multivariate samples of length ndims 89 | # @return [NArray] standard normal samples 90 | # @note Xumo::NArray implements this but no random seed selection yet 91 | def standard_normal_samples 92 | NArray.zeros([popsize, ndims]).tap do |ret| 93 | ret.each_with_index { |_,*i| ret[*i] = standard_normal_sample } 94 | end 95 | end 96 | 97 | # Move standard normal samples to current distribution 98 | # @return [NArray] individuals 99 | def move_inds inds 100 | # TODO: can we reduce the transpositions? 101 | 102 | # multi_mu = NMatrix[*inds.rows.times.collect {mu.to_a}, dtype: dtype].transpose 103 | # (multi_mu + sigma.dot(inds.transpose)).transpose 104 | 105 | mu_tile = mu.tile(inds.shape.first, 1).transpose 106 | (mu_tile + sigma.dot(inds.transpose)).transpose 107 | end 108 | 109 | # Sorted individuals 110 | # NOTE: Algorithm equations are meant for fitness maximization. Utilities need to be 111 | # matched with individuals sorted by INCREASING fitness. Then reverse order for minimization. 112 | # @return standard normal samples sorted by the respective individuals' fitnesses 113 | def sorted_inds 114 | # Xumo::NArray implements the Box-Muller, but no random seed (yet) 115 | samples = standard_normal_samples 116 | # samples = NArray.new([popsize, ndims]).rand_norm(0,1) 117 | inds = move_inds(samples) 118 | fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn) 119 | # Quick cure for NaN fitnesses 120 | fits.map { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x } 121 | @last_fits = fits # allows checking for stagnation 122 | 123 | # sorted = [fits.to_a, inds, samples.to_a].transpose.sort_by(&:first) 124 | # sorted.reverse! if opt_type==:min 125 | # this_best = sorted.last.take(2) 126 | # NArray[*sorted.map(&:last)] 127 | 128 | 129 | 130 | # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97 131 | # sort_idxs = fits.sort_index 132 | sort_idxs = fits.size.times.sort_by { |i| fits[i] }.to_na 133 | 134 | 135 | 136 | sort_idxs = sort_idxs.reverse if opt_type == :min 137 | this_best = [fits[sort_idxs[-1]], inds[sort_idxs[-1], true]] 138 | opt_cmp_fn = opt_type==:min ? :< : :> 139 | @best = this_best if this_best.first.send(opt_cmp_fn, best.first) 140 | 141 | samples[sort_idxs, true] 142 | end 143 | 144 | # @!method interface_methods 145 | # Declaring interface methods - implement these in child class! 146 | [:train, :initialize_distribution, :convergence].each do |mname| 147 | define_method mname do 148 | raise NotImplementedError, "Implement in child class!" 149 | end 150 | end 151 | end 152 | end 153 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 4 | # Block-Diagonal Natural Evolution Strategies 5 | class BDNES < Base 6 | 7 | MAX_RSEED = 10**Random.new_seed.size # block random seeds to be on the same range as `Random.new_seed` 8 | 9 | attr_reader :ndims_lst, :blocks, :popsize, :parallel_update 10 | undef :ndims # only `ndims_lst` here 11 | 12 | # Initialize a list of XNES, one for each block 13 | # see class `Base` for the description of the rest of the arguments. 14 | # @param ndims_lst [Array] list of sizes for each block in the block-diagonal 15 | # matrix. Note: entire (reconstructed) individuals will be passed to the `obj_fn` 16 | # regardless of the division here described. 17 | # @param init_opts [Hash] the rest of the options will be passed directly to XNES 18 | # @parellel_update [bool] whether to parallelize block updates 19 | def initialize ndims_lst, obj_fn, opt_type, parallel_fit: false, rseed: nil, parallel_update: false, **init_opts 20 | # mu_init: 0, sigma_init: 1 21 | # init_opts = {rseed: rseed, mu_init: mu_init, sigma_init: sigma_init} 22 | # TODO: accept list of `mu_init`s and `sigma_init`s 23 | @ndims_lst, @obj_fn, @opt_type, @parallel_fit = ndims_lst, obj_fn, opt_type, parallel_fit 24 | block_fit = -> (*args) { raise "Should never be called" } 25 | # the BD-NES seed should ensure deterministic reproducibility 26 | # but each block should have a different seed 27 | # puts "BD-NES rseed: #{s}" # currently disabled 28 | @rng = Random.new rseed || Random.new_seed 29 | @blocks = ndims_lst.map do |ndims| 30 | b_rseed = rng.rand MAX_RSEED 31 | XNES.new ndims, block_fit, opt_type, rseed: b_rseed, **init_opts 32 | end 33 | # Need `popsize` to be the same for all blocks, to make complete individuals 34 | @popsize = blocks.map(&:popsize).max 35 | blocks.each { |xnes| xnes.instance_variable_set :@popsize, popsize } 36 | 37 | @best = [(opt_type==:max ? -1 : 1) * Float::INFINITY, nil] 38 | @last_fits = [] 39 | @parallel_update = parallel_update 40 | require 'parallel' if parallel_update 41 | end 42 | 43 | def sorted_inds_lst 44 | # Build samples and inds from the list of blocks 45 | samples_lst, inds_lst = blocks.map do |xnes| 46 | samples = xnes.standard_normal_samples 47 | inds = xnes.move_inds(samples) 48 | [samples.to_a, inds] 49 | end.transpose 50 | 51 | # Join the individuals for evaluation 52 | full_inds = inds_lst.reduce { |mem, var| mem.concatenate var, axis: 1 } 53 | # Need to fix sample dimensions for sorting 54 | # - current dims: nblocks x ninds x [block sizes] 55 | # - for sorting: ninds x nblocks x [block sizes] 56 | full_samples = samples_lst.transpose 57 | 58 | # Evaluate fitness of complete individuals 59 | fits = parallel_fit ? obj_fn.call(full_inds) : full_inds.map(&obj_fn) 60 | # Quick cure for NaN fitnesses 61 | fits.map { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x } 62 | @last_fits = fits # allows checking for stagnation 63 | 64 | # Sort inds based on fit and opt_type, save best 65 | # sorted = [fits, full_inds, full_samples].transpose.sort_by(&:first) 66 | # sorted.reverse! if opt_type==:min 67 | # this_best = sorted.last.take(2) 68 | # opt_cmp_fn = opt_type==:min ? :< : :> 69 | # @best = this_best if this_best.first.send(opt_cmp_fn, best.first) 70 | # sorted_samples = sorted.map(&:last) 71 | 72 | 73 | 74 | # BUG IN NARRAY SORT!! ruby-numo/numo-narray#97 75 | # sort_idxs = fits.sort_index 76 | sort_idxs = fits.size.times.sort_by { |i| fits[i] }.to_na 77 | 78 | 79 | 80 | sort_idxs = sort_idxs.reverse if opt_type == :min 81 | this_best = [fits[sort_idxs[-1]], full_inds[sort_idxs[-1], true]] 82 | opt_cmp_fn = opt_type==:min ? :< : :> 83 | @best = this_best if this_best.first.send(opt_cmp_fn, best.first) 84 | sorted_samples = full_samples.values_at *sort_idxs 85 | 86 | # Need to bring back sample dimensions for each block 87 | # - current dims: ninds x nblocks x [block sizes] 88 | # - target blocks list: nblocks x ninds x [block sizes] 89 | block_samples = sorted_samples.transpose 90 | 91 | # then back to NArray for usage in training 92 | block_samples.map &:to_na 93 | end 94 | 95 | # duck-type the interface: [:train, :mu, :convergence, :save, :load] 96 | 97 | # TODO: refactor DRY 98 | def train picks: sorted_inds_lst 99 | if parallel_update 100 | # Parallel.each(blocks.zip(picks)) do |xnes, s_inds| 101 | # xnes.train picks: s_inds 102 | # end 103 | # Actually it's not this simple. 104 | # Forks do not act on the parent, so I need to send back updated mu and sigma 105 | # Luckily we have `NES#save` and `NES#load` at the ready 106 | # Next: need to implement `#marshal_dump` and `#marshal_load` in `Base` 107 | # Actually using `Cumo` rather than `Parallel` may avoid marshaling altogether 108 | raise NotImplementedError, "Should dump and load each instance" 109 | else 110 | blocks.zip(picks).each do |xnes, s_inds| 111 | xnes.train picks: s_inds 112 | end 113 | end 114 | end 115 | 116 | def mu 117 | blocks.map(&:mu).reduce { |mem, var| mem.concatenate var, axis: 1 } 118 | end 119 | 120 | def sigma 121 | raise NotImplementedError, "need to write a concatenation like for mu here" 122 | end 123 | 124 | def convergence 125 | blocks.map(&:convergence).reduce(:+) 126 | end 127 | 128 | def save 129 | blocks.map &:save 130 | end 131 | 132 | def load data 133 | fit = -> (*args) { raise "Should never be called" } 134 | @blocks = data.map do |block_data| 135 | ndims = block_data.first.size 136 | XNES.new(ndims, fit, opt_type).tap do |nes| 137 | nes.load block_data 138 | end 139 | end 140 | end 141 | end 142 | end 143 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer/natural_evolution_strategies/fnes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 4 | # Fixed Variance Natural Evolution Strategies 5 | class FNES < RNES 6 | 7 | def train picks: sorted_inds 8 | g_mu = utils.dot(picks) 9 | @mu += sigma.dot(g_mu.transpose).transpose * lrate 10 | end 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer/natural_evolution_strategies/rnes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 4 | # Radial Natural Evolution Strategies 5 | class RNES < Base 6 | attr_reader :variance 7 | 8 | def initialize_distribution mu_init: 0, sigma_init: 1 9 | @mu = case mu_init 10 | when Array 11 | raise ArgumentError unless mu_init.size == ndims 12 | NArray[mu_init] 13 | when Numeric 14 | NArray.new([1,ndims]).fill mu_init 15 | else 16 | raise ArgumentError, "Something is wrong with mu_init: #{mu_init}" 17 | end 18 | @variance = sigma_init 19 | @sigma = case sigma_init 20 | when Array 21 | raise ArgumentError "RNES uses single global variance" 22 | when Numeric 23 | NArray.new([ndims]).fill(variance).diag 24 | else 25 | raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}" 26 | end 27 | end 28 | 29 | def train picks: sorted_inds 30 | g_mu = utils.dot(picks) 31 | # g_sigma = utils.dot(picks.row_norms**2 - ndims).first # back to scalar 32 | row_norms = NLinalg.norm picks, 2, axis:1 33 | g_sigma = utils.dot(row_norms**2 - ndims)[0] # back to scalar 34 | @mu += sigma.dot(g_mu.transpose).transpose * lrate 35 | @variance *= Math.exp(g_sigma * lrate / 2) 36 | @sigma = NArray.new([ndims]).fill(variance).diag 37 | end 38 | 39 | # Estimate algorithm convergence based on variance 40 | def convergence 41 | variance 42 | end 43 | 44 | def save 45 | [mu.to_a, variance] 46 | end 47 | 48 | def load data 49 | raise ArgumentError unless data.size == 2 50 | mu_ary, @variance = data 51 | @mu = mu_ary.to_na 52 | @sigma = eye * variance 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 4 | # Separable Natural Evolution Strategies 5 | class SNES < Base 6 | 7 | attr_reader :variances 8 | 9 | def initialize_distribution mu_init: 0, sigma_init: 1 10 | @mu = case mu_init 11 | when Array 12 | raise ArgumentError unless mu_init.size == ndims 13 | NArray[mu_init] 14 | when Numeric 15 | NArray.new([1,ndims]).fill mu_init 16 | else 17 | raise ArgumentError, "Something is wrong with mu_init: #{mu_init}" 18 | end 19 | @variances = case sigma_init 20 | when Array 21 | raise ArgumentError unless sigma_init.size == ndims 22 | NArray[*sigma_init] 23 | when Numeric 24 | NArray.new([ndims]).fill(sigma_init) 25 | else 26 | raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}" \ 27 | "(did you remember to copy the other cases from XNES?)" 28 | end 29 | @sigma = @variances.diag 30 | end 31 | 32 | def train picks: sorted_inds 33 | g_mu = utils.dot(picks) 34 | g_sigma = utils.dot(picks**2 - 1) 35 | @mu += sigma.dot(g_mu.transpose).transpose * lrate 36 | @variances *= (g_sigma * lrate / 2).exponential.flatten 37 | @sigma = @variances.diag 38 | end 39 | 40 | # Estimate algorithm convergence as total variance 41 | def convergence 42 | variances.sum 43 | end 44 | 45 | def save 46 | [mu.to_a, variances.to_a] 47 | end 48 | 49 | def load data 50 | raise ArgumentError unless data.size == 2 51 | @mu, @variances = data.map &:to_na 52 | @sigma = variances.diag 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 4 | # Exponential Natural Evolution Strategies 5 | class XNES < Base 6 | attr_reader :log_sigma 7 | 8 | def initialize_distribution mu_init: 0, sigma_init: 1 9 | @mu = case mu_init 10 | when Range # initialize with random in range 11 | raise ArgumentError, "mu_init: `Range` start/end in `Float`s" \ 12 | unless mu_init.first.kind_of?(Float) && mu_init.last.kind_of?(Float) 13 | mu_rng = Random.new rng.rand 10**Random.new_seed.size 14 | NArray[*ndims.times.map { mu_rng.rand mu_init }] 15 | when Array 16 | raise ArgumentError unless mu_init.size == ndims 17 | NArray[mu_init] 18 | when Numeric 19 | NArray.new([1,ndims]).fill mu_init 20 | when NArray 21 | raise ArgumentError unless mu_init.size == ndims 22 | mu_init.ndim < 2 ? mu_init.reshape(1, ndims) : mu_init 23 | else 24 | raise ArgumentError, "Something is wrong with mu_init: #{mu_init}" 25 | end 26 | @sigma = case sigma_init 27 | when Array 28 | raise ArgumentError unless sigma_init.size == ndims 29 | NArray[*sigma_init].diag 30 | when Numeric 31 | NArray.new([ndims]).fill(sigma_init).diag 32 | when NArray 33 | raise ArgumentError unless sigma_init.size == ndims**2 34 | sigma_init.ndim < 2 ? sigma_init.reshape(ndims, ndims) : sigma_init 35 | else 36 | raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}" 37 | end 38 | # Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi) 39 | @log_sigma = NMath.log(sigma.diagonal).diag 40 | end 41 | 42 | def train picks: sorted_inds 43 | g_mu = utils.dot(picks) 44 | g_log_sigma = popsize.times.inject(NArray.zeros sigma.shape) do |sum, i| 45 | u = utils[i] 46 | ind = picks[i, true] 47 | ind_sq = ind.outer_flat(ind, &:*) 48 | sum + (ind_sq - eye) * u 49 | end 50 | @mu += sigma.dot(g_mu.transpose).transpose * lrate 51 | @log_sigma += g_log_sigma * (lrate/2) 52 | @sigma = log_sigma.exponential 53 | end 54 | 55 | # Estimate algorithm convergence as total variance 56 | def convergence 57 | sigma.trace 58 | end 59 | 60 | def save 61 | [mu.to_a, log_sigma.to_a] 62 | end 63 | 64 | def load data 65 | raise ArgumentError unless data.size == 2 66 | @mu, @log_sigma = data.map &:to_na 67 | @sigma = log_sigma.exponential 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/systems.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'systems/neuroevolution' 4 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/systems/neuroevolution.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | "Work in progress" 4 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/tools.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'tools/execution' 4 | require_relative 'tools/normalization' 5 | require_relative 'tools/imaging' 6 | require_relative 'tools/verification' 7 | require_relative 'tools/logging' 8 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/tools/execution.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Tools 4 | module Execution 5 | $fork_pids ||= [] 6 | 7 | # Executes block in a (detached) fork, saving the `pid` for later termination. 8 | # @note add `ensure MachineLearningWorkbench::Tools.kill_forks` to the block 9 | # where `in_fork` is called (see `#kill_forks`). 10 | def self.in_fork &block 11 | raise ArgumentError "Need block to be executed in fork" unless block 12 | pid = fork(&block) 13 | Process.detach pid 14 | $fork_pids << pid 15 | end 16 | 17 | # Kills processes spawned by `#in_fork`. 18 | # Call this in an `ensure` block after using `in_fork`. 19 | # => `ensure MachineLearningWorkbench::Tools.kill_forks` 20 | def self.kill_forks 21 | $fork_pids&.each { |pid| Process.kill('KILL', pid) rescue Errno::ESRCH } 22 | $fork_pids = [] 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/tools/imaging.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Tools 4 | module Imaging 5 | Forkable = MachineLearningWorkbench::Tools::Execution 6 | Norm = MachineLearningWorkbench::Tools::Normalization 7 | 8 | # Create RMagick::Image from numeric matrix data 9 | # @param narr [NArray] numeric matrix to display 10 | # @param shape [Array] optional reshaping 11 | def self.narr_to_img narr, shape: nil 12 | require 'rmagick' 13 | shape ||= narr.shape 14 | shape = [1, shape] if shape.kind_of?(Integer) || shape.size == 1 15 | # `Image::constitute` requires Float pixels to be in [0,1] 16 | pixels = Norm.feature_scaling narr.cast_to(NArray), to: [0,1] 17 | Magick::Image.constitute *shape, "I", pixels.to_a.flatten 18 | end 19 | 20 | # Create PNG file from numeric matrix data 21 | # @param narr [NArray] numeric matrix to display 22 | # @param fname [String] path to save PNG 23 | # @param shape [Array] optional reshaping before saving 24 | def self.narr_to_png narr, fname, shape: nil 25 | narr_to_img(narr, shape: shape).write fname 26 | end 27 | 28 | # Show a numeric matrix as image in a RMagick window 29 | # @param narr [NArray] numeric matrix to display 30 | # @param disp_size [Array] the size of the image to display 31 | # @param shape [Array] the true shape of the image (numeric matrix could be flattened) 32 | # @param in_fork [bool] whether to execute the display in fork (and continue running) 33 | def self.display narr, disp_size: nil, shape: nil, in_fork: true 34 | require 'rmagick' 35 | img = narr_to_img narr, shape: shape 36 | img.resize!(*disp_size, Magick::TriangleFilter,0.51) if disp_size 37 | if in_fork 38 | MachineLearningWorkbench::Tools::Execution.in_fork { img.display } 39 | else 40 | img.display 41 | end 42 | end 43 | 44 | # Create numeric matrix from png by filename. 45 | # @param fname the file name 46 | # @param scale optional rescaling of the image 47 | # @param flat [bool] whether to return a flat array 48 | # @param dtype dtype for the numeric matrix, leave `nil` for automatic detection 49 | def self.narr_from_png fname, scale: nil, flat: false 50 | require 'rmagick' 51 | img = Magick::ImageList.new(fname).first 52 | img.scale!(scale) if scale 53 | shape = [img.columns, img.rows] 54 | pixels = img.export_pixels(0, 0, *shape, 'I') # 'I' for intensity 55 | raise "Sanity check" unless shape.reduce(:*)==pixels.size 56 | return pixels.to_na if flat 57 | pixels.to_na.to_dimensions shape 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/tools/logging.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Tools 4 | module Logging 5 | # Splits calls to standard streams to be both displayed on terminal and saved to file 6 | class LogSplitter < File 7 | def initialize dest 8 | fname = if File.directory?(dest) 9 | "#{dest}/#{Time.now.strftime "%y%m%d_%H%M"}.log" 10 | else dest 11 | end 12 | super fname, 'w' 13 | end 14 | 15 | def write *args 16 | STDOUT.write *args 17 | super 18 | end 19 | end 20 | 21 | def self.split_to dest, also_stderr: false 22 | $stdout = LogSplitter.new dest 23 | $stderr = $stdout if also_stderr 24 | end 25 | 26 | def self.restore_streams 27 | logger = $stdout 28 | $stdout = STDOUT 29 | $stderr = STDERR 30 | logger.close 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/tools/normalization.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Tools 4 | module Normalization 5 | def self.feature_scaling narr, from: nil, to: [0,1] 6 | from ||= narr.minmax 7 | old_min, old_max = from 8 | new_min, new_max = to 9 | ( (narr-old_min)*(new_max-new_min)/(old_max-old_min) ) + new_min 10 | rescue ZeroDivisionError 11 | # require 'pry'; binding.pry 12 | raise ArgumentError, "If you get here, chances are there's a bug in `from` or `to`" 13 | end 14 | 15 | # @param per_column [bool] wheather to compute stats per-column or matrix-wise 16 | def self.z_score narr, per_column: true 17 | raise NotImplementedError unless per_column 18 | raise "this would be a good time to test this implementation" 19 | means = narr.mean 20 | stddevs = narr.std 21 | # address edge case of zero variance 22 | stddevs.map! { |v| v.zero? ? 1 : v } 23 | mean_mat = means.repeat narr.rows, 0 24 | stddev_mat = stddevs.repeat narr.rows, 0 25 | (narr - mean_mat) / stddev_mat 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/machine_learning_workbench/tools/verification.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module MachineLearningWorkbench::Tools 4 | module Verification 5 | # TODO: switch to NArray 6 | 7 | # def self.in_range! nmat, vrange 8 | # # Raise if values not in range 9 | # vmin, vmax = vrange.to_a 10 | # nmat.each_with_indices do |v, *idxs| 11 | # raise "Value not in range" unless v&.between? vmin, vmax 12 | # end 13 | # end 14 | 15 | # # Fix if values not in range 16 | # def self.in_range nmat, vrange 17 | # vmin, vmax = vrange.to_a 18 | # nmat.each_with_indices do |v, *idxs| 19 | # nmat[*idxs] = vmin if v < vmin 20 | # nmat[*idxs] = vmax if v > vmax 21 | # end 22 | # end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /machine_learning_workbench.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | lib = File.expand_path("../lib", __FILE__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "machine_learning_workbench" 8 | spec.version = `git describe` 9 | spec.author = "Giuseppe Cuccu" 10 | spec.email = "giuseppe.cuccu@gmail.com" 11 | 12 | spec.summary = %q[Workbench for practical machine learning in Ruby.] 13 | spec.description = %q[This workbench holds a collection of machine learning 14 | methods in Ruby. Rather than specializing on a single task or method, this 15 | gem aims at providing an encompassing framework for any machine learning 16 | application.].gsub(' ', '') 17 | spec.homepage = "https://github.com/giuse/machine_learning_workbench" 18 | spec.license = "MIT" 19 | spec.post_install_message = %Q[\ 20 | Thanks for installing the machine learning workbench! 21 | It is still a work in progress, feel free to open an issue or drop me an email 22 | and start a discussion if you are using this gem. Cheers! 23 | ].gsub(' ', '') 24 | 25 | spec.files = `git ls-files -z`.split("\x0").reject { |f| f.start_with? "spec" } 26 | 27 | # spec.bindir = "exe" 28 | # spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 29 | spec.require_paths = ["lib"] 30 | spec.required_ruby_version = '>= 2.4.0' 31 | 32 | # Install 33 | spec.add_development_dependency "bundler", "~> 1.16" 34 | spec.add_development_dependency "rake", "~> 10.0" 35 | 36 | # Test 37 | spec.add_development_dependency "rspec", "~> 3.0" 38 | spec.add_development_dependency "rmagick" # uhm would gladly drop this 39 | 40 | # Debug 41 | spec.add_development_dependency "pry", "~> 0.10" 42 | spec.add_development_dependency "pry-nav", "~> 0.2" 43 | spec.add_development_dependency "pry-rescue", "~> 1.4" 44 | spec.add_development_dependency "pry-stack_explorer", "~> 0.4" 45 | spec.add_development_dependency "pry-doc", "~> 0.12" 46 | 47 | # Run 48 | spec.requirements << "libopenblas-base" # requirement for `numo-linalg` 49 | spec.requirements << "liblapacke" # requirement for `numo-linalg` 50 | spec.add_dependency "numo-narray", "~> 0.9" 51 | spec.add_dependency "numo-linalg", "~> 0.1" 52 | spec.add_dependency "parallel", "~> 1.12" 53 | end 54 | -------------------------------------------------------------------------------- /spec/compressor/vector_quantization_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe MachineLearningWorkbench::Compressor::VectorQuantization do 4 | VectorQuantization = MachineLearningWorkbench::Compressor::VectorQuantization 5 | 6 | it "works" 7 | # pick some artificial data 8 | # initialize with few centroids 9 | # train on centroids 10 | # verify the output 11 | end 12 | -------------------------------------------------------------------------------- /spec/helpers/uses_temporary_folders.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # evaluate in temporary (empty) folder 4 | module UsesTemporaryFolders 5 | def self.included example_group 6 | example_group.extend self 7 | end 8 | 9 | def in_temporary_folder 10 | require 'pathname' 11 | attr_reader :orig_dir, :tmp_dir 12 | # ensure working in empty temporary folder 13 | before do 14 | @orig_dir = Pathname.pwd 15 | @tmp_dir = orig_dir + "in_temporary_folder" 16 | FileUtils.rm_rf tmp_dir 17 | FileUtils.mkdir_p tmp_dir 18 | Dir.chdir tmp_dir 19 | end 20 | # clean up 21 | after do 22 | Dir.chdir orig_dir 23 | FileUtils.rm_rf tmp_dir 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /spec/monkey/monkey_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe MachineLearningWorkbench::Monkey do 4 | 5 | describe Numeric do 6 | v = 1e-3 7 | describe "#approximates?" do 8 | it do 9 | expect(v.approximates? v+1e-4, 1e-3).to be_truthy 10 | expect(v.approximates? v+1e-2, 1e-3).to be_falsey 11 | end 12 | end 13 | end 14 | 15 | # describe NMatrix do 16 | # data = [[1,2,3],[4,5,6],[7,8,9]] 17 | # nmat = NMatrix[*data, dtype: :float64] 18 | # diag = [1,5,9] 19 | 20 | # it "::new with a block" do 21 | # shape = [data.size, data.first.size] 22 | # built = NMatrix.new(shape) { |i,j| data[i][j]**2 } 23 | # expect(built).to eq(nmat**2) 24 | # end 25 | 26 | # context "when looping on the diagonal", :SKIP do 27 | # it "#each_diag" do 28 | # expect(nmat.each_diag.to_a).to_eq(diag.collect {|n| NMatrix[[n]]}) 29 | # end 30 | 31 | # it "#each_stored_diag" do 32 | # expect(nmat.each_stored_diag.to_a).to eq(diag) 33 | # end 34 | # end 35 | 36 | # context "when setting the diagonal", :SKIP do 37 | # set_diag_diag = [10,50,90] 38 | # set_diag_data = [[10,2,3],[4,50,6],[7,8,90]] 39 | # set_diag_nmat = NMatrix[*set_diag_data] 40 | 41 | # it "#set_diag" do 42 | # setted = nmat.set_diag {|i| set_diag_diag[i]} 43 | # expect(setted).to eq(set_diag_nmat) 44 | # expect(nmat).not_to eq(setted) 45 | # expect(nmat.object_id).not_to eq(setted.object_id) 46 | # end 47 | 48 | # it "#set_diag!" do 49 | # tmp_mat = nmat.clone 50 | # setted = tmp_mat.set_diag! {|i| set_diag_diag[i]} 51 | # expect(setted).to eq(set_diag_nmat) 52 | # expect(tmp_mat).to eq(setted) 53 | # end 54 | 55 | # end 56 | 57 | # describe "#outer" do 58 | # mini = NMatrix[[1,2],[3,4]] 59 | # exp = NMatrix[[[[2, 3], [4, 5]], 60 | # [[3, 4], [5, 6]]], 61 | # [[[4, 5], [6, 7]], 62 | # [[5, 6], [7, 8]]]] 63 | # it "computes the correct result" do 64 | # res = mini.outer(mini) {|a,b| a+b} 65 | # expect(res.shape).to eq(exp.shape) 66 | # expect(res).to eq(exp) 67 | # end 68 | # end 69 | 70 | # describe "#outer_flat" do 71 | # mini = NMatrix[[1,2],[3,4]] 72 | # exp_flat = NMatrix[[2, 3, 4, 5], 73 | # [3, 4, 5, 6], 74 | # [4, 5, 6, 7], 75 | # [5, 6, 7, 8]] 76 | # it "computes the correct result" do 77 | # res = mini.outer_flat(mini) {|a,b| a+b} 78 | # expect(res.shape).to eq(exp_flat.shape) 79 | # expect(res).to eq(exp_flat) 80 | # end 81 | # end 82 | 83 | # describe "#eigen" do 84 | # trg_eigenvalues = NMatrix[[16.11684, -1.11684, 0.0]].transpose 85 | # trg_eigenvectors = NMatrix[[0.283349, 0.641675, 1.0], 86 | # [-1.28335, -0.141675, 1.0], 87 | # [1.0, -2.0, 1.0]].transpose 88 | # # NMatrix (LAPACK) -- e_values, left_e_vecs, right_e_vecs 89 | # eigenvalues, _, eigenvectors = nmat.eigen 90 | 91 | # def eigencheck? orig, e_vals, e_vecs 92 | # # INPUT: original matrix, eigenvalues accessible by index, 93 | # # NMatrix with corresponding eigenvectors in columns 94 | # e_vecs.each_column.each_with_index.all? do |e_vec_t, i| 95 | # left = orig.dot(e_vec_t) 96 | # right = e_vec_t * e_vals[i] 97 | # left.approximates? right 98 | # end 99 | # end 100 | 101 | # it "solves the eigendecomposition" do 102 | # expect(eigencheck?(nmat, trg_eigenvalues, trg_eigenvectors)).to be_truthy 103 | # expect(eigenvalues.approximates? trg_eigenvalues).to be_truthy 104 | # expect(eigencheck?(nmat, eigenvalues, eigenvectors)).to be_truthy 105 | # end 106 | # end 107 | 108 | # describe "#exponential" do 109 | # testmat = nmat/10.0 # let's avoid 1e6 values, shall we? 110 | # exp = [[1.37316, 0.531485, 0.689809], 111 | # [1.00926, 2.24815, 1.48704], 112 | # [1.64536, 1.96481, 3.28426]] 113 | # it "computes the correct result" do 114 | # left = testmat.exponential 115 | # right = NMatrix[*exp] 116 | # expect(left.approximates? right).to be_truthy 117 | # end 118 | # end 119 | 120 | # describe "row_norms" do 121 | # trg_row_norms = [[3.7416573], [8.7749643], [13.928388]] 122 | # it "computes the correct result" do 123 | # expect(nmat.row_norms.approximates? NMatrix[*trg_row_norms]).to be_truthy 124 | # end 125 | # end 126 | 127 | # describe "#approximates?" do 128 | # it do 129 | # expect(nmat.approximates? nmat+1e-4, 1e-3).to be_truthy 130 | # expect(nmat.approximates? nmat+1e-2, 1e-3).not_to be_truthy 131 | # end 132 | # end 133 | 134 | # describe "#sort_rows_by" do 135 | # it "should be implemented! And used in NES#sorted_inds!" 136 | # end 137 | 138 | # describe "#hjoin", :SKIP do 139 | # it "should work with smaller matrices" do 140 | # a = NMatrix.new([1,3], [1,2,3]) 141 | # b = NMatrix.new([1,2], [4,5]) 142 | # expect(a.hjoin(b)).to eq(NMatrix.new([1,5], [1,2,3,4,5])) 143 | # end 144 | # it "should work with larger matrices" do 145 | # a = NMatrix.new([1,3], [1,2,3]) 146 | # b = NMatrix.new([1,4], [4,5,6,7]) 147 | # expect(a.hjoin(b)).to eq(NMatrix.new([1,7], [1,2,3,4,5,6,7])) 148 | # end 149 | # # it "should be tested also with multirow matrices" 150 | # end 151 | 152 | # describe "#vjoin", :SKIP do 153 | # it "should work with smaller matrices" do 154 | # a = NMatrix.new([3,1], [1,2,3]) 155 | # b = NMatrix.new([2,1], [4,5]) 156 | # expect(a.vjoin(b)).to eq(NMatrix.new([5,1], [1,2,3,4,5])) 157 | # end 158 | # it "should work with larger matrices" do 159 | # a = NMatrix.new([3,1], [1,2,3]) 160 | # b = NMatrix.new([4,1], [4,5,6,7]) 161 | # expect(a.vjoin(b)).to eq(NMatrix.new([7,1], [1,2,3,4,5,6,7])) 162 | # end 163 | # # it "should be tested also with multicolumn matrices!" 164 | # end 165 | 166 | # describe "#to_consistent_a" do 167 | # it "should always return an array with the same shape as the matrix" do 168 | # { [2,2] => [[1,2],[3,4]], # square 169 | # [2,3] => [[1,2,3],[4,5,6]], # rectangular (h) 170 | # [3,2] => [[1,2],[3,4],[5,6]], # rectangular (v) 171 | # [1,3] => [[1,2,3]], # single row => THIS FAILS FOR `NMatrix#to_a`! 172 | # [3,1] => [[1],[2],[3]], # single column 173 | # [3] => [1,2,3] # single-dimensional 174 | # }.each do |shape, ary| 175 | # expect(NMatrix.new(shape, ary.flatten).to_consistent_a).to eq ary 176 | # end 177 | # end 178 | # end 179 | # end 180 | # end 181 | 182 | # RSpec.describe "NMatrix inconsistencies, fixed in `Monkey`" do 183 | 184 | 185 | # # IF ANY OF THESE TESTS FAIL, DROP THE MONKEY AND USE THESE METHODS! 186 | 187 | 188 | # # method #to_a not consistent! => wrote true_to_a (fixing it breaks #new) 189 | # describe "#to_a" do 190 | # it "does not always return an array with the same shape as the matrix" do 191 | # { 192 | # # [2,2] => [[1,2],[3,4]], # square 193 | # # [2,3] => [[1,2,3],[4,5,6]], # rectangular (h) 194 | # # [3,2] => [[1,2],[3,4],[5,6]], # rectangular (v) 195 | # [1,3] => [[1,2,3]], # single row => THIS FAILS FOR `NMatrix#to_a`! 196 | # # [3,1] => [[1],[2],[3]], # single column 197 | # # [3] => [1,2,3] # single-dimensional 198 | # }.each do |shape, ary| 199 | # expect(NMatrix.new(shape, ary.flatten).to_a).not_to eq ary 200 | # end 201 | # end 202 | # end 203 | 204 | end 205 | -------------------------------------------------------------------------------- /spec/neural_network/neural_network_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe MachineLearningWorkbench::NeuralNetwork do 4 | NN = MachineLearningWorkbench::NeuralNetwork 5 | netstruct = [2,2,1] 6 | 7 | describe NN::FeedForward do 8 | net = NN::FeedForward.new netstruct 9 | 10 | it "#initialize" do 11 | expect(net.struct).to eq(netstruct) 12 | end 13 | 14 | it "#reset" do 15 | initial_state = [ 16 | NArray[[0,0,1]], 17 | NArray[[0,0,1]], 18 | NArray[[0]]] 19 | altered_state = initial_state.collect { |m| m+1 } 20 | net.instance_variable_set(:@state, altered_state) 21 | expect(net.state).not_to eq(initial_state) 22 | net.reset_state 23 | expect(net.state).to eq(initial_state) 24 | end 25 | 26 | it "#deep_reset" do 27 | memoized_vars = [:@layer_row_sizes, :@layer_col_sizes, :@nlayers, 28 | :@layer_shapes, :@nweights_per_layer, :@nweights] 29 | net.nweights; net.nlayers # they end up calling all methods that use memoization 30 | memoized_vars.each do |sym| 31 | expect(net.instance_variable_get(sym)).not_to be_nil 32 | end 33 | net.deep_reset 34 | memoized_vars.each do |sym| 35 | expect(net.instance_variable_get(sym)).to be_nil 36 | end 37 | end 38 | 39 | it "#nweights" do 40 | # netstruct: [2,2,1] => layer_shapes: [[2,3],[1,3]] (remember: bias!) 41 | expect(net.nweights).to eq(2*3 + 1*3) 42 | end 43 | 44 | it "#layer_shapes" do 45 | # netstruct: [2,2,1] => layer_shapes: [[2,3],[1,3]] (remember: bias!) 46 | expect(net.layer_row_sizes.size).to eq(net.layer_col_sizes.size) 47 | expect(net.layer_shapes).to eq([[2+1,2],[2+1,1]]) 48 | end 49 | 50 | context "with random weights" do 51 | net.init_random 52 | 53 | it "has one output" do 54 | expect(net.activate([2,2]).size).to eq(1) 55 | end 56 | 57 | it "#nweights correctly counts the weights" do 58 | expect(net.nweights).to eq(net.weights.map(&:to_a).flatten.size) 59 | end 60 | end 61 | 62 | context "with loaded weights" do 63 | weights = net.nweights.times.collect { |n| 1.0/(n+1) } # best to avoid 1.0/0 64 | 65 | it "#load_weights" do 66 | weights_are_safe = weights.dup 67 | net.load_weights weights_are_safe 68 | expect(weights_are_safe).to eq(weights) 69 | expect(net.layers.collect(&:to_a).flatten).to eq(weights) 70 | end 71 | 72 | it "solves the XOR problem" do 73 | # [0,1].repeated_permutation(2).collect{|pair| [pair, pair.reduce(:^)]} 74 | xor_table = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0} 75 | net = NN::FeedForward.new([2,2,1], act_fn: :logistic) 76 | # 2 in + b -> 3 neur, 2 in + b -> 1 neur 77 | # http://stats.stackexchange.com/questions/12197/can-a-2-2-1-feedforward-neural-network-with-sigmoid-activation-functions-represe 78 | solution_weights = [ [[1,2],[1,2],[0,0]], [[-1000],[850],[0]] ] 79 | net.load_weights solution_weights.flatten 80 | expect(net.weights).to eq(solution_weights) 81 | xor_table.each do |input, target| 82 | expect(net.activate(input)[0].approximates? target).to be_truthy 83 | end 84 | end 85 | end 86 | end 87 | 88 | describe NN::Recurrent do 89 | net = NN::Recurrent.new [2,2,1] 90 | context "with random weights" do 91 | net.init_random 92 | 93 | it "#nweights and #weights correspond" do 94 | expect(net.nweights).to eq(net.weights.map(&:to_a).flatten.size) 95 | end 96 | 97 | it "#layer_shapes" do 98 | # netstruct: [2,2,1], with recurrency and biases 99 | expect(net.layer_shapes).to eq([[2+2+1,2],[2+1+1,1]]) 100 | end 101 | 102 | it "works" do 103 | expect(net.activate([2,2]).size).to eq(1) 104 | end 105 | 106 | end 107 | end 108 | 109 | end 110 | -------------------------------------------------------------------------------- /spec/optimizer/natural_evolution_strategies/individuals_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies do 4 | NES = MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 5 | 6 | describe :inds do 7 | 8 | context "when sorted by fitness" do 9 | fit = lambda { |ind| ind.sum } 10 | 11 | class TestNES < NES::Base 12 | def initialize_distribution mu_init: nil, sigma_init: nil 13 | @eye = NArray.eye(@ndims) 14 | @mu = NArray.zeros([1,@ndims]) 15 | @sigma = @eye.copy 16 | @popsize = 3 # must match with `inds` declared above 17 | end 18 | end 19 | 20 | context "with generated inds" do 21 | ndims = 5 22 | nes = TestNES.new(ndims, fit, :min) 23 | # fetch individuals through nes sampling 24 | 25 | it "minimization" do 26 | nes_sums = nes.sorted_inds.sum(1) 27 | expect(nes_sums).to eq(nes_sums.sort.reverse) 28 | end 29 | 30 | it "maximization" do 31 | nes.instance_eval("@opt_type = :max") 32 | nes_sums = nes.sorted_inds.sum(1) 33 | expect(nes_sums).to eq(nes_sums.sort) 34 | end 35 | end 36 | end 37 | 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /spec/optimizer/natural_evolution_strategies/magic_numbers_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies do 4 | describe "magic numbers" do 5 | NES = MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 6 | 7 | class TestNES < NES::Base 8 | def initialize_distribution **args 9 | @mu = NArray.zeros [1, @ndims] 10 | @sigma = NArray.eye @ndims 11 | end 12 | end 13 | 14 | describe :utilities do 15 | 16 | expected = { 17 | 5 => [-0.2, -0.2, -0.2, -0.2, 0.8], 18 | 10 => [-0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, 0.0215323, 0.192823, 0.485645], 19 | 20 => [-0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.05, -0.0331092, -0.0139599, 0.00814626, 0.0342923, 0.0662925, 0.107548, 0.165694, 0.265096] 20 | } 21 | 22 | it "computes the correct values" do 23 | expected.each do |n, exp| 24 | nes = TestNES.new(n, Proc.new{}, :min) 25 | nes.instance_eval("@popsize = n") 26 | expect(nes.cmaes_utilities.approximates? NArray[exp]).to be_truthy 27 | end 28 | end 29 | end 30 | 31 | describe :lrate do 32 | expected = { 33 | 5 => 0.412281, 34 | 10 => 0.335365, 35 | 20 => 0.268137 36 | } 37 | 38 | it "computes the correct values" do 39 | expected.each do |n, exp| 40 | nes = TestNES.new(n, Proc.new{}, :min) 41 | expect(nes.cmaes_lrate.approximates? exp).to be_truthy 42 | end 43 | end 44 | end 45 | 46 | describe :popsize do 47 | expected = { 48 | 5 => 8, 49 | 10 => 10, 50 | 20 => 12 51 | } 52 | 53 | it "computes the correct values" do 54 | expected.each do |n, exp| 55 | nes = TestNES.new(n, Proc.new{}, :min) 56 | expect(nes.cmaes_popsize).to eq(exp) 57 | end 58 | end 59 | end 60 | 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /spec/optimizer/natural_evolution_strategies/nes_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies do 4 | NES = MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies 5 | ndims = 5 # XNES, SNES RNES 6 | ndims_lst = [3,2] # BDNES 7 | obj_fns = { 8 | # MINIMIZATION: upper parabolic with minimum in [0]*ndims 9 | min: -> (ind) { (ind**2).sum }, 10 | # MAXIMIZATION: lower parabolic with maximum in [0]*ndims 11 | max: -> (ind) { -(ind**2).sum } 12 | } 13 | opt_types=obj_fns.keys 14 | one_opt_type = opt_types.first 15 | ntrains = 200 16 | 17 | describe NES::XNES do 18 | 19 | describe "#init" do 20 | it "initializes correctly" do 21 | opt_type = opt_types.sample # try either :) 22 | nes = NES::XNES.new ndims, obj_fns[opt_type], opt_type 23 | 24 | expect(opt_types).to include nes.opt_type 25 | expect(nes.obj_fn).to eq(obj_fns[nes.opt_type]) 26 | end 27 | end 28 | 29 | describe "#train" do 30 | describe "full run" do 31 | opt_type = opt_types.sample # try either :) 32 | nes = NES::XNES.new ndims, obj_fns[opt_type], opt_type, rseed: 1 33 | context "within #{ntrains} iterations" do 34 | it "optimizes the negative squares function" do 35 | ntrains.times { nes.train } 36 | expect(nes.mu.approximates? 0).to be_truthy 37 | expect(nes.convergence.approximates? 0).to be_truthy 38 | end 39 | end 40 | end 41 | 42 | describe "with parallel fit" do 43 | opt_type = opt_types.sample # try either :) 44 | fit_par = -> (inds) { inds.map &obj_fns[opt_type] } 45 | nes = NES::XNES.new ndims, fit_par, opt_type, parallel_fit: true, rseed: 1 46 | context "within #{ntrains} iterations" do 47 | it "optimizes the negative squares function" do 48 | ntrains.times { nes.train } 49 | expect(nes.mu.approximates? 0).to be_truthy 50 | expect(nes.convergence.approximates? 0).to be_truthy 51 | end 52 | end 53 | end 54 | end 55 | 56 | describe "resuming" do 57 | it "#dump and #load" do 58 | a = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1 59 | 3.times { a.train } 60 | a_dump = a.save 61 | b = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 2 62 | b.load a_dump 63 | b_dump = b.save 64 | expect(a_dump).to eq(b_dump) 65 | end 66 | 67 | it "#load allows resuming" do 68 | nes = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1 69 | 4.times { nes.train } 70 | run_4_straight = nes.save 71 | 72 | nes = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1 73 | 2.times { nes.train } 74 | run_2_only = nes.save 75 | 76 | # If I resume with a new nes, it works, but results differ because 77 | # it changes the number of times the rand has been sampled 78 | nes_new = NES::XNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1 79 | nes_new.load run_2_only 80 | 2.times { nes_new.train } 81 | run_4_resumed_new = nes_new.save 82 | expect(run_4_straight).not_to eq(run_4_resumed_new) 83 | 84 | # TODO: reactivate the test below after `Xumo::NArray#rand_norm` accepts 85 | # a random seed as input, for the moment we're giving up on this in 86 | # exchange for performance. 87 | 88 | # # If instead I use a nes with same rseed and same number of rand 89 | # # calls, even though I trash the dist info, it yields the same result 90 | # nes.load run_2_only 91 | # 2.times { nes.train } 92 | # run_4_resumed = nes.save 93 | # expect(run_4_straight).to eq(run_4_resumed) 94 | end 95 | end 96 | end 97 | 98 | describe NES::SNES do 99 | describe "full run" do 100 | opt_type = opt_types.sample # try either :) 101 | nes = NES::SNES.new ndims, obj_fns[opt_type], opt_type, rseed: 1 102 | context "within #{ntrains} iterations" do 103 | it "optimizes the negative squares function" do 104 | ntrains.times { nes.train } 105 | expect(nes.mu.approximates? 0).to be_truthy 106 | expect(nes.convergence.approximates? 0).to be_truthy 107 | end 108 | end 109 | end 110 | 111 | describe "with parallel fit" do 112 | opt_type = opt_types.sample # try either :) 113 | fit_par = -> (inds) { inds.map &obj_fns[opt_type] } 114 | nes = NES::SNES.new ndims, fit_par, opt_type, parallel_fit: true, rseed: 1 115 | context "within #{ntrains} iterations" do 116 | it "optimizes the negative squares function" do 117 | ntrains.times { nes.train } 118 | expect(nes.mu.approximates? 0).to be_truthy 119 | expect(nes.convergence.approximates? 0).to be_truthy 120 | end 121 | end 122 | end 123 | 124 | describe "resuming" do 125 | it "#dump and #load" do 126 | a = NES::SNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1 127 | 3.times { a.train } 128 | a_dump = a.save 129 | b = NES::SNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 2 130 | b.load a_dump 131 | b_dump = b.save 132 | expect(a_dump).to eq(b_dump) 133 | end 134 | end 135 | end 136 | 137 | describe NES::RNES do 138 | describe "full run" do 139 | opt_type = opt_types.sample # try either :) 140 | nes = NES::RNES.new ndims, obj_fns[opt_type], opt_type, rseed: 1 141 | context "within #{ntrains} iterations" do 142 | it "optimizes the negative squares function" do 143 | ntrains.times { nes.train } 144 | expect(nes.mu.approximates? 0).to be_truthy 145 | expect(nes.convergence.approximates? 0).to be_truthy 146 | end 147 | end 148 | end 149 | 150 | describe "with parallel fit" do 151 | opt_type = opt_types.sample # try either :) 152 | fit_par = -> (inds) { inds.map &obj_fns[opt_type] } 153 | nes = NES::RNES.new ndims, fit_par, opt_type, parallel_fit: true, rseed: 1 154 | context "within #{ntrains} iterations" do 155 | it "optimizes the negative squares function" do 156 | ntrains.times { nes.train } 157 | expect(nes.mu.approximates? 0).to be_truthy 158 | expect(nes.convergence.approximates? 0).to be_truthy 159 | end 160 | end 161 | end 162 | 163 | describe "resuming" do 164 | it "#dump and #load" do 165 | a = NES::RNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 1 166 | 3.times { a.train } 167 | a_dump = a.save 168 | b = NES::RNES.new ndims, obj_fns[one_opt_type], one_opt_type, rseed: 2 169 | b.load a_dump 170 | b_dump = b.save 171 | expect(a_dump).to eq(b_dump) 172 | end 173 | end 174 | end 175 | 176 | # Fix FNES first 177 | # describe NES::FNES, :SKIP do 178 | # describe "full run" do 179 | # opt_type = opt_types.sample # try either :) 180 | # nes = NES::FNES.new ndims, obj_fns[opt_type], opt_type, rseed: 5 181 | # context "within #{ntrains} iterations" do 182 | # it "optimizes the negative squares function" do 183 | # ntrains.times { nes.train } 184 | # expect(nes.mu.approximates? 0).to be_truthy 185 | # expect(nes.convergence).to eq(1) 186 | # end 187 | # end 188 | # end 189 | # end 190 | 191 | describe NES::BDNES do 192 | describe "full run" do 193 | opt_type = opt_types.sample # try either :) 194 | nes = NES::BDNES.new [3,2], obj_fns[opt_type], opt_type, rseed: 1 195 | context "within #{ntrains} iterations" do 196 | it "optimizes the negative squares function" do 197 | ntrains.times { nes.train } 198 | expect(nes.mu.approximates? 0).to be_truthy 199 | expect(nes.convergence.approximates? 0).to be_truthy 200 | end 201 | end 202 | end 203 | 204 | describe "with parallel fit" do 205 | opt_type = opt_types.sample # try either :) 206 | fit_par = -> (inds) { inds.map &obj_fns[opt_type] } 207 | nes = NES::BDNES.new [3,2], fit_par, opt_type, parallel_fit: true, rseed: 1 208 | context "within #{ntrains} iterations" do 209 | it "optimizes the negative squares function" do 210 | ntrains.times { nes.train } 211 | expect(nes.mu.approximates? 0).to be_truthy 212 | expect(nes.convergence.approximates? 0).to be_truthy 213 | end 214 | end 215 | end 216 | 217 | describe "resuming" do 218 | it "#dump and #load" do 219 | a = NES::BDNES.new ndims_lst, obj_fns[one_opt_type], one_opt_type, rseed: 1 220 | 3.times { a.train } 221 | a_dump = a.save 222 | b = NES::BDNES.new ndims_lst, obj_fns[one_opt_type], one_opt_type, rseed: 2 223 | b.load a_dump 224 | b_dump = b.save 225 | expect(a_dump).to eq(b_dump) 226 | end 227 | end 228 | end 229 | 230 | end 231 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/setup" 4 | require "machine_learning_workbench" 5 | require_relative 'helpers/uses_temporary_folders' 6 | 7 | STDOUT.sync = true 8 | 9 | RSpec.configure do |config| 10 | # Enable flags like --only-failures and --next-failure 11 | config.example_status_persistence_file_path = ".rspec_status" 12 | # Disable RSpec exposing methods globally on `Module` and `main` 13 | config.disable_monkey_patching! 14 | 15 | config.expect_with :rspec do |c| 16 | c.syntax = :expect 17 | end 18 | 19 | # These two settings work together to allow you to limit a spec run 20 | # to individual examples or groups you care about by tagging them with 21 | # `:focus` metadata. When nothing is tagged with `:focus`, all examples 22 | # get run. 23 | config.filter_run :FOCUS 24 | config.filter_run_excluding :SKIP 25 | config.run_all_when_everything_filtered = true 26 | end 27 | -------------------------------------------------------------------------------- /spec/systems/neuroevo_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | "Work in progress" 4 | --------------------------------------------------------------------------------