├── .github
    └── workflows
    │   └── build.yml
├── .gitignore
├── CHANGELOG.md
├── Gemfile
├── LICENSE.txt
├── README.md
├── Rakefile
├── lib
    ├── lightgbm.rb
    └── lightgbm
    │   ├── booster.rb
    │   ├── classifier.rb
    │   ├── dataset.rb
    │   ├── ffi.rb
    │   ├── inner_predictor.rb
    │   ├── model.rb
    │   ├── ranker.rb
    │   ├── regressor.rb
    │   ├── utils.rb
    │   └── version.rb
├── lightgbm.gemspec
├── test
    ├── booster_test.rb
    ├── classifier_test.rb
    ├── cv_test.rb
    ├── dataset_test.rb
    ├── ranker_test.rb
    ├── regressor_test.rb
    ├── support
    │   ├── booster.py
    │   ├── categorical.py
    │   ├── categorical.txt
    │   ├── classifier.py
    │   ├── cv.py
    │   ├── data.csv
    │   ├── model.txt
    │   ├── ranker.py
    │   ├── regressor.py
    │   └── train.py
    ├── test_helper.rb
    └── train_test.rb
└── vendor
    ├── LICENSE
    └── LICENSE-THIRD-PARTY


/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   build:
 5 |     strategy:
 6 |       fail-fast: false
 7 |       matrix:
 8 |         os: [ubuntu-latest, macos-14, macos-13]
 9 |     runs-on: ${{ matrix.os }}
10 |     steps:
11 |       - if: ${{ startsWith(matrix.os, 'windows') }}
12 |         run: git config --global core.autocrlf false
13 |       - uses: actions/checkout@v4
14 |       - uses: ruby/setup-ruby@v1
15 |         with:
16 |           ruby-version: 3.4
17 |           bundler-cache: true
18 |       - if: ${{ startsWith(matrix.os, 'macos') }}
19 |         run: brew install libomp
20 |       - run: bundle exec rake vendor:platform
21 |       - run: bundle exec rake test
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /_yardoc/
 4 | /coverage/
 5 | /doc/
 6 | /pkg/
 7 | /spec/reports/
 8 | /tmp/
 9 | *.lock
10 | /vendor/lib_lightgbm.*
11 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | ## 0.4.2 (unreleased)
  2 | 
  3 | - Improved performance of `predict` method for `pandas_categorical`
  4 | 
  5 | ## 0.4.1 (2025-02-17)
  6 | 
  7 | - Updated LightGBM to 4.6.0
  8 | 
  9 | ## 0.4.0 (2025-01-05)
 10 | 
 11 | - Added support for different prediction types
 12 | - Added support for `pandas_categorical` to `predict` method
 13 | - Added support for hashes and Rover data frames to `predict` method
 14 | - Added support for hashes to `Dataset`
 15 | - Added `importance_type` option to `dump_model`, `model_to_string`, and `save_model` methods
 16 | - Changed `Dataset` to use column names for feature names with Rover and Daru
 17 | - Changed `predict` method to match feature names with Daru
 18 | - Dropped support for Ruby < 3.1
 19 | 
 20 | ## 0.3.4 (2024-07-28)
 21 | 
 22 | - Updated LightGBM to 4.5.0
 23 | 
 24 | ## 0.3.3 (2024-06-15)
 25 | 
 26 | - Updated LightGBM to 4.4.0
 27 | 
 28 | ## 0.3.2 (2024-01-25)
 29 | 
 30 | - Updated LightGBM to 4.3.0
 31 | 
 32 | ## 0.3.1 (2023-09-13)
 33 | 
 34 | - Updated LightGBM to 4.1.0
 35 | 
 36 | ## 0.3.0 (2023-07-22)
 37 | 
 38 | - Updated LightGBM to 4.0.0
 39 | - Fixed error with `dup` and `clone`
 40 | - Dropped support for Ruby < 3
 41 | 
 42 | ## 0.2.7 (2023-02-01)
 43 | 
 44 | - Updated LightGBM to 3.3.5
 45 | - Improved ARM detection
 46 | 
 47 | ## 0.2.6 (2021-10-24)
 48 | 
 49 | - Updated LightGBM to 3.3.0
 50 | 
 51 | ## 0.2.5 (2021-07-07)
 52 | 
 53 | - Added `feature_name` method to boosters
 54 | 
 55 | ## 0.2.4 (2021-03-26)
 56 | 
 57 | - Updated LightGBM to 3.2.0
 58 | 
 59 | ## 0.2.3 (2021-03-09)
 60 | 
 61 | - Added ARM shared library for Mac
 62 | 
 63 | ## 0.2.2 (2020-12-07)
 64 | 
 65 | - Updated LightGBM to 3.1.1
 66 | 
 67 | ## 0.2.1 (2020-11-15)
 68 | 
 69 | - Updated LightGBM to 3.1.0
 70 | 
 71 | ## 0.2.0 (2020-08-31)
 72 | 
 73 | - Updated LightGBM to 3.0.0
 74 | - Made `best_iteration` and `eval_hist` consistent with Python
 75 | 
 76 | ## 0.1.9 (2020-06-10)
 77 | 
 78 | - Added support for Rover
 79 | - Improved performance of Numo datasets
 80 | 
 81 | ## 0.1.8 (2020-05-09)
 82 | 
 83 | - Improved error message when OpenMP not found on Mac
 84 | - Fixed `Cannot add validation data` error
 85 | 
 86 | ## 0.1.7 (2019-12-05)
 87 | 
 88 | - Updated LightGBM to 2.3.1
 89 | - Switched to doubles for datasets and predictions
 90 | 
 91 | ## 0.1.6 (2019-09-29)
 92 | 
 93 | - Updated LightGBM to 2.3.0
 94 | - Fixed error with JRuby
 95 | 
 96 | ## 0.1.5 (2019-09-03)
 97 | 
 98 | - Packaged LightGBM with gem
 99 | - Added support for missing values
100 | - Added `feature_names` to datasets
101 | - Fixed Daru training and prediction
102 | 
103 | ## 0.1.4 (2019-08-19)
104 | 
105 | - Friendlier message when LightGBM not found
106 | - Added `Ranker`
107 | - Added early stopping to Scikit-Learn API
108 | - Free memory when objects are destroyed
109 | - Removed unreleased `dump_text` method
110 | 
111 | ## 0.1.3 (2019-08-16)
112 | 
113 | - Added Scikit-Learn API
114 | - Added support for Daru and Numo::NArray
115 | 
116 | ## 0.1.2 (2019-08-15)
117 | 
118 | - Added `cv` method
119 | - Added early stopping
120 | - Fixed multiclass classification
121 | 
122 | ## 0.1.1 (2019-08-14)
123 | 
124 | - Added training API
125 | - Added many methods
126 | 
127 | ## 0.1.0 (2019-08-13)
128 | 
129 | - First release
130 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source "https://rubygems.org"
 2 | 
 3 | gemspec
 4 | 
 5 | gem "rake"
 6 | gem "minitest", ">= 5"
 7 | gem "daru"
 8 | gem "matrix"
 9 | gem "numo-narray", platform: [:mri, :x64_mingw]
10 | gem "rover-df", platform: [:mri, :x64_mingw]
11 | gem "csv"
12 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | Copyright (c) 2019-2025 Andrew Kane
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LightGBM Ruby
  2 | 
  3 | [LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
  4 | 
  5 | [![Build Status](https://github.com/ankane/lightgbm-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/lightgbm-ruby/actions)
  6 | 
  7 | ## Installation
  8 | 
  9 | Add this line to your application’s Gemfile:
 10 | 
 11 | ```ruby
 12 | gem "lightgbm"
 13 | ```
 14 | 
 15 | On Mac, also install OpenMP:
 16 | 
 17 | ```sh
 18 | brew install libomp
 19 | ```
 20 | 
 21 | ## Training API
 22 | 
 23 | Prep your data
 24 | 
 25 | ```ruby
 26 | x = [[1, 2], [3, 4], [5, 6], [7, 8]]
 27 | y = [1, 2, 3, 4]
 28 | ```
 29 | 
 30 | Train a model
 31 | 
 32 | ```ruby
 33 | params = {objective: "regression"}
 34 | train_set = LightGBM::Dataset.new(x, label: y)
 35 | booster = LightGBM.train(params, train_set)
 36 | ```
 37 | 
 38 | Predict
 39 | 
 40 | ```ruby
 41 | booster.predict(x)
 42 | ```
 43 | 
 44 | Save the model to a file
 45 | 
 46 | ```ruby
 47 | booster.save_model("model.txt")
 48 | ```
 49 | 
 50 | Load the model from a file
 51 | 
 52 | ```ruby
 53 | booster = LightGBM::Booster.new(model_file: "model.txt")
 54 | ```
 55 | 
 56 | Get the importance of features
 57 | 
 58 | ```ruby
 59 | booster.feature_importance
 60 | ```
 61 | 
 62 | Early stopping
 63 | 
 64 | ```ruby
 65 | LightGBM.train(params, train_set, valid_sets: [train_set, test_set], early_stopping_rounds: 5)
 66 | ```
 67 | 
 68 | CV
 69 | 
 70 | ```ruby
 71 | LightGBM.cv(params, train_set, nfold: 5, verbose_eval: true)
 72 | ```
 73 | 
 74 | ## Scikit-Learn API
 75 | 
 76 | Prep your data
 77 | 
 78 | ```ruby
 79 | x = [[1, 2], [3, 4], [5, 6], [7, 8]]
 80 | y = [1, 2, 3, 4]
 81 | ```
 82 | 
 83 | Train a model
 84 | 
 85 | ```ruby
 86 | model = LightGBM::Regressor.new
 87 | model.fit(x, y)
 88 | ```
 89 | 
 90 | > For classification, use `LightGBM::Classifier`
 91 | 
 92 | Predict
 93 | 
 94 | ```ruby
 95 | model.predict(x)
 96 | ```
 97 | 
 98 | > For classification, use `predict_proba` for probabilities
 99 | 
100 | Save the model to a file
101 | 
102 | ```ruby
103 | model.save_model("model.txt")
104 | ```
105 | 
106 | Load the model from a file
107 | 
108 | ```ruby
109 | model.load_model("model.txt")
110 | ```
111 | 
112 | Get the importance of features
113 | 
114 | ```ruby
115 | model.feature_importances
116 | ```
117 | 
118 | Early stopping
119 | 
120 | ```ruby
121 | model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
122 | ```
123 | 
124 | ## Data
125 | 
126 | Data can be an array of arrays
127 | 
128 | ```ruby
129 | [[1, 2, 3], [4, 5, 6]]
130 | ```
131 | 
132 | Or a Numo array
133 | 
134 | ```ruby
135 | Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
136 | ```
137 | 
138 | Or a Rover data frame
139 | 
140 | ```ruby
141 | Rover.read_csv("houses.csv")
142 | ```
143 | 
144 | Or a Daru data frame
145 | 
146 | ```ruby
147 | Daru::DataFrame.from_csv("houses.csv")
148 | ```
149 | 
150 | ## Helpful Resources
151 | 
152 | - [Parameters](https://lightgbm.readthedocs.io/en/latest/Parameters.html)
153 | - [Parameter Tuning](https://lightgbm.readthedocs.io/en/latest/Parameters-Tuning.html)
154 | 
155 | ## Related Projects
156 | 
157 | - [XGBoost](https://github.com/ankane/xgboost-ruby) - XGBoost for Ruby
158 | - [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
159 | 
160 | ## Credits
161 | 
162 | This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
163 | 
164 | - The `get_` and `set_` prefixes are removed from methods
165 | - The default verbosity is `-1`
166 | - With the `cv` method, `stratified` is set to `false`
167 | 
168 | Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
169 | 
170 | ## History
171 | 
172 | View the [changelog](https://github.com/ankane/lightgbm-ruby/blob/master/CHANGELOG.md)
173 | 
174 | ## Contributing
175 | 
176 | Everyone is encouraged to help improve this project. Here are a few ways you can help:
177 | 
178 | - [Report bugs](https://github.com/ankane/lightgbm-ruby/issues)
179 | - Fix bugs and [submit pull requests](https://github.com/ankane/lightgbm-ruby/pulls)
180 | - Write, clarify, or fix documentation
181 | - Suggest or add new features
182 | 
183 | To get started with development:
184 | 
185 | ```sh
186 | git clone https://github.com/ankane/lightgbm-ruby.git
187 | cd lightgbm-ruby
188 | bundle install
189 | bundle exec rake vendor:all
190 | bundle exec rake test
191 | ```
192 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require "bundler/gem_tasks"
 2 | require "rake/testtask"
 3 | 
 4 | task default: :test
 5 | Rake::TestTask.new do |t|
 6 |   t.libs << "test"
 7 |   t.pattern = "test/**/*_test.rb"
 8 |   t.warning = false # for daru
 9 | end
10 | 
11 | shared_libraries = %w(lib_lightgbm.dll lib_lightgbm.dylib lib_lightgbm.arm64.dylib lib_lightgbm.so)
12 | 
13 | # ensure vendor files exist
14 | task :ensure_vendor do
15 |   shared_libraries.each do |file|
16 |     raise "Missing file: #{file}" unless File.exist?("vendor/#{file}")
17 |   end
18 | end
19 | 
20 | Rake::Task["build"].enhance [:ensure_vendor]
21 | 
22 | def download_file(file, sha256)
23 |   require "open-uri"
24 | 
25 |   # also update licenses in vendor/
26 |   version = "4.6.0"
27 | 
28 |   url =
29 |     if file == "lib_lightgbm.arm64.dylib"
30 |       "https://github.com/ankane/ml-builds/releases/download/lightgbm-#{version}/#{file}"
31 |     else
32 |       "https://github.com/microsoft/LightGBM/releases/download/v#{version}/#{file}"
33 |     end
34 |   puts "Downloading #{file}..."
35 |   contents = URI.parse(url).read
36 | 
37 |   computed_sha256 = Digest::SHA256.hexdigest(contents)
38 |   raise "Bad hash: #{computed_sha256}" if computed_sha256 != sha256
39 | 
40 |   dest = "vendor/#{file}"
41 |   File.binwrite(dest, contents)
42 |   puts "Saved #{dest}"
43 | end
44 | 
45 | # https://github.com/microsoft/LightGBM/releases
46 | namespace :vendor do
47 |   task :linux do
48 |     download_file("lib_lightgbm.so", "237f15e1362a5abab4be0fae14aebba7bb278763f3412a82c50ab9d1fc0dc8bd")
49 |   end
50 | 
51 |   task :mac do
52 |     download_file("lib_lightgbm.dylib", "15c6678c60f1acf4a34f0784f799ee3ec7a48e25efa9be90e7415d54f9bed858")
53 |     download_file("lib_lightgbm.arm64.dylib", "df56dce6597389a749de75e46b5383f83c751f57da643232ef766f15aca10a0d")
54 |   end
55 | 
56 |   task :windows do
57 |     download_file("lib_lightgbm.dll", "a5032c5278f3350ea9f7925b7b4d270b23af9a8e9639971cb025d615b45c39e7")
58 |   end
59 | 
60 |   task all: [:linux, :mac, :windows]
61 | 
62 |   task :platform do
63 |     if Gem.win_platform?
64 |       Rake::Task["vendor:windows"].invoke
65 |     elsif RbConfig::CONFIG["host_os"].match?(/darwin/i)
66 |       Rake::Task["vendor:mac"].invoke
67 |     else
68 |       Rake::Task["vendor:linux"].invoke
69 |     end
70 |   end
71 | end
72 | 


--------------------------------------------------------------------------------
/lib/lightgbm.rb:
--------------------------------------------------------------------------------
  1 | # dependencies
  2 | require "ffi"
  3 | 
  4 | # stdlib
  5 | require "json"
  6 | 
  7 | # modules
  8 | require_relative "lightgbm/utils"
  9 | require_relative "lightgbm/booster"
 10 | require_relative "lightgbm/dataset"
 11 | require_relative "lightgbm/inner_predictor"
 12 | require_relative "lightgbm/version"
 13 | 
 14 | # scikit-learn API
 15 | require_relative "lightgbm/model"
 16 | require_relative "lightgbm/classifier"
 17 | require_relative "lightgbm/ranker"
 18 | require_relative "lightgbm/regressor"
 19 | 
 20 | module LightGBM
 21 |   class Error < StandardError; end
 22 | 
 23 |   class << self
 24 |     attr_accessor :ffi_lib
 25 |   end
 26 |   lib_name =
 27 |     if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i
 28 |       "lib_lightgbm.arm64.#{::FFI::Platform::LIBSUFFIX}"
 29 |     else
 30 |       "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
 31 |     end
 32 |   vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
 33 |   self.ffi_lib = [vendor_lib]
 34 | 
 35 |   # friendlier error message
 36 |   autoload :FFI, "lightgbm/ffi"
 37 | 
 38 |   class << self
 39 |     def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
 40 |       booster = Booster.new(params: params, train_set: train_set)
 41 | 
 42 |       valid_contain_train = false
 43 |       valid_sets.zip(valid_names).each_with_index do |(data, name), i|
 44 |         if data == train_set
 45 |           booster.train_data_name = name || "training"
 46 |           valid_contain_train = true
 47 |         else
 48 |           # ensure the validation set references the training set
 49 |           data.reference = train_set
 50 |           booster.add_valid(data, name || "valid_#{i}")
 51 |         end
 52 |       end
 53 | 
 54 |       raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }
 55 | 
 56 |       booster.best_iteration = 0
 57 | 
 58 |       if early_stopping_rounds
 59 |         best_score = []
 60 |         best_iter = []
 61 |         best_message = []
 62 | 
 63 |         puts "Training until validation scores don't improve for #{early_stopping_rounds.to_i} rounds." if verbose_eval
 64 |       end
 65 | 
 66 |       num_boost_round.times do |iteration|
 67 |         booster.update
 68 | 
 69 |         if valid_sets.any?
 70 |           # print results
 71 |           messages = []
 72 | 
 73 |           eval_valid = booster.eval_valid
 74 |           if valid_contain_train
 75 |             eval_valid = eval_valid + booster.eval_train
 76 |           end
 77 |           # not sure why reversed in output
 78 |           eval_valid.reverse!
 79 | 
 80 |           eval_valid.each do |res|
 81 |             messages << "%s's %s: %g" % [res[0], res[1], res[2]]
 82 |           end
 83 | 
 84 |           message = "[#{iteration + 1}]\t#{messages.join("\t")}"
 85 | 
 86 |           puts message if verbose_eval
 87 | 
 88 |           if early_stopping_rounds
 89 |             stop_early = false
 90 |             eval_valid.each_with_index do |(_, _, score, higher_better), i|
 91 |               op = higher_better ? :> : :<
 92 |               if best_score[i].nil? || score.send(op, best_score[i])
 93 |                 best_score[i] = score
 94 |                 best_iter[i] = iteration
 95 |                 best_message[i] = message
 96 |               elsif iteration - best_iter[i] >= early_stopping_rounds
 97 |                 booster.best_iteration = best_iter[i] + 1
 98 |                 puts "Early stopping, best iteration is:\n#{best_message[i]}" if verbose_eval
 99 |                 stop_early = true
100 |                 break
101 |               end
102 |             end
103 | 
104 |             break if stop_early
105 | 
106 |             if iteration == num_boost_round - 1
107 |               booster.best_iteration = best_iter[0] + 1
108 |               puts "Did not meet early stopping. Best iteration is: #{best_message[0]}" if verbose_eval
109 |             end
110 |           end
111 |         end
112 |       end
113 | 
114 |       booster
115 |     end
116 | 
117 |     def cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true, early_stopping_rounds: nil, verbose_eval: nil, show_stdv: true)
118 |       rand_idx = (0...train_set.num_data).to_a
119 |       rand_idx.shuffle!(random: Random.new(seed)) if shuffle
120 | 
121 |       kstep = rand_idx.size / nfold
122 |       test_id = rand_idx.each_slice(kstep).to_a[0...nfold]
123 |       train_id = []
124 |       nfold.times do |i|
125 |         idx = test_id.dup
126 |         idx.delete_at(i)
127 |         train_id << idx.flatten
128 |       end
129 | 
130 |       boosters = []
131 |       folds = train_id.zip(test_id)
132 |       folds.each do |(train_idx, test_idx)|
133 |         fold_train_set = train_set.subset(train_idx)
134 |         fold_valid_set = train_set.subset(test_idx)
135 |         booster = Booster.new(params: params, train_set: fold_train_set)
136 |         booster.add_valid(fold_valid_set, "valid")
137 |         boosters << booster
138 |       end
139 | 
140 |       eval_hist = {}
141 | 
142 |       if early_stopping_rounds
143 |         best_score = {}
144 |         best_iter = {}
145 |         best_iteration = nil
146 |       end
147 | 
148 |       num_boost_round.times do |iteration|
149 |         boosters.each(&:update)
150 | 
151 |         scores = {}
152 |         boosters.map(&:eval_valid).flat_map(&:reverse).each do |r|
153 |           (scores[r[1]] ||= []) << r[2]
154 |         end
155 | 
156 |         message_parts = ["[#{iteration + 1}]"]
157 | 
158 |         means = {}
159 |         scores.each do |eval_name, vals|
160 |           mean = mean(vals)
161 |           stdev = stdev(vals)
162 | 
163 |           (eval_hist["#{eval_name}-mean"] ||= []) << mean
164 |           (eval_hist["#{eval_name}-stdv"] ||= []) << stdev
165 | 
166 |           means[eval_name] = mean
167 | 
168 |           if show_stdv
169 |             message_parts << "cv_agg's %s: %g + %g" % [eval_name, mean, stdev]
170 |           else
171 |             message_parts << "cv_agg's %s: %g" % [eval_name, mean]
172 |           end
173 |         end
174 | 
175 |         puts message_parts.join("\t") if verbose_eval
176 | 
177 |         if early_stopping_rounds
178 |           stop_early = false
179 |           means.each do |k, score|
180 |             # TODO fix higher better
181 |             if best_score[k].nil? || score < best_score[k]
182 |               best_score[k] = score
183 |               best_iter[k] = iteration
184 |             elsif iteration - best_iter[k] >= early_stopping_rounds
185 |               best_iteration = best_iter[k]
186 |               stop_early = true
187 |               break
188 |             end
189 |           end
190 |           break if stop_early
191 |         end
192 |       end
193 | 
194 |       if early_stopping_rounds
195 |         # use best iteration from first metric if not stopped early
196 |         best_iteration ||= best_iter[best_iter.keys.first]
197 |         eval_hist.each_key do |k|
198 |           eval_hist[k] = eval_hist[k].first(best_iteration + 1)
199 |         end
200 |       end
201 | 
202 |       eval_hist
203 |     end
204 | 
205 |     private
206 | 
207 |     def mean(arr)
208 |       arr.sum / arr.size.to_f
209 |     end
210 | 
211 |     # don't subtract one from arr.size
212 |     def stdev(arr)
213 |       m = mean(arr)
214 |       sum = 0
215 |       arr.each do |v|
216 |         sum += (v - m) ** 2
217 |       end
218 |       Math.sqrt(sum / arr.size)
219 |     end
220 |   end
221 | end
222 | 


--------------------------------------------------------------------------------
/lib/lightgbm/booster.rb:
--------------------------------------------------------------------------------
  1 | module LightGBM
  2 |   class Booster
  3 |     include Utils
  4 | 
  5 |     attr_accessor :best_iteration, :train_data_name, :params
  6 | 
  7 |     def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
  8 |       if model_str
  9 |         model_from_string(model_str)
 10 |       elsif model_file
 11 |         out_num_iterations = ::FFI::MemoryPointer.new(:int)
 12 |         create_handle do |handle|
 13 |           safe_call FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, handle)
 14 |         end
 15 |         @pandas_categorical = load_pandas_categorical(file_name: model_file)
 16 |         if params
 17 |           warn "[lightgbm] Ignoring params argument, using parameters from model file."
 18 |         end
 19 |         @params = loaded_param
 20 |       else
 21 |         params ||= {}
 22 |         set_verbosity(params)
 23 |         create_handle do |handle|
 24 |           safe_call FFI.LGBM_BoosterCreate(train_set.handle, params_str(params), handle)
 25 |         end
 26 |       end
 27 | 
 28 |       self.best_iteration = -1
 29 | 
 30 |       # TODO get names when loaded from file
 31 |       @name_valid_sets = []
 32 |     end
 33 | 
 34 |     def add_valid(data, name)
 35 |       safe_call FFI.LGBM_BoosterAddValidData(@handle, data.handle)
 36 |       @name_valid_sets << name
 37 |       self # consistent with Python API
 38 |     end
 39 | 
 40 |     def current_iteration
 41 |       out = ::FFI::MemoryPointer.new(:int)
 42 |       safe_call FFI.LGBM_BoosterGetCurrentIteration(@handle, out)
 43 |       out.read_int
 44 |     end
 45 | 
 46 |     def dump_model(num_iteration: nil, start_iteration: 0, importance_type: "split")
 47 |       num_iteration ||= best_iteration
 48 |       importance_type_int = feature_importance_type_mapper(importance_type)
 49 |       buffer_len = 1 << 20
 50 |       out_len = ::FFI::MemoryPointer.new(:int64)
 51 |       out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
 52 |       safe_call FFI.LGBM_BoosterDumpModel(@handle, start_iteration, num_iteration, importance_type_int, buffer_len, out_len, out_str)
 53 |       actual_len = out_len.read_int64
 54 |       if actual_len > buffer_len
 55 |         out_str = ::FFI::MemoryPointer.new(:char, actual_len)
 56 |         safe_call FFI.LGBM_BoosterDumpModel(@handle, start_iteration, num_iteration, importance_type_int, actual_len, out_len, out_str)
 57 |       end
 58 |       out_str.read_string
 59 |     end
 60 |     alias_method :to_json, :dump_model
 61 | 
 62 |     def eval_valid
 63 |       @name_valid_sets.each_with_index.flat_map { |n, i| inner_eval(n, i + 1) }
 64 |     end
 65 | 
 66 |     def eval_train
 67 |       inner_eval(train_data_name, 0)
 68 |     end
 69 | 
 70 |     def feature_importance(iteration: nil, importance_type: "split")
 71 |       iteration ||= best_iteration
 72 |       importance_type_int = feature_importance_type_mapper(importance_type)
 73 |       num_feature = self.num_feature
 74 |       out_result = ::FFI::MemoryPointer.new(:double, num_feature)
 75 |       safe_call FFI.LGBM_BoosterFeatureImportance(@handle, iteration, importance_type_int, out_result)
 76 |       out_result.read_array_of_double(num_feature).map(&:to_i)
 77 |     end
 78 | 
 79 |     def feature_name
 80 |       len = self.num_feature
 81 |       out_len = ::FFI::MemoryPointer.new(:size_t)
 82 |       buffer_len = 255
 83 |       out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
 84 |       out_strs = ::FFI::MemoryPointer.new(:pointer, num_feature)
 85 |       str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
 86 |       out_strs.write_array_of_pointer(str_ptrs)
 87 |       safe_call FFI.LGBM_BoosterGetFeatureNames(@handle, len, out_len, buffer_len, out_buffer_len, out_strs)
 88 | 
 89 |       actual_len = out_buffer_len.read(:size_t)
 90 |       if actual_len > buffer_len
 91 |         str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
 92 |         out_strs.write_array_of_pointer(str_ptrs)
 93 |         safe_call FFI.LGBM_BoosterGetFeatureNames(@handle, len, out_len, actual_len, out_buffer_len, out_strs)
 94 |       end
 95 | 
 96 |       str_ptrs[0, out_len.read(:size_t)].map(&:read_string)
 97 |     end
 98 | 
 99 |     def model_from_string(model_str)
100 |       out_num_iterations = ::FFI::MemoryPointer.new(:int)
101 |       create_handle do |handle|
102 |         safe_call FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, handle)
103 |       end
104 |       @pandas_categorical = load_pandas_categorical(model_str: model_str)
105 |       @params = loaded_param
106 |       @cached_feature_name = nil
107 |       self
108 |     end
109 | 
110 |     def model_to_string(num_iteration: nil, start_iteration: 0, importance_type: "split")
111 |       num_iteration ||= best_iteration
112 |       importance_type_int = feature_importance_type_mapper(importance_type)
113 |       buffer_len = 1 << 20
114 |       out_len = ::FFI::MemoryPointer.new(:int64)
115 |       out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
116 |       safe_call FFI.LGBM_BoosterSaveModelToString(@handle, start_iteration, num_iteration, importance_type_int, buffer_len, out_len, out_str)
117 |       actual_len = out_len.read_int64
118 |       if actual_len > buffer_len
119 |         out_str = ::FFI::MemoryPointer.new(:char, actual_len)
120 |         safe_call FFI.LGBM_BoosterSaveModelToString(@handle, start_iteration, num_iteration, importance_type_int, actual_len, out_len, out_str)
121 |       end
122 |       out_str.read_string
123 |     end
124 | 
125 |     def num_feature
126 |       out = ::FFI::MemoryPointer.new(:int)
127 |       safe_call FFI.LGBM_BoosterGetNumFeature(@handle, out)
128 |       out.read_int
129 |     end
130 |     alias_method :num_features, :num_feature # legacy typo
131 | 
132 |     def num_model_per_iteration
133 |       out = ::FFI::MemoryPointer.new(:int)
134 |       safe_call FFI.LGBM_BoosterNumModelPerIteration(@handle, out)
135 |       out.read_int
136 |     end
137 | 
138 |     def num_trees
139 |       out = ::FFI::MemoryPointer.new(:int)
140 |       safe_call FFI.LGBM_BoosterNumberOfTotalModel(@handle, out)
141 |       out.read_int
142 |     end
143 | 
144 |     def predict(data, start_iteration: 0, num_iteration: nil, raw_score: false, pred_leaf: false, pred_contrib: false, **kwargs)
145 |       predictor = InnerPredictor.from_booster(self, kwargs.transform_values(&:dup))
146 |       if num_iteration.nil?
147 |         if start_iteration <= 0
148 |           num_iteration = best_iteration
149 |         else
150 |           num_iteration = -1
151 |         end
152 |       end
153 |       predictor.predict(
154 |         data,
155 |         start_iteration: start_iteration,
156 |         num_iteration: num_iteration,
157 |         raw_score: raw_score,
158 |         pred_leaf: pred_leaf,
159 |         pred_contrib: pred_contrib
160 |       )
161 |     end
162 | 
163 |     def save_model(filename, num_iteration: nil, start_iteration: 0, importance_type: "split")
164 |       num_iteration ||= best_iteration
165 |       importance_type_int = feature_importance_type_mapper(importance_type)
166 |       safe_call FFI.LGBM_BoosterSaveModel(@handle, start_iteration, num_iteration, importance_type_int, filename)
167 |       self # consistent with Python API
168 |     end
169 | 
170 |     def update
171 |       finished = ::FFI::MemoryPointer.new(:int)
172 |       safe_call FFI.LGBM_BoosterUpdateOneIter(@handle, finished)
173 |       finished.read_int == 1
174 |     end
175 | 
176 |     private
177 | 
178 |     def create_handle
179 |       ::FFI::MemoryPointer.new(:pointer) do |handle|
180 |         yield handle
181 |         @handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_BoosterFree))
182 |       end
183 |     end
184 | 
185 |     def eval_counts
186 |       out = ::FFI::MemoryPointer.new(:int)
187 |       safe_call FFI.LGBM_BoosterGetEvalCounts(@handle, out)
188 |       out.read_int
189 |     end
190 | 
191 |     def eval_names
192 |       eval_counts = self.eval_counts
193 |       out_len = ::FFI::MemoryPointer.new(:int)
194 |       out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
195 |       out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts)
196 |       buffer_len = 255
197 |       str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
198 |       out_strs.write_array_of_pointer(str_ptrs)
199 |       safe_call FFI.LGBM_BoosterGetEvalNames(@handle, eval_counts, out_len, buffer_len, out_buffer_len, out_strs)
200 | 
201 |       actual_len = out_buffer_len.read(:size_t)
202 |       if actual_len > buffer_len
203 |         str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
204 |         out_strs.write_array_of_pointer(str_ptrs)
205 |         safe_call FFI.LGBM_BoosterGetEvalNames(@handle, eval_counts, out_len, actual_len, out_buffer_len, out_strs)
206 |       end
207 | 
208 |       str_ptrs.map(&:read_string)
209 |     end
210 | 
211 |     def inner_eval(name, i)
212 |       eval_names = self.eval_names
213 | 
214 |       out_len = ::FFI::MemoryPointer.new(:int)
215 |       out_results = ::FFI::MemoryPointer.new(:double, eval_names.count)
216 |       safe_call FFI.LGBM_BoosterGetEval(@handle, i, out_len, out_results)
217 |       vals = out_results.read_array_of_double(out_len.read_int)
218 | 
219 |       eval_names.zip(vals).map do |eval_name, val|
220 |         higher_better = ["auc", "ndcg@", "map@"].any? { |v| eval_name.start_with?(v) }
221 |         [name, eval_name, val, higher_better]
222 |       end
223 |     end
224 | 
225 |     def num_class
226 |       out = ::FFI::MemoryPointer.new(:int)
227 |       safe_call FFI.LGBM_BoosterGetNumClasses(@handle, out)
228 |       out.read_int
229 |     end
230 | 
231 |     def cached_feature_name
232 |       @cached_feature_name ||= feature_name
233 |     end
234 | 
235 |     def feature_importance_type_mapper(importance_type)
236 |       case importance_type
237 |       when "split"
238 |         FFI::C_API_FEATURE_IMPORTANCE_SPLIT
239 |       when "gain"
240 |         FFI::C_API_FEATURE_IMPORTANCE_GAIN
241 |       else
242 |         -1
243 |       end
244 |     end
245 | 
246 |     def load_pandas_categorical(file_name: nil, model_str: nil)
247 |       pandas_key = "pandas_categorical:"
248 |       offset = -pandas_key.length
249 |       if !file_name.nil?
250 |         max_offset = -File.size(file_name)
251 |         lines = []
252 |         File.open(file_name, "rb") do |f|
253 |           loop do
254 |             offset = [offset, max_offset].max
255 |             f.seek(offset, IO::SEEK_END)
256 |             lines = f.readlines
257 |             if lines.length >= 2 || offset == max_offset
258 |               break
259 |             end
260 |             offset *= 2
261 |           end
262 |         end
263 |         last_line = lines[-1].strip
264 |         if !last_line.start_with?(pandas_key)
265 |           last_line = lines[-2].strip
266 |         end
267 |       elsif !model_str.nil?
268 |         idx = model_str[..offset].rindex("\n")
269 |         last_line = model_str[idx..].strip
270 |       end
271 |       if last_line.start_with?(pandas_key)
272 |         pandas_categorical = JSON.parse(last_line[pandas_key.length..])
273 |         pandas_categorical.map { |cats| cats.each_with_index.to_h }
274 |       end
275 |     end
276 | 
277 |     def loaded_param
278 |       buffer_len = 1 << 20
279 |       out_len = ::FFI::MemoryPointer.new(:int64)
280 |       out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
281 |       safe_call FFI.LGBM_BoosterGetLoadedParam(@handle, buffer_len, out_len, out_str)
282 |       actual_len = out_len.read_int64
283 |       if actual_len > buffer_len
284 |         out_str = ::FFI::MemoryPointer.new(:char, actual_len)
285 |         safe_call FFI.LGBM_BoosterGetLoadedParam(@handle, actual_len, out_len, out_str)
286 |       end
287 |       JSON.parse(out_str.read_string)
288 |     end
289 |   end
290 | end
291 | 


--------------------------------------------------------------------------------
/lib/lightgbm/classifier.rb:
--------------------------------------------------------------------------------
 1 | module LightGBM
 2 |   class Classifier < Model
 3 |     def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
 4 |       super
 5 |     end
 6 | 
 7 |     def fit(x, y, eval_set: nil, eval_names: [], categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
 8 |       n_classes = y.uniq.size
 9 | 
10 |       params = @params.dup
11 |       if n_classes > 2
12 |         params[:objective] ||= "multiclass"
13 |         params[:num_class] = n_classes
14 |       else
15 |         params[:objective] ||= "binary"
16 |       end
17 | 
18 |       train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params)
19 |       valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) }
20 | 
21 |       @booster = LightGBM.train(params, train_set,
22 |         num_boost_round: @n_estimators,
23 |         early_stopping_rounds: early_stopping_rounds,
24 |         verbose_eval: verbose,
25 |         valid_sets: valid_sets,
26 |         valid_names: eval_names
27 |       )
28 |       nil
29 |     end
30 | 
31 |     def predict(data, num_iteration: nil)
32 |       y_pred = @booster.predict(data, num_iteration: num_iteration)
33 | 
34 |       if y_pred.first.is_a?(Array)
35 |         # multiple classes
36 |         y_pred.map do |v|
37 |           v.map.with_index.max_by { |v2, _| v2 }.last
38 |         end
39 |       else
40 |         y_pred.map { |v| v > 0.5 ? 1 : 0 }
41 |       end
42 |     end
43 | 
44 |     def predict_proba(data, num_iteration: nil)
45 |       y_pred = @booster.predict(data, num_iteration: num_iteration)
46 | 
47 |       if y_pred.first.is_a?(Array)
48 |         # multiple classes
49 |         y_pred
50 |       else
51 |         y_pred.map { |v| [1 - v, v] }
52 |       end
53 |     end
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/lib/lightgbm/dataset.rb:
--------------------------------------------------------------------------------
  1 | module LightGBM
  2 |   class Dataset
  3 |     include Utils
  4 | 
  5 |     attr_reader :data, :params
  6 | 
  7 |     def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil)
  8 |       @data = data
  9 |       @label = label
 10 |       @weight = weight
 11 |       @group = group
 12 |       @params = params
 13 |       @reference = reference
 14 |       @used_indices = used_indices
 15 |       @categorical_feature = categorical_feature
 16 |       @feature_name = feature_name || feature_names || "auto"
 17 | 
 18 |       construct
 19 |     end
 20 | 
 21 |     def label
 22 |       field("label")
 23 |     end
 24 | 
 25 |     def weight
 26 |       field("weight")
 27 |     end
 28 | 
 29 |     def feature_name
 30 |       # must preallocate space
 31 |       num_feature_names = ::FFI::MemoryPointer.new(:int)
 32 |       out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
 33 |       len = 1000
 34 |       out_strs = ::FFI::MemoryPointer.new(:pointer, len)
 35 |       buffer_len = 255
 36 |       str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
 37 |       out_strs.write_array_of_pointer(str_ptrs)
 38 |       safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, len, num_feature_names, buffer_len, out_buffer_len, out_strs)
 39 | 
 40 |       num_features = num_feature_names.read_int
 41 |       actual_len = out_buffer_len.read(:size_t)
 42 |       if num_features > len || actual_len > buffer_len
 43 |         out_strs = ::FFI::MemoryPointer.new(:pointer, num_features) if num_features > len
 44 |         str_ptrs = num_features.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
 45 |         out_strs.write_array_of_pointer(str_ptrs)
 46 |         safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, num_features, num_feature_names, actual_len, out_buffer_len, out_strs)
 47 |       end
 48 | 
 49 |       # should be the same, but get number of features
 50 |       # from most recent call (instead of num_features)
 51 |       str_ptrs[0, num_feature_names.read_int].map(&:read_string)
 52 |     end
 53 |     alias_method :feature_names, :feature_name
 54 | 
 55 |     def label=(label)
 56 |       @label = label
 57 |       set_field("label", label)
 58 |     end
 59 | 
 60 |     def weight=(weight)
 61 |       @weight = weight
 62 |       set_field("weight", weight)
 63 |     end
 64 | 
 65 |     def group=(group)
 66 |       @group = group
 67 |       set_field("group", group, type: :int32)
 68 |     end
 69 | 
 70 |     def feature_name=(feature_names)
 71 |       feature_names = feature_names.map(&:to_s)
 72 |       @feature_names = feature_names
 73 |       c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
 74 |       # keep reference to string pointers
 75 |       str_ptrs = feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) }
 76 |       c_feature_names.write_array_of_pointer(str_ptrs)
 77 |       safe_call FFI.LGBM_DatasetSetFeatureNames(@handle, c_feature_names, feature_names.size)
 78 |     end
 79 |     alias_method :feature_names=, :feature_name=
 80 | 
 81 |     # TODO only update reference if not in chain
 82 |     def reference=(reference)
 83 |       if reference != @reference
 84 |         @reference = reference
 85 |         construct
 86 |       end
 87 |     end
 88 | 
 89 |     def num_data
 90 |       out = ::FFI::MemoryPointer.new(:int)
 91 |       safe_call FFI.LGBM_DatasetGetNumData(@handle, out)
 92 |       out.read_int
 93 |     end
 94 | 
 95 |     def num_feature
 96 |       out = ::FFI::MemoryPointer.new(:int)
 97 |       safe_call FFI.LGBM_DatasetGetNumFeature(@handle, out)
 98 |       out.read_int
 99 |     end
100 | 
101 |     def save_binary(filename)
102 |       safe_call FFI.LGBM_DatasetSaveBinary(@handle, filename)
103 |     end
104 | 
105 |     def subset(used_indices, params: nil)
106 |       # categorical_feature passed via params
107 |       params ||= self.params
108 |       Dataset.new(nil,
109 |         params: params,
110 |         reference: self,
111 |         used_indices: used_indices
112 |       )
113 |     end
114 | 
115 |     def handle
116 |       @handle
117 |     end
118 | 
119 |     private
120 | 
121 |     def construct
122 |       data = @data
123 |       used_indices = @used_indices
124 | 
125 |       # TODO stringify params
126 |       params = @params || {}
127 |       if @categorical_feature != "auto" && @categorical_feature.any?
128 |         params["categorical_feature"] ||= @categorical_feature.join(",")
129 |       end
130 |       set_verbosity(params)
131 | 
132 |       handle = ::FFI::MemoryPointer.new(:pointer)
133 |       parameters = params_str(params)
134 |       reference = @reference.handle if @reference
135 |       if used_indices
136 |         used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
137 |         used_row_indices.write_array_of_int32(used_indices)
138 |         safe_call FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, handle)
139 |       elsif data.is_a?(String)
140 |         safe_call FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, handle)
141 |       else
142 |         if matrix?(data)
143 |           nrow = data.row_count
144 |           ncol = data.column_count
145 |           flat_data = data.to_a.flatten
146 |         elsif daru?(data)
147 |           if @feature_name == "auto"
148 |             @feature_name = data.vectors.to_a
149 |           end
150 |           nrow, ncol = data.shape
151 |           flat_data = data.map_rows(&:to_a).flatten
152 |         elsif numo?(data)
153 |           nrow, ncol = data.shape
154 |         elsif rover?(data)
155 |           if @feature_name == "auto"
156 |             @feature_name = data.keys
157 |           end
158 |           data = data.to_numo
159 |           nrow, ncol = data.shape
160 |         elsif data.is_a?(Array) && data.first.is_a?(Hash)
161 |           keys = data.first.keys
162 |           if @feature_name == "auto"
163 |             @feature_name = keys
164 |           end
165 |           nrow = data.count
166 |           ncol = data.first.count
167 |           flat_data = data.flat_map { |v| v.fetch_values(*keys) }
168 |         else
169 |           data = data.to_a
170 |           check_2d_array(data)
171 |           nrow = data.count
172 |           ncol = data.first.count
173 |           flat_data = data.flatten
174 |         end
175 | 
176 |         c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
177 |         if numo?(data)
178 |           c_data.write_bytes(data.cast_to(Numo::DFloat).to_string)
179 |         else
180 |           handle_missing(flat_data)
181 |           c_data.write_array_of_double(flat_data)
182 |         end
183 | 
184 |         safe_call FFI.LGBM_DatasetCreateFromMat(c_data, FFI::C_API_DTYPE_FLOAT64, nrow, ncol, 1, parameters, reference, handle)
185 |       end
186 |       if used_indices
187 |         @handle = handle.read_pointer
188 |       else
189 |         @handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_DatasetFree))
190 |       end
191 | 
192 |       self.label = @label if @label
193 |       self.weight = @weight if @weight
194 |       self.group = @group if @group
195 |       self.feature_name = @feature_name if @feature_name && @feature_name != "auto"
196 |     end
197 | 
198 |     def dump_text(filename)
199 |       safe_call FFI.LGBM_DatasetDumpText(@handle, filename)
200 |     end
201 | 
202 |     def field(field_name)
203 |       num_data = self.num_data
204 |       out_len = ::FFI::MemoryPointer.new(:int)
205 |       out_ptr = ::FFI::MemoryPointer.new(:float, num_data)
206 |       out_type = ::FFI::MemoryPointer.new(:int)
207 |       safe_call FFI.LGBM_DatasetGetField(@handle, field_name, out_len, out_ptr, out_type)
208 |       out_ptr.read_pointer.read_array_of_float(num_data)
209 |     end
210 | 
211 |     def set_field(field_name, data, type: :float)
212 |       data = data.to_a unless data.is_a?(Array)
213 |       if type == :int32
214 |         c_data = ::FFI::MemoryPointer.new(:int32, data.count)
215 |         c_data.write_array_of_int32(data)
216 |         safe_call FFI.LGBM_DatasetSetField(@handle, field_name, c_data, data.count, 2)
217 |       else
218 |         c_data = ::FFI::MemoryPointer.new(:float, data.count)
219 |         c_data.write_array_of_float(data)
220 |         safe_call FFI.LGBM_DatasetSetField(@handle, field_name, c_data, data.count, 0)
221 |       end
222 |     end
223 |   end
224 | end
225 | 


--------------------------------------------------------------------------------
/lib/lightgbm/ffi.rb:
--------------------------------------------------------------------------------
 1 | module LightGBM
 2 |   module FFI
 3 |     extend ::FFI::Library
 4 | 
 5 |     begin
 6 |       ffi_lib LightGBM.ffi_lib
 7 |     rescue LoadError => e
 8 |       if ["/usr/local", "/opt/homebrew"].any? { |v| e.message.include?("Library not loaded: #{v}/opt/libomp/lib/libomp.dylib") } && e.message.include?("Reason: image not found")
 9 |         raise LoadError, "OpenMP not found. Run `brew install libomp`"
10 |       else
11 |         raise e
12 |       end
13 |     end
14 | 
15 |     # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
16 |     # keep same order
17 | 
18 |     C_API_DTYPE_FLOAT32 = 0
19 |     C_API_DTYPE_FLOAT64 = 1
20 |     C_API_DTYPE_INT32 = 2
21 |     C_API_DTYPE_INT64 = 3
22 | 
23 |     C_API_PREDICT_NORMAL = 0
24 |     C_API_PREDICT_RAW_SCORE = 1
25 |     C_API_PREDICT_LEAF_INDEX = 2
26 |     C_API_PREDICT_CONTRIB = 3
27 | 
28 |     C_API_FEATURE_IMPORTANCE_SPLIT = 0
29 |     C_API_FEATURE_IMPORTANCE_GAIN = 1
30 | 
31 |     # error
32 |     attach_function :LGBM_GetLastError, %i[], :string
33 | 
34 |     # dataset
35 |     attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
36 |     attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
37 |     attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
38 |     attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
39 |     attach_function :LGBM_DatasetGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int
40 |     attach_function :LGBM_DatasetFree, %i[pointer], :int
41 |     attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
42 |     attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
43 |     attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
44 |     attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
45 |     attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
46 |     attach_function :LGBM_DatasetGetNumFeature, %i[pointer pointer], :int
47 | 
48 |     # booster
49 |     attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int
50 |     attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int
51 |     attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int
52 |     attach_function :LGBM_BoosterGetLoadedParam, %i[pointer int64 pointer pointer], :int
53 |     attach_function :LGBM_BoosterFree, %i[pointer], :int
54 |     attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int
55 |     attach_function :LGBM_BoosterGetNumClasses, %i[pointer pointer], :int
56 |     attach_function :LGBM_BoosterUpdateOneIter, %i[pointer pointer], :int
57 |     attach_function :LGBM_BoosterGetCurrentIteration, %i[pointer pointer], :int
58 |     attach_function :LGBM_BoosterNumModelPerIteration, %i[pointer pointer], :int
59 |     attach_function :LGBM_BoosterNumberOfTotalModel, %i[pointer pointer], :int
60 |     attach_function :LGBM_BoosterGetEvalCounts, %i[pointer pointer], :int
61 |     attach_function :LGBM_BoosterGetEvalNames, %i[pointer int pointer size_t pointer pointer], :int
62 |     attach_function :LGBM_BoosterGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int
63 |     attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
64 |     attach_function :LGBM_BoosterGetEval, %i[pointer int pointer pointer], :int
65 |     attach_function :LGBM_BoosterCalcNumPredict, %i[pointer int int int int pointer], :int
66 |     attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int int string pointer pointer], :int
67 |     attach_function :LGBM_BoosterSaveModel, %i[pointer int int int string], :int
68 |     attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int int64 pointer pointer], :int
69 |     attach_function :LGBM_BoosterDumpModel, %i[pointer int int int int64 pointer pointer], :int
70 |     attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int
71 |   end
72 | end
73 | 


--------------------------------------------------------------------------------
/lib/lightgbm/inner_predictor.rb:
--------------------------------------------------------------------------------
  1 | module LightGBM
  2 |   class InnerPredictor
  3 |     include Utils
  4 | 
  5 |     MAX_INT32 = (1 << 31) - 1
  6 | 
  7 |     def initialize(booster, pred_parameter)
  8 |       @handle = booster.instance_variable_get(:@handle)
  9 |       @pandas_categorical = booster.instance_variable_get(:@pandas_categorical)
 10 |       @pred_parameter = params_str(pred_parameter)
 11 | 
 12 |       # keep booster for cached_feature_name
 13 |       @booster = booster
 14 |     end
 15 | 
 16 |     def self.from_booster(booster, pred_parameter)
 17 |       new(booster, pred_parameter)
 18 |     end
 19 | 
 20 |     def predict(data, start_iteration: 0, num_iteration: -1, raw_score: false, pred_leaf: false, pred_contrib: false)
 21 |       if data.is_a?(Dataset)
 22 |         raise TypeError, "Cannot use Dataset instance for prediction, please use raw data instead"
 23 |       end
 24 | 
 25 |       predict_type = FFI::C_API_PREDICT_NORMAL
 26 |       if raw_score
 27 |         predict_type = FFI::C_API_PREDICT_RAW_SCORE
 28 |       end
 29 |       if pred_leaf
 30 |         predict_type = FFI::C_API_PREDICT_LEAF_INDEX
 31 |       end
 32 |       if pred_contrib
 33 |         predict_type = FFI::C_API_PREDICT_CONTRIB
 34 |       end
 35 | 
 36 |       if daru?(data)
 37 |         data = data[*cached_feature_name].map_rows(&:to_a)
 38 |         singular = false
 39 |       elsif data.is_a?(Hash) # sort feature.values to match the order of model.feature_name
 40 |         data = [sorted_feature_values(data)]
 41 |         singular = true
 42 |       elsif data.is_a?(Array) && data.first.is_a?(Hash) # on multiple elems, if 1st is hash, assume they all are
 43 |         data = data.map(&method(:sorted_feature_values))
 44 |         singular = false
 45 |       elsif rover?(data)
 46 |         # TODO improve performance
 47 |         data = data[cached_feature_name].to_numo.to_a
 48 |         singular = false
 49 |       else
 50 |         data = data.to_a
 51 |         singular = !data.first.is_a?(Array)
 52 |         data = [data] if singular
 53 |         check_2d_array(data)
 54 |         data = data.map(&:dup) if @pandas_categorical&.any?
 55 |       end
 56 | 
 57 |       if @pandas_categorical&.any?
 58 |         apply_pandas_categorical(
 59 |           data,
 60 |           @booster.params["categorical_feature"],
 61 |           @pandas_categorical
 62 |         )
 63 |       end
 64 | 
 65 |       preds, nrow =
 66 |         pred_for_array(
 67 |           data,
 68 |           start_iteration,
 69 |           num_iteration,
 70 |           predict_type
 71 |         )
 72 | 
 73 |       if pred_leaf
 74 |         preds = preds.map(&:to_i)
 75 |       end
 76 | 
 77 |       if preds.size != nrow
 78 |         if preds.size % nrow == 0
 79 |           preds = preds.each_slice(preds.size / nrow).to_a
 80 |         else
 81 |           raise Error, "Length of predict result (#{preds.size}) cannot be divide nrow (#{nrow})"
 82 |         end
 83 |       end
 84 | 
 85 |       singular ? preds.first : preds
 86 |     end
 87 | 
 88 |     private
 89 | 
 90 |     def pred_for_array(input, start_iteration, num_iteration, predict_type)
 91 |       nrow = input.count
 92 |       if nrow > MAX_INT32
 93 |         raise Error, "Not supported"
 94 |       end
 95 |       inner_predict_array(
 96 |         input,
 97 |         start_iteration,
 98 |         num_iteration,
 99 |         predict_type
100 |       )
101 |     end
102 | 
103 |     def inner_predict_array(input, start_iteration, num_iteration, predict_type)
104 |       n_preds =
105 |         num_preds(
106 |           start_iteration,
107 |           num_iteration,
108 |           input.count,
109 |           predict_type
110 |         )
111 | 
112 |       flat_input = input.flatten
113 |       handle_missing(flat_input)
114 |       data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
115 |       data.write_array_of_double(flat_input)
116 | 
117 |       out_num_preds = ::FFI::MemoryPointer.new(:int64)
118 |       out_result = ::FFI::MemoryPointer.new(:double, n_preds)
119 |       safe_call FFI.LGBM_BoosterPredictForMat(@handle, data, FFI::C_API_DTYPE_FLOAT64, input.count, input.first.count, 1, predict_type, start_iteration, num_iteration, @pred_parameter, out_num_preds, out_result)
120 |       if n_preds != out_num_preds.read_int64
121 |         raise Error, "Wrong length for predict results"
122 |       end
123 |       preds = out_result.read_array_of_double(out_num_preds.read_int64)
124 |       [preds, input.count]
125 |     end
126 | 
127 |     def num_preds(start_iteration, num_iteration, nrow, predict_type)
128 |       out = ::FFI::MemoryPointer.new(:int64)
129 |       safe_call FFI.LGBM_BoosterCalcNumPredict(@handle, nrow, predict_type, start_iteration, num_iteration, out)
130 |       out.read_int64
131 |     end
132 | 
133 |     def sorted_feature_values(input_hash)
134 |       input_hash.transform_keys(&:to_s).fetch_values(*cached_feature_name)
135 |     end
136 | 
137 |     def cached_feature_name
138 |       @booster.send(:cached_feature_name)
139 |     end
140 | 
141 |     def apply_pandas_categorical(data, categorical_feature, pandas_categorical)
142 |       (categorical_feature || []).each_with_index do |cf, i|
143 |         cat_codes = pandas_categorical[i]
144 |         data.each do |r|
145 |           cat = r[cf]
146 |           unless cat.nil?
147 |             r[cf] =
148 |               cat_codes.fetch(cat) do
149 |                 unless cat.is_a?(String)
150 |                   raise ArgumentError, "expected categorical value"
151 |                 end
152 |                 nil
153 |               end
154 |           end
155 |         end
156 |       end
157 |     end
158 |   end
159 | end
160 | 


--------------------------------------------------------------------------------
/lib/lightgbm/model.rb:
--------------------------------------------------------------------------------
 1 | module LightGBM
 2 |   class Model
 3 |     attr_reader :booster
 4 | 
 5 |     def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
 6 |       @params = {
 7 |         num_leaves: num_leaves,
 8 |         learning_rate: learning_rate
 9 |       }.merge(options)
10 |       @params[:objective] = objective if objective
11 |       @n_estimators = n_estimators
12 |     end
13 | 
14 |     def save_model(fname)
15 |       @booster.save_model(fname)
16 |     end
17 | 
18 |     def load_model(fname)
19 |       @booster = Booster.new(model_file: fname)
20 |     end
21 | 
22 |     def best_iteration
23 |       @booster.best_iteration
24 |     end
25 | 
26 |     def feature_importances
27 |       @booster.feature_importance
28 |     end
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/lib/lightgbm/ranker.rb:
--------------------------------------------------------------------------------
 1 | module LightGBM
 2 |   class Ranker < Model
 3 |     def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "lambdarank", **options)
 4 |       super
 5 |     end
 6 | 
 7 |     def fit(x, y, group:, categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
 8 |       train_set = Dataset.new(x, label: y, group: group, categorical_feature: categorical_feature)
 9 |       @booster = LightGBM.train(@params, train_set,
10 |         num_boost_round: @n_estimators,
11 |         early_stopping_rounds: early_stopping_rounds,
12 |         verbose_eval: verbose
13 |       )
14 |       nil
15 |     end
16 | 
17 |     def predict(data, num_iteration: nil)
18 |       @booster.predict(data, num_iteration: num_iteration)
19 |     end
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/lib/lightgbm/regressor.rb:
--------------------------------------------------------------------------------
 1 | module LightGBM
 2 |   class Regressor < Model
 3 |     def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "regression", **options)
 4 |       super
 5 |     end
 6 | 
 7 |     def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
 8 |       train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params)
 9 |       valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) }
10 | 
11 |       @booster = LightGBM.train(@params, train_set,
12 |         num_boost_round: @n_estimators,
13 |         early_stopping_rounds: early_stopping_rounds,
14 |         verbose_eval: verbose,
15 |         valid_sets: valid_sets,
16 |         valid_names: eval_names
17 |       )
18 |       nil
19 |     end
20 | 
21 |     def predict(data, num_iteration: nil)
22 |       @booster.predict(data, num_iteration: num_iteration)
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/lightgbm/utils.rb:
--------------------------------------------------------------------------------
 1 | module LightGBM
 2 |   module Utils
 3 |     private
 4 | 
 5 |     def safe_call(err)
 6 |       raise Error, FFI.LGBM_GetLastError if err != 0
 7 |     end
 8 | 
 9 |     # remove spaces in keys and values to prevent injection
10 |     def params_str(params)
11 |       params.map { |k, v| [check_param(k.to_s), check_param(Array(v).join(",").to_s)].join("=") }.join(" ")
12 |     end
13 | 
14 |     def check_param(v)
15 |       raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match?(v)
16 |       v
17 |     end
18 | 
19 |     # change default verbosity
20 |     def set_verbosity(params)
21 |       params_keys = params.keys.map(&:to_s)
22 |       unless params_keys.include?("verbosity")
23 |         params["verbosity"] = -1
24 |       end
25 |     end
26 | 
27 |     def check_2d_array(data)
28 |       ncol = data.first&.size || 0
29 |       if !data.all? { |r| r.size == ncol }
30 |         raise ArgumentError, "Rows have different sizes"
31 |       end
32 |     end
33 | 
34 |     # for categorical, NaN and negative value are the same
35 |     def handle_missing(data)
36 |       data.map! { |v| v.nil? ? Float::NAN : v }
37 |     end
38 | 
39 |     def matrix?(data)
40 |       defined?(Matrix) && data.is_a?(Matrix)
41 |     end
42 | 
43 |     def daru?(data)
44 |       defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
45 |     end
46 | 
47 |     def numo?(data)
48 |       defined?(Numo::NArray) && data.is_a?(Numo::NArray)
49 |     end
50 | 
51 |     def rover?(data)
52 |       defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
53 |     end
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/lib/lightgbm/version.rb:
--------------------------------------------------------------------------------
1 | module LightGBM
2 |   VERSION = "0.4.1"
3 | end
4 | 


--------------------------------------------------------------------------------
/lightgbm.gemspec:
--------------------------------------------------------------------------------
 1 | require_relative "lib/lightgbm/version"
 2 | 
 3 | Gem::Specification.new do |spec|
 4 |   spec.name          = "lightgbm"
 5 |   spec.version       = LightGBM::VERSION
 6 |   spec.summary       = "High performance gradient boosting for Ruby"
 7 |   spec.homepage      = "https://github.com/ankane/lightgbm-ruby"
 8 |   spec.license       = "MIT"
 9 | 
10 |   spec.author        = "Andrew Kane"
11 |   spec.email         = "andrew@ankane.org"
12 | 
13 |   spec.files         = Dir["*.{md,txt}", "{lib,vendor}/**/*"]
14 |   spec.require_path  = "lib"
15 | 
16 |   spec.required_ruby_version = ">= 3.1"
17 | 
18 |   spec.add_dependency "ffi"
19 | end
20 | 


--------------------------------------------------------------------------------
/test/booster_test.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class BoosterTest < Minitest::Test
  4 |   def test_model_file
  5 |     x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
  6 |     booster = LightGBM::Booster.new(model_file: "test/support/model.txt")
  7 |     y_pred = booster.predict(x_test)
  8 |     assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2)
  9 |   end
 10 | 
 11 |   def test_model_str
 12 |     x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
 13 |     booster = LightGBM::Booster.new(model_str: File.read("test/support/model.txt"))
 14 |     y_pred = booster.predict(x_test)
 15 |     assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2)
 16 |   end
 17 | 
 18 |   def test_model_from_string
 19 |     x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
 20 |     booster = LightGBM.train(binary_params, binary_train)
 21 |     booster.model_from_string(File.read("test/support/model.txt"))
 22 |     y_pred = booster.predict(x_test)
 23 |     assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2)
 24 |   end
 25 | 
 26 |   def test_feature_importance
 27 |     assert_equal [280, 285, 335, 148], booster.feature_importance
 28 |   end
 29 | 
 30 |   def test_feature_name
 31 |     assert_equal ["x0", "x1", "x2", "x3"], booster.feature_name
 32 |   end
 33 | 
 34 |   def test_feature_importance_bad_importance_type
 35 |     error = assert_raises(LightGBM::Error) do
 36 |       booster.feature_importance(importance_type: "bad")
 37 |     end
 38 |     assert_includes error.message, "Unknown importance type"
 39 |   end
 40 | 
 41 |   def test_predict_hash
 42 |     pred = booster.predict({x0: 3.7, x1: 1.2, x2: 7.2, x3: 9.0})
 43 |     assert_in_delta 0.9823112229173586, pred
 44 | 
 45 |     pred = booster.predict({"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7})
 46 |     assert_in_delta 0.9823112229173586, pred
 47 | 
 48 |     pred =
 49 |       booster.predict([
 50 |         {"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7},
 51 |         {"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5}
 52 |       ])
 53 |     assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2)
 54 | 
 55 |     assert_raises(KeyError) do
 56 |       booster.predict({"x0" => 3.7})
 57 |     end
 58 |   end
 59 | 
 60 |   def test_predict_daru
 61 |     x_test =
 62 |       Daru::DataFrame.new([
 63 |         {"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7},
 64 |         {"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5}
 65 |       ])
 66 |     pred = booster.predict(x_test)
 67 |     assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2)
 68 | 
 69 |     assert_raises(IndexError) do
 70 |       booster.predict(Daru::DataFrame.new([{"x0" => 3.7}]))
 71 |     end
 72 |   end
 73 | 
 74 |   def test_predict_rover
 75 |     skip unless numo?
 76 | 
 77 |     require "rover"
 78 |     x_test =
 79 |       Rover::DataFrame.new([
 80 |         {"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7},
 81 |         {"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5}
 82 |       ])
 83 |     pred = booster.predict(x_test)
 84 |     assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2)
 85 | 
 86 |     assert_raises(KeyError) do
 87 |       booster.predict(Rover::DataFrame.new([{"x0" => 3.7}]))
 88 |     end
 89 |   end
 90 | 
 91 |   def test_predict_array_different_sizes
 92 |     x_test = [[1, 2], [3, 4, 5]]
 93 |     error = assert_raises(ArgumentError) do
 94 |       booster.predict(x_test)
 95 |     end
 96 |     assert_equal "Rows have different sizes", error.message
 97 |   end
 98 | 
 99 |   def test_predict_raw_score
100 |     x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
101 |     expected = [0.9823112229173586, 0.9583143724610858]
102 | 
103 |     y_pred = booster.predict(x_test, raw_score: true)
104 |     assert_elements_in_delta expected, y_pred.first(2)
105 | 
106 |     y_pred = booster.predict(x_test[0], raw_score: true)
107 |     assert_in_delta expected[0], y_pred
108 |   end
109 | 
110 |   def test_predict_pred_leaf
111 |     x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
112 |     expected = [[9, 8, 8, 11, 8, 6, 10, 12, 1, 10, 9, 10, 12, 5, 11, 9, 6, 4, 5, 12, 9, 11, 9, 11, 2, 10, 2, 10, 3, 5, 10, 6, 1, 5, 10, 10, 9, 4, 5, 4, 6, 5, 6, 6, 4, 6, 4, 10, 10, 3, 4, 4, 6, 3, 9, 11, 5, 4, 3, 6, 7, 3, 6, 7, 5, 10, 10, 6, 4, 5, 5, 9, 6, 6, 2, 2, 4, 9, 4, 3, 9, 4, 6, 11, 5, 5, 0, 9, 12, 10, 12, 4, 0, 8, 4, 8, 11, 0, 3, 10], [6, 1, 9, 7, 9, 8, 1, 7, 5, 1, 1, 1, 9, 10, 1, 1, 10, 9, 1, 11, 8, 2, 10, 3, 5, 10, 6, 0, 2, 5, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 10, 0, 0, 2, 0, 0, 9, 2, 9, 3, 1, 2, 2, 7, 9, 10, 1, 4, 4, 9, 10, 0, 1, 3, 11, 2, 5, 1, 1, 7, 8, 5, 1, 10, 10, 5, 4, 1, 10, 2, 1, 4, 2, 2, 2, 2, 10, 2, 9, 2, 11, 2, 5, 1, 11, 2, 9, 7, 7]]
113 | 
114 |     y_pred = booster.predict(x_test, pred_leaf: true)
115 |     assert_equal expected, y_pred.first(2)
116 | 
117 |     y_pred = booster.predict(x_test[0], pred_leaf: true)
118 |     assert_equal expected[0], y_pred
119 |   end
120 | 
121 |   def test_predict_pred_contrib
122 |     x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]]
123 |     expected = [[-0.0733949225678886, -0.24289592050101766, 0.24183795683166504, 0.063430775771174, 0.9933333333834246], [0.1094902954684793, -0.2810485083947154, 0.26691627597706397, -0.13037702397316747, 0.9933333333834246]]
124 | 
125 |     y_pred = booster.predict(x_test, pred_contrib: true)
126 |     assert_elements_in_delta expected[0], y_pred[0]
127 |     assert_elements_in_delta expected[1], y_pred[1]
128 | 
129 |     y_pred = booster.predict(x_test[0], pred_contrib: true)
130 |     assert_elements_in_delta expected[0], y_pred
131 |   end
132 | 
133 |   def test_predict_pandas_categorical_model_file
134 |     x_test = [[3.7, 1.2, 7.2, "cat9"], [7.5, 0.5, 7.9, "cat0"]]
135 |     booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt")
136 |     y_pred = booster.predict(x_test)
137 |     assert_elements_in_delta [0.996415541144579, 1.0809369939979934], y_pred.first(2)
138 |   end
139 | 
140 |   def test_predict_pandas_categorical_model_str
141 |     x_test = [[3.7, 1.2, 7.2, "cat9"], [7.5, 0.5, 7.9, "cat0"]]
142 |     booster = LightGBM::Booster.new(model_str: File.read("test/support/categorical.txt"))
143 |     y_pred = booster.predict(x_test)
144 |     assert_elements_in_delta [0.996415541144579, 1.0809369939979934], y_pred.first(2)
145 |   end
146 | 
147 |   def test_predict_pandas_categorical_missing_category
148 |     booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt")
149 |     assert_in_delta 0.996415541144579, booster.predict([3.7, 1.2, 7.2, nil])
150 |   end
151 | 
152 |   def test_predict_pandas_categorical_new_category
153 |     booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt")
154 |     assert_in_delta 0.996415541144579, booster.predict([3.7, 1.2, 7.2, "cat10"])
155 |   end
156 | 
157 |   def test_predict_pandas_categorical_invalid_category
158 |     booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt")
159 |     error = assert_raises(ArgumentError) do
160 |       booster.predict([7.5, 0.5, 7.9, true])
161 |     end
162 |     assert_equal "expected categorical value", error.message
163 |   end
164 | 
165 |   def test_model_to_string
166 |     assert booster.model_to_string
167 |   end
168 | 
169 |   def test_to_json
170 |     assert JSON.parse(booster.to_json)
171 |   end
172 | 
173 |   def test_dump_model
174 |     assert JSON.parse(booster.dump_model)
175 |   end
176 | 
177 |   def test_current_iteration
178 |     assert_equal 100, booster.current_iteration
179 |   end
180 | 
181 |   def test_num_model_per_iteration
182 |     assert_equal 1, booster.num_model_per_iteration
183 |   end
184 | 
185 |   def test_num_trees
186 |     assert_equal 100, booster.num_trees
187 |   end
188 | 
189 |   def test_copy
190 |     booster.dup
191 |     booster.clone
192 |   end
193 | 
194 |   private
195 | 
196 |   def booster
197 |     @booster ||= LightGBM::Booster.new(model_file: "test/support/model.txt")
198 |   end
199 | end
200 | 


--------------------------------------------------------------------------------
/test/classifier_test.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class ClassifierTest < Minitest::Test
  4 |   def test_binary
  5 |     x_train, y_train, x_test, _ = binary_data
  6 | 
  7 |     model = LightGBM::Classifier.new
  8 |     model.fit(x_train, y_train)
  9 |     y_pred = model.predict(x_test)
 10 |     expected = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1]
 11 |     assert_equal expected, y_pred.first(100)
 12 | 
 13 |     y_pred_proba = model.predict_proba(x_test)
 14 |     expected = [9.243317488749625e-06, 0.9999907566825113]
 15 |     assert_elements_in_delta expected, y_pred_proba.first
 16 | 
 17 |     expected = [399, 367, 419, 140]
 18 |     assert_equal expected, model.feature_importances
 19 | 
 20 |     model.save_model(tempfile)
 21 | 
 22 |     model = LightGBM::Classifier.new
 23 |     model.load_model(tempfile)
 24 |     assert_equal y_pred, model.predict(x_test)
 25 |   end
 26 | 
 27 |   def test_multiclass
 28 |     x_train, y_train, x_test, _ = multiclass_data
 29 | 
 30 |     model = LightGBM::Classifier.new
 31 |     model.fit(x_train, y_train)
 32 |     y_pred = model.predict(x_test)
 33 |     expected = [1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1]
 34 |     assert_equal expected, y_pred.first(100)
 35 | 
 36 |     y_pred_proba = model.predict_proba(x_test)
 37 |     expected = [0.00036627031584163575, 0.9456350323547973, 0.053998697329361176]
 38 |     assert_elements_in_delta expected, y_pred_proba.first
 39 | 
 40 |     expected = [1118, 1060, 1272, 441]
 41 |     assert_equal expected, model.feature_importances
 42 | 
 43 |     model.save_model(tempfile)
 44 | 
 45 |     model = LightGBM::Classifier.new
 46 |     model.load_model(tempfile)
 47 |     assert_equal y_pred, model.predict(x_test)
 48 |   end
 49 | 
 50 |   def test_early_stopping
 51 |     x_train, y_train, x_test, y_test = multiclass_data
 52 | 
 53 |     model = LightGBM::Classifier.new
 54 |     model.fit(x_train, y_train, early_stopping_rounds: 5, eval_set: [[x_test, y_test]], verbose: false)
 55 |     assert_equal 54, model.best_iteration
 56 |   end
 57 | 
 58 |   def test_missing_numeric
 59 |     x_train, y_train, x_test, _ = multiclass_data
 60 | 
 61 |     x_train = x_train.map(&:dup)
 62 |     x_test = x_test.map(&:dup)
 63 |     [x_train, x_test].each do |xt|
 64 |       xt.each do |x|
 65 |         x.size.times do |i|
 66 |           x[i] = nil if x[i] == 3.7
 67 |         end
 68 |       end
 69 |     end
 70 | 
 71 |     model = LightGBM::Classifier.new
 72 |     model.fit(x_train, y_train)
 73 | 
 74 |     y_pred = model.predict(x_test)
 75 |     expected = [1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1]
 76 |     assert_equal expected, y_pred.first(100)
 77 | 
 78 |     expected = [1140, 1046, 1309, 427]
 79 |     assert_equal expected, model.feature_importances
 80 |   end
 81 | 
 82 |   def test_missing_categorical
 83 |     x_train, y_train, x_test, _ = multiclass_data
 84 | 
 85 |     x_train = x_train.map(&:dup)
 86 |     x_test = x_test.map(&:dup)
 87 |     [x_train, x_test].each do |xt|
 88 |       xt.each do |x|
 89 |         x[3] = nil if x[3] > 7
 90 |       end
 91 |     end
 92 | 
 93 |     model = LightGBM::Classifier.new
 94 |     model.fit(x_train, y_train, categorical_feature: [3])
 95 | 
 96 |     y_pred = model.predict(x_test)
 97 |     expected = [1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1]
 98 |     assert_equal expected, y_pred.first(100)
 99 | 
100 |     expected = [1228, 1265, 1446, 30]
101 |     assert_equal expected, model.feature_importances
102 |   end
103 | end
104 | 


--------------------------------------------------------------------------------
/test/cv_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class CvTest < Minitest::Test
 4 |   def test_regression
 5 |     # need to set stratified=False in Python
 6 |     eval_hist = LightGBM.cv(regression_params, regression_train, shuffle: false)
 7 |     assert_in_delta 0.2597565400783163, eval_hist["l2-mean"].first
 8 |     assert_in_delta 0.10267769399880997, eval_hist["l2-mean"].last
 9 |     assert_in_delta 0.07283200245299197, eval_hist["l2-stdv"].first
10 |     assert_in_delta 0.019704697369123978, eval_hist["l2-stdv"].last
11 |   end
12 | 
13 |   def test_binary
14 |     # need to set stratified=False in Python
15 |     eval_hist = LightGBM.cv(binary_params, binary_train, shuffle: false)
16 |     assert_in_delta 0.38594176939006153, eval_hist["binary_logloss-mean"].first
17 |     assert_in_delta 0.13445744661816397, eval_hist["binary_logloss-mean"].last
18 |     assert_in_delta 0.09986377563273867, eval_hist["binary_logloss-stdv"].first
19 |     assert_in_delta 0.0463093558415842, eval_hist["binary_logloss-stdv"].last
20 |   end
21 | 
22 |   def test_multiclass
23 |     # need to set stratified=False in Python
24 |     eval_hist = LightGBM.cv(multiclass_params, multiclass_train, shuffle: false)
25 |     assert_in_delta 0.7352745822291095, eval_hist["multi_logloss-mean"].first
26 |     assert_in_delta 0.40375560053885506, eval_hist["multi_logloss-mean"].last
27 |     assert_in_delta 0.11256739058587856, eval_hist["multi_logloss-stdv"].first
28 |     assert_in_delta 0.1779828373201067, eval_hist["multi_logloss-stdv"].last
29 |   end
30 | 
31 |   def test_early_stopping_early
32 |     eval_hist = nil
33 |     stdout, _ = capture_io do
34 |       eval_hist = LightGBM.cv(regression_params, regression_train, shuffle: false, verbose_eval: true, early_stopping_rounds: 5)
35 |     end
36 |     assert_equal 36, eval_hist["l2-mean"].size
37 |     assert_includes stdout, "[41]\tcv_agg's l2: 0.0988604 + 0.0243197"
38 |     refute_includes stdout, "[42]"
39 |   end
40 | 
41 |   def test_early_stopping_not_early
42 |     eval_hist = nil
43 |     stdout, _ = capture_io do
44 |       eval_hist = LightGBM.cv(regression_params, regression_train, shuffle: false, verbose_eval: true, early_stopping_rounds: 500)
45 |     end
46 |     assert_equal 36, eval_hist["l2-mean"].size
47 |     assert_includes stdout, "[100]\tcv_agg's l2: 0.102678 + 0.0197047"
48 |   end
49 | 
50 |   def test_multiple_metrics
51 |     params = regression_params.merge(metric: ["l1", "l2", "rmse"])
52 |     eval_hist = LightGBM.cv(params, regression_train, shuffle: false, early_stopping_rounds: 5)
53 |     assert_equal ["l1-mean", "l1-stdv", "l2-mean", "l2-stdv", "rmse-mean", "rmse-stdv"].sort, eval_hist.keys.sort
54 |   end
55 | end
56 | 


--------------------------------------------------------------------------------
/test/dataset_test.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class DatasetTest < Minitest::Test
  4 |   def test_data_string
  5 |     dataset = LightGBM::Dataset.new(data_path, params: {header: true, label_column: "name:y"})
  6 |     assert_equal 500, dataset.num_data
  7 |     assert_equal 4, dataset.num_feature
  8 |     assert_equal 500, dataset.label.size
  9 |     assert_equal ["x0", "x1", "x2", "x3"], dataset.feature_name
 10 |   end
 11 | 
 12 |   def test_label
 13 |     data = [[1, 2], [3, 4]]
 14 |     label = [1, 2]
 15 |     dataset = LightGBM::Dataset.new(data, label: label)
 16 |     assert label, dataset.label
 17 |   end
 18 | 
 19 |   def test_weight
 20 |     data = [[1, 2], [3, 4]]
 21 |     weight = [1, 2]
 22 |     dataset = LightGBM::Dataset.new(data, weight: weight)
 23 |     assert weight, dataset.weight
 24 |   end
 25 | 
 26 |   def test_feature_name
 27 |     data = [[1, 2], [3, 4]]
 28 |     dataset = LightGBM::Dataset.new(data, feature_name: ["a", "b"])
 29 |     assert_equal ["a", "b"], dataset.feature_name
 30 |   end
 31 | 
 32 |   def test_num_data
 33 |     assert_equal 300, regression_train.num_data
 34 |   end
 35 | 
 36 |   def test_num_feature
 37 |     assert_equal 4, regression_train.num_feature
 38 |   end
 39 | 
 40 |   def test_save_binary
 41 |     regression_train.save_binary(tempfile)
 42 |     assert File.exist?(tempfile)
 43 |   end
 44 | 
 45 |   def test_dump_text
 46 |     # method is private in Python library
 47 |     # https://github.com/microsoft/LightGBM/pull/2434
 48 |     assert !regression_train.respond_to?(:dump_text)
 49 |     regression_train.send(:dump_text, tempfile)
 50 |     assert File.exist?(tempfile)
 51 |   end
 52 | 
 53 |   def test_hashes_string_keys
 54 |     data = [{"x0" => 1, "x1" => 2}, {"x0" => 3, "x1" => 4}, {"x0" => 5, "x1" => 6}]
 55 |     dataset = LightGBM::Dataset.new(data)
 56 |     assert_equal 3, dataset.num_data
 57 |     assert_equal 2, dataset.num_feature
 58 |     assert_equal ["x0", "x1"], dataset.feature_name
 59 |   end
 60 | 
 61 |   def test_hashes_symbol_keys
 62 |     data = [{x0: 1, x1: 2}, {x0: 3, x1: 4}, {x0: 5, x1: 6}]
 63 |     dataset = LightGBM::Dataset.new(data)
 64 |     assert_equal 3, dataset.num_data
 65 |     assert_equal 2, dataset.num_feature
 66 |     assert_equal ["x0", "x1"], dataset.feature_name
 67 |   end
 68 | 
 69 |   def test_matrix
 70 |     data = Matrix.build(3, 3) { |row, col| row + col }
 71 |     label = Vector.elements([4, 5, 6])
 72 |     dataset = LightGBM::Dataset.new(data, label: label)
 73 |     assert_equal 3, dataset.num_data
 74 |     assert_equal 3, dataset.num_feature
 75 |     assert_equal 3, dataset.label.size
 76 |     assert_equal ["Column_0", "Column_1", "Column_2"], dataset.feature_name
 77 |   end
 78 | 
 79 |   def test_daru
 80 |     data = Daru::DataFrame.from_csv(data_path)
 81 |     label = data["y"]
 82 |     data = data.delete_vector("y")
 83 |     dataset = LightGBM::Dataset.new(data, label: label)
 84 |     assert_equal 500, dataset.num_data
 85 |     assert_equal 4, dataset.num_feature
 86 |     assert_equal 500, dataset.label.size
 87 |     assert_equal ["x0", "x1", "x2", "x3"], dataset.feature_name
 88 |   end
 89 | 
 90 |   def test_numo
 91 |     skip unless numo?
 92 | 
 93 |     data = Numo::DFloat.new(3, 5).seq
 94 |     label = Numo::DFloat.new(3).seq
 95 |     dataset = LightGBM::Dataset.new(data, label: label)
 96 |     assert_equal 3, dataset.num_data
 97 |     assert_equal 5, dataset.num_feature
 98 |     assert_equal 3, dataset.label.size
 99 |     assert_equal ["Column_0", "Column_1", "Column_2", "Column_3", "Column_4"], dataset.feature_name
100 |   end
101 | 
102 |   def test_rover
103 |     skip unless numo?
104 | 
105 |     data = Rover.read_csv(data_path)
106 |     label = data.delete("y")
107 |     dataset = LightGBM::Dataset.new(data, label: label)
108 |     assert_equal 500, dataset.num_data
109 |     assert_equal 4, dataset.num_feature
110 |     assert_equal 500, dataset.label.size
111 |     assert_equal ["x0", "x1", "x2", "x3"], dataset.feature_name
112 |   end
113 | 
114 |   def test_array_different_sizes
115 |     data = [[1, 2], [3, 4, 5]]
116 |     error = assert_raises(ArgumentError) do
117 |       LightGBM::Dataset.new(data)
118 |     end
119 |     assert_equal "Rows have different sizes", error.message
120 |   end
121 | 
122 |   def test_copy
123 |     regression_train.dup
124 |     regression_train.clone
125 |   end
126 | end
127 | 


--------------------------------------------------------------------------------
/test/ranker_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class RankerTest < Minitest::Test
 4 |   def test_works
 5 |     x_train, y_train, x_test, _ = ranker_data
 6 |     group = [100, 200]
 7 | 
 8 |     model = LightGBM::Ranker.new
 9 |     model.fit(x_train, y_train, group: group)
10 |     y_pred = model.predict(x_test)
11 |     expected = [4.32677558843951, 1.5663855381974388, 3.8499830924310703, -2.1940085102547804, 3.3916802314416667, 3.488857015835257]
12 |     assert_elements_in_delta expected, y_pred.first(6)
13 | 
14 |     expected = [72, 114, 141, 17]
15 |     assert_equal expected, model.feature_importances
16 | 
17 |     model.save_model(tempfile)
18 | 
19 |     model = LightGBM::Ranker.new
20 |     model.load_model(tempfile)
21 |     assert_equal y_pred, model.predict(x_test)
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/test/regressor_test.rb:
--------------------------------------------------------------------------------
 1 | require_relative "test_helper"
 2 | 
 3 | class RegressorTest < Minitest::Test
 4 |   def test_works
 5 |     x_train, y_train, x_test, _ = regression_data
 6 | 
 7 |     model = LightGBM::Regressor.new
 8 |     model.fit(x_train, y_train)
 9 |     y_pred = model.predict(x_test)
10 |     expected = [1.3687029666659025, 1.7352643821271516, 1.4988839660914637, 0.8784593080455959, 1.209552643550604, 1.4602293932569006]
11 | 
12 |     assert_elements_in_delta expected, y_pred.first(6)
13 | 
14 |     expected = [280, 285, 335, 148]
15 |     assert_equal expected, model.feature_importances
16 | 
17 |     model.save_model(tempfile)
18 | 
19 |     model = LightGBM::Regressor.new
20 |     model.load_model(tempfile)
21 |     assert_equal y_pred, model.predict(x_test)
22 |   end
23 | 
24 |   def test_early_stopping
25 |     x_train, y_train, x_test, y_test = regression_data
26 | 
27 |     model = LightGBM::Regressor.new
28 |     model.fit(x_train, y_train, early_stopping_rounds: 5, eval_set: [[x_test, y_test]], verbose: false)
29 |     assert_equal 69, model.best_iteration
30 |   end
31 | 
32 |   def test_daru
33 |     data = Daru::DataFrame.from_csv(data_path)
34 |     y = data["y"]
35 |     x = data.delete_vector("y")
36 | 
37 |     # daru has bug with 0...300
38 |     x_train = x.row[0..299]
39 |     y_train = y[0..299]
40 |     x_test = x.row[300..-1]
41 | 
42 |     model = LightGBM::Regressor.new
43 |     model.fit(x_train, y_train)
44 |     y_pred = model.predict(x_test)
45 |     expected = [1.3687029666659025, 1.7352643821271516, 1.4988839660914637, 0.8784593080455959, 1.209552643550604, 1.4602293932569006]
46 |     assert_elements_in_delta expected, y_pred.first(6)
47 |   end
48 | 
49 |   def test_trivial
50 |     x = [[1], [2], [3], [4]]
51 |     y = [0.1, 0.2, 0.3, 0.4]
52 |     model = LightGBM::Regressor.new(min_data_in_bin: 1, min_data_in_leaf: 1)
53 |     model.fit(x, y)
54 |     assert_elements_in_delta y, model.predict(x)
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/test/support/booster.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv('test/support/data.csv')
 5 | 
 6 | X = df.drop(columns=['y'])
 7 | y = df['y']
 8 | 
 9 | X_train = X[:300]
10 | y_train = y[:300]
11 | X_test = X[300:]
12 | y_test = y[300:]
13 | 
14 | train_data = lgb.Dataset(X_train, label=y_train)
15 | bst = lgb.train({}, train_data)
16 | bst.save_model('test/support/model.txt')
17 | 
18 | bst = lgb.Booster(model_file='test/support/model.txt')
19 | print('x', X_train[:2].to_numpy().tolist())
20 | print('predict', bst.predict(X_train)[:2].tolist())
21 | print('raw_score', bst.predict(X_train, raw_score=True)[:2].tolist())
22 | print('pred_leaf', bst.predict(X_train, pred_leaf=True)[:2].tolist())
23 | print('pred_contrib', bst.predict(X_train, pred_contrib=True)[:2].tolist())
24 | print('feature_importance', bst.feature_importance().tolist())
25 | print('feature_name', bst.feature_name())
26 | 


--------------------------------------------------------------------------------
/test/support/categorical.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv('test/support/data.csv')
 5 | df['x3'] = ('cat' + df['x3'].astype(str)).astype('category')
 6 | 
 7 | X = df.drop(columns=['y'])
 8 | y = df['y']
 9 | 
10 | X_train = X[:300]
11 | y_train = y[:300]
12 | X_test = X[300:]
13 | y_test = y[300:]
14 | 
15 | train_data = lgb.Dataset(X_train, label=y_train)
16 | bst = lgb.train({}, train_data, num_boost_round=5)
17 | bst.save_model('test/support/categorical.txt')
18 | 
19 | bst = lgb.Booster(model_file='test/support/categorical.txt')
20 | print('x', X_train[:2].to_numpy().tolist())
21 | print('predict', bst.predict(X_train)[:2].tolist())
22 | 


--------------------------------------------------------------------------------
/test/support/categorical.txt:
--------------------------------------------------------------------------------
  1 | tree
  2 | version=v4
  3 | num_class=1
  4 | num_tree_per_iteration=1
  5 | label_index=0
  6 | max_feature_idx=3
  7 | objective=regression
  8 | feature_names=x0 x1 x2 x3
  9 | feature_infos=[0:9.9000000000000004] [0:9.8000000000000007] [0:9.9000000000000004] -1:9:0:2:4:5:6:7:3:8:1
 10 | tree_sizes=931 1058 976 1135 1140
 11 | 
 12 | Tree=0
 13 | num_leaves=10
 14 | num_cat=0
 15 | split_feature=2 1 1 0 0 2 0 1 0
 16 | split_gain=25.0622 13.4213 7.81363 4.17122 1.4867 1.85204 0.787037 0.163668 0.0359155
 17 | threshold=4.0500000000000007 7.7500000000000009 4.5500000000000007 5.8000000000000016 6.7500000000000009 6.3500000000000005 3.4500000000000006 5.8500000000000005 1.8500000000000003
 18 | decision_type=2 2 2 2 2 2 2 2 2
 19 | left_child=2 4 3 -1 7 -6 -4 8 -2
 20 | right_child=1 -3 6 -5 5 -7 -8 -9 -10
 21 | leaf_value=0.91066666613953806 0.9889999999835466 1.0683589738903518 0.96899999985316143 0.96323076904631955 0.99400000001614286 1.0332857140457434 0.99400000001614286 1.0019999999639888 0.99400000001614286
 22 | leaf_weight=36 20 39 20 26 21 28 34 25 51
 23 | leaf_count=36 20 39 20 26 21 28 34 25 51
 24 | internal_value=0.993333 1.01628 0.956931 0.93271 1.00228 1.01645 0.984741 0.995042 0.992592
 25 | internal_weight=0 184 116 62 145 49 54 96 71
 26 | internal_count=300 184 116 62 145 49 54 96 71
 27 | is_linear=0
 28 | shrinkage=1
 29 | 
 30 | 
 31 | Tree=1
 32 | num_leaves=11
 33 | num_cat=0
 34 | split_feature=2 1 1 0 1 2 2 2 2 0
 35 | split_gain=20.3004 10.8713 6.36761 3.77682 1.34014 1.44878 0.441848 0.19812 0.026579 0.00350037
 36 | threshold=4.0500000000000007 7.7500000000000009 5.0500000000000007 3.8500000000000001 5.7500000000000009 6.4500000000000011 2.2500000000000004 7.8500000000000005 5.1500000000000012 5.6500000000000012
 37 | decision_type=2 2 2 2 2 2 2 2 2 2
 38 | left_child=2 4 3 -1 7 -6 -4 8 -2 -10
 39 | right_child=1 -3 6 -5 5 -7 -8 -9 9 -11
 40 | leaf_value=-0.076532351072220242 -0.0043000001087784767 0.067523078200144651 -0.017899999655783178 -0.028270968151909694 0.004530612144264437 0.041676189969959004 0.0011645161216297458 0.0080224487770880967 0.00076071428733744801 -0.00097142858430743217
 41 | leaf_weight=34 20 39 20 31 21 21 31 35 28 20
 42 | leaf_count=34 20 39 20 31 21 21 31 35 28 20
 43 | internal_value=0 0.0206543 -0.0327621 -0.0535154 0.00804828 0.0231034 -0.00631176 0.00190929 -0.00123718 3.89881e-05
 44 | internal_weight=0 184 116 65 145 42 51 103 68 48
 45 | internal_count=300 184 116 65 145 42 51 103 68 48
 46 | is_linear=0
 47 | shrinkage=0.1
 48 | 
 49 | 
 50 | Tree=2
 51 | num_leaves=10
 52 | num_cat=0
 53 | split_feature=2 1 1 0 0 2 0 2 0
 54 | split_gain=16.4433 8.80574 5.30718 2.95305 1.25426 0.898936 0.167336 0.129581 0.0195663
 55 | threshold=4.0500000000000007 7.7500000000000009 4.2500000000000009 5.8000000000000016 7.5500000000000007 2.2500000000000004 3.6500000000000008 6.4500000000000011 1.6500000000000001
 56 | decision_type=2 2 2 2 2 2 2 2 2
 57 | left_child=2 4 3 -1 6 -4 8 -8 -2
 58 | right_child=1 -3 5 -5 -6 -7 7 -9 -10
 59 | leaf_value=-0.071365883899852642 -0.0042178705557370964 0.060770767163007694 -0.022856777518987657 -0.025495980858802798 0.024048169260598065 0.0021225635071887689 8.1711309515715891e-05 0.0096219042442472931 -0.00031222260219912078
 60 | leaf_weight=32 23 39 25 25 34 34 24 35 29
 61 | leaf_count=32 23 39 25 25 34 34 24 35 29
 62 | internal_value=0 0.0185889 -0.0294859 -0.0512475 0.00724345 -0.0084619 0.00209606 0.00574115 -0.00203972
 63 | internal_weight=0 184 116 57 145 59 111 59 52
 64 | internal_count=300 184 116 57 145 59 111 59 52
 65 | is_linear=0
 66 | shrinkage=0.1
 67 | 
 68 | 
 69 | Tree=3
 70 | num_leaves=12
 71 | num_cat=0
 72 | split_feature=2 1 1 2 0 0 2 0 0 1 2
 73 | split_gain=13.7383 7.13163 5.44524 3.64733 1.38156 1.17214 1.44484 0.919678 0.290498 0.163577 0.0302091
 74 | threshold=3.2500000000000004 7.7500000000000009 4.1500000000000012 8.1500000000000004 4.7500000000000009 6.7500000000000009 6.3500000000000005 3.4500000000000006 1.3500000000000003 2.2500000000000004 6.6500000000000012
 75 | decision_type=2 2 2 2 2 2 2 2 2 2 2
 76 | left_child=2 5 4 -3 -1 8 -7 -4 -2 -10 -11
 77 | right_child=1 3 7 -5 -6 6 -8 -9 9 10 -12
 78 | leaf_value=-0.076276844739913946 -0.012102717987727375 0.021397085603149166 -0.025375127499657021 0.080334717035293587 -0.039962041974067691 0.00080731424848260044 0.032654349133372305 0.0025276197327507872 -0.0053196552006907214 0.0058777159222194722 0.0014981740362210467
 79 | leaf_weight=22 20 21 21 21 20 29 28 27 27 36 28
 80 | leaf_count=22 20 21 21 21 20 29 28 27 27 36 28
 81 | internal_value=0 0.0140093 -0.0326885 0.0508659 -0.0589841 0.00479521 0.0164515 -0.00967983 -0.00119044 0.00120787 0.00396167
 82 | internal_weight=0 210 90 42 42 168 57 48 111 91 64
 83 | internal_count=300 210 90 42 42 168 57 48 111 91 64
 84 | is_linear=0
 85 | shrinkage=0.1
 86 | 
 87 | 
 88 | Tree=4
 89 | num_leaves=12
 90 | num_cat=0
 91 | split_feature=2 1 2 1 0 2 2 1 2 1 1
 92 | split_gain=11.333 5.29767 6.36153 3.12567 1.22002 1.22358 0.627947 0.0676673 0.0633798 0.0257134 0.00312228
 93 | threshold=2.5500000000000003 7.3500000000000005 6.6500000000000012 4.1500000000000012 3.6500000000000008 5.0500000000000007 8.1500000000000004 6.0500000000000007 4.3500000000000005 2.6500000000000008 2.8500000000000001
 94 | decision_type=2 2 2 2 2 2 2 2 2 2 2
 95 | left_child=3 4 -3 -1 5 -2 7 8 -6 -10 -7
 96 | right_child=1 2 -4 -5 6 10 -8 -9 9 -11 -12
 97 | leaf_value=-0.060740401331455474 -0.027608891367795881 0.003729247557847495 0.064928260411728514 -0.015461565321020316 0.0063351626133745804 0.0012375214530038648 0.02313491709297523 0.0084959104764857334 -0.0026474222024578768 0.0022552707933937203 -0.0004888500216607513
 98 | leaf_weight=31 25 35 33 30 21 20 20 20 23 20 22
 99 | leaf_count=31 25 35 33 30 21 20 20 20 23 20 22
100 | internal_value=0 0.00981924 0.0334288 -0.0384721 0.000430661 -0.0100929 0.00721029 0.00341871 0.00183208 -0.0003671 0.000333232
101 | internal_weight=0 239 68 61 171 67 104 84 64 43 42
102 | internal_count=300 239 68 61 171 67 104 84 64 43 42
103 | is_linear=0
104 | shrinkage=0.1
105 | 
106 | 
107 | end of trees
108 | 
109 | feature_importances:
110 | x2=19
111 | x1=16
112 | x0=15
113 | 
114 | parameters:
115 | [boosting: gbdt]
116 | [objective: regression]
117 | [metric: l2]
118 | [tree_learner: serial]
119 | [device_type: cpu]
120 | [data_sample_strategy: bagging]
121 | [data: ]
122 | [valid: ]
123 | [num_iterations: 5]
124 | [learning_rate: 0.1]
125 | [num_leaves: 31]
126 | [num_threads: 0]
127 | [seed: 0]
128 | [deterministic: 0]
129 | [force_col_wise: 0]
130 | [force_row_wise: 0]
131 | [histogram_pool_size: -1]
132 | [max_depth: -1]
133 | [min_data_in_leaf: 20]
134 | [min_sum_hessian_in_leaf: 0.001]
135 | [bagging_fraction: 1]
136 | [pos_bagging_fraction: 1]
137 | [neg_bagging_fraction: 1]
138 | [bagging_freq: 0]
139 | [bagging_seed: 3]
140 | [feature_fraction: 1]
141 | [feature_fraction_bynode: 1]
142 | [feature_fraction_seed: 2]
143 | [extra_trees: 0]
144 | [extra_seed: 6]
145 | [early_stopping_round: 0]
146 | [early_stopping_min_delta: 0]
147 | [first_metric_only: 0]
148 | [max_delta_step: 0]
149 | [lambda_l1: 0]
150 | [lambda_l2: 0]
151 | [linear_lambda: 0]
152 | [min_gain_to_split: 0]
153 | [drop_rate: 0.1]
154 | [max_drop: 50]
155 | [skip_drop: 0.5]
156 | [xgboost_dart_mode: 0]
157 | [uniform_drop: 0]
158 | [drop_seed: 4]
159 | [top_rate: 0.2]
160 | [other_rate: 0.1]
161 | [min_data_per_group: 100]
162 | [max_cat_threshold: 32]
163 | [cat_l2: 10]
164 | [cat_smooth: 10]
165 | [max_cat_to_onehot: 4]
166 | [top_k: 20]
167 | [monotone_constraints: ]
168 | [monotone_constraints_method: basic]
169 | [monotone_penalty: 0]
170 | [feature_contri: ]
171 | [forcedsplits_filename: ]
172 | [refit_decay_rate: 0.9]
173 | [cegb_tradeoff: 1]
174 | [cegb_penalty_split: 0]
175 | [cegb_penalty_feature_lazy: ]
176 | [cegb_penalty_feature_coupled: ]
177 | [path_smooth: 0]
178 | [interaction_constraints: ]
179 | [verbosity: 1]
180 | [saved_feature_importance_type: 0]
181 | [use_quantized_grad: 0]
182 | [num_grad_quant_bins: 4]
183 | [quant_train_renew_leaf: 0]
184 | [stochastic_rounding: 1]
185 | [linear_tree: 0]
186 | [max_bin: 255]
187 | [max_bin_by_feature: ]
188 | [min_data_in_bin: 3]
189 | [bin_construct_sample_cnt: 200000]
190 | [data_random_seed: 1]
191 | [is_enable_sparse: 1]
192 | [enable_bundle: 1]
193 | [use_missing: 1]
194 | [zero_as_missing: 0]
195 | [feature_pre_filter: 1]
196 | [pre_partition: 0]
197 | [two_round: 0]
198 | [header: 0]
199 | [label_column: ]
200 | [weight_column: ]
201 | [group_column: ]
202 | [ignore_column: ]
203 | [categorical_feature: 3]
204 | [forcedbins_filename: ]
205 | [precise_float_parser: 0]
206 | [parser_config_file: ]
207 | [objective_seed: 5]
208 | [num_class: 1]
209 | [is_unbalance: 0]
210 | [scale_pos_weight: 1]
211 | [sigmoid: 1]
212 | [boost_from_average: 1]
213 | [reg_sqrt: 0]
214 | [alpha: 0.9]
215 | [fair_c: 1]
216 | [poisson_max_delta_step: 0.7]
217 | [tweedie_variance_power: 1.5]
218 | [lambdarank_truncation_level: 30]
219 | [lambdarank_norm: 1]
220 | [label_gain: ]
221 | [lambdarank_position_bias_regularization: 0]
222 | [eval_at: ]
223 | [multi_error_top_k: 1]
224 | [auc_mu_weights: ]
225 | [num_machines: 1]
226 | [local_listen_port: 12400]
227 | [time_out: 120]
228 | [machine_list_filename: ]
229 | [machines: ]
230 | [gpu_platform_id: -1]
231 | [gpu_device_id: -1]
232 | [gpu_use_dp: 0]
233 | [num_gpu: 1]
234 | 
235 | end of parameters
236 | 
237 | pandas_categorical:[["cat0", "cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9"]]
238 | 


--------------------------------------------------------------------------------
/test/support/classifier.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv('test/support/data.csv')
 5 | 
 6 | X = df.drop(columns=['y'])
 7 | yb = df['y'].replace(2, 1)
 8 | ym = df['y']
 9 | 
10 | X_train = X[:300]
11 | yb_train = yb[:300]
12 | ym_train = ym[:300]
13 | X_test = X[300:]
14 | yb_test = yb[300:]
15 | ym_test = ym[300:]
16 | 
17 | print('test_binary')
18 | 
19 | model = lgb.LGBMClassifier()
20 | model.fit(X_train, yb_train)
21 | print(model.predict(X_test)[0:100].tolist())
22 | print(model.predict_proba(X_test)[0].tolist())
23 | print(model.feature_importances_.tolist())
24 | 
25 | print()
26 | print('test_multiclass')
27 | 
28 | model = lgb.LGBMClassifier()
29 | model.fit(X_train, ym_train)
30 | print(model.predict(X_test)[0:100].tolist())
31 | print(model.predict_proba(X_test)[0].tolist())
32 | print(model.feature_importances_.tolist())
33 | 
34 | print()
35 | print('test_early_stopping')
36 | model.fit(X_train, ym_train, eval_set=[(X_test, ym_test)], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()])
37 | 
38 | print()
39 | print('test_missing_numeric')
40 | 
41 | X_train_miss = X_train.copy()
42 | X_test_miss = X_test.copy()
43 | X_train_miss[X_train_miss == 3.7] = None
44 | X_test_miss[X_test_miss == 3.7] = None
45 | model = lgb.LGBMClassifier()
46 | model.fit(X_train_miss, ym_train)
47 | print(model.predict(X_test_miss)[0:100].tolist())
48 | print(model.feature_importances_.tolist())
49 | 
50 | print()
51 | print('test_missing_categorical')
52 | 
53 | X_train_miss2 = X_train.copy()
54 | X_test_miss2 = X_test.copy()
55 | X_train_miss2["x3"][X_train_miss2["x3"] > 7] = None
56 | X_test_miss2["x3"][X_test_miss2["x3"] > 7] = None
57 | model = lgb.LGBMClassifier()
58 | model.fit(X_train_miss2, ym_train, categorical_feature=[3])
59 | print(model.predict(X_test_miss2)[0:100].tolist())
60 | print(model.feature_importances_.tolist())
61 | 


--------------------------------------------------------------------------------
/test/support/cv.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv('test/support/data.csv')
 5 | 
 6 | X = df.drop(columns=['y'])
 7 | y = df['y']
 8 | 
 9 | X_train = X[:300]
10 | y_train = y[:300]
11 | X_test = X[300:]
12 | y_test = y[300:]
13 | 
14 | print('test_regression')
15 | 
16 | regression_params = {'objective': 'regression', 'verbosity': -1}
17 | regression_train = lgb.Dataset(X_train, label=y_train)
18 | eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False)
19 | print(eval_hist['valid l2-mean'][0])
20 | print(eval_hist['valid l2-mean'][-1])
21 | print(eval_hist['valid l2-stdv'][0])
22 | print(eval_hist['valid l2-stdv'][-1])
23 | 
24 | print()
25 | print('test_binary')
26 | 
27 | binary_params = {'objective': 'binary', 'verbosity': -1}
28 | binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1))
29 | eval_hist = lgb.cv(binary_params, binary_train, shuffle=False, stratified=False)
30 | print(eval_hist['valid binary_logloss-mean'][0])
31 | print(eval_hist['valid binary_logloss-mean'][-1])
32 | print(eval_hist['valid binary_logloss-stdv'][0])
33 | print(eval_hist['valid binary_logloss-stdv'][-1])
34 | 
35 | print()
36 | print('test_multiclass')
37 | 
38 | multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1}
39 | multiclass_train = lgb.Dataset(X_train, label=y_train)
40 | eval_hist = lgb.cv(multiclass_params, multiclass_train, shuffle=False, stratified=False)
41 | print(eval_hist['valid multi_logloss-mean'][0])
42 | print(eval_hist['valid multi_logloss-mean'][-1])
43 | print(eval_hist['valid multi_logloss-stdv'][0])
44 | print(eval_hist['valid multi_logloss-stdv'][-1])
45 | 
46 | print('')
47 | print('test_early_stopping_early')
48 | 
49 | eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, callbacks=[lgb.log_evaluation(), lgb.early_stopping(stopping_rounds=5)])
50 | print(len(eval_hist['valid l2-mean']))
51 | 
52 | print('')
53 | print('test_early_stopping_not_early')
54 | 
55 | eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, callbacks=[lgb.log_evaluation(), lgb.early_stopping(stopping_rounds=500)])
56 | print(len(eval_hist['valid l2-mean']))
57 | 
58 | 


--------------------------------------------------------------------------------
/test/support/data.csv:
--------------------------------------------------------------------------------
  1 | x0,x1,x2,x3,y
  2 | 3.7,1.2,7.2,9,1
  3 | 7.5,0.5,7.9,0,1
  4 | 1.6,0.1,7.6,7,1
  5 | 0.6,2.5,5.0,4,0
  6 | 1.8,8.4,1.1,2,1
  7 | 6.8,8.7,8.7,9,2
  8 | 9.4,9.6,8.6,9,2
  9 | 0.9,0.7,6.3,6,1
 10 | 5.7,0.1,0.0,1,0
 11 | 0.8,8.8,1.3,3,1
 12 | 4.7,7.2,3.0,7,1
 13 | 0.3,7.0,2.1,1,1
 14 | 5.7,0.3,6.8,8,1
 15 | 4.3,7.6,2.6,4,1
 16 | 8.0,4.1,8.2,0,1
 17 | 6.8,2.5,9.8,7,2
 18 | 0.7,2.6,2.5,8,0
 19 | 2.2,0.9,6.7,7,1
 20 | 2.3,2.7,3.7,9,1
 21 | 8.3,3.8,0.8,0,0
 22 | 3.4,1.0,2.3,7,0
 23 | 2.5,7.1,9.2,0,1
 24 | 8.8,2.3,5.5,1,1
 25 | 7.7,0.3,0.0,8,0
 26 | 7.7,0.6,5.2,5,1
 27 | 7.0,0.2,7.6,5,1
 28 | 7.5,0.7,7.7,8,1
 29 | 7.5,7.6,4.3,4,1
 30 | 3.0,3.6,0.7,4,0
 31 | 5.7,8.2,9.6,7,2
 32 | 8.1,0.7,2.4,4,0
 33 | 3.2,1.2,6.5,9,1
 34 | 2.4,8.2,9.7,2,2
 35 | 9.2,9.8,1.0,6,1
 36 | 9.6,8.2,8.6,6,2
 37 | 6.6,7.1,4.8,6,1
 38 | 1.5,0.5,1.7,4,0
 39 | 4.8,2.2,1.3,1,0
 40 | 9.7,5.3,8.4,0,1
 41 | 5.5,6.1,5.6,9,1
 42 | 2.1,9.6,8.3,9,2
 43 | 1.4,1.3,8.4,4,1
 44 | 4.3,0.6,7.7,8,1
 45 | 5.9,1.5,2.4,9,1
 46 | 6.6,7.1,5.3,5,1
 47 | 3.6,2.1,4.0,1,0
 48 | 4.7,7.7,4.0,7,1
 49 | 1.6,2.8,4.5,3,1
 50 | 6.6,7.8,4.6,0,1
 51 | 2.9,6.3,7.5,3,1
 52 | 5.3,9.3,3.3,2,1
 53 | 8.4,8.3,4.8,6,2
 54 | 3.2,2.8,5.5,2,1
 55 | 3.1,2.8,9.4,8,1
 56 | 9.9,3.2,0.8,4,0
 57 | 7.7,5.0,7.9,9,2
 58 | 6.4,8.3,2.4,1,1
 59 | 2.0,4.4,1.5,3,0
 60 | 1.9,2.6,8.6,7,1
 61 | 9.9,5.3,4.7,3,1
 62 | 3.4,3.2,1.9,3,0
 63 | 2.4,8.3,9.4,6,2
 64 | 4.7,0.5,7.9,1,1
 65 | 8.7,3.2,4.2,2,1
 66 | 8.8,9.8,3.0,1,1
 67 | 6.8,6.4,6.0,4,1
 68 | 7.8,1.7,3.9,3,1
 69 | 8.1,2.8,2.2,6,1
 70 | 4.1,7.4,7.7,6,1
 71 | 2.5,4.8,5.0,9,1
 72 | 5.4,0.0,8.6,7,1
 73 | 1.6,1.9,0.9,0,0
 74 | 9.2,5.1,1.0,4,1
 75 | 2.3,1.4,6.3,5,1
 76 | 4.6,0.3,5.6,8,1
 77 | 8.0,4.6,5.4,7,1
 78 | 1.4,7.7,1.5,9,1
 79 | 5.3,8.4,5.8,5,1
 80 | 2.9,4.4,3.7,6,1
 81 | 9.1,5.4,7.6,8,1
 82 | 5.9,2.6,7.6,7,1
 83 | 8.7,3.9,4.3,2,1
 84 | 3.8,9.1,6.9,2,2
 85 | 3.3,4.3,2.6,8,1
 86 | 6.9,7.3,5.2,9,1
 87 | 2.7,4.3,0.1,2,0
 88 | 9.4,9.6,2.6,4,1
 89 | 3.0,6.4,2.2,2,0
 90 | 5.2,0.3,7.0,7,1
 91 | 8.3,4.8,6.1,6,1
 92 | 1.3,3.6,2.3,6,0
 93 | 6.6,9.2,9.5,5,2
 94 | 0.9,4.1,5.7,8,1
 95 | 9.3,7.6,8.2,9,2
 96 | 3.6,1.6,3.0,3,0
 97 | 8.4,5.7,3.5,9,1
 98 | 8.1,0.2,8.5,4,1
 99 | 4.6,8.0,2.7,8,1
100 | 2.8,0.2,9.9,9,1
101 | 3.6,4.0,1.8,1,0
102 | 3.8,6.3,4.6,8,1
103 | 8.9,0.2,9.8,8,1
104 | 2.1,5.8,1.6,5,0
105 | 1.1,2.2,0.6,0,0
106 | 1.4,8.7,7.5,3,1
107 | 6.3,3.2,9.1,1,1
108 | 0.0,2.2,9.7,6,1
109 | 3.6,1.8,2.7,5,0
110 | 5.2,9.4,5.0,7,1
111 | 8.5,1.6,2.9,8,1
112 | 3.0,0.8,4.0,0,0
113 | 3.0,2.3,6.6,0,1
114 | 1.2,1.7,3.3,9,0
115 | 7.5,0.5,8.1,5,1
116 | 4.2,8.6,5.2,9,1
117 | 5.6,7.8,8.7,5,2
118 | 8.1,1.0,7.2,0,1
119 | 1.9,1.2,2.5,0,0
120 | 0.4,8.8,2.7,2,1
121 | 6.8,0.3,5.8,2,1
122 | 7.6,9.6,9.6,4,2
123 | 7.4,1.2,1.8,5,0
124 | 1.6,9.5,9.6,3,2
125 | 4.4,2.4,6.2,5,1
126 | 1.9,7.7,3.3,4,1
127 | 3.9,6.7,3.4,2,1
128 | 4.2,7.8,8.2,6,2
129 | 3.8,4.8,8.1,5,1
130 | 9.1,0.8,4.0,5,1
131 | 0.5,0.7,7.8,5,1
132 | 7.8,3.3,2.7,3,1
133 | 7.7,3.0,4.3,9,1
134 | 6.7,0.3,6.7,6,1
135 | 9.7,6.3,9.9,0,2
136 | 2.8,3.7,6.4,5,1
137 | 9.3,1.8,7.8,7,1
138 | 7.0,9.2,5.2,0,1
139 | 4.6,6.3,9.3,2,1
140 | 3.6,2.9,3.9,6,1
141 | 2.9,5.5,9.0,1,1
142 | 7.1,9.1,0.7,3,1
143 | 5.6,5.1,3.2,6,1
144 | 5.1,9.6,3.0,6,1
145 | 0.5,9.1,9.5,9,2
146 | 2.2,7.6,2.2,6,1
147 | 1.8,9.3,2.9,2,1
148 | 5.2,3.1,4.9,2,1
149 | 8.3,9.3,4.5,3,1
150 | 8.6,1.5,7.1,0,1
151 | 2.8,8.3,2.7,3,1
152 | 7.9,7.0,8.8,6,2
153 | 1.1,6.5,9.3,3,1
154 | 9.8,8.6,4.5,3,2
155 | 6.3,4.2,2.2,7,1
156 | 8.2,4.0,4.4,0,1
157 | 1.7,8.8,0.0,0,0
158 | 9.7,8.2,7.1,4,2
159 | 1.4,3.6,2.6,0,0
160 | 0.1,4.8,8.8,5,1
161 | 2.2,3.4,9.4,5,1
162 | 6.8,6.7,4.8,6,1
163 | 1.8,9.7,1.1,9,1
164 | 5.2,7.6,6.8,0,1
165 | 8.9,4.0,0.7,7,1
166 | 3.3,5.8,0.7,2,0
167 | 0.4,0.5,6.1,6,1
168 | 5.0,7.6,4.5,0,1
169 | 2.1,9.2,7.1,9,2
170 | 9.7,0.1,1.3,4,0
171 | 3.8,8.5,2.7,6,1
172 | 8.4,3.3,0.1,5,0
173 | 3.8,2.8,7.5,2,1
174 | 9.9,3.1,5.1,2,1
175 | 6.7,3.2,7.4,0,1
176 | 9.7,0.5,0.0,5,0
177 | 7.2,4.3,2.4,0,1
178 | 9.6,9.6,4.1,8,2
179 | 2.1,2.7,4.6,3,1
180 | 6.2,6.8,6.2,0,1
181 | 4.5,8.8,4.2,9,1
182 | 2.5,3.2,3.3,6,1
183 | 0.6,6.5,5.1,7,1
184 | 1.3,0.3,2.8,2,0
185 | 6.1,8.3,9.6,2,2
186 | 3.4,3.0,5.7,0,1
187 | 4.9,4.6,8.3,9,1
188 | 2.0,4.0,5.0,8,1
189 | 0.4,0.9,0.0,5,0
190 | 7.4,2.1,3.3,7,1
191 | 3.8,3.7,4.3,7,1
192 | 3.3,9.5,4.1,7,1
193 | 6.9,7.0,2.3,6,1
194 | 3.9,3.1,3.7,6,1
195 | 5.6,8.7,2.8,4,1
196 | 6.6,1.0,8.4,0,1
197 | 1.6,3.0,0.3,5,0
198 | 6.9,4.9,7.4,1,1
199 | 7.2,6.8,3.9,0,1
200 | 9.7,0.7,3.3,5,1
201 | 3.6,1.2,5.3,6,1
202 | 2.6,8.2,9.9,9,2
203 | 9.9,2.5,0.1,9,1
204 | 5.5,5.5,6.4,8,1
205 | 6.0,7.0,4.9,2,1
206 | 3.2,9.0,2.0,4,1
207 | 3.8,6.5,8.0,9,1
208 | 5.2,8.9,1.9,0,1
209 | 4.2,4.7,2.1,1,1
210 | 0.1,8.9,3.8,0,1
211 | 1.8,7.7,7.7,8,1
212 | 9.0,8.3,3.9,2,1
213 | 1.5,3.7,9.6,4,1
214 | 9.8,0.1,3.2,5,1
215 | 0.7,5.0,5.0,0,1
216 | 4.3,4.1,6.9,1,1
217 | 9.7,5.1,3.0,2,1
218 | 4.9,8.1,8.2,7,2
219 | 2.1,1.8,4.6,9,1
220 | 0.4,7.7,6.3,7,1
221 | 9.5,5.9,6.4,2,1
222 | 3.1,0.1,8.4,9,1
223 | 6.8,5.4,2.5,3,1
224 | 2.7,5.6,9.2,8,1
225 | 5.5,7.6,4.6,1,1
226 | 1.1,0.8,6.1,7,1
227 | 8.8,8.9,8.2,2,2
228 | 0.5,4.2,4.6,5,1
229 | 5.1,7.3,4.2,2,1
230 | 2.3,4.5,9.5,4,1
231 | 3.3,5.4,1.4,9,1
232 | 6.1,4.0,7.2,1,1
233 | 2.2,0.2,8.6,1,1
234 | 7.2,5.1,7.1,0,1
235 | 8.7,7.7,6.2,0,1
236 | 0.0,4.2,3.9,3,0
237 | 5.7,4.3,5.3,2,1
238 | 9.7,7.5,5.9,2,1
239 | 7.4,9.1,0.5,3,1
240 | 3.1,1.1,0.3,6,0
241 | 0.1,1.3,5.9,8,1
242 | 8.1,0.8,3.8,4,1
243 | 7.0,7.6,7.3,5,2
244 | 7.9,8.4,5.9,7,1
245 | 6.6,3.2,4.6,2,1
246 | 4.4,6.4,3.7,9,1
247 | 9.0,8.4,1.5,1,1
248 | 5.2,6.9,6.6,9,2
249 | 2.8,1.9,1.7,5,0
250 | 7.9,1.0,2.9,1,1
251 | 8.1,4.6,0.7,1,1
252 | 8.6,6.1,6.4,7,2
253 | 2.9,5.5,2.8,4,1
254 | 9.9,4.3,9.8,7,2
255 | 0.5,5.0,9.2,4,1
256 | 3.4,9.8,8.3,5,2
257 | 8.9,5.4,0.4,9,1
258 | 3.2,5.0,5.1,7,1
259 | 8.9,4.3,8.8,0,1
260 | 8.6,5.8,8.8,3,2
261 | 2.2,1.0,3.1,4,0
262 | 0.9,0.6,3.0,3,0
263 | 1.8,1.2,7.3,1,1
264 | 8.5,5.3,4.2,6,1
265 | 0.9,3.6,8.7,8,1
266 | 5.3,8.7,1.8,5,1
267 | 6.7,1.4,2.0,5,1
268 | 0.9,4.2,6.6,4,1
269 | 1.6,5.8,7.7,3,1
270 | 7.5,2.8,8.5,2,1
271 | 4.4,9.2,8.2,8,2
272 | 3.0,6.4,7.7,2,1
273 | 3.8,1.1,5.6,4,1
274 | 8.3,7.6,6.8,9,2
275 | 9.9,6.9,3.1,2,1
276 | 3.2,7.1,6.0,8,1
277 | 2.7,9.7,9.3,4,2
278 | 3.8,6.0,9.7,6,2
279 | 0.9,6.7,7.8,4,1
280 | 3.5,5.5,2.8,8,1
281 | 5.1,0.4,7.9,4,1
282 | 7.6,5.2,3.8,3,1
283 | 8.0,7.4,7.0,4,2
284 | 5.7,3.0,9.9,0,1
285 | 3.8,1.8,9.5,9,1
286 | 9.5,5.7,4.8,1,1
287 | 5.2,2.2,6.4,6,1
288 | 6.1,6.0,9.6,0,1
289 | 2.6,5.0,2.0,1,0
290 | 7.7,1.8,6.0,2,1
291 | 5.9,6.2,3.8,7,1
292 | 6.8,6.9,6.1,7,1
293 | 6.6,1.9,3.6,1,1
294 | 2.1,7.7,2.6,1,1
295 | 3.2,0.8,0.8,4,0
296 | 6.8,8.2,5.4,8,2
297 | 8.5,9.3,7.4,9,2
298 | 2.3,9.4,9.0,8,2
299 | 2.4,4.7,6.8,9,1
300 | 1.3,0.6,0.2,3,0
301 | 9.3,0.4,2.8,3,1
302 | 8.2,7.7,4.6,9,1
303 | 0.6,9.1,8.0,4,1
304 | 2.3,6.3,9.9,7,2
305 | 0.6,2.1,6.4,2,1
306 | 9.0,0.9,9.2,3,1
307 | 8.5,3.3,7.1,9,1
308 | 0.8,5.1,3.3,7,1
309 | 9.9,6.1,0.7,9,1
310 | 2.7,5.1,4.0,7,1
311 | 4.5,9.4,0.9,8,1
312 | 4.6,5.8,2.1,6,1
313 | 0.0,1.9,1.9,5,0
314 | 2.7,9.7,2.1,2,1
315 | 6.7,3.5,9.5,7,2
316 | 2.2,5.6,1.0,8,0
317 | 3.1,1.2,8.2,7,1
318 | 8.7,0.8,6.1,7,1
319 | 3.4,9.4,0.3,7,1
320 | 6.8,1.5,2.4,9,1
321 | 2.6,9.8,4.7,7,1
322 | 3.5,1.6,8.7,7,1
323 | 5.7,2.4,5.5,1,1
324 | 5.2,2.4,1.5,3,0
325 | 7.3,4.6,2.0,2,1
326 | 9.4,1.2,2.5,0,0
327 | 9.7,9.8,7.7,8,2
328 | 5.4,4.7,7.7,7,1
329 | 4.0,5.9,9.5,6,1
330 | 7.9,8.9,3.1,8,1
331 | 3.3,3.8,9.2,4,1
332 | 1.4,0.9,6.7,1,1
333 | 7.9,0.3,4.7,6,1
334 | 4.2,0.5,2.0,9,0
335 | 5.9,5.4,7.7,8,1
336 | 5.8,7.9,3.6,0,1
337 | 5.8,5.1,6.3,9,1
338 | 8.3,8.8,8.2,8,2
339 | 5.5,7.8,7.0,4,2
340 | 2.7,2.3,5.3,5,1
341 | 6.8,2.3,4.6,7,1
342 | 6.6,7.7,2.6,8,1
343 | 1.7,8.7,0.1,5,1
344 | 1.7,5.6,3.3,2,1
345 | 0.2,9.6,9.0,3,1
346 | 2.6,2.6,5.2,7,1
347 | 0.7,6.2,9.9,7,1
348 | 7.0,6.9,5.5,2,1
349 | 8.7,8.5,7.4,5,2
350 | 1.4,5.6,8.7,1,1
351 | 8.4,7.5,7.6,2,1
352 | 6.2,4.5,6.5,4,1
353 | 2.8,8.7,3.8,4,1
354 | 9.7,5.1,5.9,0,1
355 | 1.4,9.7,5.2,0,1
356 | 6.4,3.0,8.2,4,1
357 | 1.0,4.5,8.8,7,1
358 | 9.9,5.9,9.7,0,2
359 | 0.5,9.0,7.6,7,1
360 | 4.2,2.0,8.8,4,1
361 | 1.8,9.2,4.9,3,1
362 | 1.5,1.7,6.1,7,1
363 | 6.4,5.5,0.2,3,1
364 | 0.4,1.6,7.7,4,1
365 | 3.0,5.2,8.4,4,1
366 | 9.8,5.9,3.1,3,1
367 | 3.2,5.0,5.1,5,1
368 | 3.3,0.0,0.9,3,0
369 | 8.8,6.0,3.8,7,1
370 | 6.2,5.6,9.6,4,2
371 | 2.4,4.3,5.1,1,1
372 | 1.8,1.0,6.1,7,1
373 | 0.6,0.7,7.9,3,1
374 | 0.0,0.3,5.1,3,0
375 | 8.1,5.9,6.6,0,1
376 | 1.9,4.4,8.7,5,1
377 | 0.0,6.3,9.4,4,1
378 | 0.1,8.4,6.0,6,1
379 | 1.4,2.4,4.6,8,1
380 | 3.2,8.0,7.0,6,1
381 | 2.4,7.7,8.9,3,1
382 | 5.7,3.1,3.9,1,1
383 | 9.3,4.8,1.3,3,1
384 | 1.5,9.7,4.1,3,1
385 | 2.5,5.1,3.4,6,1
386 | 3.3,9.9,6.1,7,1
387 | 3.2,0.3,6.7,8,1
388 | 5.7,9.6,4.6,9,2
389 | 2.1,9.2,9.9,4,2
390 | 8.4,5.8,6.4,8,2
391 | 7.8,7.8,6.2,7,1
392 | 4.2,1.4,9.3,3,1
393 | 1.8,7.8,1.0,3,1
394 | 4.9,9.3,7.9,9,2
395 | 8.9,0.4,0.5,9,0
396 | 8.6,2.9,5.3,0,1
397 | 7.2,2.9,3.8,6,1
398 | 7.8,5.4,4.2,4,1
399 | 6.4,6.8,4.4,5,1
400 | 8.4,7.3,5.0,0,1
401 | 5.0,3.5,7.2,7,1
402 | 0.2,3.5,9.7,2,1
403 | 3.4,8.3,9.3,2,2
404 | 7.4,1.8,2.5,1,1
405 | 4.9,3.7,2.0,8,1
406 | 5.2,2.3,1.8,3,0
407 | 3.2,3.7,3.0,8,1
408 | 5.0,7.3,6.8,0,1
409 | 1.5,6.3,6.4,3,1
410 | 9.1,2.0,5.4,7,1
411 | 0.6,2.4,0.0,8,0
412 | 4.8,8.0,2.8,8,1
413 | 0.1,7.3,2.1,0,0
414 | 7.6,3.2,4.0,1,1
415 | 8.3,3.0,8.7,5,1
416 | 3.3,8.2,2.2,3,1
417 | 3.4,4.9,8.8,6,1
418 | 3.7,3.3,1.2,5,0
419 | 7.9,6.5,0.6,3,1
420 | 3.2,8.5,4.4,7,1
421 | 6.7,6.6,2.8,9,1
422 | 8.4,4.3,9.4,4,2
423 | 5.2,3.1,2.5,9,1
424 | 6.3,5.2,5.9,8,1
425 | 1.7,2.2,5.4,7,1
426 | 5.0,2.1,7.1,3,1
427 | 2.3,1.2,7.7,6,1
428 | 0.7,5.2,3.0,8,1
429 | 9.6,8.4,8.1,9,2
430 | 3.9,5.6,6.0,6,1
431 | 2.5,6.5,9.5,8,1
432 | 9.9,2.7,1.8,6,1
433 | 0.1,3.4,0.2,1,0
434 | 8.5,1.1,9.3,5,1
435 | 5.9,8.2,1.1,3,1
436 | 0.3,8.3,0.0,5,0
437 | 5.1,9.1,3.2,6,1
438 | 8.4,9.9,8.1,5,2
439 | 1.1,6.9,0.9,0,0
440 | 3.8,4.2,6.3,7,1
441 | 4.0,6.3,6.3,2,1
442 | 4.0,0.5,9.2,8,1
443 | 8.2,7.4,4.0,9,1
444 | 6.8,4.9,8.2,1,1
445 | 8.6,4.6,1.3,5,1
446 | 8.0,8.1,9.4,0,2
447 | 4.9,5.1,1.4,5,1
448 | 0.4,0.7,5.4,4,0
449 | 9.8,4.1,3.5,2,1
450 | 5.6,1.3,1.1,4,0
451 | 6.6,2.1,8.1,2,1
452 | 6.1,9.1,8.5,3,2
453 | 6.0,9.9,9.2,2,2
454 | 9.1,7.4,0.8,1,1
455 | 2.7,2.5,8.6,4,1
456 | 1.4,0.3,9.2,4,1
457 | 7.2,4.3,9.9,8,2
458 | 5.5,1.7,6.0,4,1
459 | 3.2,9.8,8.3,4,2
460 | 5.0,8.0,9.1,5,2
461 | 2.4,5.6,4.0,4,1
462 | 3.8,1.9,4.4,4,1
463 | 6.3,2.1,6.5,4,1
464 | 9.2,1.8,6.8,0,1
465 | 7.8,5.9,5.6,3,1
466 | 3.9,6.1,3.2,3,1
467 | 5.3,6.2,7.5,5,1
468 | 0.8,3.4,3.3,9,0
469 | 6.7,0.1,9.8,8,1
470 | 8.9,2.7,2.9,9,1
471 | 7.7,1.0,6.1,0,1
472 | 7.9,7.4,7.6,1,2
473 | 0.0,3.0,0.3,1,0
474 | 6.4,3.8,6.7,5,1
475 | 2.6,9.5,1.0,5,1
476 | 3.3,3.6,0.9,4,0
477 | 0.0,5.5,6.9,4,1
478 | 0.3,3.9,7.7,5,1
479 | 1.6,2.5,4.6,7,1
480 | 4.3,0.0,5.6,7,1
481 | 5.5,5.2,7.8,1,1
482 | 5.3,6.0,0.0,2,0
483 | 9.9,9.5,3.9,7,2
484 | 0.7,1.6,6.6,5,1
485 | 5.0,5.8,7.9,8,1
486 | 7.7,1.2,1.1,1,0
487 | 3.4,0.6,6.9,5,1
488 | 4.4,7.0,7.2,2,1
489 | 0.7,2.9,6.0,2,1
490 | 7.3,4.6,8.6,4,1
491 | 9.7,4.7,8.8,9,2
492 | 7.6,8.7,4.5,4,1
493 | 8.7,3.9,3.7,6,1
494 | 4.3,5.8,0.1,3,0
495 | 2.9,4.8,5.5,8,1
496 | 3.4,2.9,0.0,3,0
497 | 0.4,7.1,0.1,8,0
498 | 2.6,4.0,8.9,0,1
499 | 7.0,9.6,0.7,4,1
500 | 3.8,8.1,2.9,2,1
501 | 0.3,7.4,5.0,0,1
502 | 


--------------------------------------------------------------------------------
/test/support/ranker.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv('test/support/data.csv')
 5 | 
 6 | X = df.drop(columns=['y'])
 7 | y = df['y'].replace(2, 1)
 8 | 
 9 | X_train = X[:300]
10 | y_train = y[:300]
11 | X_test = X[300:]
12 | y_test = y[300:]
13 | 
14 | group = [100, 200]
15 | 
16 | model = lgb.LGBMRanker()
17 | model.fit(X_train, y_train, group=group)
18 | print(model.predict(X_test)[0:6].tolist())
19 | print(model.feature_importances_.tolist())
20 | 


--------------------------------------------------------------------------------
/test/support/regressor.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | 
 4 | df = pd.read_csv('test/support/data.csv')
 5 | 
 6 | X = df.drop(columns=['y'])
 7 | y = df['y']
 8 | 
 9 | X_train = X[:300]
10 | y_train = y[:300]
11 | X_test = X[300:]
12 | y_test = y[300:]
13 | 
14 | model = lgb.LGBMRegressor()
15 | model.fit(X_train, y_train)
16 | 
17 | print('predict', model.predict(X_test)[0:6].tolist())
18 | 
19 | print('feature_importances', model.feature_importances_.tolist())
20 | 
21 | print('early_stopping')
22 | model.fit(X_train, y_train, eval_set=[(X_test, y_test)], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()])
23 | 


--------------------------------------------------------------------------------
/test/support/train.py:
--------------------------------------------------------------------------------
 1 | import lightgbm as lgb
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | df = pd.read_csv('test/support/data.csv')
 6 | 
 7 | X = df.drop(columns=['y'])
 8 | y = df['y']
 9 | 
10 | X_train = X[:300]
11 | y_train = y[:300]
12 | X_test = X[300:]
13 | y_test = y[300:]
14 | 
15 | print('test_regression')
16 | 
17 | regression_params = {'objective': 'regression', 'verbosity': -1}
18 | regression_train = lgb.Dataset(X_train, label=y_train)
19 | regression_test = lgb.Dataset(X_test, label=y_test)
20 | bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test])
21 | y_pred = bst.predict(X_test)
22 | print(np.sqrt(np.mean((y_pred - y_test)**2)))
23 | 
24 | print('')
25 | print('test_binary')
26 | 
27 | binary_params = {'objective': 'binary', 'verbosity': -1}
28 | binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1))
29 | binary_test = lgb.Dataset(X_test, label=y_test.replace(2, 1))
30 | bst = lgb.train(binary_params, binary_train, valid_sets=[binary_train, binary_test])
31 | y_pred = bst.predict(X_test)
32 | print(y_pred[0])
33 | 
34 | print('')
35 | print('test_multiclass')
36 | 
37 | multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1}
38 | multiclass_train = lgb.Dataset(X_train, label=y_train)
39 | multiclass_test = lgb.Dataset(X_test, label=y_test)
40 | bst = lgb.train(multiclass_params, multiclass_train, valid_sets=[multiclass_train, multiclass_test])
41 | y_pred = bst.predict(X_test)
42 | print(y_pred[0].tolist())
43 | 
44 | print('')
45 | print('test_early_stopping_early')
46 | 
47 | bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()])
48 | print(bst.best_iteration)
49 | 
50 | print('')
51 | print('test_early_stopping_not_early')
52 | 
53 | bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], callbacks=[lgb.early_stopping(stopping_rounds=500), lgb.log_evaluation()])
54 | # appears to be using training set for best iteration instead of validation set
55 | print(bst.best_iteration)
56 | 
57 | print('')
58 | print('test_early_stopping_early_higher_better')
59 | 
60 | params = {'objective': 'binary', 'metric': 'auc', 'verbosity': -1}
61 | bst = lgb.train(params, binary_train, valid_sets=[binary_train, binary_test], callbacks=[lgb.early_stopping(stopping_rounds=5)])
62 | print(bst.best_iteration)
63 | 
64 | print('')
65 | print('test_categorical_feature')
66 | 
67 | train_set = lgb.Dataset(X_train, label=y_train, categorical_feature=[3])
68 | bst = lgb.train(regression_params, train_set)
69 | print(bst.predict(X_test)[0])
70 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
  1 | require "bundler/setup"
  2 | Bundler.require(:default)
  3 | require "minitest/autorun"
  4 | require "minitest/pride"
  5 | require "csv"
  6 | require "json"
  7 | require "matrix"
  8 | require "daru"
  9 | 
 10 | class Minitest::Test
 11 |   def setup
 12 |     if stress?
 13 |       # autoload before GC.stress
 14 |       LightGBM::FFI.ffi_libraries
 15 |       load_data
 16 |       GC.stress = true
 17 |     end
 18 |   end
 19 | 
 20 |   def teardown
 21 |     GC.stress = false if stress?
 22 |   end
 23 | 
 24 |   def stress?
 25 |     ENV["STRESS"]
 26 |   end
 27 | 
 28 |   def assert_elements_in_delta(expected, actual)
 29 |     assert_equal expected.size, actual.size
 30 |     expected.zip(actual) do |exp, act|
 31 |       assert_in_delta exp, act
 32 |     end
 33 |   end
 34 | 
 35 |   def regression_data
 36 |     @regression_data ||= split_data(*load_data)
 37 |   end
 38 | 
 39 |   def regression_train
 40 |     @regression_train ||= split_train(regression_data)
 41 |   end
 42 | 
 43 |   def regression_test
 44 |     @regression_test ||= split_test(regression_data)
 45 |   end
 46 | 
 47 |   def binary_data
 48 |     x, y = load_data
 49 |     y = y.map { |v| v > 1 ? 1 : v }
 50 |     split_data(x, y)
 51 |   end
 52 | 
 53 |   def binary_train
 54 |     @binary_train ||= split_train(binary_data)
 55 |   end
 56 | 
 57 |   def binary_test
 58 |     @binary_test ||= split_test(binary_data)
 59 |   end
 60 | 
 61 |   def multiclass_data
 62 |     @multiclass_data ||= split_data(*load_data)
 63 |   end
 64 | 
 65 |   def multiclass_train
 66 |     @multiclass_train ||= split_train(multiclass_data)
 67 |   end
 68 | 
 69 |   def multiclass_test
 70 |     @multiclass_test ||= split_test(multiclass_data)
 71 |   end
 72 | 
 73 |   def ranker_data
 74 |     @ranker_data ||= binary_data
 75 |   end
 76 | 
 77 |   def data_path
 78 |     "test/support/data.csv"
 79 |   end
 80 | 
 81 |   def load_data
 82 |     @@load_data ||= begin
 83 |       x = []
 84 |       y = []
 85 |       CSV.foreach(data_path, headers: true, converters: :numeric) do |row|
 86 |         x << row.values_at("x0", "x1", "x2", "x3").freeze
 87 |         y << row["y"]
 88 |       end
 89 |       [x.freeze, y.freeze]
 90 |     end
 91 |   end
 92 | 
 93 |   def split_data(x, y)
 94 |     [x[0...300], y[0...300], x[300..-1], y[300..-1]]
 95 |   end
 96 | 
 97 |   def split_train(data)
 98 |     x_train, y_train, _, _ = data
 99 |     LightGBM::Dataset.new(x_train, label: y_train)
100 |   end
101 | 
102 |   def split_test(data)
103 |     _, _, x_test, y_test = data
104 |     LightGBM::Dataset.new(x_test, label: y_test)
105 |   end
106 | 
107 |   def regression_params
108 |     {objective: "regression"}
109 |   end
110 | 
111 |   def binary_params
112 |     {objective: "binary"}
113 |   end
114 | 
115 |   def multiclass_params
116 |     {objective: "multiclass", num_class: 3}
117 |   end
118 | 
119 |   def teardown
120 |     @tempfile = nil
121 |   end
122 | 
123 |   def tempfile
124 |     @tempfile ||= "#{Dir.mktmpdir}/#{Time.now.to_f}"
125 |   end
126 | 
127 |   def jruby?
128 |     RUBY_ENGINE == "jruby"
129 |   end
130 | 
131 |   def numo?
132 |     !jruby? && RUBY_ENGINE != "truffleruby"
133 |   end
134 | end
135 | 


--------------------------------------------------------------------------------
/test/train_test.rb:
--------------------------------------------------------------------------------
  1 | require_relative "test_helper"
  2 | 
  3 | class TrainTest < Minitest::Test
  4 |   def test_regression
  5 |     model = LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], verbose_eval: false)
  6 |     y_pred = model.predict(regression_test.data)
  7 |     assert_in_delta 0.2900400590132747, rsme(regression_test.label, y_pred)
  8 | 
  9 |     model.save_model(tempfile)
 10 |     model = LightGBM::Booster.new(model_file: tempfile)
 11 |     y_pred = model.predict(regression_test.data)
 12 |     assert_in_delta 0.2900400590132747, rsme(regression_test.label, y_pred)
 13 |   end
 14 | 
 15 |   def test_binary
 16 |     model = LightGBM.train(binary_params, binary_train, valid_sets: [binary_train, binary_test], verbose_eval: false)
 17 |     y_pred = model.predict(binary_test.data)
 18 |     assert_in_delta 0.9999907566825113, y_pred.first
 19 | 
 20 |     model.save_model(tempfile)
 21 |     model = LightGBM::Booster.new(model_file: tempfile)
 22 |     y_pred2 = model.predict(binary_test.data)
 23 |     assert_equal y_pred, y_pred2
 24 |   end
 25 | 
 26 |   def test_multiclass
 27 |     model = LightGBM.train(multiclass_params, multiclass_train, valid_sets: [multiclass_train, multiclass_test], verbose_eval: false)
 28 | 
 29 |     y_pred = model.predict(multiclass_test.data)
 30 |     expected = [0.00036627031584163575, 0.9456350323547973, 0.053998697329361176]
 31 |     assert_elements_in_delta expected, y_pred.first
 32 |     # ensure reshaped
 33 |     assert_equal 200, y_pred.size
 34 |     assert_equal 3, y_pred.first.size
 35 | 
 36 |     model.save_model(tempfile)
 37 |     model = LightGBM::Booster.new(model_file: tempfile)
 38 |     y_pred2 = model.predict(multiclass_test.data)
 39 |     assert_equal y_pred, y_pred2
 40 |   end
 41 | 
 42 |   def test_early_stopping_early
 43 |     model = nil
 44 |     stdout, _ = capture_io do
 45 |       model = LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], early_stopping_rounds: 5)
 46 |     end
 47 |     assert_equal 69, model.best_iteration
 48 |     assert_includes stdout, "Early stopping, best iteration is:\n[69]\ttraining's l2: 0.0312266\tvalid_1's l2: 0.0843578"
 49 |   end
 50 | 
 51 |   def test_early_stopping_not_early
 52 |     model = nil
 53 |     stdout, _ = capture_io do
 54 |       model = LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], early_stopping_rounds: 500)
 55 |     end
 56 |     assert_equal 100, model.best_iteration
 57 |     if jruby?
 58 |       assert_includes stdout, "Best iteration is: [100]\ttraining's l2: 0.0245240\tvalid_1's l2: 0.0841232"
 59 |     else
 60 |       assert_includes stdout, "Best iteration is: [100]\ttraining's l2: 0.024524\tvalid_1's l2: 0.0841232"
 61 |     end
 62 |   end
 63 | 
 64 |   def test_early_stopping_early_higher_better
 65 |     model = LightGBM.train(binary_params.merge(metric: "auc"), binary_train, valid_sets: [binary_train, binary_test], early_stopping_rounds: 5, verbose_eval: false)
 66 |     assert_equal 8, model.best_iteration
 67 |   end
 68 | 
 69 |   def test_verbose_eval_false
 70 |     stdout, _ = capture_io do
 71 |       LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], early_stopping_rounds: 5, verbose_eval: false)
 72 |     end
 73 |     assert_empty stdout
 74 |   end
 75 | 
 76 |   def test_bad_params
 77 |     params = {objective: "regression verbosity=1"}
 78 |     assert_raises ArgumentError do
 79 |       LightGBM.train(params, regression_train)
 80 |     end
 81 |   end
 82 | 
 83 |   def test_early_stopping_no_valid_set
 84 |     error = assert_raises ArgumentError do
 85 |       LightGBM.train(regression_params, regression_train, valid_sets: [], early_stopping_rounds: 5)
 86 |     end
 87 |     assert_includes error.message, "at least one validation set is required"
 88 |   end
 89 | 
 90 |   def test_early_stopping_valid_set_training
 91 |     error = assert_raises ArgumentError do
 92 |       LightGBM.train(regression_params, regression_train, valid_sets: [regression_train], early_stopping_rounds: 5)
 93 |     end
 94 |     assert_includes error.message, "at least one validation set is required"
 95 |   end
 96 | 
 97 |   def test_categorical_feature
 98 |     train_set = LightGBM::Dataset.new(regression_train.data, label: regression_train.label, categorical_feature: [3])
 99 |     model = LightGBM.train(regression_params, train_set)
100 |     assert_in_delta 1.2914367038779377, model.predict(regression_test.data).first
101 |   end
102 | 
103 |   def test_multiple_metrics
104 |     params = regression_params.merge(metric: ["l1", "l2", "rmse"])
105 |     LightGBM.train(params, regression_train, valid_sets: [regression_train, regression_test], verbose_eval: false, early_stopping_rounds: 5)
106 |   end
107 | 
108 |   private
109 | 
110 |   def rsme(y_true, y_pred)
111 |     Math.sqrt(y_true.zip(y_pred).map { |a, b| (a - b)**2 }.sum / y_true.size.to_f)
112 |   end
113 | end
114 | 


--------------------------------------------------------------------------------
/vendor/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/vendor/LICENSE-THIRD-PARTY:
--------------------------------------------------------------------------------
  1 | ================================================================================
  2 | Boost.Compute 1.78.0
  3 | ================================================================================
  4 | 
  5 | Boost Software License - Version 1.0 - August 17th, 2003
  6 | 
  7 | Permission is hereby granted, free of charge, to any person or organization
  8 | obtaining a copy of the software and accompanying documentation covered by
  9 | this license (the "Software") to use, reproduce, display, distribute,
 10 | execute, and transmit the Software, and to prepare derivative works of the
 11 | Software, and to permit third-parties to whom the Software is furnished to
 12 | do so, all subject to the following:
 13 | 
 14 | The copyright notices in the Software and this entire statement, including
 15 | the above license grant, this restriction and the following disclaimer,
 16 | must be included in all copies of the Software, in whole or in part, and
 17 | all derivative works of the Software, unless such copies or derivative
 18 | works are solely in the form of machine-executable object code generated by
 19 | a source language processor.
 20 | 
 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 24 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 25 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 26 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 27 | DEALINGS IN THE SOFTWARE.
 28 | 
 29 | ================================================================================
 30 | Eigen 3.4.0
 31 | ================================================================================
 32 | 
 33 | Source code available at https://gitlab.com/libeigen/eigen
 34 | 
 35 | Mozilla Public License Version 2.0
 36 | ==================================
 37 | 
 38 | 1. Definitions
 39 | --------------
 40 | 
 41 | 1.1. "Contributor"
 42 |     means each individual or legal entity that creates, contributes to
 43 |     the creation of, or owns Covered Software.
 44 | 
 45 | 1.2. "Contributor Version"
 46 |     means the combination of the Contributions of others (if any) used
 47 |     by a Contributor and that particular Contributor's Contribution.
 48 | 
 49 | 1.3. "Contribution"
 50 |     means Covered Software of a particular Contributor.
 51 | 
 52 | 1.4. "Covered Software"
 53 |     means Source Code Form to which the initial Contributor has attached
 54 |     the notice in Exhibit A, the Executable Form of such Source Code
 55 |     Form, and Modifications of such Source Code Form, in each case
 56 |     including portions thereof.
 57 | 
 58 | 1.5. "Incompatible With Secondary Licenses"
 59 |     means
 60 | 
 61 |     (a) that the initial Contributor has attached the notice described
 62 |         in Exhibit B to the Covered Software; or
 63 | 
 64 |     (b) that the Covered Software was made available under the terms of
 65 |         version 1.1 or earlier of the License, but not also under the
 66 |         terms of a Secondary License.
 67 | 
 68 | 1.6. "Executable Form"
 69 |     means any form of the work other than Source Code Form.
 70 | 
 71 | 1.7. "Larger Work"
 72 |     means a work that combines Covered Software with other material, in
 73 |     a separate file or files, that is not Covered Software.
 74 | 
 75 | 1.8. "License"
 76 |     means this document.
 77 | 
 78 | 1.9. "Licensable"
 79 |     means having the right to grant, to the maximum extent possible,
 80 |     whether at the time of the initial grant or subsequently, any and
 81 |     all of the rights conveyed by this License.
 82 | 
 83 | 1.10. "Modifications"
 84 |     means any of the following:
 85 | 
 86 |     (a) any file in Source Code Form that results from an addition to,
 87 |         deletion from, or modification of the contents of Covered
 88 |         Software; or
 89 | 
 90 |     (b) any new file in Source Code Form that contains any Covered
 91 |         Software.
 92 | 
 93 | 1.11. "Patent Claims" of a Contributor
 94 |     means any patent claim(s), including without limitation, method,
 95 |     process, and apparatus claims, in any patent Licensable by such
 96 |     Contributor that would be infringed, but for the grant of the
 97 |     License, by the making, using, selling, offering for sale, having
 98 |     made, import, or transfer of either its Contributions or its
 99 |     Contributor Version.
100 | 
101 | 1.12. "Secondary License"
102 |     means either the GNU General Public License, Version 2.0, the GNU
103 |     Lesser General Public License, Version 2.1, the GNU Affero General
104 |     Public License, Version 3.0, or any later versions of those
105 |     licenses.
106 | 
107 | 1.13. "Source Code Form"
108 |     means the form of the work preferred for making modifications.
109 | 
110 | 1.14. "You" (or "Your")
111 |     means an individual or a legal entity exercising rights under this
112 |     License. For legal entities, "You" includes any entity that
113 |     controls, is controlled by, or is under common control with You. For
114 |     purposes of this definition, "control" means (a) the power, direct
115 |     or indirect, to cause the direction or management of such entity,
116 |     whether by contract or otherwise, or (b) ownership of more than
117 |     fifty percent (50%) of the outstanding shares or beneficial
118 |     ownership of such entity.
119 | 
120 | 2. License Grants and Conditions
121 | --------------------------------
122 | 
123 | 2.1. Grants
124 | 
125 | Each Contributor hereby grants You a world-wide, royalty-free,
126 | non-exclusive license:
127 | 
128 | (a) under intellectual property rights (other than patent or trademark)
129 |     Licensable by such Contributor to use, reproduce, make available,
130 |     modify, display, perform, distribute, and otherwise exploit its
131 |     Contributions, either on an unmodified basis, with Modifications, or
132 |     as part of a Larger Work; and
133 | 
134 | (b) under Patent Claims of such Contributor to make, use, sell, offer
135 |     for sale, have made, import, and otherwise transfer either its
136 |     Contributions or its Contributor Version.
137 | 
138 | 2.2. Effective Date
139 | 
140 | The licenses granted in Section 2.1 with respect to any Contribution
141 | become effective for each Contribution on the date the Contributor first
142 | distributes such Contribution.
143 | 
144 | 2.3. Limitations on Grant Scope
145 | 
146 | The licenses granted in this Section 2 are the only rights granted under
147 | this License. No additional rights or licenses will be implied from the
148 | distribution or licensing of Covered Software under this License.
149 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
150 | Contributor:
151 | 
152 | (a) for any code that a Contributor has removed from Covered Software;
153 |     or
154 | 
155 | (b) for infringements caused by: (i) Your and any other third party's
156 |     modifications of Covered Software, or (ii) the combination of its
157 |     Contributions with other software (except as part of its Contributor
158 |     Version); or
159 | 
160 | (c) under Patent Claims infringed by Covered Software in the absence of
161 |     its Contributions.
162 | 
163 | This License does not grant any rights in the trademarks, service marks,
164 | or logos of any Contributor (except as may be necessary to comply with
165 | the notice requirements in Section 3.4).
166 | 
167 | 2.4. Subsequent Licenses
168 | 
169 | No Contributor makes additional grants as a result of Your choice to
170 | distribute the Covered Software under a subsequent version of this
171 | License (see Section 10.2) or under the terms of a Secondary License (if
172 | permitted under the terms of Section 3.3).
173 | 
174 | 2.5. Representation
175 | 
176 | Each Contributor represents that the Contributor believes its
177 | Contributions are its original creation(s) or it has sufficient rights
178 | to grant the rights to its Contributions conveyed by this License.
179 | 
180 | 2.6. Fair Use
181 | 
182 | This License is not intended to limit any rights You have under
183 | applicable copyright doctrines of fair use, fair dealing, or other
184 | equivalents.
185 | 
186 | 2.7. Conditions
187 | 
188 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
189 | in Section 2.1.
190 | 
191 | 3. Responsibilities
192 | -------------------
193 | 
194 | 3.1. Distribution of Source Form
195 | 
196 | All distribution of Covered Software in Source Code Form, including any
197 | Modifications that You create or to which You contribute, must be under
198 | the terms of this License. You must inform recipients that the Source
199 | Code Form of the Covered Software is governed by the terms of this
200 | License, and how they can obtain a copy of this License. You may not
201 | attempt to alter or restrict the recipients' rights in the Source Code
202 | Form.
203 | 
204 | 3.2. Distribution of Executable Form
205 | 
206 | If You distribute Covered Software in Executable Form then:
207 | 
208 | (a) such Covered Software must also be made available in Source Code
209 |     Form, as described in Section 3.1, and You must inform recipients of
210 |     the Executable Form how they can obtain a copy of such Source Code
211 |     Form by reasonable means in a timely manner, at a charge no more
212 |     than the cost of distribution to the recipient; and
213 | 
214 | (b) You may distribute such Executable Form under the terms of this
215 |     License, or sublicense it under different terms, provided that the
216 |     license for the Executable Form does not attempt to limit or alter
217 |     the recipients' rights in the Source Code Form under this License.
218 | 
219 | 3.3. Distribution of a Larger Work
220 | 
221 | You may create and distribute a Larger Work under terms of Your choice,
222 | provided that You also comply with the requirements of this License for
223 | the Covered Software. If the Larger Work is a combination of Covered
224 | Software with a work governed by one or more Secondary Licenses, and the
225 | Covered Software is not Incompatible With Secondary Licenses, this
226 | License permits You to additionally distribute such Covered Software
227 | under the terms of such Secondary License(s), so that the recipient of
228 | the Larger Work may, at their option, further distribute the Covered
229 | Software under the terms of either this License or such Secondary
230 | License(s).
231 | 
232 | 3.4. Notices
233 | 
234 | You may not remove or alter the substance of any license notices
235 | (including copyright notices, patent notices, disclaimers of warranty,
236 | or limitations of liability) contained within the Source Code Form of
237 | the Covered Software, except that You may alter any license notices to
238 | the extent required to remedy known factual inaccuracies.
239 | 
240 | 3.5. Application of Additional Terms
241 | 
242 | You may choose to offer, and to charge a fee for, warranty, support,
243 | indemnity or liability obligations to one or more recipients of Covered
244 | Software. However, You may do so only on Your own behalf, and not on
245 | behalf of any Contributor. You must make it absolutely clear that any
246 | such warranty, support, indemnity, or liability obligation is offered by
247 | You alone, and You hereby agree to indemnify every Contributor for any
248 | liability incurred by such Contributor as a result of warranty, support,
249 | indemnity or liability terms You offer. You may include additional
250 | disclaimers of warranty and limitations of liability specific to any
251 | jurisdiction.
252 | 
253 | 4. Inability to Comply Due to Statute or Regulation
254 | ---------------------------------------------------
255 | 
256 | If it is impossible for You to comply with any of the terms of this
257 | License with respect to some or all of the Covered Software due to
258 | statute, judicial order, or regulation then You must: (a) comply with
259 | the terms of this License to the maximum extent possible; and (b)
260 | describe the limitations and the code they affect. Such description must
261 | be placed in a text file included with all distributions of the Covered
262 | Software under this License. Except to the extent prohibited by statute
263 | or regulation, such description must be sufficiently detailed for a
264 | recipient of ordinary skill to be able to understand it.
265 | 
266 | 5. Termination
267 | --------------
268 | 
269 | 5.1. The rights granted under this License will terminate automatically
270 | if You fail to comply with any of its terms. However, if You become
271 | compliant, then the rights granted under this License from a particular
272 | Contributor are reinstated (a) provisionally, unless and until such
273 | Contributor explicitly and finally terminates Your grants, and (b) on an
274 | ongoing basis, if such Contributor fails to notify You of the
275 | non-compliance by some reasonable means prior to 60 days after You have
276 | come back into compliance. Moreover, Your grants from a particular
277 | Contributor are reinstated on an ongoing basis if such Contributor
278 | notifies You of the non-compliance by some reasonable means, this is the
279 | first time You have received notice of non-compliance with this License
280 | from such Contributor, and You become compliant prior to 30 days after
281 | Your receipt of the notice.
282 | 
283 | 5.2. If You initiate litigation against any entity by asserting a patent
284 | infringement claim (excluding declaratory judgment actions,
285 | counter-claims, and cross-claims) alleging that a Contributor Version
286 | directly or indirectly infringes any patent, then the rights granted to
287 | You by any and all Contributors for the Covered Software under Section
288 | 2.1 of this License shall terminate.
289 | 
290 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
291 | end user license agreements (excluding distributors and resellers) which
292 | have been validly granted by You or Your distributors under this License
293 | prior to termination shall survive termination.
294 | 
295 | ************************************************************************
296 | *                                                                      *
297 | *  6. Disclaimer of Warranty                                           *
298 | *  -------------------------                                           *
299 | *                                                                      *
300 | *  Covered Software is provided under this License on an "as is"       *
301 | *  basis, without warranty of any kind, either expressed, implied, or  *
302 | *  statutory, including, without limitation, warranties that the       *
303 | *  Covered Software is free of defects, merchantable, fit for a        *
304 | *  particular purpose or non-infringing. The entire risk as to the     *
305 | *  quality and performance of the Covered Software is with You.        *
306 | *  Should any Covered Software prove defective in any respect, You     *
307 | *  (not any Contributor) assume the cost of any necessary servicing,   *
308 | *  repair, or correction. This disclaimer of warranty constitutes an   *
309 | *  essential part of this License. No use of any Covered Software is   *
310 | *  authorized under this License except under this disclaimer.         *
311 | *                                                                      *
312 | ************************************************************************
313 | 
314 | ************************************************************************
315 | *                                                                      *
316 | *  7. Limitation of Liability                                          *
317 | *  --------------------------                                          *
318 | *                                                                      *
319 | *  Under no circumstances and under no legal theory, whether tort      *
320 | *  (including negligence), contract, or otherwise, shall any           *
321 | *  Contributor, or anyone who distributes Covered Software as          *
322 | *  permitted above, be liable to You for any direct, indirect,         *
323 | *  special, incidental, or consequential damages of any character      *
324 | *  including, without limitation, damages for lost profits, loss of    *
325 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
326 | *  and all other commercial damages or losses, even if such party      *
327 | *  shall have been informed of the possibility of such damages. This   *
328 | *  limitation of liability shall not apply to liability for death or   *
329 | *  personal injury resulting from such party's negligence to the       *
330 | *  extent applicable law prohibits such limitation. Some               *
331 | *  jurisdictions do not allow the exclusion or limitation of           *
332 | *  incidental or consequential damages, so this exclusion and          *
333 | *  limitation may not apply to You.                                    *
334 | *                                                                      *
335 | ************************************************************************
336 | 
337 | 8. Litigation
338 | -------------
339 | 
340 | Any litigation relating to this License may be brought only in the
341 | courts of a jurisdiction where the defendant maintains its principal
342 | place of business and such litigation shall be governed by laws of that
343 | jurisdiction, without reference to its conflict-of-law provisions.
344 | Nothing in this Section shall prevent a party's ability to bring
345 | cross-claims or counter-claims.
346 | 
347 | 9. Miscellaneous
348 | ----------------
349 | 
350 | This License represents the complete agreement concerning the subject
351 | matter hereof. If any provision of this License is held to be
352 | unenforceable, such provision shall be reformed only to the extent
353 | necessary to make it enforceable. Any law or regulation which provides
354 | that the language of a contract shall be construed against the drafter
355 | shall not be used to construe this License against a Contributor.
356 | 
357 | 10. Versions of the License
358 | ---------------------------
359 | 
360 | 10.1. New Versions
361 | 
362 | Mozilla Foundation is the license steward. Except as provided in Section
363 | 10.3, no one other than the license steward has the right to modify or
364 | publish new versions of this License. Each version will be given a
365 | distinguishing version number.
366 | 
367 | 10.2. Effect of New Versions
368 | 
369 | You may distribute the Covered Software under the terms of the version
370 | of the License under which You originally received the Covered Software,
371 | or under the terms of any subsequent version published by the license
372 | steward.
373 | 
374 | 10.3. Modified Versions
375 | 
376 | If you create software not governed by this License, and you want to
377 | create a new license for such software, you may create and use a
378 | modified version of this License if you rename the license and remove
379 | any references to the name of the license steward (except to note that
380 | such modified license differs from this License).
381 | 
382 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
383 | Licenses
384 | 
385 | If You choose to distribute Source Code Form that is Incompatible With
386 | Secondary Licenses under the terms of this version of the License, the
387 | notice described in Exhibit B of this License must be attached.
388 | 
389 | Exhibit A - Source Code Form License Notice
390 | -------------------------------------------
391 | 
392 |   This Source Code Form is subject to the terms of the Mozilla Public
393 |   License, v. 2.0. If a copy of the MPL was not distributed with this
394 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
395 | 
396 | If it is not possible or desirable to put the notice in a particular
397 | file, then You may include the notice in a location (such as a LICENSE
398 | file in a relevant directory) where a recipient would be likely to look
399 | for such a notice.
400 | 
401 | You may add additional accurate notices of copyright ownership.
402 | 
403 | Exhibit B - "Incompatible With Secondary Licenses" Notice
404 | ---------------------------------------------------------
405 | 
406 |   This Source Code Form is "Incompatible With Secondary Licenses", as
407 |   defined by the Mozilla Public License, v. 2.0.
408 | 
409 | and
410 | 
411 | /*
412 |  Copyright (c) 2011, Intel Corporation. All rights reserved.
413 | 
414 |  Redistribution and use in source and binary forms, with or without modification,
415 |  are permitted provided that the following conditions are met:
416 | 
417 |  * Redistributions of source code must retain the above copyright notice, this
418 |    list of conditions and the following disclaimer.
419 |  * Redistributions in binary form must reproduce the above copyright notice,
420 |    this list of conditions and the following disclaimer in the documentation
421 |    and/or other materials provided with the distribution.
422 |  * Neither the name of Intel Corporation nor the names of its contributors may
423 |    be used to endorse or promote products derived from this software without
424 |    specific prior written permission.
425 | 
426 |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
427 |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
428 |  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
429 |  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
430 |  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
431 |  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
432 |  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
433 |  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
434 |  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
435 |  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
436 | */
437 | 
438 | ================================================================================
439 | fast_double_parser 0.8.0
440 | ================================================================================
441 | 
442 | Copyright (c) Daniel Lemire
443 | 
444 | Boost Software License - Version 1.0 - August 17th, 2003
445 | 
446 | Permission is hereby granted, free of charge, to any person or organization
447 | obtaining a copy of the software and accompanying documentation covered by
448 | this license (the "Software") to use, reproduce, display, distribute,
449 | execute, and transmit the Software, and to prepare derivative works of the
450 | Software, and to permit third-parties to whom the Software is furnished to
451 | do so, all subject to the following:
452 | 
453 | The copyright notices in the Software and this entire statement, including
454 | the above license grant, this restriction and the following disclaimer,
455 | must be included in all copies of the Software, in whole or in part, and
456 | all derivative works of the Software, unless such copies or derivative
457 | works are solely in the form of machine-executable object code generated by
458 | a source language processor.
459 | 
460 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
461 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
462 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
463 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
464 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
465 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
466 | DEALINGS IN THE SOFTWARE.
467 | 
468 | ================================================================================
469 | fmt 11.1.2
470 | ================================================================================
471 | 
472 | Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors
473 | 
474 | Permission is hereby granted, free of charge, to any person obtaining
475 | a copy of this software and associated documentation files (the
476 | "Software"), to deal in the Software without restriction, including
477 | without limitation the rights to use, copy, modify, merge, publish,
478 | distribute, sublicense, and/or sell copies of the Software, and to
479 | permit persons to whom the Software is furnished to do so, subject to
480 | the following conditions:
481 | 
482 | The above copyright notice and this permission notice shall be
483 | included in all copies or substantial portions of the Software.
484 | 
485 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
486 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
487 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
488 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
489 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
490 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
491 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
492 | 
493 | --- Optional exception to the license ---
494 | 
495 | As an exception, if, as a result of your compiling your source code, portions
496 | of this Software are embedded into a machine-executable object form of such
497 | source code, you may redistribute such embedded portions in such object form
498 | without including the above copyright and permission notices.
499 | 
500 | ================================================================================
501 | json11
502 | ================================================================================
503 | 
504 | Copyright (c) 2013 Dropbox, Inc.
505 | 
506 | Permission is hereby granted, free of charge, to any person obtaining a copy
507 | of this software and associated documentation files (the "Software"), to deal
508 | in the Software without restriction, including without limitation the rights
509 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
510 | copies of the Software, and to permit persons to whom the Software is
511 | furnished to do so, subject to the following conditions:
512 | 
513 | The above copyright notice and this permission notice shall be included in
514 | all copies or substantial portions of the Software.
515 | 
516 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
517 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
518 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
519 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
520 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
521 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
522 | THE SOFTWARE.
523 | 
524 | ================================================================================
525 | yamc
526 | ================================================================================
527 | 
528 | MIT License
529 | 
530 | Copyright (c) 2017 yohhoy
531 | 
532 | Permission is hereby granted, free of charge, to any person obtaining a copy
533 | of this software and associated documentation files (the "Software"), to deal
534 | in the Software without restriction, including without limitation the rights
535 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
536 | copies of the Software, and to permit persons to whom the Software is
537 | furnished to do so, subject to the following conditions:
538 | 
539 | The above copyright notice and this permission notice shall be included in all
540 | copies or substantial portions of the Software.
541 | 
542 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
543 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
544 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
545 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
546 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
547 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
548 | SOFTWARE.
549 | 


--------------------------------------------------------------------------------