├── .github └── workflows │ └── build.yml ├── .gitignore ├── CHANGELOG.md ├── Gemfile ├── LICENSE.txt ├── README.md ├── Rakefile ├── lib ├── lightgbm.rb └── lightgbm │ ├── booster.rb │ ├── classifier.rb │ ├── dataset.rb │ ├── ffi.rb │ ├── inner_predictor.rb │ ├── model.rb │ ├── ranker.rb │ ├── regressor.rb │ ├── utils.rb │ └── version.rb ├── lightgbm.gemspec ├── test ├── booster_test.rb ├── classifier_test.rb ├── cv_test.rb ├── dataset_test.rb ├── ranker_test.rb ├── regressor_test.rb ├── support │ ├── booster.py │ ├── categorical.py │ ├── categorical.txt │ ├── classifier.py │ ├── cv.py │ ├── data.csv │ ├── model.txt │ ├── ranker.py │ ├── regressor.py │ └── train.py ├── test_helper.rb └── train_test.rb └── vendor ├── LICENSE └── LICENSE-THIRD-PARTY /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | strategy: 6 | fail-fast: false 7 | matrix: 8 | os: [ubuntu-latest, macos-14, macos-13] 9 | runs-on: ${{ matrix.os }} 10 | steps: 11 | - if: ${{ startsWith(matrix.os, 'windows') }} 12 | run: git config --global core.autocrlf false 13 | - uses: actions/checkout@v4 14 | - uses: ruby/setup-ruby@v1 15 | with: 16 | ruby-version: 3.4 17 | bundler-cache: true 18 | - if: ${{ startsWith(matrix.os, 'macos') }} 19 | run: brew install libomp 20 | - run: bundle exec rake vendor:platform 21 | - run: bundle exec rake test 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /_yardoc/ 4 | /coverage/ 5 | /doc/ 6 | /pkg/ 7 | /spec/reports/ 8 | /tmp/ 9 | *.lock 10 | /vendor/lib_lightgbm.* 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.4.2 (unreleased) 2 | 3 | - Improved performance of `predict` method for `pandas_categorical` 4 | 5 | ## 0.4.1 (2025-02-17) 6 | 7 | - Updated LightGBM to 4.6.0 8 | 9 | ## 0.4.0 (2025-01-05) 10 | 11 | - Added support for different prediction types 12 | - Added support for `pandas_categorical` to `predict` method 13 | - Added support for hashes and Rover data frames to `predict` method 14 | - Added support for hashes to `Dataset` 15 | - Added `importance_type` option to `dump_model`, `model_to_string`, and `save_model` methods 16 | - Changed `Dataset` to use column names for feature names with Rover and Daru 17 | - Changed `predict` method to match feature names with Daru 18 | - Dropped support for Ruby < 3.1 19 | 20 | ## 0.3.4 (2024-07-28) 21 | 22 | - Updated LightGBM to 4.5.0 23 | 24 | ## 0.3.3 (2024-06-15) 25 | 26 | - Updated LightGBM to 4.4.0 27 | 28 | ## 0.3.2 (2024-01-25) 29 | 30 | - Updated LightGBM to 4.3.0 31 | 32 | ## 0.3.1 (2023-09-13) 33 | 34 | - Updated LightGBM to 4.1.0 35 | 36 | ## 0.3.0 (2023-07-22) 37 | 38 | - Updated LightGBM to 4.0.0 39 | - Fixed error with `dup` and `clone` 40 | - Dropped support for Ruby < 3 41 | 42 | ## 0.2.7 (2023-02-01) 43 | 44 | - Updated LightGBM to 3.3.5 45 | - Improved ARM detection 46 | 47 | ## 0.2.6 (2021-10-24) 48 | 49 | - Updated LightGBM to 3.3.0 50 | 51 | ## 0.2.5 (2021-07-07) 52 | 53 | - Added `feature_name` method to boosters 54 | 55 | ## 0.2.4 (2021-03-26) 56 | 57 | - Updated LightGBM to 3.2.0 58 | 59 | ## 0.2.3 (2021-03-09) 60 | 61 | - Added ARM shared library for Mac 62 | 63 | ## 0.2.2 (2020-12-07) 64 | 65 | - Updated LightGBM to 3.1.1 66 | 67 | ## 0.2.1 (2020-11-15) 68 | 69 | - Updated LightGBM to 3.1.0 70 | 71 | ## 0.2.0 (2020-08-31) 72 | 73 | - Updated LightGBM to 3.0.0 74 | - Made `best_iteration` and `eval_hist` consistent with Python 75 | 76 | ## 0.1.9 (2020-06-10) 77 | 78 | - Added support for Rover 79 | - Improved performance of Numo datasets 80 | 81 | ## 0.1.8 (2020-05-09) 82 | 83 | - Improved error message when OpenMP not found on Mac 84 | - Fixed `Cannot add validation data` error 85 | 86 | ## 0.1.7 (2019-12-05) 87 | 88 | - Updated LightGBM to 2.3.1 89 | - Switched to doubles for datasets and predictions 90 | 91 | ## 0.1.6 (2019-09-29) 92 | 93 | - Updated LightGBM to 2.3.0 94 | - Fixed error with JRuby 95 | 96 | ## 0.1.5 (2019-09-03) 97 | 98 | - Packaged LightGBM with gem 99 | - Added support for missing values 100 | - Added `feature_names` to datasets 101 | - Fixed Daru training and prediction 102 | 103 | ## 0.1.4 (2019-08-19) 104 | 105 | - Friendlier message when LightGBM not found 106 | - Added `Ranker` 107 | - Added early stopping to Scikit-Learn API 108 | - Free memory when objects are destroyed 109 | - Removed unreleased `dump_text` method 110 | 111 | ## 0.1.3 (2019-08-16) 112 | 113 | - Added Scikit-Learn API 114 | - Added support for Daru and Numo::NArray 115 | 116 | ## 0.1.2 (2019-08-15) 117 | 118 | - Added `cv` method 119 | - Added early stopping 120 | - Fixed multiclass classification 121 | 122 | ## 0.1.1 (2019-08-14) 123 | 124 | - Added training API 125 | - Added many methods 126 | 127 | ## 0.1.0 (2019-08-13) 128 | 129 | - First release 130 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "rake" 6 | gem "minitest", ">= 5" 7 | gem "daru" 8 | gem "matrix" 9 | gem "numo-narray", platform: [:mri, :x64_mingw] 10 | gem "rover-df", platform: [:mri, :x64_mingw] 11 | gem "csv" 12 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Microsoft Corporation 4 | Copyright (c) 2019-2025 Andrew Kane 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LightGBM Ruby 2 | 3 | [LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby 4 | 5 | [![Build Status](https://github.com/ankane/lightgbm-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/lightgbm-ruby/actions) 6 | 7 | ## Installation 8 | 9 | Add this line to your application’s Gemfile: 10 | 11 | ```ruby 12 | gem "lightgbm" 13 | ``` 14 | 15 | On Mac, also install OpenMP: 16 | 17 | ```sh 18 | brew install libomp 19 | ``` 20 | 21 | ## Training API 22 | 23 | Prep your data 24 | 25 | ```ruby 26 | x = [[1, 2], [3, 4], [5, 6], [7, 8]] 27 | y = [1, 2, 3, 4] 28 | ``` 29 | 30 | Train a model 31 | 32 | ```ruby 33 | params = {objective: "regression"} 34 | train_set = LightGBM::Dataset.new(x, label: y) 35 | booster = LightGBM.train(params, train_set) 36 | ``` 37 | 38 | Predict 39 | 40 | ```ruby 41 | booster.predict(x) 42 | ``` 43 | 44 | Save the model to a file 45 | 46 | ```ruby 47 | booster.save_model("model.txt") 48 | ``` 49 | 50 | Load the model from a file 51 | 52 | ```ruby 53 | booster = LightGBM::Booster.new(model_file: "model.txt") 54 | ``` 55 | 56 | Get the importance of features 57 | 58 | ```ruby 59 | booster.feature_importance 60 | ``` 61 | 62 | Early stopping 63 | 64 | ```ruby 65 | LightGBM.train(params, train_set, valid_sets: [train_set, test_set], early_stopping_rounds: 5) 66 | ``` 67 | 68 | CV 69 | 70 | ```ruby 71 | LightGBM.cv(params, train_set, nfold: 5, verbose_eval: true) 72 | ``` 73 | 74 | ## Scikit-Learn API 75 | 76 | Prep your data 77 | 78 | ```ruby 79 | x = [[1, 2], [3, 4], [5, 6], [7, 8]] 80 | y = [1, 2, 3, 4] 81 | ``` 82 | 83 | Train a model 84 | 85 | ```ruby 86 | model = LightGBM::Regressor.new 87 | model.fit(x, y) 88 | ``` 89 | 90 | > For classification, use `LightGBM::Classifier` 91 | 92 | Predict 93 | 94 | ```ruby 95 | model.predict(x) 96 | ``` 97 | 98 | > For classification, use `predict_proba` for probabilities 99 | 100 | Save the model to a file 101 | 102 | ```ruby 103 | model.save_model("model.txt") 104 | ``` 105 | 106 | Load the model from a file 107 | 108 | ```ruby 109 | model.load_model("model.txt") 110 | ``` 111 | 112 | Get the importance of features 113 | 114 | ```ruby 115 | model.feature_importances 116 | ``` 117 | 118 | Early stopping 119 | 120 | ```ruby 121 | model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5) 122 | ``` 123 | 124 | ## Data 125 | 126 | Data can be an array of arrays 127 | 128 | ```ruby 129 | [[1, 2, 3], [4, 5, 6]] 130 | ``` 131 | 132 | Or a Numo array 133 | 134 | ```ruby 135 | Numo::NArray.cast([[1, 2, 3], [4, 5, 6]]) 136 | ``` 137 | 138 | Or a Rover data frame 139 | 140 | ```ruby 141 | Rover.read_csv("houses.csv") 142 | ``` 143 | 144 | Or a Daru data frame 145 | 146 | ```ruby 147 | Daru::DataFrame.from_csv("houses.csv") 148 | ``` 149 | 150 | ## Helpful Resources 151 | 152 | - [Parameters](https://lightgbm.readthedocs.io/en/latest/Parameters.html) 153 | - [Parameter Tuning](https://lightgbm.readthedocs.io/en/latest/Parameters-Tuning.html) 154 | 155 | ## Related Projects 156 | 157 | - [XGBoost](https://github.com/ankane/xgboost-ruby) - XGBoost for Ruby 158 | - [Eps](https://github.com/ankane/eps) - Machine learning for Ruby 159 | 160 | ## Credits 161 | 162 | This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are: 163 | 164 | - The `get_` and `set_` prefixes are removed from methods 165 | - The default verbosity is `-1` 166 | - With the `cv` method, `stratified` is set to `false` 167 | 168 | Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI. 169 | 170 | ## History 171 | 172 | View the [changelog](https://github.com/ankane/lightgbm-ruby/blob/master/CHANGELOG.md) 173 | 174 | ## Contributing 175 | 176 | Everyone is encouraged to help improve this project. Here are a few ways you can help: 177 | 178 | - [Report bugs](https://github.com/ankane/lightgbm-ruby/issues) 179 | - Fix bugs and [submit pull requests](https://github.com/ankane/lightgbm-ruby/pulls) 180 | - Write, clarify, or fix documentation 181 | - Suggest or add new features 182 | 183 | To get started with development: 184 | 185 | ```sh 186 | git clone https://github.com/ankane/lightgbm-ruby.git 187 | cd lightgbm-ruby 188 | bundle install 189 | bundle exec rake vendor:all 190 | bundle exec rake test 191 | ``` 192 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | task default: :test 5 | Rake::TestTask.new do |t| 6 | t.libs << "test" 7 | t.pattern = "test/**/*_test.rb" 8 | t.warning = false # for daru 9 | end 10 | 11 | shared_libraries = %w(lib_lightgbm.dll lib_lightgbm.dylib lib_lightgbm.arm64.dylib lib_lightgbm.so) 12 | 13 | # ensure vendor files exist 14 | task :ensure_vendor do 15 | shared_libraries.each do |file| 16 | raise "Missing file: #{file}" unless File.exist?("vendor/#{file}") 17 | end 18 | end 19 | 20 | Rake::Task["build"].enhance [:ensure_vendor] 21 | 22 | def download_file(file, sha256) 23 | require "open-uri" 24 | 25 | # also update licenses in vendor/ 26 | version = "4.6.0" 27 | 28 | url = 29 | if file == "lib_lightgbm.arm64.dylib" 30 | "https://github.com/ankane/ml-builds/releases/download/lightgbm-#{version}/#{file}" 31 | else 32 | "https://github.com/microsoft/LightGBM/releases/download/v#{version}/#{file}" 33 | end 34 | puts "Downloading #{file}..." 35 | contents = URI.parse(url).read 36 | 37 | computed_sha256 = Digest::SHA256.hexdigest(contents) 38 | raise "Bad hash: #{computed_sha256}" if computed_sha256 != sha256 39 | 40 | dest = "vendor/#{file}" 41 | File.binwrite(dest, contents) 42 | puts "Saved #{dest}" 43 | end 44 | 45 | # https://github.com/microsoft/LightGBM/releases 46 | namespace :vendor do 47 | task :linux do 48 | download_file("lib_lightgbm.so", "237f15e1362a5abab4be0fae14aebba7bb278763f3412a82c50ab9d1fc0dc8bd") 49 | end 50 | 51 | task :mac do 52 | download_file("lib_lightgbm.dylib", "15c6678c60f1acf4a34f0784f799ee3ec7a48e25efa9be90e7415d54f9bed858") 53 | download_file("lib_lightgbm.arm64.dylib", "df56dce6597389a749de75e46b5383f83c751f57da643232ef766f15aca10a0d") 54 | end 55 | 56 | task :windows do 57 | download_file("lib_lightgbm.dll", "a5032c5278f3350ea9f7925b7b4d270b23af9a8e9639971cb025d615b45c39e7") 58 | end 59 | 60 | task all: [:linux, :mac, :windows] 61 | 62 | task :platform do 63 | if Gem.win_platform? 64 | Rake::Task["vendor:windows"].invoke 65 | elsif RbConfig::CONFIG["host_os"].match?(/darwin/i) 66 | Rake::Task["vendor:mac"].invoke 67 | else 68 | Rake::Task["vendor:linux"].invoke 69 | end 70 | end 71 | end 72 | -------------------------------------------------------------------------------- /lib/lightgbm.rb: -------------------------------------------------------------------------------- 1 | # dependencies 2 | require "ffi" 3 | 4 | # stdlib 5 | require "json" 6 | 7 | # modules 8 | require_relative "lightgbm/utils" 9 | require_relative "lightgbm/booster" 10 | require_relative "lightgbm/dataset" 11 | require_relative "lightgbm/inner_predictor" 12 | require_relative "lightgbm/version" 13 | 14 | # scikit-learn API 15 | require_relative "lightgbm/model" 16 | require_relative "lightgbm/classifier" 17 | require_relative "lightgbm/ranker" 18 | require_relative "lightgbm/regressor" 19 | 20 | module LightGBM 21 | class Error < StandardError; end 22 | 23 | class << self 24 | attr_accessor :ffi_lib 25 | end 26 | lib_name = 27 | if RbConfig::CONFIG["host_os"] =~ /darwin/i && RbConfig::CONFIG["host_cpu"] =~ /arm|aarch64/i 28 | "lib_lightgbm.arm64.#{::FFI::Platform::LIBSUFFIX}" 29 | else 30 | "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}" 31 | end 32 | vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__) 33 | self.ffi_lib = [vendor_lib] 34 | 35 | # friendlier error message 36 | autoload :FFI, "lightgbm/ffi" 37 | 38 | class << self 39 | def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true) 40 | booster = Booster.new(params: params, train_set: train_set) 41 | 42 | valid_contain_train = false 43 | valid_sets.zip(valid_names).each_with_index do |(data, name), i| 44 | if data == train_set 45 | booster.train_data_name = name || "training" 46 | valid_contain_train = true 47 | else 48 | # ensure the validation set references the training set 49 | data.reference = train_set 50 | booster.add_valid(data, name || "valid_#{i}") 51 | end 52 | end 53 | 54 | raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set } 55 | 56 | booster.best_iteration = 0 57 | 58 | if early_stopping_rounds 59 | best_score = [] 60 | best_iter = [] 61 | best_message = [] 62 | 63 | puts "Training until validation scores don't improve for #{early_stopping_rounds.to_i} rounds." if verbose_eval 64 | end 65 | 66 | num_boost_round.times do |iteration| 67 | booster.update 68 | 69 | if valid_sets.any? 70 | # print results 71 | messages = [] 72 | 73 | eval_valid = booster.eval_valid 74 | if valid_contain_train 75 | eval_valid = eval_valid + booster.eval_train 76 | end 77 | # not sure why reversed in output 78 | eval_valid.reverse! 79 | 80 | eval_valid.each do |res| 81 | messages << "%s's %s: %g" % [res[0], res[1], res[2]] 82 | end 83 | 84 | message = "[#{iteration + 1}]\t#{messages.join("\t")}" 85 | 86 | puts message if verbose_eval 87 | 88 | if early_stopping_rounds 89 | stop_early = false 90 | eval_valid.each_with_index do |(_, _, score, higher_better), i| 91 | op = higher_better ? :> : :< 92 | if best_score[i].nil? || score.send(op, best_score[i]) 93 | best_score[i] = score 94 | best_iter[i] = iteration 95 | best_message[i] = message 96 | elsif iteration - best_iter[i] >= early_stopping_rounds 97 | booster.best_iteration = best_iter[i] + 1 98 | puts "Early stopping, best iteration is:\n#{best_message[i]}" if verbose_eval 99 | stop_early = true 100 | break 101 | end 102 | end 103 | 104 | break if stop_early 105 | 106 | if iteration == num_boost_round - 1 107 | booster.best_iteration = best_iter[0] + 1 108 | puts "Did not meet early stopping. Best iteration is: #{best_message[0]}" if verbose_eval 109 | end 110 | end 111 | end 112 | end 113 | 114 | booster 115 | end 116 | 117 | def cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true, early_stopping_rounds: nil, verbose_eval: nil, show_stdv: true) 118 | rand_idx = (0...train_set.num_data).to_a 119 | rand_idx.shuffle!(random: Random.new(seed)) if shuffle 120 | 121 | kstep = rand_idx.size / nfold 122 | test_id = rand_idx.each_slice(kstep).to_a[0...nfold] 123 | train_id = [] 124 | nfold.times do |i| 125 | idx = test_id.dup 126 | idx.delete_at(i) 127 | train_id << idx.flatten 128 | end 129 | 130 | boosters = [] 131 | folds = train_id.zip(test_id) 132 | folds.each do |(train_idx, test_idx)| 133 | fold_train_set = train_set.subset(train_idx) 134 | fold_valid_set = train_set.subset(test_idx) 135 | booster = Booster.new(params: params, train_set: fold_train_set) 136 | booster.add_valid(fold_valid_set, "valid") 137 | boosters << booster 138 | end 139 | 140 | eval_hist = {} 141 | 142 | if early_stopping_rounds 143 | best_score = {} 144 | best_iter = {} 145 | best_iteration = nil 146 | end 147 | 148 | num_boost_round.times do |iteration| 149 | boosters.each(&:update) 150 | 151 | scores = {} 152 | boosters.map(&:eval_valid).flat_map(&:reverse).each do |r| 153 | (scores[r[1]] ||= []) << r[2] 154 | end 155 | 156 | message_parts = ["[#{iteration + 1}]"] 157 | 158 | means = {} 159 | scores.each do |eval_name, vals| 160 | mean = mean(vals) 161 | stdev = stdev(vals) 162 | 163 | (eval_hist["#{eval_name}-mean"] ||= []) << mean 164 | (eval_hist["#{eval_name}-stdv"] ||= []) << stdev 165 | 166 | means[eval_name] = mean 167 | 168 | if show_stdv 169 | message_parts << "cv_agg's %s: %g + %g" % [eval_name, mean, stdev] 170 | else 171 | message_parts << "cv_agg's %s: %g" % [eval_name, mean] 172 | end 173 | end 174 | 175 | puts message_parts.join("\t") if verbose_eval 176 | 177 | if early_stopping_rounds 178 | stop_early = false 179 | means.each do |k, score| 180 | # TODO fix higher better 181 | if best_score[k].nil? || score < best_score[k] 182 | best_score[k] = score 183 | best_iter[k] = iteration 184 | elsif iteration - best_iter[k] >= early_stopping_rounds 185 | best_iteration = best_iter[k] 186 | stop_early = true 187 | break 188 | end 189 | end 190 | break if stop_early 191 | end 192 | end 193 | 194 | if early_stopping_rounds 195 | # use best iteration from first metric if not stopped early 196 | best_iteration ||= best_iter[best_iter.keys.first] 197 | eval_hist.each_key do |k| 198 | eval_hist[k] = eval_hist[k].first(best_iteration + 1) 199 | end 200 | end 201 | 202 | eval_hist 203 | end 204 | 205 | private 206 | 207 | def mean(arr) 208 | arr.sum / arr.size.to_f 209 | end 210 | 211 | # don't subtract one from arr.size 212 | def stdev(arr) 213 | m = mean(arr) 214 | sum = 0 215 | arr.each do |v| 216 | sum += (v - m) ** 2 217 | end 218 | Math.sqrt(sum / arr.size) 219 | end 220 | end 221 | end 222 | -------------------------------------------------------------------------------- /lib/lightgbm/booster.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class Booster 3 | include Utils 4 | 5 | attr_accessor :best_iteration, :train_data_name, :params 6 | 7 | def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil) 8 | if model_str 9 | model_from_string(model_str) 10 | elsif model_file 11 | out_num_iterations = ::FFI::MemoryPointer.new(:int) 12 | create_handle do |handle| 13 | safe_call FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, handle) 14 | end 15 | @pandas_categorical = load_pandas_categorical(file_name: model_file) 16 | if params 17 | warn "[lightgbm] Ignoring params argument, using parameters from model file." 18 | end 19 | @params = loaded_param 20 | else 21 | params ||= {} 22 | set_verbosity(params) 23 | create_handle do |handle| 24 | safe_call FFI.LGBM_BoosterCreate(train_set.handle, params_str(params), handle) 25 | end 26 | end 27 | 28 | self.best_iteration = -1 29 | 30 | # TODO get names when loaded from file 31 | @name_valid_sets = [] 32 | end 33 | 34 | def add_valid(data, name) 35 | safe_call FFI.LGBM_BoosterAddValidData(@handle, data.handle) 36 | @name_valid_sets << name 37 | self # consistent with Python API 38 | end 39 | 40 | def current_iteration 41 | out = ::FFI::MemoryPointer.new(:int) 42 | safe_call FFI.LGBM_BoosterGetCurrentIteration(@handle, out) 43 | out.read_int 44 | end 45 | 46 | def dump_model(num_iteration: nil, start_iteration: 0, importance_type: "split") 47 | num_iteration ||= best_iteration 48 | importance_type_int = feature_importance_type_mapper(importance_type) 49 | buffer_len = 1 << 20 50 | out_len = ::FFI::MemoryPointer.new(:int64) 51 | out_str = ::FFI::MemoryPointer.new(:char, buffer_len) 52 | safe_call FFI.LGBM_BoosterDumpModel(@handle, start_iteration, num_iteration, importance_type_int, buffer_len, out_len, out_str) 53 | actual_len = out_len.read_int64 54 | if actual_len > buffer_len 55 | out_str = ::FFI::MemoryPointer.new(:char, actual_len) 56 | safe_call FFI.LGBM_BoosterDumpModel(@handle, start_iteration, num_iteration, importance_type_int, actual_len, out_len, out_str) 57 | end 58 | out_str.read_string 59 | end 60 | alias_method :to_json, :dump_model 61 | 62 | def eval_valid 63 | @name_valid_sets.each_with_index.flat_map { |n, i| inner_eval(n, i + 1) } 64 | end 65 | 66 | def eval_train 67 | inner_eval(train_data_name, 0) 68 | end 69 | 70 | def feature_importance(iteration: nil, importance_type: "split") 71 | iteration ||= best_iteration 72 | importance_type_int = feature_importance_type_mapper(importance_type) 73 | num_feature = self.num_feature 74 | out_result = ::FFI::MemoryPointer.new(:double, num_feature) 75 | safe_call FFI.LGBM_BoosterFeatureImportance(@handle, iteration, importance_type_int, out_result) 76 | out_result.read_array_of_double(num_feature).map(&:to_i) 77 | end 78 | 79 | def feature_name 80 | len = self.num_feature 81 | out_len = ::FFI::MemoryPointer.new(:size_t) 82 | buffer_len = 255 83 | out_buffer_len = ::FFI::MemoryPointer.new(:size_t) 84 | out_strs = ::FFI::MemoryPointer.new(:pointer, num_feature) 85 | str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) } 86 | out_strs.write_array_of_pointer(str_ptrs) 87 | safe_call FFI.LGBM_BoosterGetFeatureNames(@handle, len, out_len, buffer_len, out_buffer_len, out_strs) 88 | 89 | actual_len = out_buffer_len.read(:size_t) 90 | if actual_len > buffer_len 91 | str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, actual_len) } 92 | out_strs.write_array_of_pointer(str_ptrs) 93 | safe_call FFI.LGBM_BoosterGetFeatureNames(@handle, len, out_len, actual_len, out_buffer_len, out_strs) 94 | end 95 | 96 | str_ptrs[0, out_len.read(:size_t)].map(&:read_string) 97 | end 98 | 99 | def model_from_string(model_str) 100 | out_num_iterations = ::FFI::MemoryPointer.new(:int) 101 | create_handle do |handle| 102 | safe_call FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, handle) 103 | end 104 | @pandas_categorical = load_pandas_categorical(model_str: model_str) 105 | @params = loaded_param 106 | @cached_feature_name = nil 107 | self 108 | end 109 | 110 | def model_to_string(num_iteration: nil, start_iteration: 0, importance_type: "split") 111 | num_iteration ||= best_iteration 112 | importance_type_int = feature_importance_type_mapper(importance_type) 113 | buffer_len = 1 << 20 114 | out_len = ::FFI::MemoryPointer.new(:int64) 115 | out_str = ::FFI::MemoryPointer.new(:char, buffer_len) 116 | safe_call FFI.LGBM_BoosterSaveModelToString(@handle, start_iteration, num_iteration, importance_type_int, buffer_len, out_len, out_str) 117 | actual_len = out_len.read_int64 118 | if actual_len > buffer_len 119 | out_str = ::FFI::MemoryPointer.new(:char, actual_len) 120 | safe_call FFI.LGBM_BoosterSaveModelToString(@handle, start_iteration, num_iteration, importance_type_int, actual_len, out_len, out_str) 121 | end 122 | out_str.read_string 123 | end 124 | 125 | def num_feature 126 | out = ::FFI::MemoryPointer.new(:int) 127 | safe_call FFI.LGBM_BoosterGetNumFeature(@handle, out) 128 | out.read_int 129 | end 130 | alias_method :num_features, :num_feature # legacy typo 131 | 132 | def num_model_per_iteration 133 | out = ::FFI::MemoryPointer.new(:int) 134 | safe_call FFI.LGBM_BoosterNumModelPerIteration(@handle, out) 135 | out.read_int 136 | end 137 | 138 | def num_trees 139 | out = ::FFI::MemoryPointer.new(:int) 140 | safe_call FFI.LGBM_BoosterNumberOfTotalModel(@handle, out) 141 | out.read_int 142 | end 143 | 144 | def predict(data, start_iteration: 0, num_iteration: nil, raw_score: false, pred_leaf: false, pred_contrib: false, **kwargs) 145 | predictor = InnerPredictor.from_booster(self, kwargs.transform_values(&:dup)) 146 | if num_iteration.nil? 147 | if start_iteration <= 0 148 | num_iteration = best_iteration 149 | else 150 | num_iteration = -1 151 | end 152 | end 153 | predictor.predict( 154 | data, 155 | start_iteration: start_iteration, 156 | num_iteration: num_iteration, 157 | raw_score: raw_score, 158 | pred_leaf: pred_leaf, 159 | pred_contrib: pred_contrib 160 | ) 161 | end 162 | 163 | def save_model(filename, num_iteration: nil, start_iteration: 0, importance_type: "split") 164 | num_iteration ||= best_iteration 165 | importance_type_int = feature_importance_type_mapper(importance_type) 166 | safe_call FFI.LGBM_BoosterSaveModel(@handle, start_iteration, num_iteration, importance_type_int, filename) 167 | self # consistent with Python API 168 | end 169 | 170 | def update 171 | finished = ::FFI::MemoryPointer.new(:int) 172 | safe_call FFI.LGBM_BoosterUpdateOneIter(@handle, finished) 173 | finished.read_int == 1 174 | end 175 | 176 | private 177 | 178 | def create_handle 179 | ::FFI::MemoryPointer.new(:pointer) do |handle| 180 | yield handle 181 | @handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_BoosterFree)) 182 | end 183 | end 184 | 185 | def eval_counts 186 | out = ::FFI::MemoryPointer.new(:int) 187 | safe_call FFI.LGBM_BoosterGetEvalCounts(@handle, out) 188 | out.read_int 189 | end 190 | 191 | def eval_names 192 | eval_counts = self.eval_counts 193 | out_len = ::FFI::MemoryPointer.new(:int) 194 | out_buffer_len = ::FFI::MemoryPointer.new(:size_t) 195 | out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts) 196 | buffer_len = 255 197 | str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) } 198 | out_strs.write_array_of_pointer(str_ptrs) 199 | safe_call FFI.LGBM_BoosterGetEvalNames(@handle, eval_counts, out_len, buffer_len, out_buffer_len, out_strs) 200 | 201 | actual_len = out_buffer_len.read(:size_t) 202 | if actual_len > buffer_len 203 | str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, actual_len) } 204 | out_strs.write_array_of_pointer(str_ptrs) 205 | safe_call FFI.LGBM_BoosterGetEvalNames(@handle, eval_counts, out_len, actual_len, out_buffer_len, out_strs) 206 | end 207 | 208 | str_ptrs.map(&:read_string) 209 | end 210 | 211 | def inner_eval(name, i) 212 | eval_names = self.eval_names 213 | 214 | out_len = ::FFI::MemoryPointer.new(:int) 215 | out_results = ::FFI::MemoryPointer.new(:double, eval_names.count) 216 | safe_call FFI.LGBM_BoosterGetEval(@handle, i, out_len, out_results) 217 | vals = out_results.read_array_of_double(out_len.read_int) 218 | 219 | eval_names.zip(vals).map do |eval_name, val| 220 | higher_better = ["auc", "ndcg@", "map@"].any? { |v| eval_name.start_with?(v) } 221 | [name, eval_name, val, higher_better] 222 | end 223 | end 224 | 225 | def num_class 226 | out = ::FFI::MemoryPointer.new(:int) 227 | safe_call FFI.LGBM_BoosterGetNumClasses(@handle, out) 228 | out.read_int 229 | end 230 | 231 | def cached_feature_name 232 | @cached_feature_name ||= feature_name 233 | end 234 | 235 | def feature_importance_type_mapper(importance_type) 236 | case importance_type 237 | when "split" 238 | FFI::C_API_FEATURE_IMPORTANCE_SPLIT 239 | when "gain" 240 | FFI::C_API_FEATURE_IMPORTANCE_GAIN 241 | else 242 | -1 243 | end 244 | end 245 | 246 | def load_pandas_categorical(file_name: nil, model_str: nil) 247 | pandas_key = "pandas_categorical:" 248 | offset = -pandas_key.length 249 | if !file_name.nil? 250 | max_offset = -File.size(file_name) 251 | lines = [] 252 | File.open(file_name, "rb") do |f| 253 | loop do 254 | offset = [offset, max_offset].max 255 | f.seek(offset, IO::SEEK_END) 256 | lines = f.readlines 257 | if lines.length >= 2 || offset == max_offset 258 | break 259 | end 260 | offset *= 2 261 | end 262 | end 263 | last_line = lines[-1].strip 264 | if !last_line.start_with?(pandas_key) 265 | last_line = lines[-2].strip 266 | end 267 | elsif !model_str.nil? 268 | idx = model_str[..offset].rindex("\n") 269 | last_line = model_str[idx..].strip 270 | end 271 | if last_line.start_with?(pandas_key) 272 | pandas_categorical = JSON.parse(last_line[pandas_key.length..]) 273 | pandas_categorical.map { |cats| cats.each_with_index.to_h } 274 | end 275 | end 276 | 277 | def loaded_param 278 | buffer_len = 1 << 20 279 | out_len = ::FFI::MemoryPointer.new(:int64) 280 | out_str = ::FFI::MemoryPointer.new(:char, buffer_len) 281 | safe_call FFI.LGBM_BoosterGetLoadedParam(@handle, buffer_len, out_len, out_str) 282 | actual_len = out_len.read_int64 283 | if actual_len > buffer_len 284 | out_str = ::FFI::MemoryPointer.new(:char, actual_len) 285 | safe_call FFI.LGBM_BoosterGetLoadedParam(@handle, actual_len, out_len, out_str) 286 | end 287 | JSON.parse(out_str.read_string) 288 | end 289 | end 290 | end 291 | -------------------------------------------------------------------------------- /lib/lightgbm/classifier.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class Classifier < Model 3 | def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options) 4 | super 5 | end 6 | 7 | def fit(x, y, eval_set: nil, eval_names: [], categorical_feature: "auto", early_stopping_rounds: nil, verbose: true) 8 | n_classes = y.uniq.size 9 | 10 | params = @params.dup 11 | if n_classes > 2 12 | params[:objective] ||= "multiclass" 13 | params[:num_class] = n_classes 14 | else 15 | params[:objective] ||= "binary" 16 | end 17 | 18 | train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params) 19 | valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) } 20 | 21 | @booster = LightGBM.train(params, train_set, 22 | num_boost_round: @n_estimators, 23 | early_stopping_rounds: early_stopping_rounds, 24 | verbose_eval: verbose, 25 | valid_sets: valid_sets, 26 | valid_names: eval_names 27 | ) 28 | nil 29 | end 30 | 31 | def predict(data, num_iteration: nil) 32 | y_pred = @booster.predict(data, num_iteration: num_iteration) 33 | 34 | if y_pred.first.is_a?(Array) 35 | # multiple classes 36 | y_pred.map do |v| 37 | v.map.with_index.max_by { |v2, _| v2 }.last 38 | end 39 | else 40 | y_pred.map { |v| v > 0.5 ? 1 : 0 } 41 | end 42 | end 43 | 44 | def predict_proba(data, num_iteration: nil) 45 | y_pred = @booster.predict(data, num_iteration: num_iteration) 46 | 47 | if y_pred.first.is_a?(Array) 48 | # multiple classes 49 | y_pred 50 | else 51 | y_pred.map { |v| [1 - v, v] } 52 | end 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/lightgbm/dataset.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class Dataset 3 | include Utils 4 | 5 | attr_reader :data, :params 6 | 7 | def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_name: nil, feature_names: nil) 8 | @data = data 9 | @label = label 10 | @weight = weight 11 | @group = group 12 | @params = params 13 | @reference = reference 14 | @used_indices = used_indices 15 | @categorical_feature = categorical_feature 16 | @feature_name = feature_name || feature_names || "auto" 17 | 18 | construct 19 | end 20 | 21 | def label 22 | field("label") 23 | end 24 | 25 | def weight 26 | field("weight") 27 | end 28 | 29 | def feature_name 30 | # must preallocate space 31 | num_feature_names = ::FFI::MemoryPointer.new(:int) 32 | out_buffer_len = ::FFI::MemoryPointer.new(:size_t) 33 | len = 1000 34 | out_strs = ::FFI::MemoryPointer.new(:pointer, len) 35 | buffer_len = 255 36 | str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) } 37 | out_strs.write_array_of_pointer(str_ptrs) 38 | safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, len, num_feature_names, buffer_len, out_buffer_len, out_strs) 39 | 40 | num_features = num_feature_names.read_int 41 | actual_len = out_buffer_len.read(:size_t) 42 | if num_features > len || actual_len > buffer_len 43 | out_strs = ::FFI::MemoryPointer.new(:pointer, num_features) if num_features > len 44 | str_ptrs = num_features.times.map { ::FFI::MemoryPointer.new(:char, actual_len) } 45 | out_strs.write_array_of_pointer(str_ptrs) 46 | safe_call FFI.LGBM_DatasetGetFeatureNames(@handle, num_features, num_feature_names, actual_len, out_buffer_len, out_strs) 47 | end 48 | 49 | # should be the same, but get number of features 50 | # from most recent call (instead of num_features) 51 | str_ptrs[0, num_feature_names.read_int].map(&:read_string) 52 | end 53 | alias_method :feature_names, :feature_name 54 | 55 | def label=(label) 56 | @label = label 57 | set_field("label", label) 58 | end 59 | 60 | def weight=(weight) 61 | @weight = weight 62 | set_field("weight", weight) 63 | end 64 | 65 | def group=(group) 66 | @group = group 67 | set_field("group", group, type: :int32) 68 | end 69 | 70 | def feature_name=(feature_names) 71 | feature_names = feature_names.map(&:to_s) 72 | @feature_names = feature_names 73 | c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size) 74 | # keep reference to string pointers 75 | str_ptrs = feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) } 76 | c_feature_names.write_array_of_pointer(str_ptrs) 77 | safe_call FFI.LGBM_DatasetSetFeatureNames(@handle, c_feature_names, feature_names.size) 78 | end 79 | alias_method :feature_names=, :feature_name= 80 | 81 | # TODO only update reference if not in chain 82 | def reference=(reference) 83 | if reference != @reference 84 | @reference = reference 85 | construct 86 | end 87 | end 88 | 89 | def num_data 90 | out = ::FFI::MemoryPointer.new(:int) 91 | safe_call FFI.LGBM_DatasetGetNumData(@handle, out) 92 | out.read_int 93 | end 94 | 95 | def num_feature 96 | out = ::FFI::MemoryPointer.new(:int) 97 | safe_call FFI.LGBM_DatasetGetNumFeature(@handle, out) 98 | out.read_int 99 | end 100 | 101 | def save_binary(filename) 102 | safe_call FFI.LGBM_DatasetSaveBinary(@handle, filename) 103 | end 104 | 105 | def subset(used_indices, params: nil) 106 | # categorical_feature passed via params 107 | params ||= self.params 108 | Dataset.new(nil, 109 | params: params, 110 | reference: self, 111 | used_indices: used_indices 112 | ) 113 | end 114 | 115 | def handle 116 | @handle 117 | end 118 | 119 | private 120 | 121 | def construct 122 | data = @data 123 | used_indices = @used_indices 124 | 125 | # TODO stringify params 126 | params = @params || {} 127 | if @categorical_feature != "auto" && @categorical_feature.any? 128 | params["categorical_feature"] ||= @categorical_feature.join(",") 129 | end 130 | set_verbosity(params) 131 | 132 | handle = ::FFI::MemoryPointer.new(:pointer) 133 | parameters = params_str(params) 134 | reference = @reference.handle if @reference 135 | if used_indices 136 | used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count) 137 | used_row_indices.write_array_of_int32(used_indices) 138 | safe_call FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, handle) 139 | elsif data.is_a?(String) 140 | safe_call FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, handle) 141 | else 142 | if matrix?(data) 143 | nrow = data.row_count 144 | ncol = data.column_count 145 | flat_data = data.to_a.flatten 146 | elsif daru?(data) 147 | if @feature_name == "auto" 148 | @feature_name = data.vectors.to_a 149 | end 150 | nrow, ncol = data.shape 151 | flat_data = data.map_rows(&:to_a).flatten 152 | elsif numo?(data) 153 | nrow, ncol = data.shape 154 | elsif rover?(data) 155 | if @feature_name == "auto" 156 | @feature_name = data.keys 157 | end 158 | data = data.to_numo 159 | nrow, ncol = data.shape 160 | elsif data.is_a?(Array) && data.first.is_a?(Hash) 161 | keys = data.first.keys 162 | if @feature_name == "auto" 163 | @feature_name = keys 164 | end 165 | nrow = data.count 166 | ncol = data.first.count 167 | flat_data = data.flat_map { |v| v.fetch_values(*keys) } 168 | else 169 | data = data.to_a 170 | check_2d_array(data) 171 | nrow = data.count 172 | ncol = data.first.count 173 | flat_data = data.flatten 174 | end 175 | 176 | c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol) 177 | if numo?(data) 178 | c_data.write_bytes(data.cast_to(Numo::DFloat).to_string) 179 | else 180 | handle_missing(flat_data) 181 | c_data.write_array_of_double(flat_data) 182 | end 183 | 184 | safe_call FFI.LGBM_DatasetCreateFromMat(c_data, FFI::C_API_DTYPE_FLOAT64, nrow, ncol, 1, parameters, reference, handle) 185 | end 186 | if used_indices 187 | @handle = handle.read_pointer 188 | else 189 | @handle = ::FFI::AutoPointer.new(handle.read_pointer, FFI.method(:LGBM_DatasetFree)) 190 | end 191 | 192 | self.label = @label if @label 193 | self.weight = @weight if @weight 194 | self.group = @group if @group 195 | self.feature_name = @feature_name if @feature_name && @feature_name != "auto" 196 | end 197 | 198 | def dump_text(filename) 199 | safe_call FFI.LGBM_DatasetDumpText(@handle, filename) 200 | end 201 | 202 | def field(field_name) 203 | num_data = self.num_data 204 | out_len = ::FFI::MemoryPointer.new(:int) 205 | out_ptr = ::FFI::MemoryPointer.new(:float, num_data) 206 | out_type = ::FFI::MemoryPointer.new(:int) 207 | safe_call FFI.LGBM_DatasetGetField(@handle, field_name, out_len, out_ptr, out_type) 208 | out_ptr.read_pointer.read_array_of_float(num_data) 209 | end 210 | 211 | def set_field(field_name, data, type: :float) 212 | data = data.to_a unless data.is_a?(Array) 213 | if type == :int32 214 | c_data = ::FFI::MemoryPointer.new(:int32, data.count) 215 | c_data.write_array_of_int32(data) 216 | safe_call FFI.LGBM_DatasetSetField(@handle, field_name, c_data, data.count, 2) 217 | else 218 | c_data = ::FFI::MemoryPointer.new(:float, data.count) 219 | c_data.write_array_of_float(data) 220 | safe_call FFI.LGBM_DatasetSetField(@handle, field_name, c_data, data.count, 0) 221 | end 222 | end 223 | end 224 | end 225 | -------------------------------------------------------------------------------- /lib/lightgbm/ffi.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | module FFI 3 | extend ::FFI::Library 4 | 5 | begin 6 | ffi_lib LightGBM.ffi_lib 7 | rescue LoadError => e 8 | if ["/usr/local", "/opt/homebrew"].any? { |v| e.message.include?("Library not loaded: #{v}/opt/libomp/lib/libomp.dylib") } && e.message.include?("Reason: image not found") 9 | raise LoadError, "OpenMP not found. Run `brew install libomp`" 10 | else 11 | raise e 12 | end 13 | end 14 | 15 | # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h 16 | # keep same order 17 | 18 | C_API_DTYPE_FLOAT32 = 0 19 | C_API_DTYPE_FLOAT64 = 1 20 | C_API_DTYPE_INT32 = 2 21 | C_API_DTYPE_INT64 = 3 22 | 23 | C_API_PREDICT_NORMAL = 0 24 | C_API_PREDICT_RAW_SCORE = 1 25 | C_API_PREDICT_LEAF_INDEX = 2 26 | C_API_PREDICT_CONTRIB = 3 27 | 28 | C_API_FEATURE_IMPORTANCE_SPLIT = 0 29 | C_API_FEATURE_IMPORTANCE_GAIN = 1 30 | 31 | # error 32 | attach_function :LGBM_GetLastError, %i[], :string 33 | 34 | # dataset 35 | attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int 36 | attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int 37 | attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int 38 | attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int 39 | attach_function :LGBM_DatasetGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int 40 | attach_function :LGBM_DatasetFree, %i[pointer], :int 41 | attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int 42 | attach_function :LGBM_DatasetDumpText, %i[pointer string], :int 43 | attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int 44 | attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int 45 | attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int 46 | attach_function :LGBM_DatasetGetNumFeature, %i[pointer pointer], :int 47 | 48 | # booster 49 | attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int 50 | attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int 51 | attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int 52 | attach_function :LGBM_BoosterGetLoadedParam, %i[pointer int64 pointer pointer], :int 53 | attach_function :LGBM_BoosterFree, %i[pointer], :int 54 | attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int 55 | attach_function :LGBM_BoosterGetNumClasses, %i[pointer pointer], :int 56 | attach_function :LGBM_BoosterUpdateOneIter, %i[pointer pointer], :int 57 | attach_function :LGBM_BoosterGetCurrentIteration, %i[pointer pointer], :int 58 | attach_function :LGBM_BoosterNumModelPerIteration, %i[pointer pointer], :int 59 | attach_function :LGBM_BoosterNumberOfTotalModel, %i[pointer pointer], :int 60 | attach_function :LGBM_BoosterGetEvalCounts, %i[pointer pointer], :int 61 | attach_function :LGBM_BoosterGetEvalNames, %i[pointer int pointer size_t pointer pointer], :int 62 | attach_function :LGBM_BoosterGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int 63 | attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int 64 | attach_function :LGBM_BoosterGetEval, %i[pointer int pointer pointer], :int 65 | attach_function :LGBM_BoosterCalcNumPredict, %i[pointer int int int int pointer], :int 66 | attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int int string pointer pointer], :int 67 | attach_function :LGBM_BoosterSaveModel, %i[pointer int int int string], :int 68 | attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int int64 pointer pointer], :int 69 | attach_function :LGBM_BoosterDumpModel, %i[pointer int int int int64 pointer pointer], :int 70 | attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/lightgbm/inner_predictor.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class InnerPredictor 3 | include Utils 4 | 5 | MAX_INT32 = (1 << 31) - 1 6 | 7 | def initialize(booster, pred_parameter) 8 | @handle = booster.instance_variable_get(:@handle) 9 | @pandas_categorical = booster.instance_variable_get(:@pandas_categorical) 10 | @pred_parameter = params_str(pred_parameter) 11 | 12 | # keep booster for cached_feature_name 13 | @booster = booster 14 | end 15 | 16 | def self.from_booster(booster, pred_parameter) 17 | new(booster, pred_parameter) 18 | end 19 | 20 | def predict(data, start_iteration: 0, num_iteration: -1, raw_score: false, pred_leaf: false, pred_contrib: false) 21 | if data.is_a?(Dataset) 22 | raise TypeError, "Cannot use Dataset instance for prediction, please use raw data instead" 23 | end 24 | 25 | predict_type = FFI::C_API_PREDICT_NORMAL 26 | if raw_score 27 | predict_type = FFI::C_API_PREDICT_RAW_SCORE 28 | end 29 | if pred_leaf 30 | predict_type = FFI::C_API_PREDICT_LEAF_INDEX 31 | end 32 | if pred_contrib 33 | predict_type = FFI::C_API_PREDICT_CONTRIB 34 | end 35 | 36 | if daru?(data) 37 | data = data[*cached_feature_name].map_rows(&:to_a) 38 | singular = false 39 | elsif data.is_a?(Hash) # sort feature.values to match the order of model.feature_name 40 | data = [sorted_feature_values(data)] 41 | singular = true 42 | elsif data.is_a?(Array) && data.first.is_a?(Hash) # on multiple elems, if 1st is hash, assume they all are 43 | data = data.map(&method(:sorted_feature_values)) 44 | singular = false 45 | elsif rover?(data) 46 | # TODO improve performance 47 | data = data[cached_feature_name].to_numo.to_a 48 | singular = false 49 | else 50 | data = data.to_a 51 | singular = !data.first.is_a?(Array) 52 | data = [data] if singular 53 | check_2d_array(data) 54 | data = data.map(&:dup) if @pandas_categorical&.any? 55 | end 56 | 57 | if @pandas_categorical&.any? 58 | apply_pandas_categorical( 59 | data, 60 | @booster.params["categorical_feature"], 61 | @pandas_categorical 62 | ) 63 | end 64 | 65 | preds, nrow = 66 | pred_for_array( 67 | data, 68 | start_iteration, 69 | num_iteration, 70 | predict_type 71 | ) 72 | 73 | if pred_leaf 74 | preds = preds.map(&:to_i) 75 | end 76 | 77 | if preds.size != nrow 78 | if preds.size % nrow == 0 79 | preds = preds.each_slice(preds.size / nrow).to_a 80 | else 81 | raise Error, "Length of predict result (#{preds.size}) cannot be divide nrow (#{nrow})" 82 | end 83 | end 84 | 85 | singular ? preds.first : preds 86 | end 87 | 88 | private 89 | 90 | def pred_for_array(input, start_iteration, num_iteration, predict_type) 91 | nrow = input.count 92 | if nrow > MAX_INT32 93 | raise Error, "Not supported" 94 | end 95 | inner_predict_array( 96 | input, 97 | start_iteration, 98 | num_iteration, 99 | predict_type 100 | ) 101 | end 102 | 103 | def inner_predict_array(input, start_iteration, num_iteration, predict_type) 104 | n_preds = 105 | num_preds( 106 | start_iteration, 107 | num_iteration, 108 | input.count, 109 | predict_type 110 | ) 111 | 112 | flat_input = input.flatten 113 | handle_missing(flat_input) 114 | data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count) 115 | data.write_array_of_double(flat_input) 116 | 117 | out_num_preds = ::FFI::MemoryPointer.new(:int64) 118 | out_result = ::FFI::MemoryPointer.new(:double, n_preds) 119 | safe_call FFI.LGBM_BoosterPredictForMat(@handle, data, FFI::C_API_DTYPE_FLOAT64, input.count, input.first.count, 1, predict_type, start_iteration, num_iteration, @pred_parameter, out_num_preds, out_result) 120 | if n_preds != out_num_preds.read_int64 121 | raise Error, "Wrong length for predict results" 122 | end 123 | preds = out_result.read_array_of_double(out_num_preds.read_int64) 124 | [preds, input.count] 125 | end 126 | 127 | def num_preds(start_iteration, num_iteration, nrow, predict_type) 128 | out = ::FFI::MemoryPointer.new(:int64) 129 | safe_call FFI.LGBM_BoosterCalcNumPredict(@handle, nrow, predict_type, start_iteration, num_iteration, out) 130 | out.read_int64 131 | end 132 | 133 | def sorted_feature_values(input_hash) 134 | input_hash.transform_keys(&:to_s).fetch_values(*cached_feature_name) 135 | end 136 | 137 | def cached_feature_name 138 | @booster.send(:cached_feature_name) 139 | end 140 | 141 | def apply_pandas_categorical(data, categorical_feature, pandas_categorical) 142 | (categorical_feature || []).each_with_index do |cf, i| 143 | cat_codes = pandas_categorical[i] 144 | data.each do |r| 145 | cat = r[cf] 146 | unless cat.nil? 147 | r[cf] = 148 | cat_codes.fetch(cat) do 149 | unless cat.is_a?(String) 150 | raise ArgumentError, "expected categorical value" 151 | end 152 | nil 153 | end 154 | end 155 | end 156 | end 157 | end 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /lib/lightgbm/model.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class Model 3 | attr_reader :booster 4 | 5 | def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options) 6 | @params = { 7 | num_leaves: num_leaves, 8 | learning_rate: learning_rate 9 | }.merge(options) 10 | @params[:objective] = objective if objective 11 | @n_estimators = n_estimators 12 | end 13 | 14 | def save_model(fname) 15 | @booster.save_model(fname) 16 | end 17 | 18 | def load_model(fname) 19 | @booster = Booster.new(model_file: fname) 20 | end 21 | 22 | def best_iteration 23 | @booster.best_iteration 24 | end 25 | 26 | def feature_importances 27 | @booster.feature_importance 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/lightgbm/ranker.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class Ranker < Model 3 | def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "lambdarank", **options) 4 | super 5 | end 6 | 7 | def fit(x, y, group:, categorical_feature: "auto", early_stopping_rounds: nil, verbose: true) 8 | train_set = Dataset.new(x, label: y, group: group, categorical_feature: categorical_feature) 9 | @booster = LightGBM.train(@params, train_set, 10 | num_boost_round: @n_estimators, 11 | early_stopping_rounds: early_stopping_rounds, 12 | verbose_eval: verbose 13 | ) 14 | nil 15 | end 16 | 17 | def predict(data, num_iteration: nil) 18 | @booster.predict(data, num_iteration: num_iteration) 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/lightgbm/regressor.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | class Regressor < Model 3 | def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "regression", **options) 4 | super 5 | end 6 | 7 | def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true) 8 | train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params) 9 | valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) } 10 | 11 | @booster = LightGBM.train(@params, train_set, 12 | num_boost_round: @n_estimators, 13 | early_stopping_rounds: early_stopping_rounds, 14 | verbose_eval: verbose, 15 | valid_sets: valid_sets, 16 | valid_names: eval_names 17 | ) 18 | nil 19 | end 20 | 21 | def predict(data, num_iteration: nil) 22 | @booster.predict(data, num_iteration: num_iteration) 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/lightgbm/utils.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | module Utils 3 | private 4 | 5 | def safe_call(err) 6 | raise Error, FFI.LGBM_GetLastError if err != 0 7 | end 8 | 9 | # remove spaces in keys and values to prevent injection 10 | def params_str(params) 11 | params.map { |k, v| [check_param(k.to_s), check_param(Array(v).join(",").to_s)].join("=") }.join(" ") 12 | end 13 | 14 | def check_param(v) 15 | raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match?(v) 16 | v 17 | end 18 | 19 | # change default verbosity 20 | def set_verbosity(params) 21 | params_keys = params.keys.map(&:to_s) 22 | unless params_keys.include?("verbosity") 23 | params["verbosity"] = -1 24 | end 25 | end 26 | 27 | def check_2d_array(data) 28 | ncol = data.first&.size || 0 29 | if !data.all? { |r| r.size == ncol } 30 | raise ArgumentError, "Rows have different sizes" 31 | end 32 | end 33 | 34 | # for categorical, NaN and negative value are the same 35 | def handle_missing(data) 36 | data.map! { |v| v.nil? ? Float::NAN : v } 37 | end 38 | 39 | def matrix?(data) 40 | defined?(Matrix) && data.is_a?(Matrix) 41 | end 42 | 43 | def daru?(data) 44 | defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame) 45 | end 46 | 47 | def numo?(data) 48 | defined?(Numo::NArray) && data.is_a?(Numo::NArray) 49 | end 50 | 51 | def rover?(data) 52 | defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame) 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /lib/lightgbm/version.rb: -------------------------------------------------------------------------------- 1 | module LightGBM 2 | VERSION = "0.4.1" 3 | end 4 | -------------------------------------------------------------------------------- /lightgbm.gemspec: -------------------------------------------------------------------------------- 1 | require_relative "lib/lightgbm/version" 2 | 3 | Gem::Specification.new do |spec| 4 | spec.name = "lightgbm" 5 | spec.version = LightGBM::VERSION 6 | spec.summary = "High performance gradient boosting for Ruby" 7 | spec.homepage = "https://github.com/ankane/lightgbm-ruby" 8 | spec.license = "MIT" 9 | 10 | spec.author = "Andrew Kane" 11 | spec.email = "andrew@ankane.org" 12 | 13 | spec.files = Dir["*.{md,txt}", "{lib,vendor}/**/*"] 14 | spec.require_path = "lib" 15 | 16 | spec.required_ruby_version = ">= 3.1" 17 | 18 | spec.add_dependency "ffi" 19 | end 20 | -------------------------------------------------------------------------------- /test/booster_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class BoosterTest < Minitest::Test 4 | def test_model_file 5 | x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]] 6 | booster = LightGBM::Booster.new(model_file: "test/support/model.txt") 7 | y_pred = booster.predict(x_test) 8 | assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2) 9 | end 10 | 11 | def test_model_str 12 | x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]] 13 | booster = LightGBM::Booster.new(model_str: File.read("test/support/model.txt")) 14 | y_pred = booster.predict(x_test) 15 | assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2) 16 | end 17 | 18 | def test_model_from_string 19 | x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]] 20 | booster = LightGBM.train(binary_params, binary_train) 21 | booster.model_from_string(File.read("test/support/model.txt")) 22 | y_pred = booster.predict(x_test) 23 | assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], y_pred.first(2) 24 | end 25 | 26 | def test_feature_importance 27 | assert_equal [280, 285, 335, 148], booster.feature_importance 28 | end 29 | 30 | def test_feature_name 31 | assert_equal ["x0", "x1", "x2", "x3"], booster.feature_name 32 | end 33 | 34 | def test_feature_importance_bad_importance_type 35 | error = assert_raises(LightGBM::Error) do 36 | booster.feature_importance(importance_type: "bad") 37 | end 38 | assert_includes error.message, "Unknown importance type" 39 | end 40 | 41 | def test_predict_hash 42 | pred = booster.predict({x0: 3.7, x1: 1.2, x2: 7.2, x3: 9.0}) 43 | assert_in_delta 0.9823112229173586, pred 44 | 45 | pred = booster.predict({"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7}) 46 | assert_in_delta 0.9823112229173586, pred 47 | 48 | pred = 49 | booster.predict([ 50 | {"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7}, 51 | {"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5} 52 | ]) 53 | assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2) 54 | 55 | assert_raises(KeyError) do 56 | booster.predict({"x0" => 3.7}) 57 | end 58 | end 59 | 60 | def test_predict_daru 61 | x_test = 62 | Daru::DataFrame.new([ 63 | {"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7}, 64 | {"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5} 65 | ]) 66 | pred = booster.predict(x_test) 67 | assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2) 68 | 69 | assert_raises(IndexError) do 70 | booster.predict(Daru::DataFrame.new([{"x0" => 3.7}])) 71 | end 72 | end 73 | 74 | def test_predict_rover 75 | skip unless numo? 76 | 77 | require "rover" 78 | x_test = 79 | Rover::DataFrame.new([ 80 | {"x3" => 9.0, "x2" => 7.2, "x1" => 1.2, "x0" => 3.7}, 81 | {"x3" => 0.0, "x2" => 7.9, "x1" => 0.5, "x0" => 7.5} 82 | ]) 83 | pred = booster.predict(x_test) 84 | assert_elements_in_delta [0.9823112229173586, 0.9583143724610858], pred.first(2) 85 | 86 | assert_raises(KeyError) do 87 | booster.predict(Rover::DataFrame.new([{"x0" => 3.7}])) 88 | end 89 | end 90 | 91 | def test_predict_array_different_sizes 92 | x_test = [[1, 2], [3, 4, 5]] 93 | error = assert_raises(ArgumentError) do 94 | booster.predict(x_test) 95 | end 96 | assert_equal "Rows have different sizes", error.message 97 | end 98 | 99 | def test_predict_raw_score 100 | x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]] 101 | expected = [0.9823112229173586, 0.9583143724610858] 102 | 103 | y_pred = booster.predict(x_test, raw_score: true) 104 | assert_elements_in_delta expected, y_pred.first(2) 105 | 106 | y_pred = booster.predict(x_test[0], raw_score: true) 107 | assert_in_delta expected[0], y_pred 108 | end 109 | 110 | def test_predict_pred_leaf 111 | x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]] 112 | expected = [[9, 8, 8, 11, 8, 6, 10, 12, 1, 10, 9, 10, 12, 5, 11, 9, 6, 4, 5, 12, 9, 11, 9, 11, 2, 10, 2, 10, 3, 5, 10, 6, 1, 5, 10, 10, 9, 4, 5, 4, 6, 5, 6, 6, 4, 6, 4, 10, 10, 3, 4, 4, 6, 3, 9, 11, 5, 4, 3, 6, 7, 3, 6, 7, 5, 10, 10, 6, 4, 5, 5, 9, 6, 6, 2, 2, 4, 9, 4, 3, 9, 4, 6, 11, 5, 5, 0, 9, 12, 10, 12, 4, 0, 8, 4, 8, 11, 0, 3, 10], [6, 1, 9, 7, 9, 8, 1, 7, 5, 1, 1, 1, 9, 10, 1, 1, 10, 9, 1, 11, 8, 2, 10, 3, 5, 10, 6, 0, 2, 5, 0, 0, 0, 0, 0, 0, 8, 1, 0, 0, 0, 10, 0, 0, 2, 0, 0, 9, 2, 9, 3, 1, 2, 2, 7, 9, 10, 1, 4, 4, 9, 10, 0, 1, 3, 11, 2, 5, 1, 1, 7, 8, 5, 1, 10, 10, 5, 4, 1, 10, 2, 1, 4, 2, 2, 2, 2, 10, 2, 9, 2, 11, 2, 5, 1, 11, 2, 9, 7, 7]] 113 | 114 | y_pred = booster.predict(x_test, pred_leaf: true) 115 | assert_equal expected, y_pred.first(2) 116 | 117 | y_pred = booster.predict(x_test[0], pred_leaf: true) 118 | assert_equal expected[0], y_pred 119 | end 120 | 121 | def test_predict_pred_contrib 122 | x_test = [[3.7, 1.2, 7.2, 9.0], [7.5, 0.5, 7.9, 0.0]] 123 | expected = [[-0.0733949225678886, -0.24289592050101766, 0.24183795683166504, 0.063430775771174, 0.9933333333834246], [0.1094902954684793, -0.2810485083947154, 0.26691627597706397, -0.13037702397316747, 0.9933333333834246]] 124 | 125 | y_pred = booster.predict(x_test, pred_contrib: true) 126 | assert_elements_in_delta expected[0], y_pred[0] 127 | assert_elements_in_delta expected[1], y_pred[1] 128 | 129 | y_pred = booster.predict(x_test[0], pred_contrib: true) 130 | assert_elements_in_delta expected[0], y_pred 131 | end 132 | 133 | def test_predict_pandas_categorical_model_file 134 | x_test = [[3.7, 1.2, 7.2, "cat9"], [7.5, 0.5, 7.9, "cat0"]] 135 | booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt") 136 | y_pred = booster.predict(x_test) 137 | assert_elements_in_delta [0.996415541144579, 1.0809369939979934], y_pred.first(2) 138 | end 139 | 140 | def test_predict_pandas_categorical_model_str 141 | x_test = [[3.7, 1.2, 7.2, "cat9"], [7.5, 0.5, 7.9, "cat0"]] 142 | booster = LightGBM::Booster.new(model_str: File.read("test/support/categorical.txt")) 143 | y_pred = booster.predict(x_test) 144 | assert_elements_in_delta [0.996415541144579, 1.0809369939979934], y_pred.first(2) 145 | end 146 | 147 | def test_predict_pandas_categorical_missing_category 148 | booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt") 149 | assert_in_delta 0.996415541144579, booster.predict([3.7, 1.2, 7.2, nil]) 150 | end 151 | 152 | def test_predict_pandas_categorical_new_category 153 | booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt") 154 | assert_in_delta 0.996415541144579, booster.predict([3.7, 1.2, 7.2, "cat10"]) 155 | end 156 | 157 | def test_predict_pandas_categorical_invalid_category 158 | booster = LightGBM::Booster.new(model_file: "test/support/categorical.txt") 159 | error = assert_raises(ArgumentError) do 160 | booster.predict([7.5, 0.5, 7.9, true]) 161 | end 162 | assert_equal "expected categorical value", error.message 163 | end 164 | 165 | def test_model_to_string 166 | assert booster.model_to_string 167 | end 168 | 169 | def test_to_json 170 | assert JSON.parse(booster.to_json) 171 | end 172 | 173 | def test_dump_model 174 | assert JSON.parse(booster.dump_model) 175 | end 176 | 177 | def test_current_iteration 178 | assert_equal 100, booster.current_iteration 179 | end 180 | 181 | def test_num_model_per_iteration 182 | assert_equal 1, booster.num_model_per_iteration 183 | end 184 | 185 | def test_num_trees 186 | assert_equal 100, booster.num_trees 187 | end 188 | 189 | def test_copy 190 | booster.dup 191 | booster.clone 192 | end 193 | 194 | private 195 | 196 | def booster 197 | @booster ||= LightGBM::Booster.new(model_file: "test/support/model.txt") 198 | end 199 | end 200 | -------------------------------------------------------------------------------- /test/classifier_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class ClassifierTest < Minitest::Test 4 | def test_binary 5 | x_train, y_train, x_test, _ = binary_data 6 | 7 | model = LightGBM::Classifier.new 8 | model.fit(x_train, y_train) 9 | y_pred = model.predict(x_test) 10 | expected = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1] 11 | assert_equal expected, y_pred.first(100) 12 | 13 | y_pred_proba = model.predict_proba(x_test) 14 | expected = [9.243317488749625e-06, 0.9999907566825113] 15 | assert_elements_in_delta expected, y_pred_proba.first 16 | 17 | expected = [399, 367, 419, 140] 18 | assert_equal expected, model.feature_importances 19 | 20 | model.save_model(tempfile) 21 | 22 | model = LightGBM::Classifier.new 23 | model.load_model(tempfile) 24 | assert_equal y_pred, model.predict(x_test) 25 | end 26 | 27 | def test_multiclass 28 | x_train, y_train, x_test, _ = multiclass_data 29 | 30 | model = LightGBM::Classifier.new 31 | model.fit(x_train, y_train) 32 | y_pred = model.predict(x_test) 33 | expected = [1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] 34 | assert_equal expected, y_pred.first(100) 35 | 36 | y_pred_proba = model.predict_proba(x_test) 37 | expected = [0.00036627031584163575, 0.9456350323547973, 0.053998697329361176] 38 | assert_elements_in_delta expected, y_pred_proba.first 39 | 40 | expected = [1118, 1060, 1272, 441] 41 | assert_equal expected, model.feature_importances 42 | 43 | model.save_model(tempfile) 44 | 45 | model = LightGBM::Classifier.new 46 | model.load_model(tempfile) 47 | assert_equal y_pred, model.predict(x_test) 48 | end 49 | 50 | def test_early_stopping 51 | x_train, y_train, x_test, y_test = multiclass_data 52 | 53 | model = LightGBM::Classifier.new 54 | model.fit(x_train, y_train, early_stopping_rounds: 5, eval_set: [[x_test, y_test]], verbose: false) 55 | assert_equal 54, model.best_iteration 56 | end 57 | 58 | def test_missing_numeric 59 | x_train, y_train, x_test, _ = multiclass_data 60 | 61 | x_train = x_train.map(&:dup) 62 | x_test = x_test.map(&:dup) 63 | [x_train, x_test].each do |xt| 64 | xt.each do |x| 65 | x.size.times do |i| 66 | x[i] = nil if x[i] == 3.7 67 | end 68 | end 69 | end 70 | 71 | model = LightGBM::Classifier.new 72 | model.fit(x_train, y_train) 73 | 74 | y_pred = model.predict(x_test) 75 | expected = [1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 2, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] 76 | assert_equal expected, y_pred.first(100) 77 | 78 | expected = [1140, 1046, 1309, 427] 79 | assert_equal expected, model.feature_importances 80 | end 81 | 82 | def test_missing_categorical 83 | x_train, y_train, x_test, _ = multiclass_data 84 | 85 | x_train = x_train.map(&:dup) 86 | x_test = x_test.map(&:dup) 87 | [x_train, x_test].each do |xt| 88 | xt.each do |x| 89 | x[3] = nil if x[3] > 7 90 | end 91 | end 92 | 93 | model = LightGBM::Classifier.new 94 | model.fit(x_train, y_train, categorical_feature: [3]) 95 | 96 | y_pred = model.predict(x_test) 97 | expected = [1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] 98 | assert_equal expected, y_pred.first(100) 99 | 100 | expected = [1228, 1265, 1446, 30] 101 | assert_equal expected, model.feature_importances 102 | end 103 | end 104 | -------------------------------------------------------------------------------- /test/cv_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class CvTest < Minitest::Test 4 | def test_regression 5 | # need to set stratified=False in Python 6 | eval_hist = LightGBM.cv(regression_params, regression_train, shuffle: false) 7 | assert_in_delta 0.2597565400783163, eval_hist["l2-mean"].first 8 | assert_in_delta 0.10267769399880997, eval_hist["l2-mean"].last 9 | assert_in_delta 0.07283200245299197, eval_hist["l2-stdv"].first 10 | assert_in_delta 0.019704697369123978, eval_hist["l2-stdv"].last 11 | end 12 | 13 | def test_binary 14 | # need to set stratified=False in Python 15 | eval_hist = LightGBM.cv(binary_params, binary_train, shuffle: false) 16 | assert_in_delta 0.38594176939006153, eval_hist["binary_logloss-mean"].first 17 | assert_in_delta 0.13445744661816397, eval_hist["binary_logloss-mean"].last 18 | assert_in_delta 0.09986377563273867, eval_hist["binary_logloss-stdv"].first 19 | assert_in_delta 0.0463093558415842, eval_hist["binary_logloss-stdv"].last 20 | end 21 | 22 | def test_multiclass 23 | # need to set stratified=False in Python 24 | eval_hist = LightGBM.cv(multiclass_params, multiclass_train, shuffle: false) 25 | assert_in_delta 0.7352745822291095, eval_hist["multi_logloss-mean"].first 26 | assert_in_delta 0.40375560053885506, eval_hist["multi_logloss-mean"].last 27 | assert_in_delta 0.11256739058587856, eval_hist["multi_logloss-stdv"].first 28 | assert_in_delta 0.1779828373201067, eval_hist["multi_logloss-stdv"].last 29 | end 30 | 31 | def test_early_stopping_early 32 | eval_hist = nil 33 | stdout, _ = capture_io do 34 | eval_hist = LightGBM.cv(regression_params, regression_train, shuffle: false, verbose_eval: true, early_stopping_rounds: 5) 35 | end 36 | assert_equal 36, eval_hist["l2-mean"].size 37 | assert_includes stdout, "[41]\tcv_agg's l2: 0.0988604 + 0.0243197" 38 | refute_includes stdout, "[42]" 39 | end 40 | 41 | def test_early_stopping_not_early 42 | eval_hist = nil 43 | stdout, _ = capture_io do 44 | eval_hist = LightGBM.cv(regression_params, regression_train, shuffle: false, verbose_eval: true, early_stopping_rounds: 500) 45 | end 46 | assert_equal 36, eval_hist["l2-mean"].size 47 | assert_includes stdout, "[100]\tcv_agg's l2: 0.102678 + 0.0197047" 48 | end 49 | 50 | def test_multiple_metrics 51 | params = regression_params.merge(metric: ["l1", "l2", "rmse"]) 52 | eval_hist = LightGBM.cv(params, regression_train, shuffle: false, early_stopping_rounds: 5) 53 | assert_equal ["l1-mean", "l1-stdv", "l2-mean", "l2-stdv", "rmse-mean", "rmse-stdv"].sort, eval_hist.keys.sort 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /test/dataset_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class DatasetTest < Minitest::Test 4 | def test_data_string 5 | dataset = LightGBM::Dataset.new(data_path, params: {header: true, label_column: "name:y"}) 6 | assert_equal 500, dataset.num_data 7 | assert_equal 4, dataset.num_feature 8 | assert_equal 500, dataset.label.size 9 | assert_equal ["x0", "x1", "x2", "x3"], dataset.feature_name 10 | end 11 | 12 | def test_label 13 | data = [[1, 2], [3, 4]] 14 | label = [1, 2] 15 | dataset = LightGBM::Dataset.new(data, label: label) 16 | assert label, dataset.label 17 | end 18 | 19 | def test_weight 20 | data = [[1, 2], [3, 4]] 21 | weight = [1, 2] 22 | dataset = LightGBM::Dataset.new(data, weight: weight) 23 | assert weight, dataset.weight 24 | end 25 | 26 | def test_feature_name 27 | data = [[1, 2], [3, 4]] 28 | dataset = LightGBM::Dataset.new(data, feature_name: ["a", "b"]) 29 | assert_equal ["a", "b"], dataset.feature_name 30 | end 31 | 32 | def test_num_data 33 | assert_equal 300, regression_train.num_data 34 | end 35 | 36 | def test_num_feature 37 | assert_equal 4, regression_train.num_feature 38 | end 39 | 40 | def test_save_binary 41 | regression_train.save_binary(tempfile) 42 | assert File.exist?(tempfile) 43 | end 44 | 45 | def test_dump_text 46 | # method is private in Python library 47 | # https://github.com/microsoft/LightGBM/pull/2434 48 | assert !regression_train.respond_to?(:dump_text) 49 | regression_train.send(:dump_text, tempfile) 50 | assert File.exist?(tempfile) 51 | end 52 | 53 | def test_hashes_string_keys 54 | data = [{"x0" => 1, "x1" => 2}, {"x0" => 3, "x1" => 4}, {"x0" => 5, "x1" => 6}] 55 | dataset = LightGBM::Dataset.new(data) 56 | assert_equal 3, dataset.num_data 57 | assert_equal 2, dataset.num_feature 58 | assert_equal ["x0", "x1"], dataset.feature_name 59 | end 60 | 61 | def test_hashes_symbol_keys 62 | data = [{x0: 1, x1: 2}, {x0: 3, x1: 4}, {x0: 5, x1: 6}] 63 | dataset = LightGBM::Dataset.new(data) 64 | assert_equal 3, dataset.num_data 65 | assert_equal 2, dataset.num_feature 66 | assert_equal ["x0", "x1"], dataset.feature_name 67 | end 68 | 69 | def test_matrix 70 | data = Matrix.build(3, 3) { |row, col| row + col } 71 | label = Vector.elements([4, 5, 6]) 72 | dataset = LightGBM::Dataset.new(data, label: label) 73 | assert_equal 3, dataset.num_data 74 | assert_equal 3, dataset.num_feature 75 | assert_equal 3, dataset.label.size 76 | assert_equal ["Column_0", "Column_1", "Column_2"], dataset.feature_name 77 | end 78 | 79 | def test_daru 80 | data = Daru::DataFrame.from_csv(data_path) 81 | label = data["y"] 82 | data = data.delete_vector("y") 83 | dataset = LightGBM::Dataset.new(data, label: label) 84 | assert_equal 500, dataset.num_data 85 | assert_equal 4, dataset.num_feature 86 | assert_equal 500, dataset.label.size 87 | assert_equal ["x0", "x1", "x2", "x3"], dataset.feature_name 88 | end 89 | 90 | def test_numo 91 | skip unless numo? 92 | 93 | data = Numo::DFloat.new(3, 5).seq 94 | label = Numo::DFloat.new(3).seq 95 | dataset = LightGBM::Dataset.new(data, label: label) 96 | assert_equal 3, dataset.num_data 97 | assert_equal 5, dataset.num_feature 98 | assert_equal 3, dataset.label.size 99 | assert_equal ["Column_0", "Column_1", "Column_2", "Column_3", "Column_4"], dataset.feature_name 100 | end 101 | 102 | def test_rover 103 | skip unless numo? 104 | 105 | data = Rover.read_csv(data_path) 106 | label = data.delete("y") 107 | dataset = LightGBM::Dataset.new(data, label: label) 108 | assert_equal 500, dataset.num_data 109 | assert_equal 4, dataset.num_feature 110 | assert_equal 500, dataset.label.size 111 | assert_equal ["x0", "x1", "x2", "x3"], dataset.feature_name 112 | end 113 | 114 | def test_array_different_sizes 115 | data = [[1, 2], [3, 4, 5]] 116 | error = assert_raises(ArgumentError) do 117 | LightGBM::Dataset.new(data) 118 | end 119 | assert_equal "Rows have different sizes", error.message 120 | end 121 | 122 | def test_copy 123 | regression_train.dup 124 | regression_train.clone 125 | end 126 | end 127 | -------------------------------------------------------------------------------- /test/ranker_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class RankerTest < Minitest::Test 4 | def test_works 5 | x_train, y_train, x_test, _ = ranker_data 6 | group = [100, 200] 7 | 8 | model = LightGBM::Ranker.new 9 | model.fit(x_train, y_train, group: group) 10 | y_pred = model.predict(x_test) 11 | expected = [4.32677558843951, 1.5663855381974388, 3.8499830924310703, -2.1940085102547804, 3.3916802314416667, 3.488857015835257] 12 | assert_elements_in_delta expected, y_pred.first(6) 13 | 14 | expected = [72, 114, 141, 17] 15 | assert_equal expected, model.feature_importances 16 | 17 | model.save_model(tempfile) 18 | 19 | model = LightGBM::Ranker.new 20 | model.load_model(tempfile) 21 | assert_equal y_pred, model.predict(x_test) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /test/regressor_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class RegressorTest < Minitest::Test 4 | def test_works 5 | x_train, y_train, x_test, _ = regression_data 6 | 7 | model = LightGBM::Regressor.new 8 | model.fit(x_train, y_train) 9 | y_pred = model.predict(x_test) 10 | expected = [1.3687029666659025, 1.7352643821271516, 1.4988839660914637, 0.8784593080455959, 1.209552643550604, 1.4602293932569006] 11 | 12 | assert_elements_in_delta expected, y_pred.first(6) 13 | 14 | expected = [280, 285, 335, 148] 15 | assert_equal expected, model.feature_importances 16 | 17 | model.save_model(tempfile) 18 | 19 | model = LightGBM::Regressor.new 20 | model.load_model(tempfile) 21 | assert_equal y_pred, model.predict(x_test) 22 | end 23 | 24 | def test_early_stopping 25 | x_train, y_train, x_test, y_test = regression_data 26 | 27 | model = LightGBM::Regressor.new 28 | model.fit(x_train, y_train, early_stopping_rounds: 5, eval_set: [[x_test, y_test]], verbose: false) 29 | assert_equal 69, model.best_iteration 30 | end 31 | 32 | def test_daru 33 | data = Daru::DataFrame.from_csv(data_path) 34 | y = data["y"] 35 | x = data.delete_vector("y") 36 | 37 | # daru has bug with 0...300 38 | x_train = x.row[0..299] 39 | y_train = y[0..299] 40 | x_test = x.row[300..-1] 41 | 42 | model = LightGBM::Regressor.new 43 | model.fit(x_train, y_train) 44 | y_pred = model.predict(x_test) 45 | expected = [1.3687029666659025, 1.7352643821271516, 1.4988839660914637, 0.8784593080455959, 1.209552643550604, 1.4602293932569006] 46 | assert_elements_in_delta expected, y_pred.first(6) 47 | end 48 | 49 | def test_trivial 50 | x = [[1], [2], [3], [4]] 51 | y = [0.1, 0.2, 0.3, 0.4] 52 | model = LightGBM::Regressor.new(min_data_in_bin: 1, min_data_in_leaf: 1) 53 | model.fit(x, y) 54 | assert_elements_in_delta y, model.predict(x) 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /test/support/booster.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | 4 | df = pd.read_csv('test/support/data.csv') 5 | 6 | X = df.drop(columns=['y']) 7 | y = df['y'] 8 | 9 | X_train = X[:300] 10 | y_train = y[:300] 11 | X_test = X[300:] 12 | y_test = y[300:] 13 | 14 | train_data = lgb.Dataset(X_train, label=y_train) 15 | bst = lgb.train({}, train_data) 16 | bst.save_model('test/support/model.txt') 17 | 18 | bst = lgb.Booster(model_file='test/support/model.txt') 19 | print('x', X_train[:2].to_numpy().tolist()) 20 | print('predict', bst.predict(X_train)[:2].tolist()) 21 | print('raw_score', bst.predict(X_train, raw_score=True)[:2].tolist()) 22 | print('pred_leaf', bst.predict(X_train, pred_leaf=True)[:2].tolist()) 23 | print('pred_contrib', bst.predict(X_train, pred_contrib=True)[:2].tolist()) 24 | print('feature_importance', bst.feature_importance().tolist()) 25 | print('feature_name', bst.feature_name()) 26 | -------------------------------------------------------------------------------- /test/support/categorical.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | 4 | df = pd.read_csv('test/support/data.csv') 5 | df['x3'] = ('cat' + df['x3'].astype(str)).astype('category') 6 | 7 | X = df.drop(columns=['y']) 8 | y = df['y'] 9 | 10 | X_train = X[:300] 11 | y_train = y[:300] 12 | X_test = X[300:] 13 | y_test = y[300:] 14 | 15 | train_data = lgb.Dataset(X_train, label=y_train) 16 | bst = lgb.train({}, train_data, num_boost_round=5) 17 | bst.save_model('test/support/categorical.txt') 18 | 19 | bst = lgb.Booster(model_file='test/support/categorical.txt') 20 | print('x', X_train[:2].to_numpy().tolist()) 21 | print('predict', bst.predict(X_train)[:2].tolist()) 22 | -------------------------------------------------------------------------------- /test/support/categorical.txt: -------------------------------------------------------------------------------- 1 | tree 2 | version=v4 3 | num_class=1 4 | num_tree_per_iteration=1 5 | label_index=0 6 | max_feature_idx=3 7 | objective=regression 8 | feature_names=x0 x1 x2 x3 9 | feature_infos=[0:9.9000000000000004] [0:9.8000000000000007] [0:9.9000000000000004] -1:9:0:2:4:5:6:7:3:8:1 10 | tree_sizes=931 1058 976 1135 1140 11 | 12 | Tree=0 13 | num_leaves=10 14 | num_cat=0 15 | split_feature=2 1 1 0 0 2 0 1 0 16 | split_gain=25.0622 13.4213 7.81363 4.17122 1.4867 1.85204 0.787037 0.163668 0.0359155 17 | threshold=4.0500000000000007 7.7500000000000009 4.5500000000000007 5.8000000000000016 6.7500000000000009 6.3500000000000005 3.4500000000000006 5.8500000000000005 1.8500000000000003 18 | decision_type=2 2 2 2 2 2 2 2 2 19 | left_child=2 4 3 -1 7 -6 -4 8 -2 20 | right_child=1 -3 6 -5 5 -7 -8 -9 -10 21 | leaf_value=0.91066666613953806 0.9889999999835466 1.0683589738903518 0.96899999985316143 0.96323076904631955 0.99400000001614286 1.0332857140457434 0.99400000001614286 1.0019999999639888 0.99400000001614286 22 | leaf_weight=36 20 39 20 26 21 28 34 25 51 23 | leaf_count=36 20 39 20 26 21 28 34 25 51 24 | internal_value=0.993333 1.01628 0.956931 0.93271 1.00228 1.01645 0.984741 0.995042 0.992592 25 | internal_weight=0 184 116 62 145 49 54 96 71 26 | internal_count=300 184 116 62 145 49 54 96 71 27 | is_linear=0 28 | shrinkage=1 29 | 30 | 31 | Tree=1 32 | num_leaves=11 33 | num_cat=0 34 | split_feature=2 1 1 0 1 2 2 2 2 0 35 | split_gain=20.3004 10.8713 6.36761 3.77682 1.34014 1.44878 0.441848 0.19812 0.026579 0.00350037 36 | threshold=4.0500000000000007 7.7500000000000009 5.0500000000000007 3.8500000000000001 5.7500000000000009 6.4500000000000011 2.2500000000000004 7.8500000000000005 5.1500000000000012 5.6500000000000012 37 | decision_type=2 2 2 2 2 2 2 2 2 2 38 | left_child=2 4 3 -1 7 -6 -4 8 -2 -10 39 | right_child=1 -3 6 -5 5 -7 -8 -9 9 -11 40 | leaf_value=-0.076532351072220242 -0.0043000001087784767 0.067523078200144651 -0.017899999655783178 -0.028270968151909694 0.004530612144264437 0.041676189969959004 0.0011645161216297458 0.0080224487770880967 0.00076071428733744801 -0.00097142858430743217 41 | leaf_weight=34 20 39 20 31 21 21 31 35 28 20 42 | leaf_count=34 20 39 20 31 21 21 31 35 28 20 43 | internal_value=0 0.0206543 -0.0327621 -0.0535154 0.00804828 0.0231034 -0.00631176 0.00190929 -0.00123718 3.89881e-05 44 | internal_weight=0 184 116 65 145 42 51 103 68 48 45 | internal_count=300 184 116 65 145 42 51 103 68 48 46 | is_linear=0 47 | shrinkage=0.1 48 | 49 | 50 | Tree=2 51 | num_leaves=10 52 | num_cat=0 53 | split_feature=2 1 1 0 0 2 0 2 0 54 | split_gain=16.4433 8.80574 5.30718 2.95305 1.25426 0.898936 0.167336 0.129581 0.0195663 55 | threshold=4.0500000000000007 7.7500000000000009 4.2500000000000009 5.8000000000000016 7.5500000000000007 2.2500000000000004 3.6500000000000008 6.4500000000000011 1.6500000000000001 56 | decision_type=2 2 2 2 2 2 2 2 2 57 | left_child=2 4 3 -1 6 -4 8 -8 -2 58 | right_child=1 -3 5 -5 -6 -7 7 -9 -10 59 | leaf_value=-0.071365883899852642 -0.0042178705557370964 0.060770767163007694 -0.022856777518987657 -0.025495980858802798 0.024048169260598065 0.0021225635071887689 8.1711309515715891e-05 0.0096219042442472931 -0.00031222260219912078 60 | leaf_weight=32 23 39 25 25 34 34 24 35 29 61 | leaf_count=32 23 39 25 25 34 34 24 35 29 62 | internal_value=0 0.0185889 -0.0294859 -0.0512475 0.00724345 -0.0084619 0.00209606 0.00574115 -0.00203972 63 | internal_weight=0 184 116 57 145 59 111 59 52 64 | internal_count=300 184 116 57 145 59 111 59 52 65 | is_linear=0 66 | shrinkage=0.1 67 | 68 | 69 | Tree=3 70 | num_leaves=12 71 | num_cat=0 72 | split_feature=2 1 1 2 0 0 2 0 0 1 2 73 | split_gain=13.7383 7.13163 5.44524 3.64733 1.38156 1.17214 1.44484 0.919678 0.290498 0.163577 0.0302091 74 | threshold=3.2500000000000004 7.7500000000000009 4.1500000000000012 8.1500000000000004 4.7500000000000009 6.7500000000000009 6.3500000000000005 3.4500000000000006 1.3500000000000003 2.2500000000000004 6.6500000000000012 75 | decision_type=2 2 2 2 2 2 2 2 2 2 2 76 | left_child=2 5 4 -3 -1 8 -7 -4 -2 -10 -11 77 | right_child=1 3 7 -5 -6 6 -8 -9 9 10 -12 78 | leaf_value=-0.076276844739913946 -0.012102717987727375 0.021397085603149166 -0.025375127499657021 0.080334717035293587 -0.039962041974067691 0.00080731424848260044 0.032654349133372305 0.0025276197327507872 -0.0053196552006907214 0.0058777159222194722 0.0014981740362210467 79 | leaf_weight=22 20 21 21 21 20 29 28 27 27 36 28 80 | leaf_count=22 20 21 21 21 20 29 28 27 27 36 28 81 | internal_value=0 0.0140093 -0.0326885 0.0508659 -0.0589841 0.00479521 0.0164515 -0.00967983 -0.00119044 0.00120787 0.00396167 82 | internal_weight=0 210 90 42 42 168 57 48 111 91 64 83 | internal_count=300 210 90 42 42 168 57 48 111 91 64 84 | is_linear=0 85 | shrinkage=0.1 86 | 87 | 88 | Tree=4 89 | num_leaves=12 90 | num_cat=0 91 | split_feature=2 1 2 1 0 2 2 1 2 1 1 92 | split_gain=11.333 5.29767 6.36153 3.12567 1.22002 1.22358 0.627947 0.0676673 0.0633798 0.0257134 0.00312228 93 | threshold=2.5500000000000003 7.3500000000000005 6.6500000000000012 4.1500000000000012 3.6500000000000008 5.0500000000000007 8.1500000000000004 6.0500000000000007 4.3500000000000005 2.6500000000000008 2.8500000000000001 94 | decision_type=2 2 2 2 2 2 2 2 2 2 2 95 | left_child=3 4 -3 -1 5 -2 7 8 -6 -10 -7 96 | right_child=1 2 -4 -5 6 10 -8 -9 9 -11 -12 97 | leaf_value=-0.060740401331455474 -0.027608891367795881 0.003729247557847495 0.064928260411728514 -0.015461565321020316 0.0063351626133745804 0.0012375214530038648 0.02313491709297523 0.0084959104764857334 -0.0026474222024578768 0.0022552707933937203 -0.0004888500216607513 98 | leaf_weight=31 25 35 33 30 21 20 20 20 23 20 22 99 | leaf_count=31 25 35 33 30 21 20 20 20 23 20 22 100 | internal_value=0 0.00981924 0.0334288 -0.0384721 0.000430661 -0.0100929 0.00721029 0.00341871 0.00183208 -0.0003671 0.000333232 101 | internal_weight=0 239 68 61 171 67 104 84 64 43 42 102 | internal_count=300 239 68 61 171 67 104 84 64 43 42 103 | is_linear=0 104 | shrinkage=0.1 105 | 106 | 107 | end of trees 108 | 109 | feature_importances: 110 | x2=19 111 | x1=16 112 | x0=15 113 | 114 | parameters: 115 | [boosting: gbdt] 116 | [objective: regression] 117 | [metric: l2] 118 | [tree_learner: serial] 119 | [device_type: cpu] 120 | [data_sample_strategy: bagging] 121 | [data: ] 122 | [valid: ] 123 | [num_iterations: 5] 124 | [learning_rate: 0.1] 125 | [num_leaves: 31] 126 | [num_threads: 0] 127 | [seed: 0] 128 | [deterministic: 0] 129 | [force_col_wise: 0] 130 | [force_row_wise: 0] 131 | [histogram_pool_size: -1] 132 | [max_depth: -1] 133 | [min_data_in_leaf: 20] 134 | [min_sum_hessian_in_leaf: 0.001] 135 | [bagging_fraction: 1] 136 | [pos_bagging_fraction: 1] 137 | [neg_bagging_fraction: 1] 138 | [bagging_freq: 0] 139 | [bagging_seed: 3] 140 | [feature_fraction: 1] 141 | [feature_fraction_bynode: 1] 142 | [feature_fraction_seed: 2] 143 | [extra_trees: 0] 144 | [extra_seed: 6] 145 | [early_stopping_round: 0] 146 | [early_stopping_min_delta: 0] 147 | [first_metric_only: 0] 148 | [max_delta_step: 0] 149 | [lambda_l1: 0] 150 | [lambda_l2: 0] 151 | [linear_lambda: 0] 152 | [min_gain_to_split: 0] 153 | [drop_rate: 0.1] 154 | [max_drop: 50] 155 | [skip_drop: 0.5] 156 | [xgboost_dart_mode: 0] 157 | [uniform_drop: 0] 158 | [drop_seed: 4] 159 | [top_rate: 0.2] 160 | [other_rate: 0.1] 161 | [min_data_per_group: 100] 162 | [max_cat_threshold: 32] 163 | [cat_l2: 10] 164 | [cat_smooth: 10] 165 | [max_cat_to_onehot: 4] 166 | [top_k: 20] 167 | [monotone_constraints: ] 168 | [monotone_constraints_method: basic] 169 | [monotone_penalty: 0] 170 | [feature_contri: ] 171 | [forcedsplits_filename: ] 172 | [refit_decay_rate: 0.9] 173 | [cegb_tradeoff: 1] 174 | [cegb_penalty_split: 0] 175 | [cegb_penalty_feature_lazy: ] 176 | [cegb_penalty_feature_coupled: ] 177 | [path_smooth: 0] 178 | [interaction_constraints: ] 179 | [verbosity: 1] 180 | [saved_feature_importance_type: 0] 181 | [use_quantized_grad: 0] 182 | [num_grad_quant_bins: 4] 183 | [quant_train_renew_leaf: 0] 184 | [stochastic_rounding: 1] 185 | [linear_tree: 0] 186 | [max_bin: 255] 187 | [max_bin_by_feature: ] 188 | [min_data_in_bin: 3] 189 | [bin_construct_sample_cnt: 200000] 190 | [data_random_seed: 1] 191 | [is_enable_sparse: 1] 192 | [enable_bundle: 1] 193 | [use_missing: 1] 194 | [zero_as_missing: 0] 195 | [feature_pre_filter: 1] 196 | [pre_partition: 0] 197 | [two_round: 0] 198 | [header: 0] 199 | [label_column: ] 200 | [weight_column: ] 201 | [group_column: ] 202 | [ignore_column: ] 203 | [categorical_feature: 3] 204 | [forcedbins_filename: ] 205 | [precise_float_parser: 0] 206 | [parser_config_file: ] 207 | [objective_seed: 5] 208 | [num_class: 1] 209 | [is_unbalance: 0] 210 | [scale_pos_weight: 1] 211 | [sigmoid: 1] 212 | [boost_from_average: 1] 213 | [reg_sqrt: 0] 214 | [alpha: 0.9] 215 | [fair_c: 1] 216 | [poisson_max_delta_step: 0.7] 217 | [tweedie_variance_power: 1.5] 218 | [lambdarank_truncation_level: 30] 219 | [lambdarank_norm: 1] 220 | [label_gain: ] 221 | [lambdarank_position_bias_regularization: 0] 222 | [eval_at: ] 223 | [multi_error_top_k: 1] 224 | [auc_mu_weights: ] 225 | [num_machines: 1] 226 | [local_listen_port: 12400] 227 | [time_out: 120] 228 | [machine_list_filename: ] 229 | [machines: ] 230 | [gpu_platform_id: -1] 231 | [gpu_device_id: -1] 232 | [gpu_use_dp: 0] 233 | [num_gpu: 1] 234 | 235 | end of parameters 236 | 237 | pandas_categorical:[["cat0", "cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9"]] 238 | -------------------------------------------------------------------------------- /test/support/classifier.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | 4 | df = pd.read_csv('test/support/data.csv') 5 | 6 | X = df.drop(columns=['y']) 7 | yb = df['y'].replace(2, 1) 8 | ym = df['y'] 9 | 10 | X_train = X[:300] 11 | yb_train = yb[:300] 12 | ym_train = ym[:300] 13 | X_test = X[300:] 14 | yb_test = yb[300:] 15 | ym_test = ym[300:] 16 | 17 | print('test_binary') 18 | 19 | model = lgb.LGBMClassifier() 20 | model.fit(X_train, yb_train) 21 | print(model.predict(X_test)[0:100].tolist()) 22 | print(model.predict_proba(X_test)[0].tolist()) 23 | print(model.feature_importances_.tolist()) 24 | 25 | print() 26 | print('test_multiclass') 27 | 28 | model = lgb.LGBMClassifier() 29 | model.fit(X_train, ym_train) 30 | print(model.predict(X_test)[0:100].tolist()) 31 | print(model.predict_proba(X_test)[0].tolist()) 32 | print(model.feature_importances_.tolist()) 33 | 34 | print() 35 | print('test_early_stopping') 36 | model.fit(X_train, ym_train, eval_set=[(X_test, ym_test)], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()]) 37 | 38 | print() 39 | print('test_missing_numeric') 40 | 41 | X_train_miss = X_train.copy() 42 | X_test_miss = X_test.copy() 43 | X_train_miss[X_train_miss == 3.7] = None 44 | X_test_miss[X_test_miss == 3.7] = None 45 | model = lgb.LGBMClassifier() 46 | model.fit(X_train_miss, ym_train) 47 | print(model.predict(X_test_miss)[0:100].tolist()) 48 | print(model.feature_importances_.tolist()) 49 | 50 | print() 51 | print('test_missing_categorical') 52 | 53 | X_train_miss2 = X_train.copy() 54 | X_test_miss2 = X_test.copy() 55 | X_train_miss2["x3"][X_train_miss2["x3"] > 7] = None 56 | X_test_miss2["x3"][X_test_miss2["x3"] > 7] = None 57 | model = lgb.LGBMClassifier() 58 | model.fit(X_train_miss2, ym_train, categorical_feature=[3]) 59 | print(model.predict(X_test_miss2)[0:100].tolist()) 60 | print(model.feature_importances_.tolist()) 61 | -------------------------------------------------------------------------------- /test/support/cv.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | 4 | df = pd.read_csv('test/support/data.csv') 5 | 6 | X = df.drop(columns=['y']) 7 | y = df['y'] 8 | 9 | X_train = X[:300] 10 | y_train = y[:300] 11 | X_test = X[300:] 12 | y_test = y[300:] 13 | 14 | print('test_regression') 15 | 16 | regression_params = {'objective': 'regression', 'verbosity': -1} 17 | regression_train = lgb.Dataset(X_train, label=y_train) 18 | eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False) 19 | print(eval_hist['valid l2-mean'][0]) 20 | print(eval_hist['valid l2-mean'][-1]) 21 | print(eval_hist['valid l2-stdv'][0]) 22 | print(eval_hist['valid l2-stdv'][-1]) 23 | 24 | print() 25 | print('test_binary') 26 | 27 | binary_params = {'objective': 'binary', 'verbosity': -1} 28 | binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1)) 29 | eval_hist = lgb.cv(binary_params, binary_train, shuffle=False, stratified=False) 30 | print(eval_hist['valid binary_logloss-mean'][0]) 31 | print(eval_hist['valid binary_logloss-mean'][-1]) 32 | print(eval_hist['valid binary_logloss-stdv'][0]) 33 | print(eval_hist['valid binary_logloss-stdv'][-1]) 34 | 35 | print() 36 | print('test_multiclass') 37 | 38 | multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1} 39 | multiclass_train = lgb.Dataset(X_train, label=y_train) 40 | eval_hist = lgb.cv(multiclass_params, multiclass_train, shuffle=False, stratified=False) 41 | print(eval_hist['valid multi_logloss-mean'][0]) 42 | print(eval_hist['valid multi_logloss-mean'][-1]) 43 | print(eval_hist['valid multi_logloss-stdv'][0]) 44 | print(eval_hist['valid multi_logloss-stdv'][-1]) 45 | 46 | print('') 47 | print('test_early_stopping_early') 48 | 49 | eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, callbacks=[lgb.log_evaluation(), lgb.early_stopping(stopping_rounds=5)]) 50 | print(len(eval_hist['valid l2-mean'])) 51 | 52 | print('') 53 | print('test_early_stopping_not_early') 54 | 55 | eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, callbacks=[lgb.log_evaluation(), lgb.early_stopping(stopping_rounds=500)]) 56 | print(len(eval_hist['valid l2-mean'])) 57 | 58 | -------------------------------------------------------------------------------- /test/support/data.csv: -------------------------------------------------------------------------------- 1 | x0,x1,x2,x3,y 2 | 3.7,1.2,7.2,9,1 3 | 7.5,0.5,7.9,0,1 4 | 1.6,0.1,7.6,7,1 5 | 0.6,2.5,5.0,4,0 6 | 1.8,8.4,1.1,2,1 7 | 6.8,8.7,8.7,9,2 8 | 9.4,9.6,8.6,9,2 9 | 0.9,0.7,6.3,6,1 10 | 5.7,0.1,0.0,1,0 11 | 0.8,8.8,1.3,3,1 12 | 4.7,7.2,3.0,7,1 13 | 0.3,7.0,2.1,1,1 14 | 5.7,0.3,6.8,8,1 15 | 4.3,7.6,2.6,4,1 16 | 8.0,4.1,8.2,0,1 17 | 6.8,2.5,9.8,7,2 18 | 0.7,2.6,2.5,8,0 19 | 2.2,0.9,6.7,7,1 20 | 2.3,2.7,3.7,9,1 21 | 8.3,3.8,0.8,0,0 22 | 3.4,1.0,2.3,7,0 23 | 2.5,7.1,9.2,0,1 24 | 8.8,2.3,5.5,1,1 25 | 7.7,0.3,0.0,8,0 26 | 7.7,0.6,5.2,5,1 27 | 7.0,0.2,7.6,5,1 28 | 7.5,0.7,7.7,8,1 29 | 7.5,7.6,4.3,4,1 30 | 3.0,3.6,0.7,4,0 31 | 5.7,8.2,9.6,7,2 32 | 8.1,0.7,2.4,4,0 33 | 3.2,1.2,6.5,9,1 34 | 2.4,8.2,9.7,2,2 35 | 9.2,9.8,1.0,6,1 36 | 9.6,8.2,8.6,6,2 37 | 6.6,7.1,4.8,6,1 38 | 1.5,0.5,1.7,4,0 39 | 4.8,2.2,1.3,1,0 40 | 9.7,5.3,8.4,0,1 41 | 5.5,6.1,5.6,9,1 42 | 2.1,9.6,8.3,9,2 43 | 1.4,1.3,8.4,4,1 44 | 4.3,0.6,7.7,8,1 45 | 5.9,1.5,2.4,9,1 46 | 6.6,7.1,5.3,5,1 47 | 3.6,2.1,4.0,1,0 48 | 4.7,7.7,4.0,7,1 49 | 1.6,2.8,4.5,3,1 50 | 6.6,7.8,4.6,0,1 51 | 2.9,6.3,7.5,3,1 52 | 5.3,9.3,3.3,2,1 53 | 8.4,8.3,4.8,6,2 54 | 3.2,2.8,5.5,2,1 55 | 3.1,2.8,9.4,8,1 56 | 9.9,3.2,0.8,4,0 57 | 7.7,5.0,7.9,9,2 58 | 6.4,8.3,2.4,1,1 59 | 2.0,4.4,1.5,3,0 60 | 1.9,2.6,8.6,7,1 61 | 9.9,5.3,4.7,3,1 62 | 3.4,3.2,1.9,3,0 63 | 2.4,8.3,9.4,6,2 64 | 4.7,0.5,7.9,1,1 65 | 8.7,3.2,4.2,2,1 66 | 8.8,9.8,3.0,1,1 67 | 6.8,6.4,6.0,4,1 68 | 7.8,1.7,3.9,3,1 69 | 8.1,2.8,2.2,6,1 70 | 4.1,7.4,7.7,6,1 71 | 2.5,4.8,5.0,9,1 72 | 5.4,0.0,8.6,7,1 73 | 1.6,1.9,0.9,0,0 74 | 9.2,5.1,1.0,4,1 75 | 2.3,1.4,6.3,5,1 76 | 4.6,0.3,5.6,8,1 77 | 8.0,4.6,5.4,7,1 78 | 1.4,7.7,1.5,9,1 79 | 5.3,8.4,5.8,5,1 80 | 2.9,4.4,3.7,6,1 81 | 9.1,5.4,7.6,8,1 82 | 5.9,2.6,7.6,7,1 83 | 8.7,3.9,4.3,2,1 84 | 3.8,9.1,6.9,2,2 85 | 3.3,4.3,2.6,8,1 86 | 6.9,7.3,5.2,9,1 87 | 2.7,4.3,0.1,2,0 88 | 9.4,9.6,2.6,4,1 89 | 3.0,6.4,2.2,2,0 90 | 5.2,0.3,7.0,7,1 91 | 8.3,4.8,6.1,6,1 92 | 1.3,3.6,2.3,6,0 93 | 6.6,9.2,9.5,5,2 94 | 0.9,4.1,5.7,8,1 95 | 9.3,7.6,8.2,9,2 96 | 3.6,1.6,3.0,3,0 97 | 8.4,5.7,3.5,9,1 98 | 8.1,0.2,8.5,4,1 99 | 4.6,8.0,2.7,8,1 100 | 2.8,0.2,9.9,9,1 101 | 3.6,4.0,1.8,1,0 102 | 3.8,6.3,4.6,8,1 103 | 8.9,0.2,9.8,8,1 104 | 2.1,5.8,1.6,5,0 105 | 1.1,2.2,0.6,0,0 106 | 1.4,8.7,7.5,3,1 107 | 6.3,3.2,9.1,1,1 108 | 0.0,2.2,9.7,6,1 109 | 3.6,1.8,2.7,5,0 110 | 5.2,9.4,5.0,7,1 111 | 8.5,1.6,2.9,8,1 112 | 3.0,0.8,4.0,0,0 113 | 3.0,2.3,6.6,0,1 114 | 1.2,1.7,3.3,9,0 115 | 7.5,0.5,8.1,5,1 116 | 4.2,8.6,5.2,9,1 117 | 5.6,7.8,8.7,5,2 118 | 8.1,1.0,7.2,0,1 119 | 1.9,1.2,2.5,0,0 120 | 0.4,8.8,2.7,2,1 121 | 6.8,0.3,5.8,2,1 122 | 7.6,9.6,9.6,4,2 123 | 7.4,1.2,1.8,5,0 124 | 1.6,9.5,9.6,3,2 125 | 4.4,2.4,6.2,5,1 126 | 1.9,7.7,3.3,4,1 127 | 3.9,6.7,3.4,2,1 128 | 4.2,7.8,8.2,6,2 129 | 3.8,4.8,8.1,5,1 130 | 9.1,0.8,4.0,5,1 131 | 0.5,0.7,7.8,5,1 132 | 7.8,3.3,2.7,3,1 133 | 7.7,3.0,4.3,9,1 134 | 6.7,0.3,6.7,6,1 135 | 9.7,6.3,9.9,0,2 136 | 2.8,3.7,6.4,5,1 137 | 9.3,1.8,7.8,7,1 138 | 7.0,9.2,5.2,0,1 139 | 4.6,6.3,9.3,2,1 140 | 3.6,2.9,3.9,6,1 141 | 2.9,5.5,9.0,1,1 142 | 7.1,9.1,0.7,3,1 143 | 5.6,5.1,3.2,6,1 144 | 5.1,9.6,3.0,6,1 145 | 0.5,9.1,9.5,9,2 146 | 2.2,7.6,2.2,6,1 147 | 1.8,9.3,2.9,2,1 148 | 5.2,3.1,4.9,2,1 149 | 8.3,9.3,4.5,3,1 150 | 8.6,1.5,7.1,0,1 151 | 2.8,8.3,2.7,3,1 152 | 7.9,7.0,8.8,6,2 153 | 1.1,6.5,9.3,3,1 154 | 9.8,8.6,4.5,3,2 155 | 6.3,4.2,2.2,7,1 156 | 8.2,4.0,4.4,0,1 157 | 1.7,8.8,0.0,0,0 158 | 9.7,8.2,7.1,4,2 159 | 1.4,3.6,2.6,0,0 160 | 0.1,4.8,8.8,5,1 161 | 2.2,3.4,9.4,5,1 162 | 6.8,6.7,4.8,6,1 163 | 1.8,9.7,1.1,9,1 164 | 5.2,7.6,6.8,0,1 165 | 8.9,4.0,0.7,7,1 166 | 3.3,5.8,0.7,2,0 167 | 0.4,0.5,6.1,6,1 168 | 5.0,7.6,4.5,0,1 169 | 2.1,9.2,7.1,9,2 170 | 9.7,0.1,1.3,4,0 171 | 3.8,8.5,2.7,6,1 172 | 8.4,3.3,0.1,5,0 173 | 3.8,2.8,7.5,2,1 174 | 9.9,3.1,5.1,2,1 175 | 6.7,3.2,7.4,0,1 176 | 9.7,0.5,0.0,5,0 177 | 7.2,4.3,2.4,0,1 178 | 9.6,9.6,4.1,8,2 179 | 2.1,2.7,4.6,3,1 180 | 6.2,6.8,6.2,0,1 181 | 4.5,8.8,4.2,9,1 182 | 2.5,3.2,3.3,6,1 183 | 0.6,6.5,5.1,7,1 184 | 1.3,0.3,2.8,2,0 185 | 6.1,8.3,9.6,2,2 186 | 3.4,3.0,5.7,0,1 187 | 4.9,4.6,8.3,9,1 188 | 2.0,4.0,5.0,8,1 189 | 0.4,0.9,0.0,5,0 190 | 7.4,2.1,3.3,7,1 191 | 3.8,3.7,4.3,7,1 192 | 3.3,9.5,4.1,7,1 193 | 6.9,7.0,2.3,6,1 194 | 3.9,3.1,3.7,6,1 195 | 5.6,8.7,2.8,4,1 196 | 6.6,1.0,8.4,0,1 197 | 1.6,3.0,0.3,5,0 198 | 6.9,4.9,7.4,1,1 199 | 7.2,6.8,3.9,0,1 200 | 9.7,0.7,3.3,5,1 201 | 3.6,1.2,5.3,6,1 202 | 2.6,8.2,9.9,9,2 203 | 9.9,2.5,0.1,9,1 204 | 5.5,5.5,6.4,8,1 205 | 6.0,7.0,4.9,2,1 206 | 3.2,9.0,2.0,4,1 207 | 3.8,6.5,8.0,9,1 208 | 5.2,8.9,1.9,0,1 209 | 4.2,4.7,2.1,1,1 210 | 0.1,8.9,3.8,0,1 211 | 1.8,7.7,7.7,8,1 212 | 9.0,8.3,3.9,2,1 213 | 1.5,3.7,9.6,4,1 214 | 9.8,0.1,3.2,5,1 215 | 0.7,5.0,5.0,0,1 216 | 4.3,4.1,6.9,1,1 217 | 9.7,5.1,3.0,2,1 218 | 4.9,8.1,8.2,7,2 219 | 2.1,1.8,4.6,9,1 220 | 0.4,7.7,6.3,7,1 221 | 9.5,5.9,6.4,2,1 222 | 3.1,0.1,8.4,9,1 223 | 6.8,5.4,2.5,3,1 224 | 2.7,5.6,9.2,8,1 225 | 5.5,7.6,4.6,1,1 226 | 1.1,0.8,6.1,7,1 227 | 8.8,8.9,8.2,2,2 228 | 0.5,4.2,4.6,5,1 229 | 5.1,7.3,4.2,2,1 230 | 2.3,4.5,9.5,4,1 231 | 3.3,5.4,1.4,9,1 232 | 6.1,4.0,7.2,1,1 233 | 2.2,0.2,8.6,1,1 234 | 7.2,5.1,7.1,0,1 235 | 8.7,7.7,6.2,0,1 236 | 0.0,4.2,3.9,3,0 237 | 5.7,4.3,5.3,2,1 238 | 9.7,7.5,5.9,2,1 239 | 7.4,9.1,0.5,3,1 240 | 3.1,1.1,0.3,6,0 241 | 0.1,1.3,5.9,8,1 242 | 8.1,0.8,3.8,4,1 243 | 7.0,7.6,7.3,5,2 244 | 7.9,8.4,5.9,7,1 245 | 6.6,3.2,4.6,2,1 246 | 4.4,6.4,3.7,9,1 247 | 9.0,8.4,1.5,1,1 248 | 5.2,6.9,6.6,9,2 249 | 2.8,1.9,1.7,5,0 250 | 7.9,1.0,2.9,1,1 251 | 8.1,4.6,0.7,1,1 252 | 8.6,6.1,6.4,7,2 253 | 2.9,5.5,2.8,4,1 254 | 9.9,4.3,9.8,7,2 255 | 0.5,5.0,9.2,4,1 256 | 3.4,9.8,8.3,5,2 257 | 8.9,5.4,0.4,9,1 258 | 3.2,5.0,5.1,7,1 259 | 8.9,4.3,8.8,0,1 260 | 8.6,5.8,8.8,3,2 261 | 2.2,1.0,3.1,4,0 262 | 0.9,0.6,3.0,3,0 263 | 1.8,1.2,7.3,1,1 264 | 8.5,5.3,4.2,6,1 265 | 0.9,3.6,8.7,8,1 266 | 5.3,8.7,1.8,5,1 267 | 6.7,1.4,2.0,5,1 268 | 0.9,4.2,6.6,4,1 269 | 1.6,5.8,7.7,3,1 270 | 7.5,2.8,8.5,2,1 271 | 4.4,9.2,8.2,8,2 272 | 3.0,6.4,7.7,2,1 273 | 3.8,1.1,5.6,4,1 274 | 8.3,7.6,6.8,9,2 275 | 9.9,6.9,3.1,2,1 276 | 3.2,7.1,6.0,8,1 277 | 2.7,9.7,9.3,4,2 278 | 3.8,6.0,9.7,6,2 279 | 0.9,6.7,7.8,4,1 280 | 3.5,5.5,2.8,8,1 281 | 5.1,0.4,7.9,4,1 282 | 7.6,5.2,3.8,3,1 283 | 8.0,7.4,7.0,4,2 284 | 5.7,3.0,9.9,0,1 285 | 3.8,1.8,9.5,9,1 286 | 9.5,5.7,4.8,1,1 287 | 5.2,2.2,6.4,6,1 288 | 6.1,6.0,9.6,0,1 289 | 2.6,5.0,2.0,1,0 290 | 7.7,1.8,6.0,2,1 291 | 5.9,6.2,3.8,7,1 292 | 6.8,6.9,6.1,7,1 293 | 6.6,1.9,3.6,1,1 294 | 2.1,7.7,2.6,1,1 295 | 3.2,0.8,0.8,4,0 296 | 6.8,8.2,5.4,8,2 297 | 8.5,9.3,7.4,9,2 298 | 2.3,9.4,9.0,8,2 299 | 2.4,4.7,6.8,9,1 300 | 1.3,0.6,0.2,3,0 301 | 9.3,0.4,2.8,3,1 302 | 8.2,7.7,4.6,9,1 303 | 0.6,9.1,8.0,4,1 304 | 2.3,6.3,9.9,7,2 305 | 0.6,2.1,6.4,2,1 306 | 9.0,0.9,9.2,3,1 307 | 8.5,3.3,7.1,9,1 308 | 0.8,5.1,3.3,7,1 309 | 9.9,6.1,0.7,9,1 310 | 2.7,5.1,4.0,7,1 311 | 4.5,9.4,0.9,8,1 312 | 4.6,5.8,2.1,6,1 313 | 0.0,1.9,1.9,5,0 314 | 2.7,9.7,2.1,2,1 315 | 6.7,3.5,9.5,7,2 316 | 2.2,5.6,1.0,8,0 317 | 3.1,1.2,8.2,7,1 318 | 8.7,0.8,6.1,7,1 319 | 3.4,9.4,0.3,7,1 320 | 6.8,1.5,2.4,9,1 321 | 2.6,9.8,4.7,7,1 322 | 3.5,1.6,8.7,7,1 323 | 5.7,2.4,5.5,1,1 324 | 5.2,2.4,1.5,3,0 325 | 7.3,4.6,2.0,2,1 326 | 9.4,1.2,2.5,0,0 327 | 9.7,9.8,7.7,8,2 328 | 5.4,4.7,7.7,7,1 329 | 4.0,5.9,9.5,6,1 330 | 7.9,8.9,3.1,8,1 331 | 3.3,3.8,9.2,4,1 332 | 1.4,0.9,6.7,1,1 333 | 7.9,0.3,4.7,6,1 334 | 4.2,0.5,2.0,9,0 335 | 5.9,5.4,7.7,8,1 336 | 5.8,7.9,3.6,0,1 337 | 5.8,5.1,6.3,9,1 338 | 8.3,8.8,8.2,8,2 339 | 5.5,7.8,7.0,4,2 340 | 2.7,2.3,5.3,5,1 341 | 6.8,2.3,4.6,7,1 342 | 6.6,7.7,2.6,8,1 343 | 1.7,8.7,0.1,5,1 344 | 1.7,5.6,3.3,2,1 345 | 0.2,9.6,9.0,3,1 346 | 2.6,2.6,5.2,7,1 347 | 0.7,6.2,9.9,7,1 348 | 7.0,6.9,5.5,2,1 349 | 8.7,8.5,7.4,5,2 350 | 1.4,5.6,8.7,1,1 351 | 8.4,7.5,7.6,2,1 352 | 6.2,4.5,6.5,4,1 353 | 2.8,8.7,3.8,4,1 354 | 9.7,5.1,5.9,0,1 355 | 1.4,9.7,5.2,0,1 356 | 6.4,3.0,8.2,4,1 357 | 1.0,4.5,8.8,7,1 358 | 9.9,5.9,9.7,0,2 359 | 0.5,9.0,7.6,7,1 360 | 4.2,2.0,8.8,4,1 361 | 1.8,9.2,4.9,3,1 362 | 1.5,1.7,6.1,7,1 363 | 6.4,5.5,0.2,3,1 364 | 0.4,1.6,7.7,4,1 365 | 3.0,5.2,8.4,4,1 366 | 9.8,5.9,3.1,3,1 367 | 3.2,5.0,5.1,5,1 368 | 3.3,0.0,0.9,3,0 369 | 8.8,6.0,3.8,7,1 370 | 6.2,5.6,9.6,4,2 371 | 2.4,4.3,5.1,1,1 372 | 1.8,1.0,6.1,7,1 373 | 0.6,0.7,7.9,3,1 374 | 0.0,0.3,5.1,3,0 375 | 8.1,5.9,6.6,0,1 376 | 1.9,4.4,8.7,5,1 377 | 0.0,6.3,9.4,4,1 378 | 0.1,8.4,6.0,6,1 379 | 1.4,2.4,4.6,8,1 380 | 3.2,8.0,7.0,6,1 381 | 2.4,7.7,8.9,3,1 382 | 5.7,3.1,3.9,1,1 383 | 9.3,4.8,1.3,3,1 384 | 1.5,9.7,4.1,3,1 385 | 2.5,5.1,3.4,6,1 386 | 3.3,9.9,6.1,7,1 387 | 3.2,0.3,6.7,8,1 388 | 5.7,9.6,4.6,9,2 389 | 2.1,9.2,9.9,4,2 390 | 8.4,5.8,6.4,8,2 391 | 7.8,7.8,6.2,7,1 392 | 4.2,1.4,9.3,3,1 393 | 1.8,7.8,1.0,3,1 394 | 4.9,9.3,7.9,9,2 395 | 8.9,0.4,0.5,9,0 396 | 8.6,2.9,5.3,0,1 397 | 7.2,2.9,3.8,6,1 398 | 7.8,5.4,4.2,4,1 399 | 6.4,6.8,4.4,5,1 400 | 8.4,7.3,5.0,0,1 401 | 5.0,3.5,7.2,7,1 402 | 0.2,3.5,9.7,2,1 403 | 3.4,8.3,9.3,2,2 404 | 7.4,1.8,2.5,1,1 405 | 4.9,3.7,2.0,8,1 406 | 5.2,2.3,1.8,3,0 407 | 3.2,3.7,3.0,8,1 408 | 5.0,7.3,6.8,0,1 409 | 1.5,6.3,6.4,3,1 410 | 9.1,2.0,5.4,7,1 411 | 0.6,2.4,0.0,8,0 412 | 4.8,8.0,2.8,8,1 413 | 0.1,7.3,2.1,0,0 414 | 7.6,3.2,4.0,1,1 415 | 8.3,3.0,8.7,5,1 416 | 3.3,8.2,2.2,3,1 417 | 3.4,4.9,8.8,6,1 418 | 3.7,3.3,1.2,5,0 419 | 7.9,6.5,0.6,3,1 420 | 3.2,8.5,4.4,7,1 421 | 6.7,6.6,2.8,9,1 422 | 8.4,4.3,9.4,4,2 423 | 5.2,3.1,2.5,9,1 424 | 6.3,5.2,5.9,8,1 425 | 1.7,2.2,5.4,7,1 426 | 5.0,2.1,7.1,3,1 427 | 2.3,1.2,7.7,6,1 428 | 0.7,5.2,3.0,8,1 429 | 9.6,8.4,8.1,9,2 430 | 3.9,5.6,6.0,6,1 431 | 2.5,6.5,9.5,8,1 432 | 9.9,2.7,1.8,6,1 433 | 0.1,3.4,0.2,1,0 434 | 8.5,1.1,9.3,5,1 435 | 5.9,8.2,1.1,3,1 436 | 0.3,8.3,0.0,5,0 437 | 5.1,9.1,3.2,6,1 438 | 8.4,9.9,8.1,5,2 439 | 1.1,6.9,0.9,0,0 440 | 3.8,4.2,6.3,7,1 441 | 4.0,6.3,6.3,2,1 442 | 4.0,0.5,9.2,8,1 443 | 8.2,7.4,4.0,9,1 444 | 6.8,4.9,8.2,1,1 445 | 8.6,4.6,1.3,5,1 446 | 8.0,8.1,9.4,0,2 447 | 4.9,5.1,1.4,5,1 448 | 0.4,0.7,5.4,4,0 449 | 9.8,4.1,3.5,2,1 450 | 5.6,1.3,1.1,4,0 451 | 6.6,2.1,8.1,2,1 452 | 6.1,9.1,8.5,3,2 453 | 6.0,9.9,9.2,2,2 454 | 9.1,7.4,0.8,1,1 455 | 2.7,2.5,8.6,4,1 456 | 1.4,0.3,9.2,4,1 457 | 7.2,4.3,9.9,8,2 458 | 5.5,1.7,6.0,4,1 459 | 3.2,9.8,8.3,4,2 460 | 5.0,8.0,9.1,5,2 461 | 2.4,5.6,4.0,4,1 462 | 3.8,1.9,4.4,4,1 463 | 6.3,2.1,6.5,4,1 464 | 9.2,1.8,6.8,0,1 465 | 7.8,5.9,5.6,3,1 466 | 3.9,6.1,3.2,3,1 467 | 5.3,6.2,7.5,5,1 468 | 0.8,3.4,3.3,9,0 469 | 6.7,0.1,9.8,8,1 470 | 8.9,2.7,2.9,9,1 471 | 7.7,1.0,6.1,0,1 472 | 7.9,7.4,7.6,1,2 473 | 0.0,3.0,0.3,1,0 474 | 6.4,3.8,6.7,5,1 475 | 2.6,9.5,1.0,5,1 476 | 3.3,3.6,0.9,4,0 477 | 0.0,5.5,6.9,4,1 478 | 0.3,3.9,7.7,5,1 479 | 1.6,2.5,4.6,7,1 480 | 4.3,0.0,5.6,7,1 481 | 5.5,5.2,7.8,1,1 482 | 5.3,6.0,0.0,2,0 483 | 9.9,9.5,3.9,7,2 484 | 0.7,1.6,6.6,5,1 485 | 5.0,5.8,7.9,8,1 486 | 7.7,1.2,1.1,1,0 487 | 3.4,0.6,6.9,5,1 488 | 4.4,7.0,7.2,2,1 489 | 0.7,2.9,6.0,2,1 490 | 7.3,4.6,8.6,4,1 491 | 9.7,4.7,8.8,9,2 492 | 7.6,8.7,4.5,4,1 493 | 8.7,3.9,3.7,6,1 494 | 4.3,5.8,0.1,3,0 495 | 2.9,4.8,5.5,8,1 496 | 3.4,2.9,0.0,3,0 497 | 0.4,7.1,0.1,8,0 498 | 2.6,4.0,8.9,0,1 499 | 7.0,9.6,0.7,4,1 500 | 3.8,8.1,2.9,2,1 501 | 0.3,7.4,5.0,0,1 502 | -------------------------------------------------------------------------------- /test/support/ranker.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | 4 | df = pd.read_csv('test/support/data.csv') 5 | 6 | X = df.drop(columns=['y']) 7 | y = df['y'].replace(2, 1) 8 | 9 | X_train = X[:300] 10 | y_train = y[:300] 11 | X_test = X[300:] 12 | y_test = y[300:] 13 | 14 | group = [100, 200] 15 | 16 | model = lgb.LGBMRanker() 17 | model.fit(X_train, y_train, group=group) 18 | print(model.predict(X_test)[0:6].tolist()) 19 | print(model.feature_importances_.tolist()) 20 | -------------------------------------------------------------------------------- /test/support/regressor.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | 4 | df = pd.read_csv('test/support/data.csv') 5 | 6 | X = df.drop(columns=['y']) 7 | y = df['y'] 8 | 9 | X_train = X[:300] 10 | y_train = y[:300] 11 | X_test = X[300:] 12 | y_test = y[300:] 13 | 14 | model = lgb.LGBMRegressor() 15 | model.fit(X_train, y_train) 16 | 17 | print('predict', model.predict(X_test)[0:6].tolist()) 18 | 19 | print('feature_importances', model.feature_importances_.tolist()) 20 | 21 | print('early_stopping') 22 | model.fit(X_train, y_train, eval_set=[(X_test, y_test)], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()]) 23 | -------------------------------------------------------------------------------- /test/support/train.py: -------------------------------------------------------------------------------- 1 | import lightgbm as lgb 2 | import pandas as pd 3 | import numpy as np 4 | 5 | df = pd.read_csv('test/support/data.csv') 6 | 7 | X = df.drop(columns=['y']) 8 | y = df['y'] 9 | 10 | X_train = X[:300] 11 | y_train = y[:300] 12 | X_test = X[300:] 13 | y_test = y[300:] 14 | 15 | print('test_regression') 16 | 17 | regression_params = {'objective': 'regression', 'verbosity': -1} 18 | regression_train = lgb.Dataset(X_train, label=y_train) 19 | regression_test = lgb.Dataset(X_test, label=y_test) 20 | bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test]) 21 | y_pred = bst.predict(X_test) 22 | print(np.sqrt(np.mean((y_pred - y_test)**2))) 23 | 24 | print('') 25 | print('test_binary') 26 | 27 | binary_params = {'objective': 'binary', 'verbosity': -1} 28 | binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1)) 29 | binary_test = lgb.Dataset(X_test, label=y_test.replace(2, 1)) 30 | bst = lgb.train(binary_params, binary_train, valid_sets=[binary_train, binary_test]) 31 | y_pred = bst.predict(X_test) 32 | print(y_pred[0]) 33 | 34 | print('') 35 | print('test_multiclass') 36 | 37 | multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1} 38 | multiclass_train = lgb.Dataset(X_train, label=y_train) 39 | multiclass_test = lgb.Dataset(X_test, label=y_test) 40 | bst = lgb.train(multiclass_params, multiclass_train, valid_sets=[multiclass_train, multiclass_test]) 41 | y_pred = bst.predict(X_test) 42 | print(y_pred[0].tolist()) 43 | 44 | print('') 45 | print('test_early_stopping_early') 46 | 47 | bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()]) 48 | print(bst.best_iteration) 49 | 50 | print('') 51 | print('test_early_stopping_not_early') 52 | 53 | bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], callbacks=[lgb.early_stopping(stopping_rounds=500), lgb.log_evaluation()]) 54 | # appears to be using training set for best iteration instead of validation set 55 | print(bst.best_iteration) 56 | 57 | print('') 58 | print('test_early_stopping_early_higher_better') 59 | 60 | params = {'objective': 'binary', 'metric': 'auc', 'verbosity': -1} 61 | bst = lgb.train(params, binary_train, valid_sets=[binary_train, binary_test], callbacks=[lgb.early_stopping(stopping_rounds=5)]) 62 | print(bst.best_iteration) 63 | 64 | print('') 65 | print('test_categorical_feature') 66 | 67 | train_set = lgb.Dataset(X_train, label=y_train, categorical_feature=[3]) 68 | bst = lgb.train(regression_params, train_set) 69 | print(bst.predict(X_test)[0]) 70 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "bundler/setup" 2 | Bundler.require(:default) 3 | require "minitest/autorun" 4 | require "minitest/pride" 5 | require "csv" 6 | require "json" 7 | require "matrix" 8 | require "daru" 9 | 10 | class Minitest::Test 11 | def setup 12 | if stress? 13 | # autoload before GC.stress 14 | LightGBM::FFI.ffi_libraries 15 | load_data 16 | GC.stress = true 17 | end 18 | end 19 | 20 | def teardown 21 | GC.stress = false if stress? 22 | end 23 | 24 | def stress? 25 | ENV["STRESS"] 26 | end 27 | 28 | def assert_elements_in_delta(expected, actual) 29 | assert_equal expected.size, actual.size 30 | expected.zip(actual) do |exp, act| 31 | assert_in_delta exp, act 32 | end 33 | end 34 | 35 | def regression_data 36 | @regression_data ||= split_data(*load_data) 37 | end 38 | 39 | def regression_train 40 | @regression_train ||= split_train(regression_data) 41 | end 42 | 43 | def regression_test 44 | @regression_test ||= split_test(regression_data) 45 | end 46 | 47 | def binary_data 48 | x, y = load_data 49 | y = y.map { |v| v > 1 ? 1 : v } 50 | split_data(x, y) 51 | end 52 | 53 | def binary_train 54 | @binary_train ||= split_train(binary_data) 55 | end 56 | 57 | def binary_test 58 | @binary_test ||= split_test(binary_data) 59 | end 60 | 61 | def multiclass_data 62 | @multiclass_data ||= split_data(*load_data) 63 | end 64 | 65 | def multiclass_train 66 | @multiclass_train ||= split_train(multiclass_data) 67 | end 68 | 69 | def multiclass_test 70 | @multiclass_test ||= split_test(multiclass_data) 71 | end 72 | 73 | def ranker_data 74 | @ranker_data ||= binary_data 75 | end 76 | 77 | def data_path 78 | "test/support/data.csv" 79 | end 80 | 81 | def load_data 82 | @@load_data ||= begin 83 | x = [] 84 | y = [] 85 | CSV.foreach(data_path, headers: true, converters: :numeric) do |row| 86 | x << row.values_at("x0", "x1", "x2", "x3").freeze 87 | y << row["y"] 88 | end 89 | [x.freeze, y.freeze] 90 | end 91 | end 92 | 93 | def split_data(x, y) 94 | [x[0...300], y[0...300], x[300..-1], y[300..-1]] 95 | end 96 | 97 | def split_train(data) 98 | x_train, y_train, _, _ = data 99 | LightGBM::Dataset.new(x_train, label: y_train) 100 | end 101 | 102 | def split_test(data) 103 | _, _, x_test, y_test = data 104 | LightGBM::Dataset.new(x_test, label: y_test) 105 | end 106 | 107 | def regression_params 108 | {objective: "regression"} 109 | end 110 | 111 | def binary_params 112 | {objective: "binary"} 113 | end 114 | 115 | def multiclass_params 116 | {objective: "multiclass", num_class: 3} 117 | end 118 | 119 | def teardown 120 | @tempfile = nil 121 | end 122 | 123 | def tempfile 124 | @tempfile ||= "#{Dir.mktmpdir}/#{Time.now.to_f}" 125 | end 126 | 127 | def jruby? 128 | RUBY_ENGINE == "jruby" 129 | end 130 | 131 | def numo? 132 | !jruby? && RUBY_ENGINE != "truffleruby" 133 | end 134 | end 135 | -------------------------------------------------------------------------------- /test/train_test.rb: -------------------------------------------------------------------------------- 1 | require_relative "test_helper" 2 | 3 | class TrainTest < Minitest::Test 4 | def test_regression 5 | model = LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], verbose_eval: false) 6 | y_pred = model.predict(regression_test.data) 7 | assert_in_delta 0.2900400590132747, rsme(regression_test.label, y_pred) 8 | 9 | model.save_model(tempfile) 10 | model = LightGBM::Booster.new(model_file: tempfile) 11 | y_pred = model.predict(regression_test.data) 12 | assert_in_delta 0.2900400590132747, rsme(regression_test.label, y_pred) 13 | end 14 | 15 | def test_binary 16 | model = LightGBM.train(binary_params, binary_train, valid_sets: [binary_train, binary_test], verbose_eval: false) 17 | y_pred = model.predict(binary_test.data) 18 | assert_in_delta 0.9999907566825113, y_pred.first 19 | 20 | model.save_model(tempfile) 21 | model = LightGBM::Booster.new(model_file: tempfile) 22 | y_pred2 = model.predict(binary_test.data) 23 | assert_equal y_pred, y_pred2 24 | end 25 | 26 | def test_multiclass 27 | model = LightGBM.train(multiclass_params, multiclass_train, valid_sets: [multiclass_train, multiclass_test], verbose_eval: false) 28 | 29 | y_pred = model.predict(multiclass_test.data) 30 | expected = [0.00036627031584163575, 0.9456350323547973, 0.053998697329361176] 31 | assert_elements_in_delta expected, y_pred.first 32 | # ensure reshaped 33 | assert_equal 200, y_pred.size 34 | assert_equal 3, y_pred.first.size 35 | 36 | model.save_model(tempfile) 37 | model = LightGBM::Booster.new(model_file: tempfile) 38 | y_pred2 = model.predict(multiclass_test.data) 39 | assert_equal y_pred, y_pred2 40 | end 41 | 42 | def test_early_stopping_early 43 | model = nil 44 | stdout, _ = capture_io do 45 | model = LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], early_stopping_rounds: 5) 46 | end 47 | assert_equal 69, model.best_iteration 48 | assert_includes stdout, "Early stopping, best iteration is:\n[69]\ttraining's l2: 0.0312266\tvalid_1's l2: 0.0843578" 49 | end 50 | 51 | def test_early_stopping_not_early 52 | model = nil 53 | stdout, _ = capture_io do 54 | model = LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], early_stopping_rounds: 500) 55 | end 56 | assert_equal 100, model.best_iteration 57 | if jruby? 58 | assert_includes stdout, "Best iteration is: [100]\ttraining's l2: 0.0245240\tvalid_1's l2: 0.0841232" 59 | else 60 | assert_includes stdout, "Best iteration is: [100]\ttraining's l2: 0.024524\tvalid_1's l2: 0.0841232" 61 | end 62 | end 63 | 64 | def test_early_stopping_early_higher_better 65 | model = LightGBM.train(binary_params.merge(metric: "auc"), binary_train, valid_sets: [binary_train, binary_test], early_stopping_rounds: 5, verbose_eval: false) 66 | assert_equal 8, model.best_iteration 67 | end 68 | 69 | def test_verbose_eval_false 70 | stdout, _ = capture_io do 71 | LightGBM.train(regression_params, regression_train, valid_sets: [regression_train, regression_test], early_stopping_rounds: 5, verbose_eval: false) 72 | end 73 | assert_empty stdout 74 | end 75 | 76 | def test_bad_params 77 | params = {objective: "regression verbosity=1"} 78 | assert_raises ArgumentError do 79 | LightGBM.train(params, regression_train) 80 | end 81 | end 82 | 83 | def test_early_stopping_no_valid_set 84 | error = assert_raises ArgumentError do 85 | LightGBM.train(regression_params, regression_train, valid_sets: [], early_stopping_rounds: 5) 86 | end 87 | assert_includes error.message, "at least one validation set is required" 88 | end 89 | 90 | def test_early_stopping_valid_set_training 91 | error = assert_raises ArgumentError do 92 | LightGBM.train(regression_params, regression_train, valid_sets: [regression_train], early_stopping_rounds: 5) 93 | end 94 | assert_includes error.message, "at least one validation set is required" 95 | end 96 | 97 | def test_categorical_feature 98 | train_set = LightGBM::Dataset.new(regression_train.data, label: regression_train.label, categorical_feature: [3]) 99 | model = LightGBM.train(regression_params, train_set) 100 | assert_in_delta 1.2914367038779377, model.predict(regression_test.data).first 101 | end 102 | 103 | def test_multiple_metrics 104 | params = regression_params.merge(metric: ["l1", "l2", "rmse"]) 105 | LightGBM.train(params, regression_train, valid_sets: [regression_train, regression_test], verbose_eval: false, early_stopping_rounds: 5) 106 | end 107 | 108 | private 109 | 110 | def rsme(y_true, y_pred) 111 | Math.sqrt(y_true.zip(y_pred).map { |a, b| (a - b)**2 }.sum / y_true.size.to_f) 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /vendor/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) Microsoft Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /vendor/LICENSE-THIRD-PARTY: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | Boost.Compute 1.78.0 3 | ================================================================================ 4 | 5 | Boost Software License - Version 1.0 - August 17th, 2003 6 | 7 | Permission is hereby granted, free of charge, to any person or organization 8 | obtaining a copy of the software and accompanying documentation covered by 9 | this license (the "Software") to use, reproduce, display, distribute, 10 | execute, and transmit the Software, and to prepare derivative works of the 11 | Software, and to permit third-parties to whom the Software is furnished to 12 | do so, all subject to the following: 13 | 14 | The copyright notices in the Software and this entire statement, including 15 | the above license grant, this restriction and the following disclaimer, 16 | must be included in all copies of the Software, in whole or in part, and 17 | all derivative works of the Software, unless such copies or derivative 18 | works are solely in the form of machine-executable object code generated by 19 | a source language processor. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 24 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 25 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 26 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 27 | DEALINGS IN THE SOFTWARE. 28 | 29 | ================================================================================ 30 | Eigen 3.4.0 31 | ================================================================================ 32 | 33 | Source code available at https://gitlab.com/libeigen/eigen 34 | 35 | Mozilla Public License Version 2.0 36 | ================================== 37 | 38 | 1. Definitions 39 | -------------- 40 | 41 | 1.1. "Contributor" 42 | means each individual or legal entity that creates, contributes to 43 | the creation of, or owns Covered Software. 44 | 45 | 1.2. "Contributor Version" 46 | means the combination of the Contributions of others (if any) used 47 | by a Contributor and that particular Contributor's Contribution. 48 | 49 | 1.3. "Contribution" 50 | means Covered Software of a particular Contributor. 51 | 52 | 1.4. "Covered Software" 53 | means Source Code Form to which the initial Contributor has attached 54 | the notice in Exhibit A, the Executable Form of such Source Code 55 | Form, and Modifications of such Source Code Form, in each case 56 | including portions thereof. 57 | 58 | 1.5. "Incompatible With Secondary Licenses" 59 | means 60 | 61 | (a) that the initial Contributor has attached the notice described 62 | in Exhibit B to the Covered Software; or 63 | 64 | (b) that the Covered Software was made available under the terms of 65 | version 1.1 or earlier of the License, but not also under the 66 | terms of a Secondary License. 67 | 68 | 1.6. "Executable Form" 69 | means any form of the work other than Source Code Form. 70 | 71 | 1.7. "Larger Work" 72 | means a work that combines Covered Software with other material, in 73 | a separate file or files, that is not Covered Software. 74 | 75 | 1.8. "License" 76 | means this document. 77 | 78 | 1.9. "Licensable" 79 | means having the right to grant, to the maximum extent possible, 80 | whether at the time of the initial grant or subsequently, any and 81 | all of the rights conveyed by this License. 82 | 83 | 1.10. "Modifications" 84 | means any of the following: 85 | 86 | (a) any file in Source Code Form that results from an addition to, 87 | deletion from, or modification of the contents of Covered 88 | Software; or 89 | 90 | (b) any new file in Source Code Form that contains any Covered 91 | Software. 92 | 93 | 1.11. "Patent Claims" of a Contributor 94 | means any patent claim(s), including without limitation, method, 95 | process, and apparatus claims, in any patent Licensable by such 96 | Contributor that would be infringed, but for the grant of the 97 | License, by the making, using, selling, offering for sale, having 98 | made, import, or transfer of either its Contributions or its 99 | Contributor Version. 100 | 101 | 1.12. "Secondary License" 102 | means either the GNU General Public License, Version 2.0, the GNU 103 | Lesser General Public License, Version 2.1, the GNU Affero General 104 | Public License, Version 3.0, or any later versions of those 105 | licenses. 106 | 107 | 1.13. "Source Code Form" 108 | means the form of the work preferred for making modifications. 109 | 110 | 1.14. "You" (or "Your") 111 | means an individual or a legal entity exercising rights under this 112 | License. For legal entities, "You" includes any entity that 113 | controls, is controlled by, or is under common control with You. For 114 | purposes of this definition, "control" means (a) the power, direct 115 | or indirect, to cause the direction or management of such entity, 116 | whether by contract or otherwise, or (b) ownership of more than 117 | fifty percent (50%) of the outstanding shares or beneficial 118 | ownership of such entity. 119 | 120 | 2. License Grants and Conditions 121 | -------------------------------- 122 | 123 | 2.1. Grants 124 | 125 | Each Contributor hereby grants You a world-wide, royalty-free, 126 | non-exclusive license: 127 | 128 | (a) under intellectual property rights (other than patent or trademark) 129 | Licensable by such Contributor to use, reproduce, make available, 130 | modify, display, perform, distribute, and otherwise exploit its 131 | Contributions, either on an unmodified basis, with Modifications, or 132 | as part of a Larger Work; and 133 | 134 | (b) under Patent Claims of such Contributor to make, use, sell, offer 135 | for sale, have made, import, and otherwise transfer either its 136 | Contributions or its Contributor Version. 137 | 138 | 2.2. Effective Date 139 | 140 | The licenses granted in Section 2.1 with respect to any Contribution 141 | become effective for each Contribution on the date the Contributor first 142 | distributes such Contribution. 143 | 144 | 2.3. Limitations on Grant Scope 145 | 146 | The licenses granted in this Section 2 are the only rights granted under 147 | this License. No additional rights or licenses will be implied from the 148 | distribution or licensing of Covered Software under this License. 149 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 150 | Contributor: 151 | 152 | (a) for any code that a Contributor has removed from Covered Software; 153 | or 154 | 155 | (b) for infringements caused by: (i) Your and any other third party's 156 | modifications of Covered Software, or (ii) the combination of its 157 | Contributions with other software (except as part of its Contributor 158 | Version); or 159 | 160 | (c) under Patent Claims infringed by Covered Software in the absence of 161 | its Contributions. 162 | 163 | This License does not grant any rights in the trademarks, service marks, 164 | or logos of any Contributor (except as may be necessary to comply with 165 | the notice requirements in Section 3.4). 166 | 167 | 2.4. Subsequent Licenses 168 | 169 | No Contributor makes additional grants as a result of Your choice to 170 | distribute the Covered Software under a subsequent version of this 171 | License (see Section 10.2) or under the terms of a Secondary License (if 172 | permitted under the terms of Section 3.3). 173 | 174 | 2.5. Representation 175 | 176 | Each Contributor represents that the Contributor believes its 177 | Contributions are its original creation(s) or it has sufficient rights 178 | to grant the rights to its Contributions conveyed by this License. 179 | 180 | 2.6. Fair Use 181 | 182 | This License is not intended to limit any rights You have under 183 | applicable copyright doctrines of fair use, fair dealing, or other 184 | equivalents. 185 | 186 | 2.7. Conditions 187 | 188 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 189 | in Section 2.1. 190 | 191 | 3. Responsibilities 192 | ------------------- 193 | 194 | 3.1. Distribution of Source Form 195 | 196 | All distribution of Covered Software in Source Code Form, including any 197 | Modifications that You create or to which You contribute, must be under 198 | the terms of this License. You must inform recipients that the Source 199 | Code Form of the Covered Software is governed by the terms of this 200 | License, and how they can obtain a copy of this License. You may not 201 | attempt to alter or restrict the recipients' rights in the Source Code 202 | Form. 203 | 204 | 3.2. Distribution of Executable Form 205 | 206 | If You distribute Covered Software in Executable Form then: 207 | 208 | (a) such Covered Software must also be made available in Source Code 209 | Form, as described in Section 3.1, and You must inform recipients of 210 | the Executable Form how they can obtain a copy of such Source Code 211 | Form by reasonable means in a timely manner, at a charge no more 212 | than the cost of distribution to the recipient; and 213 | 214 | (b) You may distribute such Executable Form under the terms of this 215 | License, or sublicense it under different terms, provided that the 216 | license for the Executable Form does not attempt to limit or alter 217 | the recipients' rights in the Source Code Form under this License. 218 | 219 | 3.3. Distribution of a Larger Work 220 | 221 | You may create and distribute a Larger Work under terms of Your choice, 222 | provided that You also comply with the requirements of this License for 223 | the Covered Software. If the Larger Work is a combination of Covered 224 | Software with a work governed by one or more Secondary Licenses, and the 225 | Covered Software is not Incompatible With Secondary Licenses, this 226 | License permits You to additionally distribute such Covered Software 227 | under the terms of such Secondary License(s), so that the recipient of 228 | the Larger Work may, at their option, further distribute the Covered 229 | Software under the terms of either this License or such Secondary 230 | License(s). 231 | 232 | 3.4. Notices 233 | 234 | You may not remove or alter the substance of any license notices 235 | (including copyright notices, patent notices, disclaimers of warranty, 236 | or limitations of liability) contained within the Source Code Form of 237 | the Covered Software, except that You may alter any license notices to 238 | the extent required to remedy known factual inaccuracies. 239 | 240 | 3.5. Application of Additional Terms 241 | 242 | You may choose to offer, and to charge a fee for, warranty, support, 243 | indemnity or liability obligations to one or more recipients of Covered 244 | Software. However, You may do so only on Your own behalf, and not on 245 | behalf of any Contributor. You must make it absolutely clear that any 246 | such warranty, support, indemnity, or liability obligation is offered by 247 | You alone, and You hereby agree to indemnify every Contributor for any 248 | liability incurred by such Contributor as a result of warranty, support, 249 | indemnity or liability terms You offer. You may include additional 250 | disclaimers of warranty and limitations of liability specific to any 251 | jurisdiction. 252 | 253 | 4. Inability to Comply Due to Statute or Regulation 254 | --------------------------------------------------- 255 | 256 | If it is impossible for You to comply with any of the terms of this 257 | License with respect to some or all of the Covered Software due to 258 | statute, judicial order, or regulation then You must: (a) comply with 259 | the terms of this License to the maximum extent possible; and (b) 260 | describe the limitations and the code they affect. Such description must 261 | be placed in a text file included with all distributions of the Covered 262 | Software under this License. Except to the extent prohibited by statute 263 | or regulation, such description must be sufficiently detailed for a 264 | recipient of ordinary skill to be able to understand it. 265 | 266 | 5. Termination 267 | -------------- 268 | 269 | 5.1. The rights granted under this License will terminate automatically 270 | if You fail to comply with any of its terms. However, if You become 271 | compliant, then the rights granted under this License from a particular 272 | Contributor are reinstated (a) provisionally, unless and until such 273 | Contributor explicitly and finally terminates Your grants, and (b) on an 274 | ongoing basis, if such Contributor fails to notify You of the 275 | non-compliance by some reasonable means prior to 60 days after You have 276 | come back into compliance. Moreover, Your grants from a particular 277 | Contributor are reinstated on an ongoing basis if such Contributor 278 | notifies You of the non-compliance by some reasonable means, this is the 279 | first time You have received notice of non-compliance with this License 280 | from such Contributor, and You become compliant prior to 30 days after 281 | Your receipt of the notice. 282 | 283 | 5.2. If You initiate litigation against any entity by asserting a patent 284 | infringement claim (excluding declaratory judgment actions, 285 | counter-claims, and cross-claims) alleging that a Contributor Version 286 | directly or indirectly infringes any patent, then the rights granted to 287 | You by any and all Contributors for the Covered Software under Section 288 | 2.1 of this License shall terminate. 289 | 290 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 291 | end user license agreements (excluding distributors and resellers) which 292 | have been validly granted by You or Your distributors under this License 293 | prior to termination shall survive termination. 294 | 295 | ************************************************************************ 296 | * * 297 | * 6. Disclaimer of Warranty * 298 | * ------------------------- * 299 | * * 300 | * Covered Software is provided under this License on an "as is" * 301 | * basis, without warranty of any kind, either expressed, implied, or * 302 | * statutory, including, without limitation, warranties that the * 303 | * Covered Software is free of defects, merchantable, fit for a * 304 | * particular purpose or non-infringing. The entire risk as to the * 305 | * quality and performance of the Covered Software is with You. * 306 | * Should any Covered Software prove defective in any respect, You * 307 | * (not any Contributor) assume the cost of any necessary servicing, * 308 | * repair, or correction. This disclaimer of warranty constitutes an * 309 | * essential part of this License. No use of any Covered Software is * 310 | * authorized under this License except under this disclaimer. * 311 | * * 312 | ************************************************************************ 313 | 314 | ************************************************************************ 315 | * * 316 | * 7. Limitation of Liability * 317 | * -------------------------- * 318 | * * 319 | * Under no circumstances and under no legal theory, whether tort * 320 | * (including negligence), contract, or otherwise, shall any * 321 | * Contributor, or anyone who distributes Covered Software as * 322 | * permitted above, be liable to You for any direct, indirect, * 323 | * special, incidental, or consequential damages of any character * 324 | * including, without limitation, damages for lost profits, loss of * 325 | * goodwill, work stoppage, computer failure or malfunction, or any * 326 | * and all other commercial damages or losses, even if such party * 327 | * shall have been informed of the possibility of such damages. This * 328 | * limitation of liability shall not apply to liability for death or * 329 | * personal injury resulting from such party's negligence to the * 330 | * extent applicable law prohibits such limitation. Some * 331 | * jurisdictions do not allow the exclusion or limitation of * 332 | * incidental or consequential damages, so this exclusion and * 333 | * limitation may not apply to You. * 334 | * * 335 | ************************************************************************ 336 | 337 | 8. Litigation 338 | ------------- 339 | 340 | Any litigation relating to this License may be brought only in the 341 | courts of a jurisdiction where the defendant maintains its principal 342 | place of business and such litigation shall be governed by laws of that 343 | jurisdiction, without reference to its conflict-of-law provisions. 344 | Nothing in this Section shall prevent a party's ability to bring 345 | cross-claims or counter-claims. 346 | 347 | 9. Miscellaneous 348 | ---------------- 349 | 350 | This License represents the complete agreement concerning the subject 351 | matter hereof. If any provision of this License is held to be 352 | unenforceable, such provision shall be reformed only to the extent 353 | necessary to make it enforceable. Any law or regulation which provides 354 | that the language of a contract shall be construed against the drafter 355 | shall not be used to construe this License against a Contributor. 356 | 357 | 10. Versions of the License 358 | --------------------------- 359 | 360 | 10.1. New Versions 361 | 362 | Mozilla Foundation is the license steward. Except as provided in Section 363 | 10.3, no one other than the license steward has the right to modify or 364 | publish new versions of this License. Each version will be given a 365 | distinguishing version number. 366 | 367 | 10.2. Effect of New Versions 368 | 369 | You may distribute the Covered Software under the terms of the version 370 | of the License under which You originally received the Covered Software, 371 | or under the terms of any subsequent version published by the license 372 | steward. 373 | 374 | 10.3. Modified Versions 375 | 376 | If you create software not governed by this License, and you want to 377 | create a new license for such software, you may create and use a 378 | modified version of this License if you rename the license and remove 379 | any references to the name of the license steward (except to note that 380 | such modified license differs from this License). 381 | 382 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 383 | Licenses 384 | 385 | If You choose to distribute Source Code Form that is Incompatible With 386 | Secondary Licenses under the terms of this version of the License, the 387 | notice described in Exhibit B of this License must be attached. 388 | 389 | Exhibit A - Source Code Form License Notice 390 | ------------------------------------------- 391 | 392 | This Source Code Form is subject to the terms of the Mozilla Public 393 | License, v. 2.0. If a copy of the MPL was not distributed with this 394 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 395 | 396 | If it is not possible or desirable to put the notice in a particular 397 | file, then You may include the notice in a location (such as a LICENSE 398 | file in a relevant directory) where a recipient would be likely to look 399 | for such a notice. 400 | 401 | You may add additional accurate notices of copyright ownership. 402 | 403 | Exhibit B - "Incompatible With Secondary Licenses" Notice 404 | --------------------------------------------------------- 405 | 406 | This Source Code Form is "Incompatible With Secondary Licenses", as 407 | defined by the Mozilla Public License, v. 2.0. 408 | 409 | and 410 | 411 | /* 412 | Copyright (c) 2011, Intel Corporation. All rights reserved. 413 | 414 | Redistribution and use in source and binary forms, with or without modification, 415 | are permitted provided that the following conditions are met: 416 | 417 | * Redistributions of source code must retain the above copyright notice, this 418 | list of conditions and the following disclaimer. 419 | * Redistributions in binary form must reproduce the above copyright notice, 420 | this list of conditions and the following disclaimer in the documentation 421 | and/or other materials provided with the distribution. 422 | * Neither the name of Intel Corporation nor the names of its contributors may 423 | be used to endorse or promote products derived from this software without 424 | specific prior written permission. 425 | 426 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 427 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 428 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 429 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 430 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 431 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 432 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 433 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 434 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 435 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 436 | */ 437 | 438 | ================================================================================ 439 | fast_double_parser 0.8.0 440 | ================================================================================ 441 | 442 | Copyright (c) Daniel Lemire 443 | 444 | Boost Software License - Version 1.0 - August 17th, 2003 445 | 446 | Permission is hereby granted, free of charge, to any person or organization 447 | obtaining a copy of the software and accompanying documentation covered by 448 | this license (the "Software") to use, reproduce, display, distribute, 449 | execute, and transmit the Software, and to prepare derivative works of the 450 | Software, and to permit third-parties to whom the Software is furnished to 451 | do so, all subject to the following: 452 | 453 | The copyright notices in the Software and this entire statement, including 454 | the above license grant, this restriction and the following disclaimer, 455 | must be included in all copies of the Software, in whole or in part, and 456 | all derivative works of the Software, unless such copies or derivative 457 | works are solely in the form of machine-executable object code generated by 458 | a source language processor. 459 | 460 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 461 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 462 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 463 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 464 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 465 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 466 | DEALINGS IN THE SOFTWARE. 467 | 468 | ================================================================================ 469 | fmt 11.1.2 470 | ================================================================================ 471 | 472 | Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors 473 | 474 | Permission is hereby granted, free of charge, to any person obtaining 475 | a copy of this software and associated documentation files (the 476 | "Software"), to deal in the Software without restriction, including 477 | without limitation the rights to use, copy, modify, merge, publish, 478 | distribute, sublicense, and/or sell copies of the Software, and to 479 | permit persons to whom the Software is furnished to do so, subject to 480 | the following conditions: 481 | 482 | The above copyright notice and this permission notice shall be 483 | included in all copies or substantial portions of the Software. 484 | 485 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 486 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 487 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 488 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 489 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 490 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 491 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 492 | 493 | --- Optional exception to the license --- 494 | 495 | As an exception, if, as a result of your compiling your source code, portions 496 | of this Software are embedded into a machine-executable object form of such 497 | source code, you may redistribute such embedded portions in such object form 498 | without including the above copyright and permission notices. 499 | 500 | ================================================================================ 501 | json11 502 | ================================================================================ 503 | 504 | Copyright (c) 2013 Dropbox, Inc. 505 | 506 | Permission is hereby granted, free of charge, to any person obtaining a copy 507 | of this software and associated documentation files (the "Software"), to deal 508 | in the Software without restriction, including without limitation the rights 509 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 510 | copies of the Software, and to permit persons to whom the Software is 511 | furnished to do so, subject to the following conditions: 512 | 513 | The above copyright notice and this permission notice shall be included in 514 | all copies or substantial portions of the Software. 515 | 516 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 517 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 518 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 519 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 520 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 521 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 522 | THE SOFTWARE. 523 | 524 | ================================================================================ 525 | yamc 526 | ================================================================================ 527 | 528 | MIT License 529 | 530 | Copyright (c) 2017 yohhoy 531 | 532 | Permission is hereby granted, free of charge, to any person obtaining a copy 533 | of this software and associated documentation files (the "Software"), to deal 534 | in the Software without restriction, including without limitation the rights 535 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 536 | copies of the Software, and to permit persons to whom the Software is 537 | furnished to do so, subject to the following conditions: 538 | 539 | The above copyright notice and this permission notice shall be included in all 540 | copies or substantial portions of the Software. 541 | 542 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 543 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 544 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 545 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 546 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 547 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 548 | SOFTWARE. 549 | --------------------------------------------------------------------------------