├── .appveyor.yml
├── .ci
    ├── README.md
    ├── append-comment.sh
    ├── check-dynamic-dependencies.py
    ├── check-python-dists.sh
    ├── conda-envs
    │   ├── README.md
    │   ├── ci-core-py37.txt
    │   ├── ci-core-py38.txt
    │   └── ci-core.txt
    ├── create-nuget.py
    ├── get-workflow-status.py
    ├── install-opencl.ps1
    ├── install-r-deps.R
    ├── lint-cpp.sh
    ├── lint-js.sh
    ├── lint-powershell.ps1
    ├── lint-python-bash.sh
    ├── lint-r-code.R
    ├── parameter-generator.py
    ├── rerun-workflow.sh
    ├── run-r-cmd-check.sh
    ├── set-commit-status.sh
    ├── setup.sh
    ├── test-python-latest.sh
    ├── test-python-oldest.sh
    ├── test-r-package-valgrind.sh
    ├── test-r-package-windows.ps1
    ├── test-r-package.sh
    ├── test-windows.ps1
    ├── test.sh
    └── trigger-dispatch-run.sh
├── .editorconfig
├── .git-blame-ignore-revs
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── BUG_REPORT.md
    │   └── FEATURE_REQUEST.md
    ├── dependabot.yml
    ├── release-drafter.yml
    └── workflows
    │   ├── cuda.yml
    │   ├── linkchecker.yml
    │   ├── lock.yml
    │   ├── no_response.yml
    │   ├── optional_checks.yml
    │   ├── python_package.yml
    │   ├── r_configure.yml
    │   ├── r_package.yml
    │   ├── r_valgrind.yml
    │   ├── release_drafter.yml
    │   ├── static_analysis.yml
    │   └── triggering_comments.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .typos.toml
├── .vsts-ci.yml
├── .yamllint.yml
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── R-package
    ├── .Rbuildignore
    ├── AUTOCONF_UBUNTU_VERSION
    ├── DESCRIPTION
    ├── LICENSE
    ├── NAMESPACE
    ├── R
    │   ├── aliases.R
    │   ├── callback.R
    │   ├── lgb.Booster.R
    │   ├── lgb.DataProcessor.R
    │   ├── lgb.Dataset.R
    │   ├── lgb.Predictor.R
    │   ├── lgb.convert_with_rules.R
    │   ├── lgb.cv.R
    │   ├── lgb.drop_serialized.R
    │   ├── lgb.importance.R
    │   ├── lgb.interprete.R
    │   ├── lgb.make_serializable.R
    │   ├── lgb.model.dt.tree.R
    │   ├── lgb.plot.importance.R
    │   ├── lgb.plot.interpretation.R
    │   ├── lgb.restore_handle.R
    │   ├── lgb.train.R
    │   ├── lightgbm.R
    │   ├── metrics.R
    │   ├── multithreading.R
    │   └── utils.R
    ├── README.md
    ├── cleanup
    ├── configure
    ├── configure.ac
    ├── configure.win
    ├── cran-comments.md
    ├── data
    │   ├── agaricus.test.rda
    │   ├── agaricus.train.rda
    │   └── bank.rda
    ├── demo
    │   ├── 00Index
    │   ├── basic_walkthrough.R
    │   ├── boost_from_prediction.R
    │   ├── categorical_features_rules.R
    │   ├── cross_validation.R
    │   ├── early_stopping.R
    │   ├── efficient_many_training.R
    │   ├── leaf_stability.R
    │   ├── multiclass.R
    │   ├── multiclass_custom_objective.R
    │   └── weight_param.R
    ├── inst
    │   ├── Makevars
    │   ├── Makevars.win
    │   ├── bin
    │   │   └── .gitkeep
    │   └── make-r-def.R
    ├── man
    │   ├── agaricus.test.Rd
    │   ├── agaricus.train.Rd
    │   ├── bank.Rd
    │   ├── dim.Rd
    │   ├── dimnames.lgb.Dataset.Rd
    │   ├── figures
    │   │   └── logo.svg
    │   ├── getLGBMThreads.Rd
    │   ├── get_field.Rd
    │   ├── lgb.Dataset.Rd
    │   ├── lgb.Dataset.construct.Rd
    │   ├── lgb.Dataset.create.valid.Rd
    │   ├── lgb.Dataset.save.Rd
    │   ├── lgb.Dataset.set.categorical.Rd
    │   ├── lgb.Dataset.set.reference.Rd
    │   ├── lgb.configure_fast_predict.Rd
    │   ├── lgb.convert_with_rules.Rd
    │   ├── lgb.cv.Rd
    │   ├── lgb.drop_serialized.Rd
    │   ├── lgb.dump.Rd
    │   ├── lgb.get.eval.result.Rd
    │   ├── lgb.importance.Rd
    │   ├── lgb.interprete.Rd
    │   ├── lgb.load.Rd
    │   ├── lgb.make_serializable.Rd
    │   ├── lgb.model.dt.tree.Rd
    │   ├── lgb.plot.importance.Rd
    │   ├── lgb.plot.interpretation.Rd
    │   ├── lgb.restore_handle.Rd
    │   ├── lgb.save.Rd
    │   ├── lgb.slice.Dataset.Rd
    │   ├── lgb.train.Rd
    │   ├── lgb_shared_dataset_params.Rd
    │   ├── lgb_shared_params.Rd
    │   ├── lightgbm.Rd
    │   ├── predict.lgb.Booster.Rd
    │   ├── print.lgb.Booster.Rd
    │   ├── setLGBMThreads.Rd
    │   ├── set_field.Rd
    │   └── summary.lgb.Booster.Rd
    ├── pkgdown
    │   ├── _pkgdown.yml
    │   └── favicon
    │   │   ├── apple-touch-icon-120x120.png
    │   │   ├── apple-touch-icon-152x152.png
    │   │   ├── apple-touch-icon-180x180.png
    │   │   ├── apple-touch-icon-60x60.png
    │   │   ├── apple-touch-icon-76x76.png
    │   │   ├── apple-touch-icon.png
    │   │   ├── favicon-16x16.png
    │   │   ├── favicon-32x32.png
    │   │   └── favicon.ico
    ├── recreate-configure.sh
    ├── src
    │   ├── Makevars.in
    │   ├── Makevars.win.in
    │   ├── install.libs.R
    │   ├── lightgbm-win.def
    │   ├── lightgbm_R.cpp
    │   └── lightgbm_R.h
    ├── tests
    │   ├── testthat.R
    │   └── testthat
    │   │   ├── helper.R
    │   │   ├── test_Predictor.R
    │   │   ├── test_basic.R
    │   │   ├── test_custom_objective.R
    │   │   ├── test_dataset.R
    │   │   ├── test_learning_to_rank.R
    │   │   ├── test_lgb.Booster.R
    │   │   ├── test_lgb.convert_with_rules.R
    │   │   ├── test_lgb.importance.R
    │   │   ├── test_lgb.interprete.R
    │   │   ├── test_lgb.model.dt.tree.R
    │   │   ├── test_lgb.plot.importance.R
    │   │   ├── test_lgb.plot.interpretation.R
    │   │   ├── test_metrics.R
    │   │   ├── test_multithreading.R
    │   │   ├── test_parameters.R
    │   │   ├── test_utils.R
    │   │   └── test_weighted_loss.R
    └── vignettes
    │   └── basic_walkthrough.Rmd
├── README.md
├── SECURITY.md
├── VERSION.txt
├── biome.json
├── build-cran-package.sh
├── build-python.sh
├── build_r.R
├── cmake
    ├── IntegratedOpenCL.cmake
    ├── Sanitizer.cmake
    └── modules
    │   └── FindLibR.cmake
├── docker
    ├── README.md
    ├── dockerfile-cli
    ├── dockerfile-python
    ├── dockerfile-r
    └── gpu
    │   ├── README.md
    │   ├── dockerfile-cli-only-distroless.gpu
    │   ├── dockerfile-cli-only.gpu
    │   └── dockerfile.gpu
├── docs
    ├── .linkcheckerrc
    ├── Advanced-Topics.rst
    ├── C-API.rst
    ├── Development-Guide.rst
    ├── Experiments.rst
    ├── FAQ.rst
    ├── Features.rst
    ├── GPU-Performance.rst
    ├── GPU-Targets.rst
    ├── GPU-Tutorial.rst
    ├── GPU-Windows.rst
    ├── Installation-Guide.rst
    ├── Key-Events.md
    ├── Makefile
    ├── Parallel-Learning-Guide.rst
    ├── Parameters-Tuning.rst
    ├── Parameters.rst
    ├── Python-API.rst
    ├── Python-Intro.rst
    ├── Quick-Start.rst
    ├── README.rst
    ├── _static
    │   ├── images
    │   │   ├── artifacts-download.svg
    │   │   ├── artifacts-fetching.svg
    │   │   ├── artifacts-not-available.svg
    │   │   ├── dask-concat.svg
    │   │   ├── dask-initial-setup.svg
    │   │   ├── favicon.ico
    │   │   ├── gpu-performance-comparison.png
    │   │   ├── leaf-wise.png
    │   │   └── level-wise.png
    │   └── js
    │   │   └── script.js
    ├── build-docs.sh
    ├── conf.py
    ├── env.yml
    ├── gcc-Tips.rst
    ├── index.rst
    ├── logo
    │   ├── LightGBM-logo-hex.cdr
    │   ├── LightGBM-logo-hex.svg
    │   ├── LightGBM_logo-hex.png
    │   ├── LightGBM_logo.cdr
    │   ├── LightGBM_logo_black_text.svg
    │   ├── LightGBM_logo_black_text_huge.png
    │   ├── LightGBM_logo_black_text_large.png
    │   ├── LightGBM_logo_black_text_medium.png
    │   ├── LightGBM_logo_black_text_small.png
    │   ├── LightGBM_logo_black_text_tiny.png
    │   ├── LightGBM_logo_grey_text.svg
    │   ├── LightGBM_logo_grey_text_huge.png
    │   ├── LightGBM_logo_grey_text_large.png
    │   ├── LightGBM_logo_grey_text_medium.png
    │   ├── LightGBM_logo_grey_text_small.png
    │   ├── LightGBM_logo_grey_text_tiny.png
    │   ├── LightGBM_logo_no_text.svg
    │   ├── LightGBM_logo_no_text_huge.png
    │   ├── LightGBM_logo_no_text_large.png
    │   ├── LightGBM_logo_no_text_medium.png
    │   ├── LightGBM_logo_no_text_small.png
    │   └── LightGBM_logo_no_text_tiny.png
    └── make.bat
├── examples
    ├── README.md
    ├── binary_classification
    │   ├── README.md
    │   ├── binary.test
    │   ├── binary.test.weight
    │   ├── binary.train
    │   ├── binary.train.weight
    │   ├── forced_splits.json
    │   ├── predict.conf
    │   ├── train.conf
    │   └── train_linear.conf
    ├── lambdarank
    │   ├── README.md
    │   ├── predict.conf
    │   ├── rank.test
    │   ├── rank.test.query
    │   ├── rank.train
    │   ├── rank.train.query
    │   └── train.conf
    ├── multiclass_classification
    │   ├── README.md
    │   ├── multiclass.test
    │   ├── multiclass.train
    │   ├── predict.conf
    │   └── train.conf
    ├── parallel_learning
    │   ├── README.md
    │   ├── binary.test
    │   ├── binary.train
    │   ├── mlist.txt
    │   ├── predict.conf
    │   └── train.conf
    ├── python-guide
    │   ├── README.md
    │   ├── advanced_example.py
    │   ├── dask
    │   │   ├── README.md
    │   │   ├── binary-classification.py
    │   │   ├── multiclass-classification.py
    │   │   ├── prediction.py
    │   │   ├── ranking.py
    │   │   └── regression.py
    │   ├── dataset_from_multi_hdf5.py
    │   ├── logistic_regression.py
    │   ├── notebooks
    │   │   └── interactive_plot_example.ipynb
    │   ├── plot_example.py
    │   ├── simple_example.py
    │   └── sklearn_example.py
    ├── regression
    │   ├── README.md
    │   ├── forced_bins.json
    │   ├── forced_bins2.json
    │   ├── predict.conf
    │   ├── regression.test
    │   ├── regression.test.init
    │   ├── regression.train
    │   ├── regression.train.init
    │   └── train.conf
    └── xendcg
    │   ├── README.md
    │   ├── predict.conf
    │   ├── rank.test
    │   ├── rank.test.query
    │   ├── rank.train
    │   ├── rank.train.query
    │   └── train.conf
├── include
    └── LightGBM
    │   ├── application.h
    │   ├── arrow.h
    │   ├── arrow.tpp
    │   ├── bin.h
    │   ├── boosting.h
    │   ├── c_api.h
    │   ├── config.h
    │   ├── cuda
    │       ├── cuda_algorithms.hpp
    │       ├── cuda_column_data.hpp
    │       ├── cuda_metadata.hpp
    │       ├── cuda_metric.hpp
    │       ├── cuda_objective_function.hpp
    │       ├── cuda_random.hpp
    │       ├── cuda_row_data.hpp
    │       ├── cuda_split_info.hpp
    │       ├── cuda_tree.hpp
    │       ├── cuda_utils.hu
    │       └── vector_cudahost.h
    │   ├── dataset.h
    │   ├── dataset_loader.h
    │   ├── export.h
    │   ├── feature_group.h
    │   ├── meta.h
    │   ├── metric.h
    │   ├── network.h
    │   ├── objective_function.h
    │   ├── prediction_early_stop.h
    │   ├── sample_strategy.h
    │   ├── train_share_states.h
    │   ├── tree.h
    │   ├── tree_learner.h
    │   └── utils
    │       ├── array_args.h
    │       ├── binary_writer.h
    │       ├── byte_buffer.h
    │       ├── chunked_array.hpp
    │       ├── common.h
    │       ├── file_io.h
    │       ├── json11.h
    │       ├── log.h
    │       ├── openmp_wrapper.h
    │       ├── pipeline_reader.h
    │       ├── random.h
    │       ├── text_reader.h
    │       ├── threading.h
    │       └── yamc
    │           ├── alternate_shared_mutex.hpp
    │           ├── yamc_rwlock_sched.hpp
    │           └── yamc_shared_lock.hpp
├── python-package
    ├── README.rst
    ├── lightgbm
    │   ├── __init__.py
    │   ├── basic.py
    │   ├── callback.py
    │   ├── compat.py
    │   ├── dask.py
    │   ├── engine.py
    │   ├── libpath.py
    │   ├── plotting.py
    │   ├── py.typed
    │   └── sklearn.py
    └── pyproject.toml
├── src
    ├── application
    │   ├── application.cpp
    │   └── predictor.hpp
    ├── boosting
    │   ├── bagging.hpp
    │   ├── boosting.cpp
    │   ├── cuda
    │   │   ├── cuda_score_updater.cpp
    │   │   ├── cuda_score_updater.cu
    │   │   └── cuda_score_updater.hpp
    │   ├── dart.hpp
    │   ├── gbdt.cpp
    │   ├── gbdt.h
    │   ├── gbdt_model_text.cpp
    │   ├── gbdt_prediction.cpp
    │   ├── goss.hpp
    │   ├── prediction_early_stop.cpp
    │   ├── rf.hpp
    │   ├── sample_strategy.cpp
    │   └── score_updater.hpp
    ├── c_api.cpp
    ├── cuda
    │   ├── cuda_algorithms.cu
    │   └── cuda_utils.cpp
    ├── io
    │   ├── bin.cpp
    │   ├── config.cpp
    │   ├── config_auto.cpp
    │   ├── cuda
    │   │   ├── cuda_column_data.cpp
    │   │   ├── cuda_column_data.cu
    │   │   ├── cuda_metadata.cpp
    │   │   ├── cuda_row_data.cpp
    │   │   ├── cuda_tree.cpp
    │   │   └── cuda_tree.cu
    │   ├── dataset.cpp
    │   ├── dataset_loader.cpp
    │   ├── dense_bin.hpp
    │   ├── file_io.cpp
    │   ├── json11.cpp
    │   ├── metadata.cpp
    │   ├── multi_val_dense_bin.hpp
    │   ├── multi_val_sparse_bin.hpp
    │   ├── parser.cpp
    │   ├── parser.hpp
    │   ├── sparse_bin.hpp
    │   ├── train_share_states.cpp
    │   └── tree.cpp
    ├── main.cpp
    ├── metric
    │   ├── binary_metric.hpp
    │   ├── cuda
    │   │   ├── cuda_binary_metric.cpp
    │   │   ├── cuda_binary_metric.hpp
    │   │   ├── cuda_pointwise_metric.cpp
    │   │   ├── cuda_pointwise_metric.cu
    │   │   ├── cuda_pointwise_metric.hpp
    │   │   ├── cuda_regression_metric.cpp
    │   │   └── cuda_regression_metric.hpp
    │   ├── dcg_calculator.cpp
    │   ├── map_metric.hpp
    │   ├── metric.cpp
    │   ├── multiclass_metric.hpp
    │   ├── rank_metric.hpp
    │   ├── regression_metric.hpp
    │   └── xentropy_metric.hpp
    ├── network
    │   ├── linker_topo.cpp
    │   ├── linkers.h
    │   ├── linkers_mpi.cpp
    │   ├── linkers_socket.cpp
    │   ├── network.cpp
    │   └── socket_wrapper.hpp
    ├── objective
    │   ├── binary_objective.hpp
    │   ├── cuda
    │   │   ├── cuda_binary_objective.cpp
    │   │   ├── cuda_binary_objective.cu
    │   │   ├── cuda_binary_objective.hpp
    │   │   ├── cuda_multiclass_objective.cpp
    │   │   ├── cuda_multiclass_objective.cu
    │   │   ├── cuda_multiclass_objective.hpp
    │   │   ├── cuda_rank_objective.cpp
    │   │   ├── cuda_rank_objective.cu
    │   │   ├── cuda_rank_objective.hpp
    │   │   ├── cuda_regression_objective.cpp
    │   │   ├── cuda_regression_objective.cu
    │   │   └── cuda_regression_objective.hpp
    │   ├── multiclass_objective.hpp
    │   ├── objective_function.cpp
    │   ├── rank_objective.hpp
    │   ├── regression_objective.hpp
    │   └── xentropy_objective.hpp
    ├── treelearner
    │   ├── col_sampler.hpp
    │   ├── cost_effective_gradient_boosting.hpp
    │   ├── cuda
    │   │   ├── cuda_best_split_finder.cpp
    │   │   ├── cuda_best_split_finder.cu
    │   │   ├── cuda_best_split_finder.hpp
    │   │   ├── cuda_data_partition.cpp
    │   │   ├── cuda_data_partition.cu
    │   │   ├── cuda_data_partition.hpp
    │   │   ├── cuda_gradient_discretizer.cu
    │   │   ├── cuda_gradient_discretizer.hpp
    │   │   ├── cuda_histogram_constructor.cpp
    │   │   ├── cuda_histogram_constructor.cu
    │   │   ├── cuda_histogram_constructor.hpp
    │   │   ├── cuda_leaf_splits.cpp
    │   │   ├── cuda_leaf_splits.cu
    │   │   ├── cuda_leaf_splits.hpp
    │   │   ├── cuda_single_gpu_tree_learner.cpp
    │   │   ├── cuda_single_gpu_tree_learner.cu
    │   │   └── cuda_single_gpu_tree_learner.hpp
    │   ├── data_parallel_tree_learner.cpp
    │   ├── data_partition.hpp
    │   ├── feature_histogram.cpp
    │   ├── feature_histogram.hpp
    │   ├── feature_parallel_tree_learner.cpp
    │   ├── gpu_tree_learner.cpp
    │   ├── gpu_tree_learner.h
    │   ├── gradient_discretizer.cpp
    │   ├── gradient_discretizer.hpp
    │   ├── leaf_splits.hpp
    │   ├── linear_tree_learner.cpp
    │   ├── linear_tree_learner.h
    │   ├── monotone_constraints.hpp
    │   ├── ocl
    │   │   ├── histogram16.cl
    │   │   ├── histogram256.cl
    │   │   └── histogram64.cl
    │   ├── parallel_tree_learner.h
    │   ├── serial_tree_learner.cpp
    │   ├── serial_tree_learner.h
    │   ├── split_info.hpp
    │   ├── tree_learner.cpp
    │   └── voting_parallel_tree_learner.cpp
    └── utils
    │   └── openmp_wrapper.cpp
├── swig
    ├── ChunkedArray_API_extensions.i
    ├── StringArray.hpp
    ├── StringArray.i
    ├── StringArray_API_extensions.i
    ├── lightgbmlib.i
    └── pointer_manipulation.i
├── tests
    ├── c_api_test
    │   └── test_.py
    ├── cpp_tests
    │   ├── predict.conf
    │   ├── test.py
    │   ├── test_array_args.cpp
    │   ├── test_arrow.cpp
    │   ├── test_byte_buffer.cpp
    │   ├── test_chunked_array.cpp
    │   ├── test_common.cpp
    │   ├── test_main.cpp
    │   ├── test_serialize.cpp
    │   ├── test_single_row.cpp
    │   ├── test_stream.cpp
    │   ├── testutils.cpp
    │   ├── testutils.h
    │   └── train.conf
    ├── data
    │   └── categorical.data
    ├── distributed
    │   ├── _test_distributed.py
    │   └── conftest.py
    └── python_package_test
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── test_arrow.py
    │   ├── test_basic.py
    │   ├── test_callback.py
    │   ├── test_consistency.py
    │   ├── test_dask.py
    │   ├── test_dual.py
    │   ├── test_engine.py
    │   ├── test_plotting.py
    │   ├── test_sklearn.py
    │   ├── test_utilities.py
    │   └── utils.py
└── windows
    ├── LightGBM.sln
    ├── LightGBM.vcxproj
    └── LightGBM.vcxproj.filters


/.appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: 4.6.0.99.{build}
 2 | 
 3 | image: Visual Studio 2015
 4 | platform: x64
 5 | configuration:
 6 |   - '3.8'
 7 | 
 8 | # only build on 'master' and pull requests targeting it
 9 | branches:
10 |   only:
11 |     - master
12 | 
13 | environment:
14 |   matrix:
15 |     - COMPILER: MSVC
16 |       TASK: python
17 |     - COMPILER: MINGW
18 |       TASK: python
19 | 
20 | clone_depth: 5
21 | 
22 | install:
23 |   - git submodule update --init --recursive  # get `external_libs` folder
24 |   - set PATH=C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64\bin;%PATH%
25 |   - set PYTHON_VERSION=%CONFIGURATION%
26 |   - ps: |
27 |       $env:ALLOW_SKIP_ARROW_TESTS = "1"
28 |       $env:APPVEYOR = "true"
29 |       $env:CMAKE_BUILD_PARALLEL_LEVEL = 4
30 |       $env:MINICONDA = "C:\Miniconda3-x64"
31 |       $env:PATH = "$env:MINICONDA;$env:MINICONDA\Scripts;$env:PATH"
32 |       $env:BUILD_SOURCESDIRECTORY = "$env:APPVEYOR_BUILD_FOLDER"
33 | 
34 | build: false
35 | 
36 | test_script:
37 |   - conda config --remove channels defaults
38 |   - conda config --add channels nodefaults
39 |   - conda config --add channels conda-forge
40 |   - conda config --set channel_priority strict
41 |   - conda init powershell
42 |   - powershell.exe -ExecutionPolicy Bypass -File %APPVEYOR_BUILD_FOLDER%\.ci\test-windows.ps1
43 | 


--------------------------------------------------------------------------------
/.ci/README.md:
--------------------------------------------------------------------------------
1 | ﻿Helper Scripts for CI
2 | =====================
3 | 
4 | This folder contains scripts which are run on CI services.
5 | 
6 | Dockerfile used on CI service is maintained in a separate [GitHub repository](https://github.com/guolinke/lightgbm-ci-docker) and can be pulled from [Docker Hub](https://hub.docker.com/r/lightgbm/vsts-agent).
7 | 


--------------------------------------------------------------------------------
/.ci/append-comment.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # [description]
 4 | #     Update comment appending a given body to the specified original comment.
 5 | #
 6 | # [usage]
 7 | #     append-comment.sh <COMMENT_ID> <BODY>
 8 | #
 9 | # COMMENT_ID: ID of comment that should be modified.
10 | #
11 | # BODY: Text that will be appended to the original comment body.
12 | 
13 | set -e -E -u -o pipefail
14 | 
15 | if [ -z "$GITHUB_ACTIONS" ]; then
16 |   echo "Must be run inside GitHub Actions CI"
17 |   exit 1
18 | fi
19 | 
20 | if [ $# -ne 2 ]; then
21 |   echo "Usage: $0 <COMMENT_ID> <BODY>"
22 |   exit 1
23 | fi
24 | 
25 | comment_id=$1
26 | body=$2
27 | 
28 | old_comment_body=$(
29 |   curl -sL \
30 |     -H "Accept: application/vnd.github.v3+json" \
31 |     -H "Authorization: token $SECRETS_WORKFLOW" \
32 |     "${GITHUB_API_URL}/repos/microsoft/LightGBM/issues/comments/$comment_id" | \
33 |   jq '.body'
34 | )
35 | body=${body/failure/failure ❌}
36 | body=${body/error/failure ❌}
37 | body=${body/cancelled/failure ❌}
38 | body=${body/timed_out/failure ❌}
39 | body=${body/success/success ✔️}
40 | data=$(
41 |   jq -n \
42 |     --argjson body "${old_comment_body%?}\r\n\r\n$body\"" \
43 |     '{"body":$body}'
44 | )
45 | curl -sL \
46 |   -X PATCH \
47 |   -H "Accept: application/vnd.github.v3+json" \
48 |   -H "Authorization: token $SECRETS_WORKFLOW" \
49 |   -d "$data" \
50 |   "${GITHUB_API_URL}/repos/microsoft/LightGBM/issues/comments/$comment_id"
51 | 


--------------------------------------------------------------------------------
/.ci/check-dynamic-dependencies.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Helper script for checking versions in the dynamic symbol table.
 3 | 
 4 | This script checks that LightGBM library is linked to the appropriate symbol versions.
 5 | 
 6 | Linking to newer symbol versions at compile time is problematic because it could result
 7 | in built artifacts being unusable on older platforms.
 8 | 
 9 | Version history for these symbols can be found at the following:
10 | 
11 | * GLIBC: https://sourceware.org/glibc/wiki/Glibc%20Timeline
12 | * GLIBCXX: https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
13 | * OMP/GOMP: https://github.com/gcc-mirror/gcc/blob/master/libgomp/libgomp.map
14 | """
15 | 
16 | import re
17 | import sys
18 | from pathlib import Path
19 | 
20 | 
21 | def check_dependencies(objdump_string: str) -> None:
22 |     """Check the dynamic symbol versions.
23 | 
24 |     Parameters
25 |     ----------
26 |     objdump_string : str
27 |         The dynamic symbol table entries of the file (result of `objdump -T` command).
28 |     """
29 |     GLIBC_version = re.compile(r"0{16}[ \(\t]+GLIBC_(\d{1,2})[.](\d{1,3})[.]?\d{,3}[ \)\t]+")
30 |     versions = GLIBC_version.findall(objdump_string)
31 |     assert len(versions) > 1
32 |     for major, minor in versions:
33 |         error_msg = f"found unexpected GLIBC version: '{major}.{minor}'"
34 |         assert int(major) <= 2, error_msg
35 |         assert int(minor) <= 28, error_msg
36 | 
37 |     GLIBCXX_version = re.compile(r"0{16}[ \(\t]+GLIBCXX_(\d{1,2})[.](\d{1,2})[.]?(\d{,3})[ \)\t]+")
38 |     versions = GLIBCXX_version.findall(objdump_string)
39 |     assert len(versions) > 1
40 |     for major, minor, patch in versions:
41 |         error_msg = f"found unexpected GLIBCXX version: '{major}.{minor}.{patch}'"
42 |         assert int(major) == 3, error_msg
43 |         assert int(minor) == 4, error_msg
44 |         assert patch == "" or int(patch) <= 22, error_msg
45 | 
46 |     GOMP_version = re.compile(r"0{16}[ \(\t]+G?OMP_(\d{1,2})[.](\d{1,2})[.]?\d{,3}[ \)\t]+")
47 |     versions = GOMP_version.findall(objdump_string)
48 |     assert len(versions) > 1
49 |     for major, minor in versions:
50 |         error_msg = f"found unexpected OMP/GOMP version: '{major}.{minor}'"
51 |         assert int(major) <= 4, error_msg
52 |         assert int(minor) <= 5, error_msg
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     check_dependencies(Path(sys.argv[1]).read_text(encoding="utf-8"))
57 | 


--------------------------------------------------------------------------------
/.ci/check-python-dists.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e -u
 4 | 
 5 | DIST_DIR=${1}
 6 | 
 7 | # defaults
 8 | METHOD=${METHOD:-""}
 9 | TASK=${TASK:-""}
10 | 
11 | echo "checking Python-package distributions in '${DIST_DIR}'"
12 | 
13 | pip install \
14 |     -qq \
15 |     check-wheel-contents \
16 |     twine || exit 1
17 | 
18 | echo "twine check..."
19 | twine check --strict "$(echo "${DIST_DIR}"/*)" || exit 1
20 | 
21 | if { test "${TASK}" = "bdist" || test "${METHOD}" = "wheel"; }; then
22 |     echo "check-wheel-contents..."
23 |     check-wheel-contents "$(echo "${DIST_DIR}"/*.whl)" || exit 1
24 | fi
25 | 
26 | PY_MINOR_VER=$(python -c "import sys; print(sys.version_info.minor)")
27 | if [ "$PY_MINOR_VER" -gt 7 ]; then
28 |     echo "pydistcheck..."
29 |     pip install 'pydistcheck>=0.9.1'
30 |     if { test "${TASK}" = "cuda" || test "${METHOD}" = "wheel"; }; then
31 |         pydistcheck \
32 |             --inspect \
33 |             --ignore 'compiled-objects-have-debug-symbols'\
34 |             --ignore 'distro-too-large-compressed' \
35 |             --max-allowed-size-uncompressed '120M' \
36 |             --max-allowed-files 800 \
37 |             "$(echo "${DIST_DIR}"/*)" || exit 1
38 |     elif { test "$(uname -m)" = "aarch64"; }; then
39 |         pydistcheck \
40 |             --inspect \
41 |             --ignore 'compiled-objects-have-debug-symbols' \
42 |             --max-allowed-size-compressed '5M' \
43 |             --max-allowed-size-uncompressed '15M' \
44 |             --max-allowed-files 800 \
45 |             "$(echo "${DIST_DIR}"/*)" || exit 1
46 |     else
47 |         pydistcheck \
48 |             --inspect \
49 |             --max-allowed-size-compressed '5M' \
50 |             --max-allowed-size-uncompressed '15M' \
51 |             --max-allowed-files 800 \
52 |             "$(echo "${DIST_DIR}"/*)" || exit 1
53 |     fi
54 | else
55 |     echo "skipping pydistcheck (does not support Python 3.${PY_MINOR_VER})"
56 | fi
57 | 
58 | echo "done checking Python-package distributions"
59 | 


--------------------------------------------------------------------------------
/.ci/conda-envs/README.md:
--------------------------------------------------------------------------------
 1 | # conda-envs
 2 | 
 3 | This directory contains files used to create `conda` environments for development
 4 | and testing of LightGBM.
 5 | 
 6 | The `.txt` files here are intended to be used with `conda create --file`.
 7 | 
 8 | For details on that, see the `conda` docs:
 9 | 
10 | * `conda create` docs ([link](https://conda.io/projects/conda/en/latest/commands/create.html))
11 | * "Managing Environments" ([link](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html))
12 | 


--------------------------------------------------------------------------------
/.ci/conda-envs/ci-core-py37.txt:
--------------------------------------------------------------------------------
 1 | # [description]
 2 | #
 3 | #   Similar to ci-core.txt, but specific to Python 3.7.
 4 | #
 5 | #   Unlike ci-core.txt, this includes a Python version and uses
 6 | #   `=` and `<=` pins to make solves faster and prevent against
 7 | #   issues like https://github.com/microsoft/LightGBM/pull/6370.
 8 | #
 9 | # [usage]
10 | #
11 | #   conda create \
12 | #     --name test-env \
13 | #     --file ./.ci/conda-envs/ci-core-py37.txt
14 | #
15 | 
16 | # python
17 | python=3.7.*
18 | 
19 | # direct imports
20 | cffi=1.15.*
21 | # older versions of Dask are incompatible with pandas>=2.0, but not all conda packages' metadata accurately reflects that
22 | #
23 | # ref: https://github.com/microsoft/LightGBM/issues/6030
24 | dask=2022.2.*
25 | distributed=2022.2.*
26 | joblib=1.3.*
27 | matplotlib-base=3.5.*
28 | numpy=1.21.*
29 | pandas=1.3.*
30 | pyarrow=9.0.*
31 | # python-graphviz 0.20.2 is not compatible with Python 3.7
32 | # ref: https://github.com/microsoft/LightGBM/pull/6370
33 | python-graphviz=0.20.1
34 | scikit-learn=1.0.*
35 | scipy=1.7.*
36 | 
37 | # testing-only dependencies
38 | cloudpickle=2.2.*
39 | pluggy=1.0.*
40 | psutil=5.9.3
41 | pytest=7.4.*
42 | 
43 | # other recursive dependencies, just
44 | # pinned here to help speed up solves
45 | bokeh=2.4.*
46 | fsspec=2023.1.*
47 | msgpack-python=1.0.*
48 | pluggy=1.0.*
49 | pytz=2024.1
50 | setuptools=59.8.*
51 | snappy=1.1.*
52 | tomli=2.0.*
53 | tornado=6.1.*
54 | wheel=0.42.*
55 | zict=2.2.*
56 | zipp=3.15.*
57 | 


--------------------------------------------------------------------------------
/.ci/conda-envs/ci-core-py38.txt:
--------------------------------------------------------------------------------
 1 | # [description]
 2 | #
 3 | #   Similar to ci-core.txt, but specific to Python 3.8.
 4 | #
 5 | #   Unlike ci-core.txt, this includes a Python version and uses
 6 | #   `=` and `<=` pins to make solves faster and prevent against
 7 | #   issues like https://github.com/microsoft/LightGBM/pull/6370.
 8 | #
 9 | # [usage]
10 | #
11 | #   conda create \
12 | #     --name test-env \
13 | #     --file ./.ci/conda-envs/ci-core-py38.txt
14 | #
15 | 
16 | # python
17 | python=3.8.*
18 | 
19 | # direct imports
20 | cffi=1.15.*
21 | dask=2023.5.*
22 | distributed=2023.5.*
23 | joblib=1.4.*
24 | matplotlib-base=3.7.*
25 | numpy=1.24.*
26 | pandas=1.5.*
27 | pyarrow-core=16.1.*
28 | python-graphviz=0.20.*
29 | scikit-learn=1.3.*
30 | scipy=1.10.*
31 | 
32 | # testing-only dependencies
33 | cloudpickle=3.0.*
34 | pluggy=1.5.*
35 | psutil=5.9.8
36 | pytest=8.2.*
37 | 
38 | # other recursive dependencies, just
39 | # pinned here to help speed up solves
40 | bokeh=3.1.*
41 | fsspec=2024.5.*
42 | # pinning 'libabseil' and 'libre2' to specific build numbers for pyarrow compatibility:
43 | # ref: https://github.com/microsoft/LightGBM/issues/6772
44 | libabseil=20240722.0=*_1
45 | libre2-11=2024.07.02=*_1
46 | msgpack-python=1.0.*
47 | pluggy=1.5.*
48 | pyparsing=3.1.4
49 | pytz=2024.1
50 | setuptools=69.5.*
51 | snappy=1.2.*
52 | tomli=2.0.*
53 | tornado=6.4.*
54 | wheel=0.43.*
55 | zict=3.0.*
56 | zipp=3.17.*
57 | 


--------------------------------------------------------------------------------
/.ci/conda-envs/ci-core.txt:
--------------------------------------------------------------------------------
 1 | # [description]
 2 | #
 3 | #   Core dependencies used across most LightGBM continuous integration (CI) jobs.
 4 | #
 5 | #   'python' constraint is intentionally omitted, so this file can be reused across
 6 | #   Python versions.
 7 | #
 8 | #   These floors are not the oldest versions LightGBM supports... they're here just to make conda
 9 | #   solves faster, and should generally be the latest versions that work for all CI jobs using this.
10 | #
11 | # [usage]
12 | #
13 | #   conda create \
14 | #     --name test-env \
15 | #     --file ./.ci/conda-envs/ci-core.txt \
16 | #     python=3.10
17 | #
18 | 
19 | # direct imports
20 | cffi>=1.16
21 | dask>=2023.5.0,<2024.12
22 | joblib>=1.3.2
23 | matplotlib-base>=3.7.3
24 | numpy>=1.24.4
25 | pandas>2.0
26 | pyarrow-core>=6.0
27 | python-graphviz>=0.20.3
28 | scikit-learn>=1.3.2
29 | scipy>=1.1
30 | 
31 | # testing-only dependencies
32 | cloudpickle>=3.0.0
33 | psutil>=5.9.8
34 | pytest>=8.1.1
35 | 
36 | # other recursive dependencies, just
37 | # pinned here to help speed up solves
38 | pluggy>=1.4.0
39 | setuptools>=69.2
40 | wheel>=0.43
41 | 


--------------------------------------------------------------------------------
/.ci/install-opencl.ps1:
--------------------------------------------------------------------------------
 1 | Write-Output "Installing OpenCL CPU platform"
 2 | 
 3 | $installer = "AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe"
 4 | 
 5 | Write-Output "Downloading OpenCL platform installer"
 6 | $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
 7 | $params = @{
 8 |     OutFile = "$installer"
 9 |     Uri = "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer"
10 | }
11 | Invoke-WebRequest @params
12 | 
13 | if (Test-Path "$installer") {
14 |     Write-Output "Successfully downloaded OpenCL platform installer"
15 | } else {
16 |     Write-Output "Unable to download OpenCL platform installer"
17 |     Write-Output "Setting EXIT"
18 |     $host.SetShouldExit(-1)
19 |     exit 1
20 | }
21 | 
22 | # Install OpenCL platform from installer executable
23 | Write-Output "Running OpenCL installer"
24 | Invoke-Command -ScriptBlock {
25 |     Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait
26 | }
27 | 
28 | $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
29 | if ($null -eq $property) {
30 |     Write-Output "Unable to install OpenCL CPU platform"
31 |     Write-Output "OpenCL installation log:"
32 |     Get-Content "opencl.log"
33 |     Write-Output "Setting EXIT"
34 |     $host.SetShouldExit(-1)
35 |     exit 1
36 | } else {
37 |     Write-Output "Successfully installed OpenCL CPU platform"
38 |     Write-Output "Current OpenCL drivers:"
39 |     Write-Output $property
40 | }
41 | 


--------------------------------------------------------------------------------
/.ci/lint-cpp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -E -u -o pipefail
 4 | 
 5 | echo "running cpplint"
 6 | cpplint \
 7 |     --filter=-build/c++11,-build/include_subdir,-build/header_guard,-whitespace/line_length \
 8 |     --recursive ./src ./include ./R-package ./swig ./tests \
 9 | || exit 1
10 | echo "done running cpplint"
11 | 
12 | echo "running cmakelint"
13 | find \
14 |     . \
15 |     -type f \
16 |     \( -name CMakeLists.txt -o -path "./cmake/*.cmake" \) \
17 |     -not -path './external_libs/*' \
18 |     -exec cmakelint \
19 |     --linelength=120 \
20 |     --filter=-convention/filename,-package/stdargs,-readability/wonkycase \
21 |     {} \+ \
22 | || exit 1
23 | echo "done running cmakelint"
24 | 
25 | echo "checking that all OpenMP pragmas specify num_threads()"
26 | get_omp_pragmas_without_num_threads() {
27 |     grep \
28 |         -n \
29 |         -R \
30 |         --include='*.c' \
31 |         --include='*.cc' \
32 |         --include='*.cpp' \
33 |         --include='*.h' \
34 |         --include='*.hpp' \
35 |         'pragma omp parallel' \
36 |     | grep -v ' num_threads'
37 | }
38 | 
39 | # 'grep' returns a non-0 exit code if 0 lines were found.
40 | # Turning off '-e -o pipefail' options here so that bash doesn't
41 | # consider this a failure and stop execution of the script.
42 | #
43 | # ref: https://www.gnu.org/software/grep/manual/html_node/Exit-Status.html
44 | set +e +o pipefail
45 | PROBLEMATIC_LINES=$(
46 |     get_omp_pragmas_without_num_threads
47 | )
48 | set -e -o pipefail
49 | if test "${PROBLEMATIC_LINES}" != ""; then
50 |     get_omp_pragmas_without_num_threads
51 |     echo "Found '#pragma omp parallel' not using explicit num_threads() configuration. Fix those."
52 |     echo "For details, see https://www.openmp.org/spec-html/5.0/openmpse14.html#x54-800002.6"
53 |     exit 1
54 | fi
55 | echo "done checking OpenMP pragmas"
56 | 


--------------------------------------------------------------------------------
/.ci/lint-js.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e -E -u -o pipefail
4 | 
5 | biome ci --config-path=./biome.json --diagnostic-level=info --error-on-warnings ./
6 | 


--------------------------------------------------------------------------------
/.ci/lint-powershell.ps1:
--------------------------------------------------------------------------------
 1 | $settings = @{
 2 |     Severity = @(
 3 |         'Information',
 4 |         'Warning',
 5 |         'Error'
 6 |     )
 7 |     IncludeDefaultRules = $true
 8 |     # Additional rules that are disabled by default
 9 |     Rules = @{
10 |         PSAvoidExclaimOperator = @{
11 |             Enable = $true
12 |         }
13 |         PSAvoidLongLines = @{
14 |             Enable = $true
15 |             MaximumLineLength = 120
16 |         }
17 |         PSAvoidSemicolonsAsLineTerminators = @{
18 |             Enable = $true
19 |         }
20 |         PSPlaceCloseBrace = @{
21 |             Enable = $true
22 |             NoEmptyLineBefore = $true
23 |             IgnoreOneLineBlock = $true
24 |             NewLineAfter = $false
25 |         }
26 |         PSPlaceOpenBrace = @{
27 |             Enable = $true
28 |             OnSameLine = $true
29 |             NewLineAfter = $true
30 |             IgnoreOneLineBlock = $true
31 |         }
32 |         PSUseConsistentIndentation = @{
33 |             Enable = $true
34 |             IndentationSize = 4
35 |             PipelineIndentation = 'IncreaseIndentationAfterEveryPipeline'
36 |             Kind = 'space'
37 |         }
38 |         PSUseConsistentWhitespace = @{
39 |             Enable = $true
40 |             CheckInnerBrace = $true
41 |             CheckOpenBrace = $true
42 |             CheckOpenParen = $true
43 |             CheckOperator = $true
44 |             CheckSeparator = $true
45 |             CheckPipe = $true
46 |             CheckPipeForRedundantWhitespace = $true
47 |             CheckParameter = $true
48 |             IgnoreAssignmentOperatorInsideHashTable = $false
49 |         }
50 |         PSUseCorrectCasing = @{
51 |             Enable = $true
52 |         }
53 |     }
54 | }
55 | 
56 | Invoke-ScriptAnalyzer -Path ./ -Recurse -EnableExit -Settings $settings
57 | 


--------------------------------------------------------------------------------
/.ci/lint-python-bash.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -E -u -o pipefail
 4 | 
 5 | echo "running pre-commit checks"
 6 | pre-commit run --all-files || exit 1
 7 | echo "done running pre-commit checks"
 8 | 
 9 | echo "running mypy"
10 | mypy \
11 |     --config-file=./python-package/pyproject.toml \
12 |     ./python-package \
13 | || true
14 | echo "done running mypy"
15 | 


--------------------------------------------------------------------------------
/.ci/rerun-workflow.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # [description]
 4 | #     Rerun specified workflow for given pull request.
 5 | #
 6 | # [usage]
 7 | #     rerun-workflow.sh <WORKFLOW_ID> <PR_NUMBER> <PR_BRANCH>
 8 | #
 9 | # WORKFLOW_ID: Identifier (config name of ID) of a workflow to be rerun.
10 | #
11 | # PR_NUMBER: Number of pull request for which workflow should be rerun.
12 | #
13 | # PR_BRANCH: Name of pull request's branch.
14 | 
15 | set -e -E -u -o pipefail
16 | 
17 | if [ -z "$GITHUB_ACTIONS" ]; then
18 |   echo "Must be run inside GitHub Actions CI"
19 |   exit 1
20 | fi
21 | 
22 | if [ $# -ne 3 ]; then
23 |   echo "Usage: $0 <WORKFLOW_ID> <PR_NUMBER> <PR_BRANCH>"
24 |   exit 1
25 | fi
26 | 
27 | workflow_id=$1
28 | pr_number=$2
29 | pr_branch=$3
30 | 
31 | runs=$(
32 |   curl -sL \
33 |     -H "Accept: application/vnd.github.v3+json" \
34 |     -H "Authorization: token $SECRETS_WORKFLOW" \
35 |     "${GITHUB_API_URL}/repos/microsoft/LightGBM/actions/workflows/${workflow_id}/runs?event=pull_request&branch=${pr_branch}" | \
36 |   jq '.workflow_runs'
37 | )
38 | runs=$(echo "${runs}" | jq --arg pr_number "${pr_number}" --arg pr_branch "${pr_branch}" 'map(select(.event == "pull_request" and ((.pull_requests | length) != 0 and (.pull_requests[0].number | tostring) == $pr_number or .head_branch == $pr_branch)))')
39 | runs=$(echo "${runs}" | jq 'sort_by(.run_number) | reverse')
40 | 
41 | if [[ $(echo "${runs}" | jq 'length') -gt 0 ]]; then
42 |   curl -sL \
43 |     -X POST \
44 |     -H "Accept: application/vnd.github.v3+json" \
45 |     -H "Authorization: token $SECRETS_WORKFLOW" \
46 |     "${GITHUB_API_URL}/repos/microsoft/LightGBM/actions/runs/$(echo "${runs}" | jq '.[0].id')/rerun"
47 | fi
48 | 


--------------------------------------------------------------------------------
/.ci/run-r-cmd-check.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -u -o pipefail
 4 | 
 5 | PKG_TARBALL="${1}"
 6 | declare -i ALLOWED_CHECK_NOTES=${2}
 7 | 
 8 | # 'R CMD check' redirects installation logs to a file, and returns
 9 | # a non-0 exit code if ERRORs are raised.
10 | #
11 | # The '||' here gives us an opportunity to echo out the installation
12 | # logs prior to exiting the script.
13 | check_succeeded="yes"
14 | R CMD check "${PKG_TARBALL}" \
15 |     --as-cran \
16 |     --run-donttest \
17 | || check_succeeded="no"
18 | 
19 | CHECK_LOG_FILE=lightgbm.Rcheck/00check.log
20 | BUILD_LOG_FILE=lightgbm.Rcheck/00install.out
21 | 
22 | echo "R CMD check build logs:"
23 | cat "${BUILD_LOG_FILE}"
24 | 
25 | if [[ $check_succeeded == "no" ]]; then
26 |     echo "R CMD check failed"
27 |     exit 1
28 | fi
29 | 
30 | # WARNINGs or ERRORs should be treated as a failure
31 | if grep -q -E "WARNING|ERROR" "${CHECK_LOG_FILE}"; then
32 |     echo "WARNINGs or ERRORs have been found by R CMD check"
33 |     exit 1
34 | fi
35 | 
36 | # Allow a configurable number of NOTEs.
37 | # Sometimes NOTEs are raised in CI that wouldn't show up on an actual CRAN submission.
38 | set +e
39 | NUM_CHECK_NOTES=$(
40 |     grep -o -E '[0-9]+ NOTE' "${CHECK_LOG_FILE}" \
41 |     | sed 's/[^0-9]*//g'
42 | )
43 | if [[ ${NUM_CHECK_NOTES} -gt ${ALLOWED_CHECK_NOTES} ]]; then
44 |     echo "Found ${NUM_CHECK_NOTES} NOTEs from R CMD check. Only ${ALLOWED_CHECK_NOTES} are allowed"
45 |     exit 1
46 | fi
47 | 


--------------------------------------------------------------------------------
/.ci/set-commit-status.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # [description]
 4 | #     Set a status with a given name to the specified commit.
 5 | #
 6 | # [usage]
 7 | #     set-commit-status.sh <NAME> <STATUS> <SHA>
 8 | #
 9 | # NAME: Name of status.
10 | #       Status with existing name overwrites a previous one.
11 | #
12 | # STATUS: Status to be set.
13 | #         Can be "error", "failure", "pending" or "success".
14 | #
15 | # SHA: SHA of a commit to set a status on.
16 | 
17 | set -e -E -u -o pipefail
18 | 
19 | if [ -z "$GITHUB_ACTIONS" ]; then
20 |   echo "Must be run inside GitHub Actions CI"
21 |   exit 1
22 | fi
23 | 
24 | if [ $# -ne 3 ]; then
25 |   echo "Usage: $0 <NAME> <STATUS> <SHA>"
26 |   exit 1
27 | fi
28 | 
29 | name=$1
30 | 
31 | status=$2
32 | status=${status/error/failure}
33 | status=${status/cancelled/failure}
34 | status=${status/timed_out/failure}
35 | status=${status/in_progress/pending}
36 | status=${status/queued/pending}
37 | 
38 | sha=$3
39 | 
40 | data=$(
41 |   jq -n \
42 |     --arg state "${status}" \
43 |     --arg url "${GITHUB_SERVER_URL}/microsoft/LightGBM/actions/runs/${GITHUB_RUN_ID}" \
44 |     --arg name "${name}" \
45 |     '{"state":$state,"target_url":$url,"context":$name}'
46 | )
47 | 
48 | curl -sL \
49 |   -X POST \
50 |   -H "Accept: application/vnd.github.v3+json" \
51 |   -H "Authorization: token $SECRETS_WORKFLOW" \
52 |   -d "$data" \
53 |   "${GITHUB_API_URL}/repos/microsoft/LightGBM/statuses/$sha"
54 | 


--------------------------------------------------------------------------------
/.ci/test-python-latest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -E -u -o pipefail
 4 | 
 5 | # latest versions of lightgbm's dependencies,
 6 | # including pre-releases and nightlies
 7 | #
 8 | # ref: https://github.com/pydata/xarray/blob/31111b3afe44fd6f7dac363264e94186cc5168d2/.github/workflows/upstream-dev-ci.yaml
 9 | echo "installing testing dependencies"
10 | python -m pip install \
11 |     cloudpickle \
12 |     psutil \
13 |     pytest
14 | echo "done installing testing dependencies"
15 | 
16 | echo "installing lightgbm's dependencies"
17 | python -m pip install \
18 |     --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
19 |     --prefer-binary \
20 |     --pre \
21 |     --upgrade \
22 |         'numpy>=2.0.0.dev0' \
23 |         'matplotlib>=3.10.0.dev0' \
24 |         'pandas>=3.0.0.dev0' \
25 |         'scikit-learn>=1.6.dev0' \
26 |         'scipy>=1.15.0.dev0'
27 | 
28 | python -m pip install \
29 |     --extra-index-url https://pypi.fury.io/arrow-nightlies/ \
30 |     --prefer-binary \
31 |     --pre \
32 |     --upgrade \
33 |         'pyarrow>=17.0.0.dev0'
34 | 
35 | python -m pip install \
36 |     'cffi>=1.15.1'
37 | 
38 | echo "done installing lightgbm's dependencies"
39 | 
40 | echo "installing lightgbm"
41 | pip install --no-deps dist/*.whl
42 | echo "done installing lightgbm"
43 | 
44 | echo "installed package versions:"
45 | pip freeze
46 | 
47 | echo ""
48 | echo "running tests"
49 | pytest tests/c_api_test/
50 | pytest tests/python_package_test/
51 | 


--------------------------------------------------------------------------------
/.ci/test-python-oldest.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -E -u -o pipefail
 4 | 
 5 | # oldest versions of dependencies published after
 6 | # minimum supported Python version's first release,
 7 | # for which there are wheels compatible with the
 8 | # python:{version} image
 9 | #
10 | # see https://devguide.python.org/versions/
11 | #
12 | echo "installing lightgbm's dependencies"
13 | pip install \
14 |   'cffi==1.15.1' \
15 |   'numpy==1.19.0' \
16 |   'pandas==1.1.3' \
17 |   'pyarrow==6.0.1' \
18 |   'scikit-learn==0.24.2' \
19 |   'scipy==1.6.0' \
20 | || exit 1
21 | echo "done installing lightgbm's dependencies"
22 | 
23 | echo "installing lightgbm"
24 | pip install --no-deps dist/*.whl || exit 1
25 | echo "done installing lightgbm"
26 | 
27 | echo "installed package versions:"
28 | pip freeze
29 | 
30 | echo ""
31 | echo "checking that examples run without error"
32 | 
33 | # run a few examples to test that Python-package minimally works
34 | echo ""
35 | echo "--- advanced_example.py ---"
36 | echo ""
37 | python ./examples/python-guide/advanced_example.py || exit 1
38 | 
39 | echo ""
40 | echo "--- logistic_regression.py ---"
41 | echo ""
42 | python ./examples/python-guide/logistic_regression.py || exit 1
43 | 
44 | echo ""
45 | echo "--- simple_example.py ---"
46 | echo ""
47 | python ./examples/python-guide/simple_example.py || exit 1
48 | 
49 | echo ""
50 | echo "--- sklearn_example.py ---"
51 | echo ""
52 | python ./examples/python-guide/sklearn_example.py || exit 1
53 | 
54 | echo ""
55 | echo "done testing on oldest supported Python version"
56 | 


--------------------------------------------------------------------------------
/.ci/trigger-dispatch-run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # [description]
 4 | #     Trigger manual workflow run by a dispatch event.
 5 | #
 6 | # [usage]
 7 | #     trigger-dispatch-run.sh <PR_URL> <COMMENT_ID> <DISPATCH_NAME>
 8 | #
 9 | # PR_URL: URL of pull request from which dispatch is triggering.
10 | #
11 | # COMMENT_ID: ID of comment that is triggering a dispatch.
12 | #
13 | # DISPATCH_NAME: Name of a dispatch to be triggered.
14 | 
15 | set -e -E -u -o pipefail
16 | 
17 | if [ -z "$GITHUB_ACTIONS" ]; then
18 |   echo "Must be run inside GitHub Actions CI"
19 |   exit 1
20 | fi
21 | 
22 | if [ $# -ne 3 ]; then
23 |   echo "Usage: $0 <PR_URL> <COMMENT_ID> <DISPATCH_NAME>"
24 |   exit 1
25 | fi
26 | 
27 | pr_url=$1
28 | comment_id=$2
29 | dispatch_name=$3
30 | 
31 | pr=$(
32 |   curl -sL \
33 |     -H "Accept: application/vnd.github.v3+json" \
34 |     -H "Authorization: token $SECRETS_WORKFLOW" \
35 |     "$pr_url"
36 | )
37 | data=$(
38 |   jq -n \
39 |     --arg event_type "$dispatch_name" \
40 |     --arg pr_number "$(echo "$pr" | jq '.number')" \
41 |     --arg pr_sha "$(echo "$pr" | jq '.head.sha')" \
42 |     --arg pr_branch "$(echo "$pr" | jq '.head.ref')" \
43 |     --arg comment_number "$comment_id" \
44 |     '{"event_type":$event_type,"client_payload":{"pr_number":$pr_number,"pr_sha":$pr_sha,"pr_branch":$pr_branch,"comment_number":$comment_number}}'
45 | )
46 | curl -sL \
47 |   -X POST \
48 |   -H "Accept: application/vnd.github.v3+json" \
49 |   -H "Authorization: token $SECRETS_WORKFLOW" \
50 |   -d "$data" \
51 |   "${GITHUB_API_URL}/repos/microsoft/LightGBM/dispatches"
52 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | trim_trailing_whitespace = true
 6 | insert_final_newline = true
 7 | end_of_line = lf
 8 | indent_style = space
 9 | indent_size = 2
10 | 
11 | [*.{py,sh,ps1,js,json}]
12 | indent_size = 4
13 | max_line_length = 120
14 | skip = external_libs
15 | known_first_party = lightgbm
16 | 
17 | # Tabs matter for Makefile and .gitmodules
18 | [{makefile*,Makefile*,*.mk,*.mak,*.makefile,*.Makefile,GNUmakefile,BSDmakefile,make.bat,Makevars*,*.gitmodules}]
19 | indent_style = tab
20 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # introduce ruff-format (#6308)
2 | 6330d6269c81dfd4c96e664b99239b8ff39ccf91
3 | # enable ruff format on tests and examples (#6317)
4 | 1b792e716682254c33ddb5eb845357e84018636d
5 | # enable ruff-format on main library Python code (#6336)
6 | dd31208ab7a7aea86762830697b00666f843ded9
7 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # This file controls default reviewers for LightGBM code.
 2 | # See https://help.github.com/en/articles/about-code-owners
 3 | # for details
 4 | #
 5 | # Maintainers are encouraged to use their best discretion in
 6 | # setting reviewers on PRs manually, but this file should
 7 | # offer a reasonable automatic best-guess
 8 | 
 9 | # catch-all rule (this only gets matched if no rules below match)
10 | *    @guolinke @jameslamb @shiyu1994 @jmoralez @borchero @StrikerRUS
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/BUG_REPORT.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug Report 🐞
 3 | about: Something isn't working as expected? Here is the right place to report.
 4 | ---
 5 | 
 6 | ## Description
 7 | <!-- A clear description of the bug -->
 8 | 
 9 | ## Reproducible example
10 | <!-- Minimal code that exhibits this behavior -->
11 | 
12 | ## Environment info
13 | 
14 | LightGBM version or commit hash:
15 | 
16 | Command(s) you used to install LightGBM
17 | 
18 | ```shell
19 | 
20 | ```
21 | 
22 | <!-- Put any additional environment information here -->
23 | 
24 | 
25 | ## Additional Comments
26 | <!-- What else should we know? -->
27 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/FEATURE_REQUEST.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Request 💡
 3 | about: Suggest a new idea for the project.
 4 | labels: enhancement, feature-request
 5 | ---
 6 | 
 7 | <!--
 8 | Please search your feature on previous issues and our feature requests consolidation hub (https://github.com/microsoft/LightGBM/issues/2302) before you open a new one.
 9 | -->
10 | 
11 | ## Summary
12 | 
13 | <!-- Briefly explain your feature proposal. -->
14 | 
15 | ## Motivation
16 | 
17 | <!-- Why is it useful to have this feature in the LightGBM project? -->
18 | 
19 | ## Description
20 | 
21 | <!-- Detailed description of the new feature. -->
22 | 
23 | ## References
24 | 
25 | <!-- Any useful references, for instance, papers, implementations in other projects, draft code snippets, etc. -->
26 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: /
 5 |     schedule:
 6 |       interval: monthly
 7 |     groups:
 8 |       ci-dependencies:
 9 |         patterns:
10 |           - "*"
11 |     commit-message:
12 |       prefix: "[ci]"
13 |     labels:
14 |       - maintenance
15 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'v$NEXT_PATCH_VERSION'
 2 | tag-template: 'v$NEXT_PATCH_VERSION'
 3 | categories:
 4 |   - title: '💡 New Features'
 5 |     label: 'feature'
 6 |   - title: '🔨 Breaking'
 7 |     label: 'breaking'
 8 |   - title: '🚀 Efficiency Improvement'
 9 |     label: 'efficiency'
10 |   - title: '🐛 Bug Fixes'
11 |     label: 'fix'
12 |   - title: '📖 Documentation'
13 |     label: 'doc'
14 |   - title: '🧰 Maintenance'
15 |     label: 'maintenance'
16 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
17 | template: |
18 |   ## Changes
19 | 
20 |   $CHANGES
21 | 


--------------------------------------------------------------------------------
/.github/workflows/linkchecker.yml:
--------------------------------------------------------------------------------
 1 | name: Link checks
 2 | 
 3 | on:
 4 |   # Run manually by clicking a button in the UI
 5 |   workflow_dispatch:
 6 |   # Run once a day at 8:00am UTC
 7 |   schedule:
 8 |     - cron: '0 8 * * *'
 9 | 
10 | env:
11 |   COMPILER: gcc
12 |   OS_NAME: 'linux'
13 |   PYTHON_VERSION: '3.13'
14 |   TASK: 'check-links'
15 | 
16 | jobs:
17 |   check-links:
18 |     timeout-minutes: 60
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v4
23 |         with:
24 |           fetch-depth: 5
25 |           submodules: false
26 |       - name: Setup and run tests
27 |         run: |
28 |           export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
29 |           export CONDA=${HOME}/miniforge
30 |           export PATH=${CONDA}/bin:${HOME}/.local/bin:${PATH}
31 |           $GITHUB_WORKSPACE/.ci/setup.sh || exit 1
32 |           $GITHUB_WORKSPACE/.ci/test.sh || exit 1
33 | 


--------------------------------------------------------------------------------
/.github/workflows/lock.yml:
--------------------------------------------------------------------------------
 1 | name: 'Lock Inactive Threads'
 2 | 
 3 | on:
 4 |   schedule:
 5 |     # midnight UTC, every Wednesday, for Issues
 6 |     - cron: '0 0 * * 3'
 7 |     # midnight UTC, every Thursday, for PRs
 8 |     - cron: '0 0 * * 4'
 9 |   # allow manual triggering from GitHub UI
10 |   workflow_dispatch:
11 | 
12 | permissions:
13 |   issues: write
14 |   pull-requests: write
15 | 
16 | concurrency:
17 |   group: lock
18 | 
19 | jobs:
20 |   action:
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: dessant/lock-threads@v5
24 |         with:
25 |           github-token: ${{ github.token }}
26 |           # after how many days of inactivity should a closed issue/PR be locked?
27 |           issue-inactive-days: '365'
28 |           pr-inactive-days: '365'
29 |           # do not close feature request issues...
30 |           # we close those but track them in https://github.com/microsoft/LightGBM/issues/2302
31 |           exclude-any-issue-labels: 'feature request'
32 |           # what labels should be removed prior to locking?
33 |           remove-issue-labels: 'awaiting response,awaiting review,blocking,in progress'
34 |           remove-pr-labels: 'awaiting response,awaiting review,blocking,in progress'
35 |           # what message should be posted prior to locking?
36 |           issue-comment: >
37 |             This issue has been automatically locked
38 |             since there has not been any recent activity since it was closed.
39 | 
40 |             To start a new related discussion,
41 |             open a new issue at https://github.com/microsoft/LightGBM/issues
42 |             including a reference to this.
43 |           pr-comment: >
44 |             This pull request has been automatically locked
45 |             since there has not been any recent activity since it was closed.
46 | 
47 |             To start a new related discussion,
48 |             open a new issue at https://github.com/microsoft/LightGBM/issues
49 |             including a reference to this.
50 |           # what should the locking status be?
51 |           issue-lock-reason: 'resolved'
52 |           pr-lock-reason: 'resolved'
53 |           process-only: ${{ github.event.schedule == '0 0 * * 3' && 'issues' || 'prs' }}
54 | 


--------------------------------------------------------------------------------
/.github/workflows/no_response.yml:
--------------------------------------------------------------------------------
 1 | name: No Response Bot
 2 | 
 3 | permissions:
 4 |   issues: write
 5 |   pull-requests: write
 6 | 
 7 | on:
 8 |   issue_comment:
 9 |     types: [created]
10 |   schedule:
11 |     # "every day at 04:00 UTC"
12 |     - cron: '0 4 * * *'
13 | 
14 | jobs:
15 |   noResponse:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: lee-dohm/no-response@v0.5.0
19 |         with:
20 |           closeComment: >
21 |               This issue has been automatically closed
22 |               because it has been awaiting a response for too long.
23 | 
24 |               When you have time to to work with the maintainers to resolve this issue,
25 |               please post a new comment and it will be re-opened.
26 |               If the issue has been locked for editing by the time you return to it,
27 |               please open a new issue and reference this one.
28 | 
29 |               Thank you for taking the time to improve LightGBM!
30 |           daysUntilClose: 30
31 |           responseRequiredLabel: awaiting response
32 |           token: ${{ github.token }}
33 | 


--------------------------------------------------------------------------------
/.github/workflows/optional_checks.yml:
--------------------------------------------------------------------------------
 1 | name: Optional checks
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - master
 7 | 
 8 | jobs:
 9 |   all-optional-checks-successful:
10 |     timeout-minutes: 120
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Checkout repository
14 |         uses: actions/checkout@v4
15 |         with:
16 |           fetch-depth: 5
17 |           submodules: false
18 |       - name: Check that all tests succeeded
19 |         shell: bash
20 |         run: |
21 |             workflows=(
22 |                 "R valgrind tests;r-valgrind"
23 |             )
24 |             for i in "${workflows[@]}"; do
25 |                 workflow_name=${i%;*}
26 |                 comment="The last reported status from workflow \"$workflow_name\" is failure."
27 |                 comment="${comment} Commit fixes and rerun the workflow."
28 |                 trigger_phrase=${i#*;}
29 |                 python \
30 |                     "$GITHUB_WORKSPACE/.ci/get-workflow-status.py" \
31 |                     "$trigger_phrase" \
32 |                 || { echo "${comment}"; exit 1; }
33 |             done
34 | 


--------------------------------------------------------------------------------
/.github/workflows/r_configure.yml:
--------------------------------------------------------------------------------
 1 | name: R generate configure
 2 | 
 3 | on:
 4 |   repository_dispatch:
 5 |     types: [gha_run_r_configure]
 6 | 
 7 | jobs:
 8 |   r-configure:
 9 |     name: r-configure
10 |     timeout-minutes: 60
11 |     runs-on: ubuntu-latest
12 |     container: "ubuntu:22.04"
13 |     steps:
14 |       - name: Install essential software before checkout
15 |         run: |
16 |           apt-get update
17 |           apt-get install --no-install-recommends -y \
18 |             ca-certificates \
19 |             git
20 |       - name: Trust git cloning LightGBM
21 |         run: |
22 |           git config --global --add safe.directory "${GITHUB_WORKSPACE}"
23 |       - name: Checkout repository
24 |         uses: actions/checkout@v4
25 |         with:
26 |           fetch-depth: 5
27 |           submodules: true
28 |           repository: microsoft/LightGBM
29 |           ref: "refs/heads/${{ fromJSON(github.event.client_payload.pr_branch) }}"
30 |           token: ${{ secrets.WORKFLOW }}
31 |           persist-credentials: true
32 |       - name: Update configure
33 |         shell: bash
34 |         run: ./R-package/recreate-configure.sh || exit 1
35 |       - name: Push changes
36 |         run: |
37 |           git config --global user.name "GitHub Actions Bot"
38 |           git config --global user.email "githubactionsbot@users.noreply.github.com"
39 |           git add "./R-package/configure"
40 |           git commit --allow-empty -m "Auto-update configure"
41 |           git push
42 | 


--------------------------------------------------------------------------------
/.github/workflows/release_drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | permissions:
 4 |   contents: read
 5 | 
 6 | on:
 7 |   push:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   updateReleaseDraft:
13 |     permissions:
14 |       contents: write
15 |       pull-requests: read
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: release-drafter/release-drafter@v6.1.0
19 |         with:
20 |           config-name: release-drafter.yml
21 |           disable-autolabeler: true
22 |         env:
23 |           GITHUB_TOKEN: ${{ github.token }}
24 | 


--------------------------------------------------------------------------------
/.github/workflows/triggering_comments.yml:
--------------------------------------------------------------------------------
 1 | name: Triggering comments
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | jobs:
 8 |   triggering-tests:
 9 |     if: |
10 |       github.event.issue.pull_request &&
11 |       contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) &&
12 |       startsWith(github.event.comment.body, '/gha run')
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
16 |     steps:
17 |       - name: Checkout repository
18 |         uses: actions/checkout@v4
19 |         with:
20 |           fetch-depth: 5
21 |           submodules: false
22 | 
23 |       - name: Trigger R valgrind tests
24 |         if: github.event.comment.body == '/gha run r-valgrind'
25 |         run: |
26 |           $GITHUB_WORKSPACE/.ci/trigger-dispatch-run.sh \
27 |             "${{ github.event.issue.pull_request.url }}" \
28 |             "${{ github.event.comment.id }}" \
29 |             "gha_run_r_valgrind"
30 | 
31 |       - name: Trigger update R configure
32 |         if: github.event.comment.body == '/gha run r-configure'
33 |         run: |
34 |           $GITHUB_WORKSPACE/.ci/trigger-dispatch-run.sh \
35 |             "${{ github.event.issue.pull_request.url }}" \
36 |             "${{ github.event.comment.id }}" \
37 |             "gha_run_r_configure"
38 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "include/boost/compute"]
 2 | 	path = external_libs/compute
 3 | 	url = https://github.com/boostorg/compute
 4 | [submodule "eigen"]
 5 | 	path = external_libs/eigen
 6 | 	url = https://gitlab.com/libeigen/eigen.git
 7 | [submodule "external_libs/fmt"]
 8 | 	path = external_libs/fmt
 9 | 	url = https://github.com/fmtlib/fmt.git
10 | [submodule "external_libs/fast_double_parser"]
11 | 	path = external_libs/fast_double_parser
12 | 	url = https://github.com/lemire/fast_double_parser.git
13 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # exclude files which are auto-generated by build tools
 2 | exclude: |
 3 |   (?x)^(
 4 |       build|
 5 |       external_libs|
 6 |       lightgbm-python|
 7 |       lightgbm_r|
 8 |   )$
 9 |   |R-package/configure$
10 |   |R-package/inst/Makevars$
11 |   |R-package/inst/Makevars.win$
12 |   |R-package/man/.*Rd$
13 | 
14 | repos:
15 |   - repo: https://github.com/pre-commit/pre-commit-hooks
16 |     rev: v5.0.0
17 |     hooks:
18 |       - id: end-of-file-fixer
19 |       - id: trailing-whitespace
20 |   - repo: https://github.com/adrienverge/yamllint
21 |     rev: v1.35.1
22 |     hooks:
23 |       - id: yamllint
24 |         args: ["--strict"]
25 |   - repo: https://github.com/astral-sh/ruff-pre-commit
26 |     # Ruff version.
27 |     rev: v0.9.10
28 |     hooks:
29 |       # Run the linter.
30 |       - id: ruff
31 |         args: ["--config", "python-package/pyproject.toml"]
32 |         types_or: [python, jupyter]
33 |       # Run the formatter.
34 |       - id: ruff-format
35 |         args: ["--config", "python-package/pyproject.toml"]
36 |         types_or: [python, jupyter]
37 |   - repo: https://github.com/shellcheck-py/shellcheck-py
38 |     rev: v0.10.0.1
39 |     hooks:
40 |       - id: shellcheck
41 |   - repo: https://github.com/crate-ci/typos
42 |     rev: v1.30.2
43 |     hooks:
44 |       - id: typos
45 |         args: ["--force-exclude"]
46 |         exclude: (\.gitignore$)|(^\.editorconfig$)
47 |   - repo: https://github.com/henryiii/validate-pyproject-schema-store
48 |     rev: 2025.03.10
49 |     hooks:
50 |       - id: validate-pyproject
51 |         files: python-package/pyproject.toml$
52 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | build:
 3 |   os: "ubuntu-24.04"
 4 |   tools:
 5 |     python: "mambaforge-23.11"
 6 | conda:
 7 |   environment: docs/env.yml
 8 | formats:
 9 |   - pdf
10 | sphinx:
11 |   builder: html
12 |   configuration: docs/conf.py
13 |   fail_on_warning: true
14 | submodules:
15 |   include: all
16 |   recursive: true
17 | 


--------------------------------------------------------------------------------
/.typos.toml:
--------------------------------------------------------------------------------
 1 | default.extend-ignore-re = [
 2 |   "/Ot",
 3 |   "mis-alignment",
 4 |   "mis-spelled",
 5 |   "posix-seh-rt",
 6 | ]
 7 | 
 8 | [default.extend-words]
 9 | MAPE = "MAPE"
10 | datas = "datas"
11 | interprete = "interprete"
12 | mape = "mape"
13 | splitted = "splitted"
14 | 
15 | [default.extend-identifiers]
16 | ERRORs = "ERRORs"
17 | GAM = "GAM"
18 | ND24s = "ND24s"
19 | WARNINGs = "WARNINGs"
20 | fullset = "fullset"
21 | thess = "thess"
22 | 


--------------------------------------------------------------------------------
/.yamllint.yml:
--------------------------------------------------------------------------------
 1 | # default config: https://yamllint.readthedocs.io/en/stable/configuration.html#default-configuration
 2 | extends: default
 3 | 
 4 | rules:
 5 |   document-start: disable
 6 |   line-length:
 7 |     max: 120
 8 |   truthy:
 9 |     # prevent treating GitHub Workflow "on" key as boolean value
10 |     check-keys: false
11 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # contributing
 2 | 
 3 | LightGBM has been developed and used by many active community members.
 4 | 
 5 | Your help is very valuable to make it better for everyone.
 6 | 
 7 | ## How to Contribute
 8 | 
 9 | - Check the [Feature Requests Hub](https://github.com/microsoft/LightGBM/issues/2302), and submit pull requests to address chosen issue. If you need development guideline, you can check the [Development Guide](https://github.com/microsoft/LightGBM/blob/master/docs/Development-Guide.rst) or directly ask us in Issues/Pull Requests.
10 | - Contribute to the [tests](https://github.com/microsoft/LightGBM/tree/master/tests) to make it more reliable.
11 | - Contribute to the [documentation](https://github.com/microsoft/LightGBM/tree/master/docs) to make it clearer for everyone.
12 | - Contribute to the [examples](https://github.com/microsoft/LightGBM/tree/master/examples) to share your experience with other users.
13 | - Add your stories and experience to [Awesome LightGBM](https://github.com/microsoft/LightGBM/blob/master/examples/README.md). If LightGBM helped you in a machine learning competition or some research application, we want to hear about it!
14 | - [Open an issue](https://github.com/microsoft/LightGBM/issues) to report problems or recommend new features.
15 | 
16 | ## Development Guide
17 | 
18 | ### Linting
19 | 
20 | Every commit in the repository is tested with multiple static analyzers.
21 | 
22 | When developing locally, run some of them using `pre-commit` ([pre-commit docs](https://pre-commit.com/)).
23 | 
24 | ```shell
25 | pre-commit run --all-files
26 | ```
27 | 
28 | That command will check for some issues and automatically reformat the code.
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) Microsoft Corporation
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R-package/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | \.appveyor\.yml
 2 | AUTOCONF_UBUNTU_VERSION
 3 | ^autom4te.cache/.*$
 4 | ^.*\.bin
 5 | ^build_r.R$
 6 | \.clang-format
 7 | ^.*\.clusterfuzzlite$
 8 | ^cran-comments\.md$
 9 | ^docs$
10 | ^.*\.dll
11 | \.drone\.yml
12 | ^.*\.dylib
13 | \.git
14 | \.gitkeep$
15 | ^.*\.history
16 | ^Makefile$
17 | ^.*\.o
18 | ^.*\.out
19 | ^pkgdown$
20 | ^recreate-configure\.sh$
21 | ^.*\.so
22 | ^src/build/.*$
23 | ^src/CMakeLists.txt$
24 | ^src/external_libs/compute/.appveyor.yml$
25 | ^src/external_libs/compute/.coveralls.yml$
26 | ^src/external_libs/compute/.travis.yml$
27 | ^src/external_libs/compute/test/.*$
28 | ^src/external_libs/compute/index.html$
29 | ^src/external_libs/compute/.git$
30 | ^src/external_libs/compute/.gitignore$
31 | ^src/external_libs/compute/CONTRIBUTING.md$
32 | ^src/external_libs/compute/README.md$
33 | src/external_libs/fast_double_parser/benchmarks
34 | src/external_libs/fast_double_parser/Makefile
35 | src/external_libs/fast_double_parser/.*\.md
36 | src/external_libs/fast_double_parser/tests
37 | src/external_libs/fast_double_parser/.*\.yaml
38 | src/external_libs/fast_double_parser/.*\.yml
39 | src/external_libs/fmt/.*\.md
40 | src/external_libs/fmt/.travis.yml
41 | src/external_libs/fmt/doc
42 | src/external_libs/fmt/support/Android\.mk
43 | src/external_libs/fmt/support/bazel/.bazel.*
44 | src/external_libs/fmt/support/.*\.gradle
45 | src/external_libs/fmt/support/.*\.pro
46 | src/external_libs/fmt/support/.*\.py
47 | src/external_libs/fmt/support/rtd
48 | src/external_libs/fmt/support/.*sublime-syntax
49 | src/external_libs/fmt/support/Vagrantfile
50 | src/external_libs/fmt/support/.*\.xml
51 | src/external_libs/fmt/support/.*\.yml
52 | src/external_libs/fmt/test
53 | 


--------------------------------------------------------------------------------
/R-package/AUTOCONF_UBUNTU_VERSION:
--------------------------------------------------------------------------------
1 | 2.71-2
2 | 


--------------------------------------------------------------------------------
/R-package/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Microsoft Corporation
3 | 


--------------------------------------------------------------------------------
/R-package/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method("dimnames<-",lgb.Dataset)
 4 | S3method(dim,lgb.Dataset)
 5 | S3method(dimnames,lgb.Dataset)
 6 | S3method(get_field,lgb.Dataset)
 7 | S3method(predict,lgb.Booster)
 8 | S3method(print,lgb.Booster)
 9 | S3method(set_field,lgb.Dataset)
10 | S3method(summary,lgb.Booster)
11 | export(getLGBMthreads)
12 | export(get_field)
13 | export(lgb.Dataset)
14 | export(lgb.Dataset.construct)
15 | export(lgb.Dataset.create.valid)
16 | export(lgb.Dataset.save)
17 | export(lgb.Dataset.set.categorical)
18 | export(lgb.Dataset.set.reference)
19 | export(lgb.configure_fast_predict)
20 | export(lgb.convert_with_rules)
21 | export(lgb.cv)
22 | export(lgb.drop_serialized)
23 | export(lgb.dump)
24 | export(lgb.get.eval.result)
25 | export(lgb.importance)
26 | export(lgb.interprete)
27 | export(lgb.load)
28 | export(lgb.make_serializable)
29 | export(lgb.model.dt.tree)
30 | export(lgb.plot.importance)
31 | export(lgb.plot.interpretation)
32 | export(lgb.restore_handle)
33 | export(lgb.save)
34 | export(lgb.slice.Dataset)
35 | export(lgb.train)
36 | export(lightgbm)
37 | export(setLGBMthreads)
38 | export(set_field)
39 | import(methods)
40 | importClassesFrom(Matrix,CsparseMatrix)
41 | importClassesFrom(Matrix,RsparseMatrix)
42 | importClassesFrom(Matrix,dgCMatrix)
43 | importClassesFrom(Matrix,dgRMatrix)
44 | importClassesFrom(Matrix,dsparseMatrix)
45 | importClassesFrom(Matrix,dsparseVector)
46 | importFrom(Matrix,Matrix)
47 | importFrom(R6,R6Class)
48 | importFrom(data.table,":=")
49 | importFrom(data.table,as.data.table)
50 | importFrom(data.table,data.table)
51 | importFrom(data.table,rbindlist)
52 | importFrom(data.table,set)
53 | importFrom(data.table,setnames)
54 | importFrom(data.table,setorder)
55 | importFrom(data.table,setorderv)
56 | importFrom(graphics,barplot)
57 | importFrom(graphics,par)
58 | importFrom(jsonlite,fromJSON)
59 | importFrom(methods,is)
60 | importFrom(methods,new)
61 | importFrom(parallel,detectCores)
62 | importFrom(stats,quantile)
63 | importFrom(utils,modifyList)
64 | importFrom(utils,read.delim)
65 | useDynLib(lightgbm , .registration = TRUE)
66 | 


--------------------------------------------------------------------------------
/R-package/R/lgb.drop_serialized.R:
--------------------------------------------------------------------------------
 1 | #' @name lgb.drop_serialized
 2 | #' @title Drop serialized raw bytes in a LightGBM model object
 3 | #' @description If a LightGBM model object was produced with argument `serializable=TRUE`, the R object will keep
 4 | #' a copy of the underlying C++ object as raw bytes, which can be used to reconstruct such object after getting
 5 | #' serialized and de-serialized, but at the cost of extra memory usage. If these raw bytes are not needed anymore,
 6 | #' they can be dropped through this function in order to save memory. Note that the object will be modified in-place.
 7 | #'
 8 | #'              \emph{New in version 4.0.0}
 9 | #'
10 | #' @param model \code{lgb.Booster} object which was produced with `serializable=TRUE`.
11 | #'
12 | #' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible).
13 | #' @seealso \link{lgb.restore_handle}, \link{lgb.make_serializable}.
14 | #' @export
15 | lgb.drop_serialized <- function(model) {
16 |   if (!.is_Booster(x = model)) {
17 |     stop("lgb.drop_serialized: model should be an ", sQuote("lgb.Booster"))
18 |   }
19 |   model$drop_raw()
20 |   return(invisible(model))
21 | }
22 | 


--------------------------------------------------------------------------------
/R-package/R/lgb.make_serializable.R:
--------------------------------------------------------------------------------
 1 | #' @name lgb.make_serializable
 2 | #' @title Make a LightGBM object serializable by keeping raw bytes
 3 | #' @description If a LightGBM model object was produced with argument `serializable=FALSE`, the R object will not
 4 | #' be serializable (e.g. cannot save and load with \code{saveRDS} and \code{readRDS}) as it will lack the raw bytes
 5 | #' needed to reconstruct its underlying C++ object. This function can be used to forcibly produce those serialized
 6 | #' raw bytes and make the object serializable. Note that the object will be modified in-place.
 7 | #'
 8 | #'              \emph{New in version 4.0.0}
 9 | #'
10 | #' @param model \code{lgb.Booster} object which was produced with `serializable=FALSE`.
11 | #'
12 | #' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible).
13 | #' @seealso \link{lgb.restore_handle}, \link{lgb.drop_serialized}.
14 | #' @export
15 | lgb.make_serializable <- function(model) {
16 |   if (!.is_Booster(x = model)) {
17 |     stop("lgb.make_serializable: model should be an ", sQuote("lgb.Booster"))
18 |   }
19 |   model$save_raw()
20 |   return(invisible(model))
21 | }
22 | 


--------------------------------------------------------------------------------
/R-package/R/lgb.restore_handle.R:
--------------------------------------------------------------------------------
 1 | #' @name lgb.restore_handle
 2 | #' @title Restore the C++ component of a de-serialized LightGBM model
 3 | #' @description After a LightGBM model object is de-serialized through functions such as \code{save} or
 4 | #' \code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such
 5 | #' object is restored automatically when calling functions such as \code{predict}, but this function can be
 6 | #' used to forcibly restore it beforehand. Note that the object will be modified in-place.
 7 | #'
 8 | #'              \emph{New in version 4.0.0}
 9 | #'
10 | #' @details Be aware that fast single-row prediction configurations are not restored through this
11 | #' function. If you wish to make fast single-row predictions using a \code{lgb.Booster} loaded this way,
12 | #' call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object.
13 | #' @param model \code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle
14 | #' need to be restored.
15 | #'
16 | #' @return \code{lgb.Booster} (the same `model` object that was passed as input, invisibly).
17 | #' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}.
18 | #' @examples
19 | #' \donttest{
20 | #' library(lightgbm)
21 | #' \dontshow{setLGBMthreads(2L)}
22 | #' \dontshow{data.table::setDTthreads(1L)}
23 | #' data("agaricus.train")
24 | #' model <- lightgbm(
25 | #'   agaricus.train$data
26 | #'   , agaricus.train$label
27 | #'   , params = list(objective = "binary")
28 | #'   , nrounds = 5L
29 | #'   , verbose = 0
30 | #'   , num_threads = 2L
31 | #' )
32 | #' fname <- tempfile(fileext="rds")
33 | #' saveRDS(model, fname)
34 | #'
35 | #' model_new <- readRDS(fname)
36 | #' model_new$check_null_handle()
37 | #' lgb.restore_handle(model_new)
38 | #' model_new$check_null_handle()
39 | #' }
40 | #' @export
41 | lgb.restore_handle <- function(model) {
42 |   if (!.is_Booster(x = model)) {
43 |     stop("lgb.restore_handle: model should be an ", sQuote("lgb.Booster"))
44 |   }
45 |   model$restore_handle()
46 |   return(invisible(model))
47 | }
48 | 


--------------------------------------------------------------------------------
/R-package/R/metrics.R:
--------------------------------------------------------------------------------
 1 | # [description] List of metrics known to LightGBM. The most up to date list can be found
 2 | #               at https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters
 3 | #
 4 | # [return] A named logical vector, where each key is a metric name and each value is a boolean.
 5 | #          TRUE if higher values of the metric are desirable, FALSE if lower values are desirable.
 6 | #          Note that only the 'main' metrics are stored here, not aliases, since only the 'main' metrics
 7 | #          are returned from the C++ side. For example, if you use `metric = "mse"` in your code,
 8 | #          the metric name `"l2"` will be returned.
 9 | .METRICS_HIGHER_BETTER <- function() {
10 |     return(
11 |         c(
12 |             "l1" = FALSE
13 |             , "l2" = FALSE
14 |             , "mape" = FALSE
15 |             , "rmse" = FALSE
16 |             , "quantile" = FALSE
17 |             , "huber" = FALSE
18 |             , "fair" = FALSE
19 |             , "poisson" = FALSE
20 |             , "gamma" = FALSE
21 |             , "gamma_deviance" = FALSE
22 |             , "tweedie" = FALSE
23 |             , "ndcg" = TRUE
24 |             , "map" = TRUE
25 |             , "auc" = TRUE
26 |             , "average_precision" = TRUE
27 |             , "binary_logloss" = FALSE
28 |             , "binary_error" = FALSE
29 |             , "auc_mu" = TRUE
30 |             , "multi_logloss" = FALSE
31 |             , "multi_error" = FALSE
32 |             , "cross_entropy" = FALSE
33 |             , "cross_entropy_lambda" = FALSE
34 |             , "kullback_leibler" = FALSE
35 |         )
36 |     )
37 | }
38 | 


--------------------------------------------------------------------------------
/R-package/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | rm -f src/Makevars
3 | 


--------------------------------------------------------------------------------
/R-package/data/agaricus.test.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/data/agaricus.test.rda


--------------------------------------------------------------------------------
/R-package/data/agaricus.train.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/data/agaricus.train.rda


--------------------------------------------------------------------------------
/R-package/data/bank.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/data/bank.rda


--------------------------------------------------------------------------------
/R-package/demo/00Index:
--------------------------------------------------------------------------------
 1 | basic_walkthrough               Basic feature walkthrough
 2 | boost_from_prediction           Boosting from existing prediction
 3 | categorical_features_rules       Categorical Feature Preparation with Rules
 4 | cross_validation                Cross Validation
 5 | early_stopping                  Early Stop in training
 6 | efficient_many_training         Efficiency for Many Model Trainings
 7 | multiclass                      Multiclass training/prediction
 8 | multiclass_custom_objective     Multiclass with Custom Objective Function
 9 | leaf_stability                  Leaf (in)Stability example
10 | weight_param                    Weight-Parameter adjustment relationship
11 | 


--------------------------------------------------------------------------------
/R-package/demo/boost_from_prediction.R:
--------------------------------------------------------------------------------
 1 | library(lightgbm)
 2 | 
 3 | # Load in the agaricus dataset
 4 | data(agaricus.train, package = "lightgbm")
 5 | data(agaricus.test, package = "lightgbm")
 6 | dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
 7 | dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label)
 8 | 
 9 | valids <- list(eval = dtest, train = dtrain)
10 | #--------------------Advanced features ---------------------------
11 | # advanced: start from an initial base prediction
12 | print("Start running example to start from an initial prediction")
13 | 
14 | # Train lightgbm for 1 round
15 | param <- list(
16 |     num_leaves = 4L
17 |     , learning_rate = 1.0
18 |     , nthread = 2L
19 |     , objective = "binary"
20 | )
21 | bst <- lgb.train(param, dtrain, 1L, valids = valids)
22 | 
23 | # Note: we need the margin value instead of transformed prediction in set_init_score
24 | ptrain <- predict(bst, agaricus.train$data, type = "raw")
25 | ptest  <- predict(bst, agaricus.test$data, type = "raw")
26 | 
27 | # set the init_score property of dtrain and dtest
28 | # base margin is the base prediction we will boost from
29 | set_field(dtrain, "init_score", ptrain)
30 | set_field(dtest, "init_score", ptest)
31 | 
32 | print("This is result of boost from initial prediction")
33 | bst <- lgb.train(
34 |     params = param
35 |     , data = dtrain
36 |     , nrounds = 5L
37 |     , valids = valids
38 | )
39 | 


--------------------------------------------------------------------------------
/R-package/demo/early_stopping.R:
--------------------------------------------------------------------------------
 1 | library(lightgbm)
 2 | 
 3 | # Load in the agaricus dataset
 4 | data(agaricus.train, package = "lightgbm")
 5 | data(agaricus.test, package = "lightgbm")
 6 | 
 7 | dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
 8 | dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label)
 9 | 
10 | # Note: for customized objective function, we leave objective as default
11 | # Note: what we are getting is margin value in prediction
12 | # You must know what you are doing
13 | param <- list(
14 |   num_leaves = 4L
15 |   , learning_rate = 1.0
16 | )
17 | valids <- list(eval = dtest)
18 | num_round <- 20L
19 | 
20 | # User define objective function, given prediction, return gradient and second order gradient
21 | # This is loglikelihood loss
22 | logregobj <- function(preds, dtrain) {
23 |   labels <- get_field(dtrain, "label")
24 |   preds <- 1.0 / (1.0 + exp(-preds))
25 |   grad <- preds - labels
26 |   hess <- preds * (1.0 - preds)
27 |   return(list(grad = grad, hess = hess))
28 | }
29 | 
30 | # User-defined evaluation function returns a pair (metric_name, result, higher_better)
31 | # NOTE: when you do customized loss function, the default prediction value is margin
32 | # This may make built-in evaluation metric calculate wrong results
33 | # For example, we are doing logistic loss, the prediction is score before logistic transformation
34 | # The built-in evaluation error assumes input is after logistic transformation
35 | # Keep this in mind when you use the customization, and maybe you need write customized evaluation function
36 | evalerror <- function(preds, dtrain) {
37 |   labels <- get_field(dtrain, "label")
38 |   err <- as.numeric(sum(labels != (preds > 0.5))) / length(labels)
39 |   return(list(name = "error", value = err, higher_better = FALSE))
40 | }
41 | print("Start training with early Stopping setting")
42 | 
43 | bst <- lgb.train(
44 |   param
45 |   , dtrain
46 |   , num_round
47 |   , valids
48 |   , obj = logregobj
49 |   , eval = evalerror
50 |   , early_stopping_round = 3L
51 | )
52 | 


--------------------------------------------------------------------------------
/R-package/demo/efficient_many_training.R:
--------------------------------------------------------------------------------
 1 | # Efficient training means training without giving up too much RAM
 2 | # In the case of many trainings (like 100+ models), RAM will be eaten very quickly
 3 | # Therefore, it is essential to know a strategy to deal with such issue
 4 | 
 5 | # More results can be found here: https://github.com/microsoft/LightGBM/issues/879#issuecomment-326656580
 6 | # Quote: "@Laurae2 Thanks for nice easily reproducible example (unlike mine).
 7 | # With reset=FALSE you get after 500 iterations (not 1000): OS reports 27GB usage, while R gc() reports 1.5GB.
 8 | # Just doing reset=TRUE will already improve things: OS reports 4.6GB.
 9 | # Doing reset=TRUE and calling gc() in the loop will have OS 1.3GB. Thanks for the latest tip."
10 | 
11 | # Load library
12 | library(lightgbm)
13 | 
14 | # Generate fictive data of size 1M x 100
15 | set.seed(11111L)
16 | x_data <- matrix(rnorm(n = 100000000L, mean = 0.0, sd = 100.0), nrow = 1000000L, ncol = 100L)
17 | y_data <- rnorm(n = 1000000L, mean = 0.0, sd = 5.0)
18 | 
19 | # Create lgb.Dataset for training
20 | data <- lgb.Dataset(x_data, label = y_data)
21 | data$construct()
22 | 
23 | # Loop through a training of 1000 models, please check your RAM on your task manager
24 | # It MUST remain constant (if not increasing very slightly)
25 | gbm <- list()
26 | 
27 | for (i in 1L:1000L) {
28 |   print(i)
29 |   gbm[[i]] <- lgb.train(
30 |       params = list(objective = "regression")
31 |       , data = data
32 |       , 1L
33 |       , reset_data = TRUE
34 |   )
35 |   gc(verbose = FALSE)
36 | }
37 | 


--------------------------------------------------------------------------------
/R-package/demo/multiclass.R:
--------------------------------------------------------------------------------
 1 | library(lightgbm)
 2 | 
 3 | # We load the default iris dataset shipped with R
 4 | data(iris)
 5 | 
 6 | # We must convert factors to numeric
 7 | # They must be starting from number 0 to use multiclass
 8 | # For instance: 0, 1, 2, 3, 4, 5...
 9 | iris$Species <- as.numeric(as.factor(iris$Species)) - 1L
10 | 
11 | # We cut the data set into 80% train and 20% validation
12 | # The 10 last samples of each class are for validation
13 | 
14 | train <- as.matrix(iris[c(1L:40L, 51L:90L, 101L:140L), ])
15 | test <- as.matrix(iris[c(41L:50L, 91L:100L, 141L:150L), ])
16 | dtrain <- lgb.Dataset(data = train[, 1L:4L], label = train[, 5L])
17 | dtest <- lgb.Dataset.create.valid(dtrain, data = test[, 1L:4L], label = test[, 5L])
18 | valids <- list(test = dtest)
19 | 
20 | # Method 1 of training
21 | params <- list(
22 |     objective = "multiclass"
23 |     , metric = "multi_error"
24 |     , num_class = 3L
25 |     , min_data = 1L
26 |     , learning_rate = 1.0
27 | )
28 | model <- lgb.train(
29 |     params
30 |     , dtrain
31 |     , 100L
32 |     , valids
33 |     , early_stopping_rounds = 10L
34 | )
35 | 
36 | # We can predict on test data, outputs a 90-length vector
37 | # Order: obs1 class1, obs1 class2, obs1 class3, obs2 class1, obs2 class2, obs2 class3...
38 | my_preds <- predict(model, test[, 1L:4L])
39 | 
40 | # Method 2 of training, identical
41 | params <- list(
42 |     min_data = 1L
43 |     , learning_rate = 1.0
44 |     , objective = "multiclass"
45 |     , metric = "multi_error"
46 |     , num_class = 3L
47 | )
48 | model <- lgb.train(
49 |     params
50 |     , dtrain
51 |     , 100L
52 |     , valids
53 |     , early_stopping_rounds = 10L
54 | )
55 | 
56 | # We can predict on test data, identical
57 | my_preds <- predict(model, test[, 1L:4L])
58 | 
59 | # A (30x3) matrix with the predictions
60 | # class1 class2 class3
61 | #   obs1   obs1   obs1
62 | #   obs2   obs2   obs2
63 | #   ....   ....   ....
64 | my_preds <- predict(model, test[, 1L:4L])
65 | 
66 | # We can also get the predicted scores before the Sigmoid/Softmax application
67 | my_preds <- predict(model, test[, 1L:4L], type = "raw")
68 | 
69 | # We can also get the leaf index
70 | my_preds <- predict(model, test[, 1L:4L], type = "leaf")
71 | 


--------------------------------------------------------------------------------
/R-package/inst/Makevars:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/R-package/inst/Makevars.win:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/R-package/inst/bin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/inst/bin/.gitkeep


--------------------------------------------------------------------------------
/R-package/man/agaricus.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lightgbm.R
 3 | \docType{data}
 4 | \name{agaricus.test}
 5 | \alias{agaricus.test}
 6 | \title{Test part from Mushroom Data Set}
 7 | \format{
 8 | A list containing a label vector, and a dgCMatrix object with 1611
 9 | rows and 126 variables
10 | }
11 | \usage{
12 | data(agaricus.test)
13 | }
14 | \description{
15 | This data set is originally from the Mushroom data set,
16 |              UCI Machine Learning Repository.
17 |              This data set includes the following fields:
18 | 
19 |              \itemize{
20 |                  \item{\code{label}: the label for each record}
21 |                  \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
22 |              }
23 | }
24 | \references{
25 | https://archive.ics.uci.edu/ml/datasets/Mushroom
26 | 
27 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
28 | [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
29 | School of Information and Computer Science.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/R-package/man/agaricus.train.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lightgbm.R
 3 | \docType{data}
 4 | \name{agaricus.train}
 5 | \alias{agaricus.train}
 6 | \title{Training part from Mushroom Data Set}
 7 | \format{
 8 | A list containing a label vector, and a dgCMatrix object with 6513
 9 | rows and 127 variables
10 | }
11 | \usage{
12 | data(agaricus.train)
13 | }
14 | \description{
15 | This data set is originally from the Mushroom data set,
16 |              UCI Machine Learning Repository.
17 |              This data set includes the following fields:
18 | 
19 |               \itemize{
20 |                   \item{\code{label}: the label for each record}
21 |                   \item{\code{data}: a sparse Matrix of \code{dgCMatrix} class, with 126 columns.}
22 |                }
23 | }
24 | \references{
25 | https://archive.ics.uci.edu/ml/datasets/Mushroom
26 | 
27 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
28 | [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
29 | School of Information and Computer Science.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/R-package/man/bank.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lightgbm.R
 3 | \docType{data}
 4 | \name{bank}
 5 | \alias{bank}
 6 | \title{Bank Marketing Data Set}
 7 | \format{
 8 | A data.table with 4521 rows and 17 variables
 9 | }
10 | \usage{
11 | data(bank)
12 | }
13 | \description{
14 | This data set is originally from the Bank Marketing data set,
15 |              UCI Machine Learning Repository.
16 | 
17 |              It contains only the following: bank.csv with 10% of the examples and 17 inputs,
18 |              randomly selected from 3 (older version of this dataset with less inputs).
19 | }
20 | \references{
21 | http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
22 | 
23 | S. Moro, P. Cortez and P. Rita. (2014)
24 | A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems
25 | }
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/R-package/man/dim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{dim.lgb.Dataset}
 4 | \alias{dim.lgb.Dataset}
 5 | \title{Dimensions of an \code{lgb.Dataset}}
 6 | \usage{
 7 | \method{dim}{lgb.Dataset}(x)
 8 | }
 9 | \arguments{
10 | \item{x}{Object of class \code{lgb.Dataset}}
11 | }
12 | \value{
13 | a vector of numbers of rows and of columns
14 | }
15 | \description{
16 | Returns a vector of numbers of rows and of columns in an \code{lgb.Dataset}.
17 | }
18 | \details{
19 | Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
20 | be directly used with an \code{lgb.Dataset} object.
21 | }
22 | \examples{
23 | \donttest{
24 | \dontshow{setLGBMthreads(2L)}
25 | \dontshow{data.table::setDTthreads(1L)}
26 | data(agaricus.train, package = "lightgbm")
27 | train <- agaricus.train
28 | dtrain <- lgb.Dataset(train$data, label = train$label)
29 | 
30 | stopifnot(nrow(dtrain) == nrow(train$data))
31 | stopifnot(ncol(dtrain) == ncol(train$data))
32 | stopifnot(all(dim(dtrain) == dim(train$data)))
33 | }
34 | }
35 | 


--------------------------------------------------------------------------------
/R-package/man/dimnames.lgb.Dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{dimnames.lgb.Dataset}
 4 | \alias{dimnames.lgb.Dataset}
 5 | \alias{dimnames<-.lgb.Dataset}
 6 | \title{Handling of column names of \code{lgb.Dataset}}
 7 | \usage{
 8 | \method{dimnames}{lgb.Dataset}(x)
 9 | 
10 | \method{dimnames}{lgb.Dataset}(x) <- value
11 | }
12 | \arguments{
13 | \item{x}{object of class \code{lgb.Dataset}}
14 | 
15 | \item{value}{a list of two elements: the first one is ignored
16 | and the second one is column names}
17 | }
18 | \value{
19 | A list with the dimension names of the dataset
20 | }
21 | \description{
22 | Only column names are supported for \code{lgb.Dataset}, thus setting of
23 |              row names would have no effect and returned row names would be NULL.
24 | }
25 | \details{
26 | Generic \code{dimnames} methods are used by \code{colnames}.
27 | Since row names are irrelevant, it is recommended to use \code{colnames} directly.
28 | }
29 | \examples{
30 | \donttest{
31 | \dontshow{setLGBMthreads(2L)}
32 | \dontshow{data.table::setDTthreads(1L)}
33 | data(agaricus.train, package = "lightgbm")
34 | train <- agaricus.train
35 | dtrain <- lgb.Dataset(train$data, label = train$label)
36 | lgb.Dataset.construct(dtrain)
37 | dimnames(dtrain)
38 | colnames(dtrain)
39 | colnames(dtrain) <- make.names(seq_len(ncol(train$data)))
40 | print(dtrain, verbose = TRUE)
41 | }
42 | }
43 | 


--------------------------------------------------------------------------------
/R-package/man/getLGBMThreads.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/multithreading.R
 3 | \name{getLGBMThreads}
 4 | \alias{getLGBMThreads}
 5 | \alias{getLGBMthreads}
 6 | \title{Get default number of threads used by LightGBM}
 7 | \usage{
 8 | getLGBMthreads()
 9 | }
10 | \value{
11 | number of threads as an integer. \code{-1} means that in situations where parameter \code{num_threads} is
12 |         not explicitly supplied, LightGBM will choose a number of threads to use automatically.
13 | }
14 | \description{
15 | LightGBM attempts to speed up many operations by using multi-threading.
16 |              The number of threads used in those operations can be controlled via the
17 |              \code{num_threads} parameter passed through \code{params} to functions like
18 |              \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
19 |              a model from a text file) are done via code paths that don't explicitly accept thread-control
20 |              configuration.
21 | 
22 |              Use this function to see the default number of threads LightGBM will use for such operations.
23 | }
24 | \seealso{
25 | \link{setLGBMthreads}
26 | }
27 | 


--------------------------------------------------------------------------------
/R-package/man/get_field.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{get_field}
 4 | \alias{get_field}
 5 | \alias{get_field.lgb.Dataset}
 6 | \title{Get one attribute of a \code{lgb.Dataset}}
 7 | \usage{
 8 | get_field(dataset, field_name)
 9 | 
10 | \method{get_field}{lgb.Dataset}(dataset, field_name)
11 | }
12 | \arguments{
13 | \item{dataset}{Object of class \code{lgb.Dataset}}
14 | 
15 | \item{field_name}{String with the name of the attribute to get. One of the following.
16 | \itemize{
17 |     \item \code{label}: label lightgbm learns from ;
18 |     \item \code{weight}: to do a weight rescale ;
19 |     \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
20 |         group rows together as ordered results from the same set of candidate results to be ranked.
21 |         For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)},
22 |         that means that you have 6 groups, where the first 10 records are in the first group,
23 |         records 11-30 are in the second group, etc.}
24 |     \item \code{init_score}: initial score is the base prediction lightgbm will boost from.
25 | }}
26 | }
27 | \value{
28 | requested attribute
29 | }
30 | \description{
31 | Get one attribute of a \code{lgb.Dataset}
32 | }
33 | \examples{
34 | \donttest{
35 | \dontshow{setLGBMthreads(2L)}
36 | \dontshow{data.table::setDTthreads(1L)}
37 | data(agaricus.train, package = "lightgbm")
38 | train <- agaricus.train
39 | dtrain <- lgb.Dataset(train$data, label = train$label)
40 | lgb.Dataset.construct(dtrain)
41 | 
42 | labels <- lightgbm::get_field(dtrain, "label")
43 | lightgbm::set_field(dtrain, "label", 1 - labels)
44 | 
45 | labels2 <- lightgbm::get_field(dtrain, "label")
46 | stopifnot(all(labels2 == 1 - labels))
47 | }
48 | }
49 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.Dataset.construct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{lgb.Dataset.construct}
 4 | \alias{lgb.Dataset.construct}
 5 | \title{Construct Dataset explicitly}
 6 | \usage{
 7 | lgb.Dataset.construct(dataset)
 8 | }
 9 | \arguments{
10 | \item{dataset}{Object of class \code{lgb.Dataset}}
11 | }
12 | \value{
13 | constructed dataset
14 | }
15 | \description{
16 | Construct Dataset explicitly
17 | }
18 | \examples{
19 | \donttest{
20 | \dontshow{setLGBMthreads(2L)}
21 | \dontshow{data.table::setDTthreads(1L)}
22 | data(agaricus.train, package = "lightgbm")
23 | train <- agaricus.train
24 | dtrain <- lgb.Dataset(train$data, label = train$label)
25 | lgb.Dataset.construct(dtrain)
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.Dataset.save.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{lgb.Dataset.save}
 4 | \alias{lgb.Dataset.save}
 5 | \title{Save \code{lgb.Dataset} to a binary file}
 6 | \usage{
 7 | lgb.Dataset.save(dataset, fname)
 8 | }
 9 | \arguments{
10 | \item{dataset}{object of class \code{lgb.Dataset}}
11 | 
12 | \item{fname}{object filename of output file}
13 | }
14 | \value{
15 | the dataset you passed in
16 | }
17 | \description{
18 | Please note that \code{init_score} is not saved in binary file.
19 |              If you need it, please set it again after loading Dataset.
20 | }
21 | \examples{
22 | \donttest{
23 | \dontshow{setLGBMthreads(2L)}
24 | \dontshow{data.table::setDTthreads(1L)}
25 | data(agaricus.train, package = "lightgbm")
26 | train <- agaricus.train
27 | dtrain <- lgb.Dataset(train$data, label = train$label)
28 | lgb.Dataset.save(dtrain, tempfile(fileext = ".bin"))
29 | }
30 | }
31 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.Dataset.set.categorical.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{lgb.Dataset.set.categorical}
 4 | \alias{lgb.Dataset.set.categorical}
 5 | \title{Set categorical feature of \code{lgb.Dataset}}
 6 | \usage{
 7 | lgb.Dataset.set.categorical(dataset, categorical_feature)
 8 | }
 9 | \arguments{
10 | \item{dataset}{object of class \code{lgb.Dataset}}
11 | 
12 | \item{categorical_feature}{categorical features. This can either be a character vector of feature
13 | names or an integer vector with the indices of the features (e.g.
14 | \code{c(1L, 10L)} to say "the first and tenth columns").}
15 | }
16 | \value{
17 | the dataset you passed in
18 | }
19 | \description{
20 | Set the categorical features of an \code{lgb.Dataset} object. Use this function
21 |              to tell LightGBM which features should be treated as categorical.
22 | }
23 | \examples{
24 | \donttest{
25 | \dontshow{setLGBMthreads(2L)}
26 | \dontshow{data.table::setDTthreads(1L)}
27 | data(agaricus.train, package = "lightgbm")
28 | train <- agaricus.train
29 | dtrain <- lgb.Dataset(train$data, label = train$label)
30 | data_file <- tempfile(fileext = ".data")
31 | lgb.Dataset.save(dtrain, data_file)
32 | dtrain <- lgb.Dataset(data_file)
33 | lgb.Dataset.set.categorical(dtrain, 1L:2L)
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.Dataset.set.reference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{lgb.Dataset.set.reference}
 4 | \alias{lgb.Dataset.set.reference}
 5 | \title{Set reference of \code{lgb.Dataset}}
 6 | \usage{
 7 | lgb.Dataset.set.reference(dataset, reference)
 8 | }
 9 | \arguments{
10 | \item{dataset}{object of class \code{lgb.Dataset}}
11 | 
12 | \item{reference}{object of class \code{lgb.Dataset}}
13 | }
14 | \value{
15 | the dataset you passed in
16 | }
17 | \description{
18 | If you want to use validation data, you should set reference to training data
19 | }
20 | \examples{
21 | \donttest{
22 | \dontshow{setLGBMthreads(2L)}
23 | \dontshow{data.table::setDTthreads(1L)}
24 | # create training Dataset
25 | data(agaricus.train, package ="lightgbm")
26 | train <- agaricus.train
27 | dtrain <- lgb.Dataset(train$data, label = train$label)
28 | 
29 | # create a validation Dataset, using dtrain as a reference
30 | data(agaricus.test, package = "lightgbm")
31 | test <- agaricus.test
32 | dtest <- lgb.Dataset(test$data, label = test$label)
33 | lgb.Dataset.set.reference(dtest, dtrain)
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.drop_serialized.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.drop_serialized.R
 3 | \name{lgb.drop_serialized}
 4 | \alias{lgb.drop_serialized}
 5 | \title{Drop serialized raw bytes in a LightGBM model object}
 6 | \usage{
 7 | lgb.drop_serialized(model)
 8 | }
 9 | \arguments{
10 | \item{model}{\code{lgb.Booster} object which was produced with `serializable=TRUE`.}
11 | }
12 | \value{
13 | \code{lgb.Booster} (the same `model` object that was passed as input, as invisible).
14 | }
15 | \description{
16 | If a LightGBM model object was produced with argument `serializable=TRUE`, the R object will keep
17 | a copy of the underlying C++ object as raw bytes, which can be used to reconstruct such object after getting
18 | serialized and de-serialized, but at the cost of extra memory usage. If these raw bytes are not needed anymore,
19 | they can be dropped through this function in order to save memory. Note that the object will be modified in-place.
20 | 
21 |              \emph{New in version 4.0.0}
22 | }
23 | \seealso{
24 | \link{lgb.restore_handle}, \link{lgb.make_serializable}.
25 | }
26 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.dump.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Booster.R
 3 | \name{lgb.dump}
 4 | \alias{lgb.dump}
 5 | \title{Dump LightGBM model to json}
 6 | \usage{
 7 | lgb.dump(booster, num_iteration = NULL, start_iteration = 1L)
 8 | }
 9 | \arguments{
10 | \item{booster}{Object of class \code{lgb.Booster}}
11 | 
12 | \item{num_iteration}{Number of iterations to be dumped. NULL or <= 0 means use best iteration}
13 | 
14 | \item{start_iteration}{Index (1-based) of the first boosting round to dump.
15 |        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
16 |        means "dump the fifth, sixth, and seventh tree"
17 | 
18 |        \emph{New in version 4.4.0}}
19 | }
20 | \value{
21 | json format of model
22 | }
23 | \description{
24 | Dump LightGBM model to json
25 | }
26 | \examples{
27 | \donttest{
28 | library(lightgbm)
29 | \dontshow{setLGBMthreads(2L)}
30 | \dontshow{data.table::setDTthreads(1L)}
31 | data(agaricus.train, package = "lightgbm")
32 | train <- agaricus.train
33 | dtrain <- lgb.Dataset(train$data, label = train$label)
34 | data(agaricus.test, package = "lightgbm")
35 | test <- agaricus.test
36 | dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
37 | params <- list(
38 |   objective = "regression"
39 |   , metric = "l2"
40 |   , min_data = 1L
41 |   , learning_rate = 1.0
42 |   , num_threads = 2L
43 | )
44 | valids <- list(test = dtest)
45 | model <- lgb.train(
46 |   params = params
47 |   , data = dtrain
48 |   , nrounds = 10L
49 |   , valids = valids
50 |   , early_stopping_rounds = 5L
51 | )
52 | json_model <- lgb.dump(model)
53 | }
54 | }
55 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.get.eval.result.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Booster.R
 3 | \name{lgb.get.eval.result}
 4 | \alias{lgb.get.eval.result}
 5 | \title{Get record evaluation result from booster}
 6 | \usage{
 7 | lgb.get.eval.result(
 8 |   booster,
 9 |   data_name,
10 |   eval_name,
11 |   iters = NULL,
12 |   is_err = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{booster}{Object of class \code{lgb.Booster}}
17 | 
18 | \item{data_name}{Name of the dataset to return evaluation results for.}
19 | 
20 | \item{eval_name}{Name of the evaluation metric to return results for.}
21 | 
22 | \item{iters}{An integer vector of iterations you want to get evaluation results for. If NULL
23 | (the default), evaluation results for all iterations will be returned.}
24 | 
25 | \item{is_err}{TRUE will return evaluation error instead}
26 | }
27 | \value{
28 | numeric vector of evaluation result
29 | }
30 | \description{
31 | Given a \code{lgb.Booster}, return evaluation results for a
32 |              particular metric on a particular dataset.
33 | }
34 | \examples{
35 | \donttest{
36 | \dontshow{setLGBMthreads(2L)}
37 | \dontshow{data.table::setDTthreads(1L)}
38 | # train a regression model
39 | data(agaricus.train, package = "lightgbm")
40 | train <- agaricus.train
41 | dtrain <- lgb.Dataset(train$data, label = train$label)
42 | data(agaricus.test, package = "lightgbm")
43 | test <- agaricus.test
44 | dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
45 | params <- list(
46 |   objective = "regression"
47 |   , metric = "l2"
48 |   , min_data = 1L
49 |   , learning_rate = 1.0
50 |   , num_threads = 2L
51 | )
52 | valids <- list(test = dtest)
53 | model <- lgb.train(
54 |   params = params
55 |   , data = dtrain
56 |   , nrounds = 5L
57 |   , valids = valids
58 | )
59 | 
60 | # Examine valid data_name values
61 | print(setdiff(names(model$record_evals), "start_iter"))
62 | 
63 | # Examine valid eval_name values for dataset "test"
64 | print(names(model$record_evals[["test"]]))
65 | 
66 | # Get L2 values for "test" dataset
67 | lgb.get.eval.result(model, "test", "l2")
68 | }
69 | }
70 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.importance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.importance.R
 3 | \name{lgb.importance}
 4 | \alias{lgb.importance}
 5 | \title{Compute feature importance in a model}
 6 | \usage{
 7 | lgb.importance(model, percentage = TRUE)
 8 | }
 9 | \arguments{
10 | \item{model}{object of class \code{lgb.Booster}.}
11 | 
12 | \item{percentage}{whether to show importance in relative percentage.}
13 | }
14 | \value{
15 | For a tree model, a \code{data.table} with the following columns:
16 | \itemize{
17 |   \item{\code{Feature}: Feature names in the model.}
18 |   \item{\code{Gain}: The total gain of this feature's splits.}
19 |   \item{\code{Cover}: The number of observation related to this feature.}
20 |   \item{\code{Frequency}: The number of times a feature split in trees.}
21 | }
22 | }
23 | \description{
24 | Creates a \code{data.table} of feature importances in a model.
25 | }
26 | \examples{
27 | \donttest{
28 | \dontshow{setLGBMthreads(2L)}
29 | \dontshow{data.table::setDTthreads(1L)}
30 | data(agaricus.train, package = "lightgbm")
31 | train <- agaricus.train
32 | dtrain <- lgb.Dataset(train$data, label = train$label)
33 | 
34 | params <- list(
35 |   objective = "binary"
36 |   , learning_rate = 0.1
37 |   , max_depth = -1L
38 |   , min_data_in_leaf = 1L
39 |   , min_sum_hessian_in_leaf = 1.0
40 |   , num_threads = 2L
41 | )
42 | model <- lgb.train(
43 |     params = params
44 |     , data = dtrain
45 |     , nrounds = 5L
46 | )
47 | 
48 | tree_imp1 <- lgb.importance(model, percentage = TRUE)
49 | tree_imp2 <- lgb.importance(model, percentage = FALSE)
50 | }
51 | }
52 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.interprete.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.interprete.R
 3 | \name{lgb.interprete}
 4 | \alias{lgb.interprete}
 5 | \title{Compute feature contribution of prediction}
 6 | \usage{
 7 | lgb.interprete(model, data, idxset, num_iteration = NULL)
 8 | }
 9 | \arguments{
10 | \item{model}{object of class \code{lgb.Booster}.}
11 | 
12 | \item{data}{a matrix object or a dgCMatrix object.}
13 | 
14 | \item{idxset}{an integer vector of indices of rows needed.}
15 | 
16 | \item{num_iteration}{number of iteration want to predict with, NULL or <= 0 means use best iteration.}
17 | }
18 | \value{
19 | For regression, binary classification and lambdarank model, a \code{list} of \code{data.table}
20 |         with the following columns:
21 |         \itemize{
22 |             \item{\code{Feature}: Feature names in the model.}
23 |             \item{\code{Contribution}: The total contribution of this feature's splits.}
24 |         }
25 |         For multiclass classification, a \code{list} of \code{data.table} with the Feature column and
26 |         Contribution columns to each class.
27 | }
28 | \description{
29 | Computes feature contribution components of rawscore prediction.
30 | }
31 | \examples{
32 | \donttest{
33 | \dontshow{setLGBMthreads(2L)}
34 | \dontshow{data.table::setDTthreads(1L)}
35 | Logit <- function(x) log(x / (1.0 - x))
36 | data(agaricus.train, package = "lightgbm")
37 | train <- agaricus.train
38 | dtrain <- lgb.Dataset(train$data, label = train$label)
39 | set_field(
40 |   dataset = dtrain
41 |   , field_name = "init_score"
42 |   , data = rep(Logit(mean(train$label)), length(train$label))
43 | )
44 | data(agaricus.test, package = "lightgbm")
45 | test <- agaricus.test
46 | 
47 | params <- list(
48 |     objective = "binary"
49 |     , learning_rate = 0.1
50 |     , max_depth = -1L
51 |     , min_data_in_leaf = 1L
52 |     , min_sum_hessian_in_leaf = 1.0
53 |     , num_threads = 2L
54 | )
55 | model <- lgb.train(
56 |     params = params
57 |     , data = dtrain
58 |     , nrounds = 3L
59 | )
60 | 
61 | tree_interpretation <- lgb.interprete(model, test$data, 1L:5L)
62 | }
63 | }
64 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.load.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Booster.R
 3 | \name{lgb.load}
 4 | \alias{lgb.load}
 5 | \title{Load LightGBM model}
 6 | \usage{
 7 | lgb.load(filename = NULL, model_str = NULL)
 8 | }
 9 | \arguments{
10 | \item{filename}{path of model file}
11 | 
12 | \item{model_str}{a str containing the model (as a \code{character} or \code{raw} vector)}
13 | }
14 | \value{
15 | lgb.Booster
16 | }
17 | \description{
18 | Load LightGBM takes in either a file path or model string.
19 |              If both are provided, Load will default to loading from file
20 | }
21 | \examples{
22 | \donttest{
23 | \dontshow{setLGBMthreads(2L)}
24 | \dontshow{data.table::setDTthreads(1L)}
25 | data(agaricus.train, package = "lightgbm")
26 | train <- agaricus.train
27 | dtrain <- lgb.Dataset(train$data, label = train$label)
28 | data(agaricus.test, package = "lightgbm")
29 | test <- agaricus.test
30 | dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
31 | params <- list(
32 |   objective = "regression"
33 |   , metric = "l2"
34 |   , min_data = 1L
35 |   , learning_rate = 1.0
36 |   , num_threads = 2L
37 | )
38 | valids <- list(test = dtest)
39 | model <- lgb.train(
40 |   params = params
41 |   , data = dtrain
42 |   , nrounds = 5L
43 |   , valids = valids
44 |   , early_stopping_rounds = 3L
45 | )
46 | model_file <- tempfile(fileext = ".txt")
47 | lgb.save(model, model_file)
48 | load_booster <- lgb.load(filename = model_file)
49 | model_string <- model$save_model_to_string(NULL) # saves best iteration
50 | load_booster_from_str <- lgb.load(model_str = model_string)
51 | }
52 | }
53 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.make_serializable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.make_serializable.R
 3 | \name{lgb.make_serializable}
 4 | \alias{lgb.make_serializable}
 5 | \title{Make a LightGBM object serializable by keeping raw bytes}
 6 | \usage{
 7 | lgb.make_serializable(model)
 8 | }
 9 | \arguments{
10 | \item{model}{\code{lgb.Booster} object which was produced with `serializable=FALSE`.}
11 | }
12 | \value{
13 | \code{lgb.Booster} (the same `model` object that was passed as input, as invisible).
14 | }
15 | \description{
16 | If a LightGBM model object was produced with argument `serializable=FALSE`, the R object will not
17 | be serializable (e.g. cannot save and load with \code{saveRDS} and \code{readRDS}) as it will lack the raw bytes
18 | needed to reconstruct its underlying C++ object. This function can be used to forcibly produce those serialized
19 | raw bytes and make the object serializable. Note that the object will be modified in-place.
20 | 
21 |              \emph{New in version 4.0.0}
22 | }
23 | \seealso{
24 | \link{lgb.restore_handle}, \link{lgb.drop_serialized}.
25 | }
26 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.plot.importance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.plot.importance.R
 3 | \name{lgb.plot.importance}
 4 | \alias{lgb.plot.importance}
 5 | \title{Plot feature importance as a bar graph}
 6 | \usage{
 7 | lgb.plot.importance(
 8 |   tree_imp,
 9 |   top_n = 10L,
10 |   measure = "Gain",
11 |   left_margin = 10L,
12 |   cex = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{tree_imp}{a \code{data.table} returned by \code{\link{lgb.importance}}.}
17 | 
18 | \item{top_n}{maximal number of top features to include into the plot.}
19 | 
20 | \item{measure}{the name of importance measure to plot, can be "Gain", "Cover" or "Frequency".}
21 | 
22 | \item{left_margin}{(base R barplot) allows to adjust the left margin size to fit feature names.}
23 | 
24 | \item{cex}{(base R barplot) passed as \code{cex.names} parameter to \code{\link[graphics]{barplot}}.
25 | Set a number smaller than 1.0 to make the bar labels smaller than R's default and values
26 | greater than 1.0 to make them larger.}
27 | }
28 | \value{
29 | The \code{lgb.plot.importance} function creates a \code{barplot}
30 | and silently returns a processed data.table with \code{top_n} features sorted by defined importance.
31 | }
32 | \description{
33 | Plot previously calculated feature importance: Gain, Cover and Frequency, as a bar graph.
34 | }
35 | \details{
36 | The graph represents each feature as a horizontal bar of length proportional to the defined importance of a feature.
37 | Features are shown ranked in a decreasing importance order.
38 | }
39 | \examples{
40 | \donttest{
41 | \dontshow{setLGBMthreads(2L)}
42 | \dontshow{data.table::setDTthreads(1L)}
43 | data(agaricus.train, package = "lightgbm")
44 | train <- agaricus.train
45 | dtrain <- lgb.Dataset(train$data, label = train$label)
46 | 
47 | params <- list(
48 |     objective = "binary"
49 |     , learning_rate = 0.1
50 |     , min_data_in_leaf = 1L
51 |     , min_sum_hessian_in_leaf = 1.0
52 |     , num_threads = 2L
53 | )
54 | 
55 | model <- lgb.train(
56 |     params = params
57 |     , data = dtrain
58 |     , nrounds = 5L
59 | )
60 | 
61 | tree_imp <- lgb.importance(model, percentage = TRUE)
62 | lgb.plot.importance(tree_imp, top_n = 5L, measure = "Gain")
63 | }
64 | }
65 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.restore_handle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.restore_handle.R
 3 | \name{lgb.restore_handle}
 4 | \alias{lgb.restore_handle}
 5 | \title{Restore the C++ component of a de-serialized LightGBM model}
 6 | \usage{
 7 | lgb.restore_handle(model)
 8 | }
 9 | \arguments{
10 | \item{model}{\code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle
11 | need to be restored.}
12 | }
13 | \value{
14 | \code{lgb.Booster} (the same `model` object that was passed as input, invisibly).
15 | }
16 | \description{
17 | After a LightGBM model object is de-serialized through functions such as \code{save} or
18 | \code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such
19 | object is restored automatically when calling functions such as \code{predict}, but this function can be
20 | used to forcibly restore it beforehand. Note that the object will be modified in-place.
21 | 
22 |              \emph{New in version 4.0.0}
23 | }
24 | \details{
25 | Be aware that fast single-row prediction configurations are not restored through this
26 | function. If you wish to make fast single-row predictions using a \code{lgb.Booster} loaded this way,
27 | call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object.
28 | }
29 | \examples{
30 | \donttest{
31 | library(lightgbm)
32 | \dontshow{setLGBMthreads(2L)}
33 | \dontshow{data.table::setDTthreads(1L)}
34 | data("agaricus.train")
35 | model <- lightgbm(
36 |   agaricus.train$data
37 |   , agaricus.train$label
38 |   , params = list(objective = "binary")
39 |   , nrounds = 5L
40 |   , verbose = 0
41 |   , num_threads = 2L
42 | )
43 | fname <- tempfile(fileext="rds")
44 | saveRDS(model, fname)
45 | 
46 | model_new <- readRDS(fname)
47 | model_new$check_null_handle()
48 | lgb.restore_handle(model_new)
49 | model_new$check_null_handle()
50 | }
51 | }
52 | \seealso{
53 | \link{lgb.make_serializable}, \link{lgb.drop_serialized}.
54 | }
55 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.save.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Booster.R
 3 | \name{lgb.save}
 4 | \alias{lgb.save}
 5 | \title{Save LightGBM model}
 6 | \usage{
 7 | lgb.save(booster, filename, num_iteration = NULL, start_iteration = 1L)
 8 | }
 9 | \arguments{
10 | \item{booster}{Object of class \code{lgb.Booster}}
11 | 
12 | \item{filename}{Saved filename}
13 | 
14 | \item{num_iteration}{Number of iterations to save, NULL or <= 0 means use best iteration}
15 | 
16 | \item{start_iteration}{Index (1-based) of the first boosting round to save.
17 |        For example, passing \code{start_iteration=5, num_iteration=3} for a regression model
18 |        means "save the fifth, sixth, and seventh tree"
19 | 
20 |        \emph{New in version 4.4.0}}
21 | }
22 | \value{
23 | lgb.Booster
24 | }
25 | \description{
26 | Save LightGBM model
27 | }
28 | \examples{
29 | \donttest{
30 | \dontshow{setLGBMthreads(2L)}
31 | \dontshow{data.table::setDTthreads(1L)}
32 | library(lightgbm)
33 | data(agaricus.train, package = "lightgbm")
34 | train <- agaricus.train
35 | dtrain <- lgb.Dataset(train$data, label = train$label)
36 | data(agaricus.test, package = "lightgbm")
37 | test <- agaricus.test
38 | dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
39 | params <- list(
40 |   objective = "regression"
41 |   , metric = "l2"
42 |   , min_data = 1L
43 |   , learning_rate = 1.0
44 |   , num_threads = 2L
45 | )
46 | valids <- list(test = dtest)
47 | model <- lgb.train(
48 |   params = params
49 |   , data = dtrain
50 |   , nrounds = 10L
51 |   , valids = valids
52 |   , early_stopping_rounds = 5L
53 | )
54 | lgb.save(model, tempfile(fileext = ".txt"))
55 | }
56 | }
57 | 


--------------------------------------------------------------------------------
/R-package/man/lgb.slice.Dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{lgb.slice.Dataset}
 4 | \alias{lgb.slice.Dataset}
 5 | \title{Slice a dataset}
 6 | \usage{
 7 | lgb.slice.Dataset(dataset, idxset)
 8 | }
 9 | \arguments{
10 | \item{dataset}{Object of class \code{lgb.Dataset}}
11 | 
12 | \item{idxset}{an integer vector of indices of rows needed}
13 | }
14 | \value{
15 | constructed sub dataset
16 | }
17 | \description{
18 | Get a new \code{lgb.Dataset} containing the specified rows of
19 |              original \code{lgb.Dataset} object
20 | 
21 |              \emph{Renamed from} \code{slice()} \emph{in 4.4.0}
22 | }
23 | \examples{
24 | \donttest{
25 | \dontshow{setLGBMthreads(2L)}
26 | \dontshow{data.table::setDTthreads(1L)}
27 | data(agaricus.train, package = "lightgbm")
28 | train <- agaricus.train
29 | dtrain <- lgb.Dataset(train$data, label = train$label)
30 | 
31 | dsub <- lgb.slice.Dataset(dtrain, seq_len(42L))
32 | lgb.Dataset.construct(dsub)
33 | labels <- lightgbm::get_field(dsub, "label")
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/R-package/man/lgb_shared_dataset_params.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{lgb_shared_dataset_params}
 4 | \alias{lgb_shared_dataset_params}
 5 | \title{Shared Dataset parameter docs}
 6 | \arguments{
 7 | \item{label}{vector of labels to use as the target variable}
 8 | 
 9 | \item{weight}{numeric vector of sample weights}
10 | 
11 | \item{init_score}{initial score is the base prediction lightgbm will boost from}
12 | 
13 | \item{group}{used for learning-to-rank tasks. An integer vector describing how to
14 | group rows together as ordered results from the same set of candidate results
15 | to be ranked. For example, if you have a 100-document dataset with
16 | \code{group = c(10, 20, 40, 10, 10, 10)}, that means that you have 6 groups,
17 | where the first 10 records are in the first group, records 11-30 are in the
18 | second group, etc.}
19 | }
20 | \description{
21 | Parameter docs for fields used in \code{lgb.Dataset} construction
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/R-package/man/print.lgb.Booster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Booster.R
 3 | \name{print.lgb.Booster}
 4 | \alias{print.lgb.Booster}
 5 | \title{Print method for LightGBM model}
 6 | \usage{
 7 | \method{print}{lgb.Booster}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{Object of class \code{lgb.Booster}}
11 | 
12 | \item{...}{Not used}
13 | }
14 | \value{
15 | The same input \code{x}, returned as invisible.
16 | }
17 | \description{
18 | Show summary information about a LightGBM model object (same as \code{summary}).
19 | 
20 |              \emph{New in version 4.0.0}
21 | }
22 | 


--------------------------------------------------------------------------------
/R-package/man/setLGBMThreads.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/multithreading.R
 3 | \name{setLGBMThreads}
 4 | \alias{setLGBMThreads}
 5 | \alias{setLGBMthreads}
 6 | \title{Set maximum number of threads used by LightGBM}
 7 | \usage{
 8 | setLGBMthreads(num_threads)
 9 | }
10 | \arguments{
11 | \item{num_threads}{maximum number of threads to be used by LightGBM in multi-threaded operations}
12 | }
13 | \description{
14 | LightGBM attempts to speed up many operations by using multi-threading.
15 |              The number of threads used in those operations can be controlled via the
16 |              \code{num_threads} parameter passed through \code{params} to functions like
17 |              \link{lgb.train} and \link{lgb.Dataset}. However, some operations (like materializing
18 |              a model from a text file) are done via code paths that don't explicitly accept thread-control
19 |              configuration.
20 | 
21 |              Use this function to set the maximum number of threads LightGBM will use for such operations.
22 | 
23 |              This function affects all LightGBM operations in the same process.
24 | 
25 |              So, for example, if you call \code{setLGBMthreads(4)}, no other multi-threaded LightGBM
26 |              operation in the same process will use more than 4 threads.
27 | 
28 |              Call \code{setLGBMthreads(-1)} to remove this limitation.
29 | }
30 | \seealso{
31 | \link{getLGBMthreads}
32 | }
33 | 


--------------------------------------------------------------------------------
/R-package/man/set_field.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Dataset.R
 3 | \name{set_field}
 4 | \alias{set_field}
 5 | \alias{set_field.lgb.Dataset}
 6 | \title{Set one attribute of a \code{lgb.Dataset} object}
 7 | \usage{
 8 | set_field(dataset, field_name, data)
 9 | 
10 | \method{set_field}{lgb.Dataset}(dataset, field_name, data)
11 | }
12 | \arguments{
13 | \item{dataset}{Object of class \code{lgb.Dataset}}
14 | 
15 | \item{field_name}{String with the name of the attribute to set. One of the following.
16 | \itemize{
17 |     \item \code{label}: label lightgbm learns from ;
18 |     \item \code{weight}: to do a weight rescale ;
19 |     \item{\code{group}: used for learning-to-rank tasks. An integer vector describing how to
20 |         group rows together as ordered results from the same set of candidate results to be ranked.
21 |         For example, if you have a 100-document dataset with \code{group = c(10, 20, 40, 10, 10, 10)},
22 |         that means that you have 6 groups, where the first 10 records are in the first group,
23 |         records 11-30 are in the second group, etc.}
24 |     \item \code{init_score}: initial score is the base prediction lightgbm will boost from.
25 | }}
26 | 
27 | \item{data}{The data for the field. See examples.}
28 | }
29 | \value{
30 | The \code{lgb.Dataset} you passed in.
31 | }
32 | \description{
33 | Set one attribute of a \code{lgb.Dataset}
34 | }
35 | \examples{
36 | \donttest{
37 | \dontshow{setLGBMthreads(2L)}
38 | \dontshow{data.table::setDTthreads(1L)}
39 | data(agaricus.train, package = "lightgbm")
40 | train <- agaricus.train
41 | dtrain <- lgb.Dataset(train$data, label = train$label)
42 | lgb.Dataset.construct(dtrain)
43 | 
44 | labels <- lightgbm::get_field(dtrain, "label")
45 | lightgbm::set_field(dtrain, "label", 1 - labels)
46 | 
47 | labels2 <- lightgbm::get_field(dtrain, "label")
48 | stopifnot(all.equal(labels2, 1 - labels))
49 | }
50 | }
51 | 


--------------------------------------------------------------------------------
/R-package/man/summary.lgb.Booster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lgb.Booster.R
 3 | \name{summary.lgb.Booster}
 4 | \alias{summary.lgb.Booster}
 5 | \title{Summary method for LightGBM model}
 6 | \usage{
 7 | \method{summary}{lgb.Booster}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{Object of class \code{lgb.Booster}}
11 | 
12 | \item{...}{Not used}
13 | }
14 | \value{
15 | The same input \code{object}, returned as invisible.
16 | }
17 | \description{
18 | Show summary information about a LightGBM model object (same as \code{print}).
19 | 
20 |              \emph{New in version 4.0.0}
21 | }
22 | 


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/R-package/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/R-package/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/R-package/recreate-configure.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -E -u -o pipefail
 4 | 
 5 | # recreates 'configure' from 'configure.ac'
 6 | # this script should run on Ubuntu 22.04
 7 | AUTOCONF_VERSION=$(cat R-package/AUTOCONF_UBUNTU_VERSION)
 8 | 
 9 | # R packages cannot have versions like 3.0.0rc1, but
10 | # 3.0.0-1 is acceptable
11 | LGB_VERSION=$(sed "s/rc/-/g" < VERSION.txt)
12 | 
13 | # this script changes configure.ac. Copying to a temporary file
14 | # so changes to configure.ac don't get committed in git
15 | TMP_CONFIGURE_AC=".configure.ac"
16 | 
17 | echo "Creating 'configure' script with Autoconf ${AUTOCONF_VERSION}"
18 | 
19 | apt update
20 | apt-get install \
21 |     --no-install-recommends \
22 |     -y \
23 |         autoconf="${AUTOCONF_VERSION}"
24 | 
25 | cd R-package
26 | 
27 | cp configure.ac ${TMP_CONFIGURE_AC}
28 | sed -i.bak -e "s/~~VERSION~~/${LGB_VERSION}/" ${TMP_CONFIGURE_AC}
29 | 
30 | autoconf \
31 |     --output configure \
32 |     ${TMP_CONFIGURE_AC} \
33 |     || exit 1
34 | 
35 | rm ${TMP_CONFIGURE_AC}
36 | 
37 | rm -r autom4te.cache || echo "no autoconf cache found"
38 | 
39 | echo "done creating 'configure' script"
40 | 


--------------------------------------------------------------------------------
/R-package/src/Makevars.in:
--------------------------------------------------------------------------------
 1 | CXX_STD = CXX17
 2 | 
 3 | PKGROOT=.
 4 | 
 5 | LGB_CPPFLAGS = \
 6 |     @LGB_CPPFLAGS@ \
 7 |     -DUSE_SOCKET \
 8 |     -DLGB_R_BUILD
 9 | 
10 | PKG_CPPFLAGS = \
11 |     -I$(PKGROOT)/include \
12 |     $(LGB_CPPFLAGS)
13 | 
14 | PKG_CXXFLAGS = \
15 |     @OPENMP_CXXFLAGS@ \
16 |     -pthread
17 | 
18 | PKG_LIBS = \
19 |     @OPENMP_CXXFLAGS@ \
20 |     @OPENMP_LIB@ \
21 |     -pthread
22 | 
23 | OBJECTS = \
24 |     boosting/boosting.o \
25 |     boosting/gbdt.o \
26 |     boosting/gbdt_model_text.o \
27 |     boosting/gbdt_prediction.o \
28 |     boosting/prediction_early_stop.o \
29 |     boosting/sample_strategy.o \
30 |     io/bin.o \
31 |     io/config.o \
32 |     io/config_auto.o \
33 |     io/dataset.o \
34 |     io/dataset_loader.o \
35 |     io/file_io.o \
36 |     io/json11.o \
37 |     io/metadata.o \
38 |     io/parser.o \
39 |     io/train_share_states.o \
40 |     io/tree.o \
41 |     metric/dcg_calculator.o \
42 |     metric/metric.o \
43 |     objective/objective_function.o \
44 |     network/linker_topo.o \
45 |     network/linkers_mpi.o \
46 |     network/linkers_socket.o \
47 |     network/network.o \
48 |     treelearner/data_parallel_tree_learner.o \
49 |     treelearner/feature_histogram.o \
50 |     treelearner/feature_parallel_tree_learner.o \
51 |     treelearner/gpu_tree_learner.o \
52 |     treelearner/gradient_discretizer.o \
53 |     treelearner/linear_tree_learner.o \
54 |     treelearner/serial_tree_learner.o \
55 |     treelearner/tree_learner.o \
56 |     treelearner/voting_parallel_tree_learner.o \
57 |     utils/openmp_wrapper.o \
58 |     c_api.o \
59 |     lightgbm_R.o
60 | 


--------------------------------------------------------------------------------
/R-package/src/Makevars.win.in:
--------------------------------------------------------------------------------
 1 | CXX_STD = @CXX_STD@
 2 | 
 3 | PKGROOT=.
 4 | 
 5 | LGB_CPPFLAGS = \
 6 |     @LGB_CPPFLAGS@ \
 7 |     -DUSE_SOCKET \
 8 |     -DLGB_R_BUILD
 9 | 
10 | PKG_CPPFLAGS = \
11 |     -I$(PKGROOT)/include \
12 |     $(LGB_CPPFLAGS)
13 | 
14 | PKG_CXXFLAGS = \
15 |     ${SHLIB_OPENMP_CXXFLAGS} \
16 |     ${SHLIB_PTHREAD_FLAGS}
17 | 
18 | PKG_LIBS = \
19 |     ${SHLIB_OPENMP_CXXFLAGS} \
20 |     ${SHLIB_PTHREAD_FLAGS} \
21 |     -lws2_32 \
22 |     -liphlpapi
23 | 
24 | OBJECTS = \
25 |     boosting/boosting.o \
26 |     boosting/gbdt.o \
27 |     boosting/gbdt_model_text.o \
28 |     boosting/gbdt_prediction.o \
29 |     boosting/prediction_early_stop.o \
30 |     boosting/sample_strategy.o \
31 |     io/bin.o \
32 |     io/config.o \
33 |     io/config_auto.o \
34 |     io/dataset.o \
35 |     io/dataset_loader.o \
36 |     io/file_io.o \
37 |     io/json11.o \
38 |     io/metadata.o \
39 |     io/parser.o \
40 |     io/train_share_states.o \
41 |     io/tree.o \
42 |     metric/dcg_calculator.o \
43 |     metric/metric.o \
44 |     objective/objective_function.o \
45 |     network/linker_topo.o \
46 |     network/linkers_mpi.o \
47 |     network/linkers_socket.o \
48 |     network/network.o \
49 |     treelearner/data_parallel_tree_learner.o \
50 |     treelearner/feature_histogram.o \
51 |     treelearner/feature_parallel_tree_learner.o \
52 |     treelearner/gpu_tree_learner.o \
53 |     treelearner/gradient_discretizer.o \
54 |     treelearner/linear_tree_learner.o \
55 |     treelearner/serial_tree_learner.o \
56 |     treelearner/tree_learner.o \
57 |     treelearner/voting_parallel_tree_learner.o \
58 |     utils/openmp_wrapper.o \
59 |     c_api.o \
60 |     lightgbm_R.o
61 | 


--------------------------------------------------------------------------------
/R-package/src/lightgbm-win.def:
--------------------------------------------------------------------------------
1 | LIBRARY lightgbm.dll
2 | EXPORTS
3 |  R_init_lightgbm
4 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(lightgbm)  # nolint: [unused_import]
 3 | 
 4 | test_check(
 5 |     package = "lightgbm"
 6 |     , stop_on_failure = TRUE
 7 |     , stop_on_warning = FALSE
 8 |     , reporter = testthat::SummaryReporter$new()
 9 | )
10 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat/helper.R:
--------------------------------------------------------------------------------
 1 | # ref for this file:
 2 | #
 3 | # * https://r-pkgs.org/testing-design.html#testthat-helper-files
 4 | # * https://r-pkgs.org/testing-design.html#testthat-setup-files
 5 | 
 6 | # LightGBM-internal fix to comply with CRAN policy of only using up to 2 threads in tests and example.
 7 | #
 8 | # per https://cran.r-project.org/web/packages/policies.html
 9 | #
10 | # > If running a package uses multiple threads/cores it must never use more than two simultaneously:
11 | #   the check farm is a shared resource and will typically be running many checks simultaneously.
12 | #
13 | .LGB_MAX_THREADS <- 2L
14 | setLGBMthreads(.LGB_MAX_THREADS)
15 | 
16 | # control data.table parallelism
17 | # ref: https://github.com/Rdatatable/data.table/issues/5658
18 | data.table::setDTthreads(1L)
19 | 
20 | # by default, how much should results in tests be allowed to differ from hard-coded expected numbers?
21 | .LGB_NUMERIC_TOLERANCE <- 1e-6
22 | 
23 | # are the tests running on Windows?
24 | .LGB_ON_WINDOWS <- .Platform$OS.type == "windows"
25 | .LGB_ON_32_BIT_WINDOWS <- .LGB_ON_WINDOWS && .Machine$sizeof.pointer != 8L
26 | 
27 | # are the tests running in a UTF-8 locale?
28 | .LGB_UTF8_LOCALE <- all(endsWith(
29 |   Sys.getlocale(category = "LC_CTYPE")
30 |   , "UTF-8"
31 | ))
32 | 
33 | # control how many loud LightGBM's logger is in tests
34 | .LGB_VERBOSITY <- as.integer(
35 |   Sys.getenv("LIGHTGBM_TEST_VERBOSITY", "-1")
36 | )
37 | 
38 | # [description]
39 | #    test that every element of 'x' is in 'y'
40 | #
41 | #    testthat::expect_in() is not available in version of {testthat}
42 | #    built for R 3.6, this is here to support a similar interface on R 3.6
43 | .expect_in <- function(x, y) {
44 |   if (exists("expect_in")) {
45 |     expect_in(x, y)
46 |   } else {
47 |     missing_items <- x[!(x %in% y)]
48 |     if (length(missing_items) != 0L) {
49 |       error_msg <- paste0("Some expected items not found: ", toString(missing_items))
50 |       stop(error_msg)
51 |     }
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat/test_lgb.importance.R:
--------------------------------------------------------------------------------
 1 | test_that("lgb.importance() should reject bad inputs", {
 2 |     bad_inputs <- list(
 3 |         .Machine$integer.max
 4 |         , Inf
 5 |         , -Inf
 6 |         , NA
 7 |         , NA_real_
 8 |         , -10L:10L
 9 |         , list(c("a", "b", "c"))
10 |         , data.frame(
11 |             x = rnorm(20L)
12 |             , y = sample(
13 |                 x = c(1L, 2L)
14 |                 , size = 20L
15 |                 , replace = TRUE
16 |             )
17 |         )
18 |         , data.table::data.table(
19 |             x = rnorm(20L)
20 |             , y = sample(
21 |                 x = c(1L, 2L)
22 |                 , size = 20L
23 |                 , replace = TRUE
24 |             )
25 |         )
26 |         , lgb.Dataset(
27 |             data = matrix(rnorm(100L), ncol = 2L)
28 |             , label = matrix(sample(c(0L, 1L), 50L, replace = TRUE))
29 |         )
30 |         , "lightgbm.model"
31 |     )
32 |     for (input in bad_inputs) {
33 |         expect_error({
34 |             lgb.importance(input)
35 |         }, regexp = "'model' has to be an object of class lgb\\.Booster")
36 |     }
37 | })
38 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat/test_lgb.plot.importance.R:
--------------------------------------------------------------------------------
 1 | test_that("lgb.plot.importance() should run without error for well-formed inputs", {
 2 |     data(agaricus.train, package = "lightgbm")
 3 |     train <- agaricus.train
 4 |     dtrain <- lgb.Dataset(train$data, label = train$label)
 5 |     params <- list(
 6 |         objective = "binary"
 7 |         , learning_rate = 0.01
 8 |         , num_leaves = 63L
 9 |         , max_depth = -1L
10 |         , min_data_in_leaf = 1L
11 |         , min_sum_hessian_in_leaf = 1.0
12 |         , verbosity = .LGB_VERBOSITY
13 |         , num_threads = .LGB_MAX_THREADS
14 |     )
15 |     model <- lgb.train(params, dtrain, 3L)
16 |     tree_imp <- lgb.importance(model, percentage = TRUE)
17 | 
18 |     # Check that there are no plots present before plotting
19 |     expect_null(dev.list())
20 | 
21 |     args_no_cex <- list(
22 |         "tree_imp" = tree_imp
23 |         , top_n = 10L
24 |         , measure = "Gain"
25 |     )
26 |     args_cex <- args_no_cex
27 |     args_cex[["cex"]] <- 0.75
28 | 
29 |     for (arg_list in list(args_no_cex, args_cex)) {
30 | 
31 |         resDT <- do.call(
32 |             what = lgb.plot.importance
33 |             , args = arg_list
34 |         )
35 | 
36 |         # Check that lgb.plot.importance() returns the data.table of the plotted data
37 |         expect_true(data.table::is.data.table(resDT))
38 |         expect_named(resDT, c("Feature", "Gain", "Cover", "Frequency"))
39 | 
40 |         # Check that a plot was produced
41 |         expect_false(is.null(dev.list()))
42 | 
43 |         # remove all plots
44 |         dev.off()
45 |         expect_null(dev.list())
46 |     }
47 | })
48 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat/test_metrics.R:
--------------------------------------------------------------------------------
 1 | test_that(".METRICS_HIGHER_BETTER() should be well formed", {
 2 |     metrics <- .METRICS_HIGHER_BETTER()
 3 |     metric_names <- names(.METRICS_HIGHER_BETTER())
 4 |     # should be a logical vector
 5 |     expect_true(is.logical(metrics))
 6 |     # no metrics should be repeated
 7 |     expect_true(length(unique(metric_names)) == length(metrics))
 8 |     # should not be any NAs
 9 |     expect_false(anyNA(metrics))
10 | })
11 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat/test_multithreading.R:
--------------------------------------------------------------------------------
 1 | test_that("getLGBMthreads() and setLGBMthreads() work as expected", {
 2 |     # works with integer input
 3 |     ret <- setLGBMthreads(2L)
 4 |     expect_null(ret)
 5 |     expect_equal(getLGBMthreads(), 2L)
 6 | 
 7 |     # works with float input
 8 |     ret <- setLGBMthreads(1.0)
 9 |     expect_null(ret)
10 |     expect_equal(getLGBMthreads(), 1L)
11 | 
12 |     # setting to any negative number sets max threads to -1
13 |     ret <- setLGBMthreads(-312L)
14 |     expect_null(ret)
15 |     expect_equal(getLGBMthreads(), -1L)
16 | })
17 | 


--------------------------------------------------------------------------------
/R-package/tests/testthat/test_weighted_loss.R:
--------------------------------------------------------------------------------
 1 | test_that("Gamma regression reacts on 'weight'", {
 2 |   n <- 100L
 3 |   set.seed(87L)
 4 |   X <- matrix(runif(2L * n), ncol = 2L)
 5 |   y <- X[, 1L] + X[, 2L] + runif(n)
 6 |   X_pred <- X[1L:5L, ]
 7 | 
 8 |   params <- list(objective = "gamma", num_threads = .LGB_MAX_THREADS)
 9 | 
10 |   # Unweighted
11 |   dtrain <- lgb.Dataset(X, label = y)
12 |   bst <- lgb.train(
13 |     params = params
14 |     , data = dtrain
15 |     , nrounds = 4L
16 |     , verbose = .LGB_VERBOSITY
17 |   )
18 |   pred_unweighted <- predict(bst, X_pred)
19 | 
20 |   # Constant weight 1
21 |   dtrain <- lgb.Dataset(
22 |     X
23 |     , label = y
24 |     , weight = rep(1.0, n)
25 |   )
26 |   bst <- lgb.train(
27 |     params = params
28 |     , data = dtrain
29 |     , nrounds = 4L
30 |     , verbose = .LGB_VERBOSITY
31 |   )
32 |   pred_weighted_1 <- predict(bst, X_pred)
33 | 
34 |   # Constant weight 2
35 |   dtrain <- lgb.Dataset(
36 |     X
37 |     , label = y
38 |     , weight = rep(2.0, n)
39 |   )
40 |   bst <- lgb.train(
41 |     params = params
42 |     , data = dtrain
43 |     , nrounds = 4L
44 |     , verbose = .LGB_VERBOSITY
45 |   )
46 |   pred_weighted_2 <- predict(bst, X_pred)
47 | 
48 |   # Non-constant weights
49 |   dtrain <- lgb.Dataset(
50 |     X
51 |     , label = y
52 |     , weight = seq(0.0, 1.0, length.out = n)
53 |   )
54 |   bst <- lgb.train(
55 |     params = params
56 |     , data = dtrain
57 |     , nrounds = 4L
58 |     , verbose = .LGB_VERBOSITY
59 |   )
60 |   pred_weighted <- predict(bst, X_pred)
61 | 
62 |   expect_equal(pred_unweighted, pred_weighted_1)
63 |   expect_equal(pred_weighted_1, pred_weighted_2)
64 |   expect_false(all(pred_unweighted == pred_weighted))
65 | })
66 | 


--------------------------------------------------------------------------------
/VERSION.txt:
--------------------------------------------------------------------------------
1 | 4.6.0.99
2 | 


--------------------------------------------------------------------------------
/biome.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "files": {
 3 |         "ignore": [".mypy_cache/"]
 4 |     },
 5 |     "formatter": {
 6 |         "enabled": true,
 7 |         "useEditorconfig": true
 8 |     },
 9 |     "organizeImports": {
10 |         "enabled": true
11 |     },
12 |     "linter": {
13 |         "enabled": true,
14 |         "rules": {
15 |             "all": true
16 |         }
17 |     },
18 |     "javascript": {
19 |         "globals": ["$"]
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/cmake/Sanitizer.cmake:
--------------------------------------------------------------------------------
 1 | # Set appropriate compiler and linker flags for sanitizers.
 2 | #
 3 | # Usage of this module:
 4 | #  enable_sanitizers("address;leak")
 5 | 
 6 | # Add flags
 7 | macro(enable_sanitizer sanitizer)
 8 |   if(${sanitizer} MATCHES "address")
 9 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=address")
10 | 
11 |   elseif(${sanitizer} MATCHES "thread")
12 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=thread")
13 | 
14 |   elseif(${sanitizer} MATCHES "leak")
15 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=leak")
16 | 
17 |   elseif(${sanitizer} MATCHES "undefined")
18 |     set(SAN_COMPILE_FLAGS "${SAN_COMPILE_FLAGS} -fsanitize=undefined -fno-sanitize-recover=undefined")
19 | 
20 |   else()
21 |     message(FATAL_ERROR "Sanitizer ${sanitizer} not supported.")
22 |   endif()
23 | endmacro()
24 | 
25 | macro(enable_sanitizers SANITIZERS)
26 |   # Check sanitizers compatibility.
27 |   foreach(_san ${SANITIZERS})
28 |     string(TOLOWER ${_san} _san)
29 |     if(_san MATCHES "thread")
30 |       if(${_use_other_sanitizers})
31 |         message(FATAL_ERROR "thread sanitizer is not compatible with ${_san} sanitizer.")
32 |       endif()
33 |       set(_use_thread_sanitizer 1)
34 |     else()
35 |       if(${_use_thread_sanitizer})
36 |         message(FATAL_ERROR "${_san} sanitizer is not compatible with thread sanitizer.")
37 |       endif()
38 |       set(_use_other_sanitizers 1)
39 |     endif()
40 |   endforeach()
41 | 
42 |   message(STATUS "Sanitizers: ${SANITIZERS}")
43 | 
44 |   foreach(_san ${SANITIZERS})
45 |     string(TOLOWER ${_san} _san)
46 |     enable_sanitizer(${_san})
47 |   endforeach()
48 |   message(STATUS "Sanitizers compile flags: ${SAN_COMPILE_FLAGS}")
49 |   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_COMPILE_FLAGS}")
50 |   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_COMPILE_FLAGS}")
51 | endmacro()
52 | 


--------------------------------------------------------------------------------
/docker/dockerfile-cli:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | ENV \
 4 |     DEBIAN_FRONTEND=noninteractive \
 5 |     LANG=C.UTF-8 \
 6 |     LC_ALL=C.UTF-8
 7 | 
 8 | RUN apt-get update -y && \
 9 |     apt-get install -y --no-install-recommends \
10 |         ca-certificates \
11 |         curl \
12 |         build-essential \
13 |         gcc \
14 |         g++ \
15 |         git \
16 |         libomp-dev && \
17 |     rm -rf /var/lib/apt/lists/*
18 | 
19 | RUN curl -L -o cmake.sh https://github.com/Kitware/CMake/releases/download/v3.29.2/cmake-3.29.2-linux-x86_64.sh && \
20 |     chmod +x cmake.sh && \
21 |     sh ./cmake.sh --prefix=/usr/local --skip-license && \
22 |     rm cmake.sh
23 | 
24 | RUN git clone \
25 |         --recursive \
26 |         --branch stable \
27 |         --depth 1 \
28 |         https://github.com/Microsoft/LightGBM && \
29 |     cd ./LightGBM && \
30 |     cmake -B build -S . && \
31 |     cmake --build build -j4 && \
32 |     cmake --install build && \
33 |     cd "${HOME}" && \
34 |     rm -rf LightGBM
35 | 
36 | ENTRYPOINT ["lightgbm"]
37 | 


--------------------------------------------------------------------------------
/docker/dockerfile-python:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:20.04
 2 | 
 3 | ARG CONDA_DIR=/opt/miniforge
 4 | 
 5 | ENV \
 6 |     DEBIAN_FRONTEND=noninteractive \
 7 |     LANG=C.UTF-8 \
 8 |     LC_ALL=C.UTF-8 \
 9 |     PATH=$CONDA_DIR/bin:$PATH
10 | 
11 | RUN apt-get update && \
12 |     apt-get install -y --no-install-recommends \
13 |         ca-certificates \
14 |         cmake \
15 |         build-essential \
16 |         gcc \
17 |         g++ \
18 |         curl \
19 |         git \
20 |         libomp-dev && \
21 |     # python environment
22 |     curl -sL https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh -o miniforge.sh && \
23 |     /bin/bash miniforge.sh -f -b -p $CONDA_DIR && \
24 |     export PATH="$CONDA_DIR/bin:$PATH" && \
25 |     conda config --set always_yes yes --set changeps1 no && \
26 |     # lightgbm
27 |     conda install -q -y numpy scipy scikit-learn pandas && \
28 |     git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \
29 |     cd ./LightGBM && \
30 |     sh ./build-python.sh install && \
31 |     # clean
32 |     apt-get autoremove -y && apt-get clean && \
33 |     conda clean -a -y && \
34 |     rm -rf /usr/local/src/*
35 | 


--------------------------------------------------------------------------------
/docker/dockerfile-r:
--------------------------------------------------------------------------------
 1 | ARG R_VERSION=latest
 2 | FROM rocker/verse:${R_VERSION}
 3 | 
 4 | RUN apt-get update && \
 5 |     apt-get install -y --no-install-recommends \
 6 |         build-essential \
 7 |         libomp-dev && \
 8 |     git clone \
 9 |         --recursive \
10 |         --branch stable \
11 |         --depth 1 https://github.com/Microsoft/LightGBM && \
12 |     cd ./LightGBM && \
13 |     sh build-cran-package.sh --no-build-vignettes && \
14 |     R CMD INSTALL ./lightgbm_*.tar.gz && \
15 |     cd .. && \
16 |     rm -rf ./LightGBM
17 | 


--------------------------------------------------------------------------------
/docker/gpu/README.md:
--------------------------------------------------------------------------------
 1 | # Tiny Distroless Dockerfile for LightGBM GPU CLI-only Version
 2 | 
 3 | `dockerfile-cli-only-distroless.gpu` - A multi-stage build based on the `nvidia/opencl:devel-ubuntu18.04` (build) and `distroless/cc-debian10` (production) images. LightGBM (CLI-only) can be utilized in GPU and CPU modes. The resulting image size is around 15 MB.
 4 | 
 5 | ---
 6 | 
 7 | # Small Dockerfile for LightGBM GPU CLI-only Version
 8 | 
 9 | `dockerfile-cli-only.gpu` - A multi-stage build based on the `nvidia/opencl:devel` (build) and `nvidia/opencl:runtime` (production) images. LightGBM (CLI-only) can be utilized in GPU and CPU modes. The resulting image size is around 100 MB.
10 | 
11 | ---
12 | 
13 | # Dockerfile for LightGBM GPU Version with Python
14 | 
15 | `dockerfile.gpu` - A docker file with LightGBM utilizing nvidia-docker. The file is based on the `nvidia/cuda:8.0-cudnn5-devel` image.
16 | LightGBM can be utilized in GPU and CPU modes and via Python.
17 | 
18 | ## Contents
19 | 
20 | - LightGBM (cpu + gpu)
21 | - Python (conda) + scikit-learn, notebooks, pandas, matplotlib
22 | 
23 | Running the container starts a Jupyter Notebook at `localhost:8888`.
24 | 
25 | Jupyter password: `keras`.
26 | 
27 | ## Requirements
28 | 
29 | Requires docker and [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) on host machine.
30 | 
31 | ## Quickstart
32 | 
33 | ### Build Docker Image
34 | 
35 | ```sh
36 | mkdir lightgbm-docker
37 | cd lightgbm-docker
38 | wget https://raw.githubusercontent.com/Microsoft/LightGBM/master/docker/gpu/dockerfile.gpu
39 | docker build -f dockerfile.gpu -t lightgbm-gpu .
40 | ```
41 | 
42 | ### Run Image
43 | 
44 | ```sh
45 | nvidia-docker run --rm -d --name lightgbm-gpu -p 8888:8888 -v /home:/home lightgbm-gpu
46 | ```
47 | 
48 | ### Attach with Command Line Access (if required)
49 | 
50 | ```sh
51 | docker exec -it lightgbm-gpu bash
52 | ```
53 | 
54 | ### Jupyter Notebook
55 | 
56 | ```sh
57 | localhost:8888
58 | ```
59 | 


--------------------------------------------------------------------------------
/docs/.linkcheckerrc:
--------------------------------------------------------------------------------
 1 | [checking]
 2 | maxrequestspersecond=0.1
 3 | recursionlevel=1
 4 | anchors=1
 5 | sslverify=0
 6 | threads=4
 7 | 
 8 | [filtering]
 9 | ignore=
10 |   pythonapi/lightgbm\..*\.html.*
11 |   http.*amd.com/.*
12 |   https.*dl.acm.org/doi/.*
13 |   https.*tandfonline.com/.*
14 | ignorewarnings=http-redirected,http-robots-denied,https-certificate-error
15 | checkextern=1
16 | 
17 | [output]
18 | # Set to 1 if you want see the full output, not only warnings and errors
19 | verbose=0
20 | 
21 | [AnchorCheck]
22 | 


--------------------------------------------------------------------------------
/docs/C-API.rst:
--------------------------------------------------------------------------------
1 | C API
2 | =====
3 | 
4 | .. doxygenfile:: c_api.h
5 | 


--------------------------------------------------------------------------------
/docs/GPU-Windows.rst:
--------------------------------------------------------------------------------
1 | The content of this document was very outdated and is no longer available to avoid any misleadings.
2 | 
3 | Starting from the ``3.2.0`` version LightGBM Python packages have been having built-in support of training on GPU devices.
4 | 


--------------------------------------------------------------------------------
/docs/Key-Events.md:
--------------------------------------------------------------------------------
1 | The content of this document was very outdated and is no longer available to avoid any misleadings.
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    = -W
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = LightGBM
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/Python-API.rst:
--------------------------------------------------------------------------------
 1 | Python API
 2 | ==========
 3 | 
 4 | .. currentmodule:: lightgbm
 5 | 
 6 | Data Structure API
 7 | ------------------
 8 | 
 9 | .. autosummary::
10 |     :toctree: pythonapi/
11 | 
12 |     Dataset
13 |     Booster
14 |     CVBooster
15 |     Sequence
16 | 
17 | Training API
18 | ------------
19 | 
20 | .. autosummary::
21 |     :toctree: pythonapi/
22 | 
23 |     train
24 |     cv
25 | 
26 | Scikit-learn API
27 | ----------------
28 | 
29 | .. autosummary::
30 |     :toctree: pythonapi/
31 | 
32 |     LGBMModel
33 |     LGBMClassifier
34 |     LGBMRegressor
35 |     LGBMRanker
36 | 
37 | Dask API
38 | --------
39 | 
40 | .. versionadded:: 3.2.0
41 | 
42 | .. autosummary::
43 |     :toctree: pythonapi/
44 | 
45 |     DaskLGBMClassifier
46 |     DaskLGBMRegressor
47 |     DaskLGBMRanker
48 | 
49 | Callbacks
50 | ---------
51 | 
52 | .. autosummary::
53 |     :toctree: pythonapi/
54 | 
55 |     early_stopping
56 |     log_evaluation
57 |     record_evaluation
58 |     reset_parameter
59 | 
60 | Plotting
61 | --------
62 | 
63 | .. autosummary::
64 |     :toctree: pythonapi/
65 | 
66 |     plot_importance
67 |     plot_split_value_histogram
68 |     plot_metric
69 |     plot_tree
70 |     create_tree_digraph
71 | 
72 | Utilities
73 | ---------
74 | 
75 | .. autosummary::
76 |     :toctree: pythonapi/
77 | 
78 |     register_logger
79 | 


--------------------------------------------------------------------------------
/docs/_static/images/artifacts-download.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="118" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="118" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h55v20H0z"/><path fill="#4c1" d="M55 0h63v20H55z"/><path fill="url(#b)" d="M0 0h118v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">artifacts</text><text x="285" y="140" transform="scale(.1)" textLength="450">artifacts</text><text x="855" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">download</text><text x="855" y="140" transform="scale(.1)" textLength="530">download</text></g> </svg>
2 | 


--------------------------------------------------------------------------------
/docs/_static/images/artifacts-fetching.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="122" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="122" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h55v20H0z"/><path fill="#9f9f9f" d="M55 0h67v20H55z"/><path fill="url(#b)" d="M0 0h122v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">artifacts</text><text x="285" y="140" transform="scale(.1)" textLength="450">artifacts</text><text x="875" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="570">fetching...</text><text x="875" y="140" transform="scale(.1)" textLength="570">fetching...</text></g> </svg>
2 | 


--------------------------------------------------------------------------------
/docs/_static/images/artifacts-not-available.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="302" height="20"><linearGradient id="b" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="a"><rect width="302" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#a)"><path fill="#555" d="M0 0h55v20H0z"/><path fill="#9f9f9f" d="M55 0h247v20H55z"/><path fill="url(#b)" d="M0 0h302v20H0z"/></g><g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="110"> <text x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">artifacts</text><text x="285" y="140" transform="scale(.1)" textLength="450">artifacts</text><text x="1775" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="2370">link is available only on Read the Docs site</text><text x="1775" y="140" transform="scale(.1)" textLength="2370">link is available only on Read the Docs site</text></g> </svg>
2 | 


--------------------------------------------------------------------------------
/docs/_static/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/_static/images/favicon.ico


--------------------------------------------------------------------------------
/docs/_static/images/gpu-performance-comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/_static/images/gpu-performance-comparison.png


--------------------------------------------------------------------------------
/docs/_static/images/leaf-wise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/_static/images/leaf-wise.png


--------------------------------------------------------------------------------
/docs/_static/images/level-wise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/_static/images/level-wise.png


--------------------------------------------------------------------------------
/docs/build-docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e -E -u -o pipefail
 4 | 
 5 | rm -f ./_FIRST_RUN.flag
 6 | 
 7 | export PATH="${CONDA}/bin:${PATH}"
 8 | 
 9 | curl \
10 |     -sL \
11 |     -o "${HOME}/miniforge.sh" \
12 |     https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh
13 | 
14 | /bin/bash "${HOME}/miniforge.sh" -b -p "${CONDA}"
15 | 
16 | conda config --set always_yes yes --set changeps1 no
17 | conda update -q -y conda
18 | 
19 | conda env create \
20 |     --name docs-env \
21 |     --file env.yml || exit 1
22 | 
23 | # shellcheck disable=SC1091
24 | source activate docs-env
25 | make clean html || exit 1
26 | 
27 | echo "Done building docs. Open docs/_build/html/index.html in a web browser to view them."
28 | 


--------------------------------------------------------------------------------
/docs/env.yml:
--------------------------------------------------------------------------------
 1 | name: docs-env
 2 | channels:
 3 |   - nodefaults
 4 |   - conda-forge
 5 | dependencies:
 6 |   - breathe>=4.35
 7 |   - python=3.12
 8 |   - r-base>=4.3.3
 9 |   - r-data.table=1.16.4
10 |   - r-jsonlite=1.8.9
11 |   - r-knitr=1.49
12 |   - r-markdown=1.13
13 |   - r-matrix=1.6_5
14 |   - r-pkgdown=2.1.1
15 |   - r-roxygen2=7.3.2
16 |   - scikit-learn>=1.6.1
17 |   - sphinx>=8.1.3
18 |   - sphinx_rtd_theme>=3.0.1
19 | 


--------------------------------------------------------------------------------
/docs/gcc-Tips.rst:
--------------------------------------------------------------------------------
1 | The content of this document was very outdated and is no longer available to avoid any misleadings.
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. LightGBM documentation master file, created by
 2 |    sphinx-quickstart on Thu May  4 14:30:58 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | .. image:: ./logo/LightGBM_logo_black_text.svg
 7 |    :align: center
 8 |    :width: 600
 9 |    :alt: Light Gradient Boosting Machine logo.
10 | 
11 | |
12 | 
13 | Welcome to LightGBM's documentation!
14 | ====================================
15 | 
16 | **LightGBM** is a gradient boosting framework that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:
17 | 
18 | - Faster training speed and higher efficiency.
19 | - Lower memory usage.
20 | - Better accuracy.
21 | - Support of parallel, distributed, and GPU learning.
22 | - Capable of handling large-scale data.
23 | 
24 | For more details, please refer to `Features <./Features.rst>`__.
25 | 
26 | .. toctree::
27 |    :maxdepth: 1
28 |    :caption: Contents:
29 | 
30 |    Installation Guide <Installation-Guide>
31 |    Quick Start <Quick-Start>
32 |    Python Quick Start <Python-Intro>
33 |    Features <Features>
34 |    Experiments <Experiments>
35 |    Parameters <Parameters>
36 |    Parameters Tuning <Parameters-Tuning>
37 |    C API <C-API>
38 |    Python API <Python-API>
39 |    R API <https://lightgbm.readthedocs.io/en/latest/R/reference/>
40 |    Distributed Learning Guide <Parallel-Learning-Guide>
41 |    GPU Tutorial <GPU-Tutorial>
42 |    Advanced Topics <Advanced-Topics>
43 |    FAQ <FAQ>
44 |    Development Guide <Development-Guide>
45 | 
46 | .. toctree::
47 |    :hidden:
48 | 
49 |    GPU-Performance
50 |    GPU-Targets
51 |    GPU-Windows
52 |    gcc-Tips
53 |    README
54 | 
55 | Indices and Tables
56 | ==================
57 | 
58 | * :ref:`genindex`
59 | 


--------------------------------------------------------------------------------
/docs/logo/LightGBM-logo-hex.cdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM-logo-hex.cdr


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo-hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo-hex.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo.cdr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo.cdr


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_black_text_huge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_black_text_huge.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_black_text_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_black_text_large.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_black_text_medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_black_text_medium.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_black_text_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_black_text_small.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_black_text_tiny.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_black_text_tiny.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_grey_text_huge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_grey_text_huge.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_grey_text_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_grey_text_large.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_grey_text_medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_grey_text_medium.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_grey_text_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_grey_text_small.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_grey_text_tiny.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_grey_text_tiny.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_no_text.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 3 | <!-- Creator: CorelDRAW 2020 (64-Bit Evaluation Version) -->
 4 | <svg xmlns="http://www.w3.org/2000/svg" xml:space="preserve" width="1800px" height="2907px" version="1.1" style="shape-rendering:geometricPrecision; text-rendering:geometricPrecision; image-rendering:optimizeQuality; fill-rule:evenodd; clip-rule:evenodd"
 5 | viewBox="0 0 1808.08 2919.85"
 6 |  xmlns:xlink="http://www.w3.org/1999/xlink"
 7 |  xmlns:xodm="http://www.corel.com/coreldraw/odm/2003">
 8 |  <defs>
 9 |   <style type="text/css">
10 |    <![CDATA[
11 |     .fil4 {fill:none}
12 |     .fil3 {fill:#1B9AD7}
13 |     .fil1 {fill:#76B644}
14 |     .fil0 {fill:#EF4927}
15 |     .fil2 {fill:#FCB518}
16 |    ]]>
17 |   </style>
18 |  </defs>
19 |  <g id="图层_x0020_1">
20 |   <metadata id="CorelCorpID_0Corel-Layer"/>
21 |   <polygon class="fil0" points="1732.73,84.18 71.14,84.18 71.14,1147.68 "/>
22 |   <polygon class="fil1" points="762.56,1220.86 1732.77,591.98 1732.77,1411.27 "/>
23 |   <polygon class="fil2" points="71.19,2847.67 1732.78,2847.67 1732.78,1784.16 "/>
24 |   <polygon class="fil3" points="1041.36,1716.72 71.15,2345.59 71.15,1526.3 "/>
25 |   <rect class="fil4" width="1808.08" height="2919.85"/>
26 |  </g>
27 | </svg>
28 | 


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_no_text_huge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_no_text_huge.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_no_text_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_no_text_large.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_no_text_medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_no_text_medium.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_no_text_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_no_text_small.png


--------------------------------------------------------------------------------
/docs/logo/LightGBM_logo_no_text_tiny.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/docs/logo/LightGBM_logo_no_text_tiny.png


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=LightGBM
13 | set SPHINXOPTS=-W
14 | 
15 | if "%1" == "" goto help
16 | 
17 | %SPHINXBUILD% >NUL 2>NUL
18 | if errorlevel 9009 (
19 | 	echo.
20 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
21 | 	echo.installed, then set the SPHINXBUILD environment variable to point
22 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
23 | 	echo.may add the Sphinx directory to PATH.
24 | 	echo.
25 | 	echo.If you don't have Sphinx installed, grab it from
26 | 	echo.https://www.sphinx-doc.org/
27 | 	exit /b 1
28 | )
29 | 
30 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
31 | goto end
32 | 
33 | :help
34 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
35 | 
36 | :end
37 | popd
38 | 


--------------------------------------------------------------------------------
/examples/binary_classification/README.md:
--------------------------------------------------------------------------------
 1 | Binary Classification Example
 2 | =============================
 3 | 
 4 | Here is an example for LightGBM to run binary classification task.
 5 | 
 6 | ***You must follow the [installation instructions](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html)
 7 | for the following commands to work. The `lightgbm` binary must be built and available at the root of this project.***
 8 | 
 9 | Training
10 | --------
11 | 
12 | Run the following command in this folder:
13 | 
14 | ```bash
15 | "../../lightgbm" config=train.conf
16 | ```
17 | 
18 | Prediction
19 | ----------
20 | 
21 | You should finish training first.
22 | 
23 | Run the following command in this folder:
24 | 
25 | ```bash
26 | "../../lightgbm" config=predict.conf
27 | ```
28 | 


--------------------------------------------------------------------------------
/examples/binary_classification/forced_splits.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "feature": 25,
 3 |     "threshold": 1.3,
 4 |     "left": {
 5 |         "feature": 26,
 6 |         "threshold": 0.85
 7 |     },
 8 |     "right": {
 9 |         "feature": 26,
10 |         "threshold": 0.85
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/binary_classification/predict.conf:
--------------------------------------------------------------------------------
1 | task = predict
2 | 
3 | data = binary.test
4 | 
5 | input_model= LightGBM_model.txt
6 | 


--------------------------------------------------------------------------------
/examples/lambdarank/README.md:
--------------------------------------------------------------------------------
 1 | LambdaRank Example
 2 | ==================
 3 | 
 4 | Here is an example for LightGBM to run LambdaRank task.
 5 | 
 6 | ***You must follow the [installation instructions](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html)
 7 | for the following commands to work. The `lightgbm` binary must be built and available at the root of this project.***
 8 | 
 9 | Training
10 | --------
11 | 
12 | Run the following command in this folder:
13 | 
14 | ```bash
15 | "../../lightgbm" config=train.conf
16 | ```
17 | 
18 | Prediction
19 | ----------
20 | 
21 | You should finish training first.
22 | 
23 | Run the following command in this folder:
24 | 
25 | ```bash
26 | "../../lightgbm" config=predict.conf
27 | ```
28 | 
29 | Data Format
30 | -----------
31 | 
32 | To learn more about the query format used in this example, check out the
33 | [query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data).
34 | 


--------------------------------------------------------------------------------
/examples/lambdarank/predict.conf:
--------------------------------------------------------------------------------
1 | task = predict
2 | 
3 | data = rank.test
4 | 
5 | input_model= LightGBM_model.txt
6 | 


--------------------------------------------------------------------------------
/examples/lambdarank/rank.test.query:
--------------------------------------------------------------------------------
 1 | 12
 2 | 19
 3 | 18
 4 | 10
 5 | 15
 6 | 15
 7 | 22
 8 | 23
 9 | 18
10 | 16
11 | 16
12 | 11
13 | 6
14 | 13
15 | 17
16 | 21
17 | 20
18 | 16
19 | 13
20 | 16
21 | 21
22 | 15
23 | 10
24 | 19
25 | 10
26 | 13
27 | 18
28 | 17
29 | 23
30 | 24
31 | 16
32 | 13
33 | 17
34 | 24
35 | 17
36 | 10
37 | 17
38 | 15
39 | 18
40 | 16
41 | 9
42 | 9
43 | 21
44 | 14
45 | 13
46 | 13
47 | 13
48 | 10
49 | 10
50 | 6
51 | 


--------------------------------------------------------------------------------
/examples/lambdarank/rank.train.query:
--------------------------------------------------------------------------------
  1 | 1
  2 | 13
  3 | 5
  4 | 8
  5 | 19
  6 | 12
  7 | 18
  8 | 5
  9 | 14
 10 | 13
 11 | 8
 12 | 9
 13 | 16
 14 | 11
 15 | 21
 16 | 14
 17 | 21
 18 | 9
 19 | 14
 20 | 11
 21 | 20
 22 | 18
 23 | 13
 24 | 20
 25 | 22
 26 | 22
 27 | 13
 28 | 17
 29 | 10
 30 | 13
 31 | 12
 32 | 13
 33 | 13
 34 | 23
 35 | 18
 36 | 13
 37 | 20
 38 | 12
 39 | 22
 40 | 14
 41 | 13
 42 | 23
 43 | 13
 44 | 14
 45 | 14
 46 | 5
 47 | 13
 48 | 15
 49 | 14
 50 | 14
 51 | 16
 52 | 16
 53 | 15
 54 | 21
 55 | 22
 56 | 10
 57 | 22
 58 | 18
 59 | 25
 60 | 16
 61 | 12
 62 | 12
 63 | 15
 64 | 15
 65 | 25
 66 | 13
 67 | 9
 68 | 12
 69 | 8
 70 | 16
 71 | 25
 72 | 19
 73 | 24
 74 | 12
 75 | 16
 76 | 10
 77 | 16
 78 | 9
 79 | 17
 80 | 15
 81 | 7
 82 | 9
 83 | 15
 84 | 14
 85 | 16
 86 | 17
 87 | 8
 88 | 17
 89 | 12
 90 | 18
 91 | 23
 92 | 10
 93 | 12
 94 | 12
 95 | 4
 96 | 14
 97 | 12
 98 | 15
 99 | 27
100 | 16
101 | 20
102 | 13
103 | 19
104 | 13
105 | 17
106 | 17
107 | 16
108 | 12
109 | 15
110 | 14
111 | 14
112 | 19
113 | 12
114 | 23
115 | 18
116 | 16
117 | 9
118 | 23
119 | 11
120 | 15
121 | 8
122 | 10
123 | 10
124 | 16
125 | 11
126 | 15
127 | 22
128 | 16
129 | 17
130 | 23
131 | 16
132 | 22
133 | 17
134 | 14
135 | 12
136 | 14
137 | 20
138 | 15
139 | 17
140 | 15
141 | 15
142 | 22
143 | 9
144 | 21
145 | 9
146 | 17
147 | 16
148 | 15
149 | 13
150 | 13
151 | 15
152 | 14
153 | 18
154 | 21
155 | 14
156 | 17
157 | 15
158 | 14
159 | 16
160 | 12
161 | 17
162 | 19
163 | 16
164 | 11
165 | 18
166 | 11
167 | 13
168 | 14
169 | 9
170 | 16
171 | 15
172 | 16
173 | 25
174 | 9
175 | 13
176 | 22
177 | 16
178 | 18
179 | 20
180 | 14
181 | 11
182 | 9
183 | 16
184 | 19
185 | 19
186 | 11
187 | 11
188 | 13
189 | 14
190 | 14
191 | 13
192 | 16
193 | 6
194 | 21
195 | 16
196 | 12
197 | 16
198 | 11
199 | 24
200 | 12
201 | 10
202 | 


--------------------------------------------------------------------------------
/examples/multiclass_classification/README.md:
--------------------------------------------------------------------------------
 1 | Multiclass Classification Example
 2 | =================================
 3 | 
 4 | Here is an example for LightGBM to run multiclass classification task.
 5 | 
 6 | ***You must follow the [installation instructions](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html)
 7 | for the following commands to work. The `lightgbm` binary must be built and available at the root of this project.***
 8 | 
 9 | Training
10 | --------
11 | 
12 | Run the following command in this folder:
13 | 
14 | ```bash
15 | "../../lightgbm" config=train.conf
16 | ```
17 | 
18 | Prediction
19 | ----------
20 | 
21 | You should finish training first.
22 | 
23 | Run the following command in this folder:
24 | 
25 | ```bash
26 | "../../lightgbm" config=predict.conf
27 | ```
28 | 


--------------------------------------------------------------------------------
/examples/multiclass_classification/predict.conf:
--------------------------------------------------------------------------------
1 | task = predict
2 | 
3 | data = multiclass.test
4 | 
5 | input_model= LightGBM_model.txt
6 | 


--------------------------------------------------------------------------------
/examples/multiclass_classification/train.conf:
--------------------------------------------------------------------------------
 1 | # task type, support train and predict
 2 | task = train
 3 | 
 4 | # boosting type, support gbdt for now, alias: boosting, boost
 5 | boosting_type = gbdt
 6 | 
 7 | # application type, support following application
 8 | # regression , regression task
 9 | # binary , binary classification task
10 | # lambdarank , LambdaRank task
11 | # multiclass
12 | # alias: application, app
13 | objective = multiclass
14 | 
15 | # eval metrics, support multi metric, delimited by ',' , support following metrics
16 | # l1
17 | # l2 , default metric for regression
18 | # ndcg , default metric for lambdarank
19 | # auc
20 | # binary_logloss , default metric for binary
21 | # binary_error
22 | # multi_logloss
23 | # multi_error
24 | # auc_mu
25 | metric = multi_logloss,auc_mu
26 | 
27 | # AUC-mu weights; the matrix of loss weights below is passed in parameter auc_mu_weights as a list
28 | #  0  1  2  3  4
29 | #  5  0  6  7  8
30 | #  9 10  0 11 12
31 | # 13 14 15  0 16
32 | # 17 18 19 20  0
33 | auc_mu_weights = 0,1,2,3,4,5,0,6,7,8,9,10,0,11,12,13,14,15,0,16,17,18,19,20,0
34 | 
35 | # number of class, for multiclass classification
36 | num_class = 5
37 | 
38 | # frequency for metric output
39 | metric_freq = 1
40 | 
41 | # true if need output metric for training data, alias: tranining_metric, train_metric
42 | is_training_metric = true
43 | 
44 | # column in data to use as label
45 | label_column = 0
46 | 
47 | # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
48 | max_bin = 255
49 | 
50 | # training data
51 | # if existing weight file, should name to "regression.train.weight"
52 | # alias: train_data, train
53 | data = multiclass.train
54 | 
55 | # valid data
56 | valid_data = multiclass.test
57 | 
58 | # round for early stopping
59 | early_stopping = 10
60 | 
61 | # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
62 | num_trees = 100
63 | 
64 | # shrinkage rate , alias: shrinkage_rate
65 | learning_rate = 0.05
66 | 
67 | # number of leaves for one tree, alias: num_leaf
68 | num_leaves = 31
69 | 


--------------------------------------------------------------------------------
/examples/parallel_learning/README.md:
--------------------------------------------------------------------------------
 1 | Distributed Learning Example
 2 | ============================
 3 | <a name="parallel-learning-example"></a>
 4 | 
 5 | Here is an example for LightGBM to perform distributed learning for 2 machines.
 6 | 
 7 | 1. Edit [mlist.txt](./mlist.txt): write the ip of these 2 machines that you want to run application on.
 8 | 
 9 |    ```
10 |    machine1_ip 12400
11 |    machine2_ip 12400
12 |    ```
13 | 
14 | 2. Copy this folder and executable file to these 2 machines that you want to run application on.
15 | 
16 | 3. Run command in this folder on both 2 machines:
17 | 
18 |    ```"./lightgbm" config=train.conf```
19 | 
20 | This distributed learning example is based on socket. LightGBM also supports distributed learning based on MPI.
21 | 
22 | For more details about the usage of distributed learning, please refer to [this](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst).
23 | 


--------------------------------------------------------------------------------
/examples/parallel_learning/mlist.txt:
--------------------------------------------------------------------------------
1 | 192.168.1.101 12400
2 | 192.168.1.102 12400
3 | 


--------------------------------------------------------------------------------
/examples/parallel_learning/predict.conf:
--------------------------------------------------------------------------------
1 | task = predict
2 | 
3 | data = binary.test
4 | 
5 | input_model= LightGBM_model.txt
6 | 


--------------------------------------------------------------------------------
/examples/python-guide/dask/README.md:
--------------------------------------------------------------------------------
 1 | Dask Examples
 2 | =============
 3 | 
 4 | This directory contains examples of machine learning workflows with LightGBM and [Dask](https://dask.org/).
 5 | 
 6 | Before running this code, see [the installation instructions for the Dask-package](https://github.com/microsoft/LightGBM/tree/master/python-package#install-dask-package).
 7 | 
 8 | After installing the package and its dependencies, any of the examples here can be run with a command like this:
 9 | 
10 | ```shell
11 | python binary-classification.py
12 | ```
13 | 
14 | The examples listed below contain minimal code showing how to train LightGBM models using Dask.
15 | 
16 | **Training**
17 | 
18 | * [binary-classification.py](./binary-classification.py)
19 | * [multiclass-classification.py](./multiclass-classification.py)
20 | * [ranking.py](./ranking.py)
21 | * [regression.py](./regression.py)
22 | 
23 | **Prediction**
24 | 
25 | * [prediction.py](./prediction.py)
26 | 


--------------------------------------------------------------------------------
/examples/python-guide/dask/binary-classification.py:
--------------------------------------------------------------------------------
 1 | import dask.array as da
 2 | from distributed import Client, LocalCluster
 3 | from sklearn.datasets import make_blobs
 4 | 
 5 | import lightgbm as lgb
 6 | 
 7 | if __name__ == "__main__":
 8 |     print("loading data")
 9 | 
10 |     X, y = make_blobs(n_samples=1000, n_features=50, centers=2)
11 | 
12 |     print("initializing a Dask cluster")
13 | 
14 |     cluster = LocalCluster()
15 |     client = Client(cluster)
16 | 
17 |     print("created a Dask LocalCluster")
18 | 
19 |     print("distributing training data on the Dask cluster")
20 | 
21 |     dX = da.from_array(X, chunks=(100, 50))
22 |     dy = da.from_array(y, chunks=(100,))
23 | 
24 |     print("beginning training")
25 | 
26 |     dask_model = lgb.DaskLGBMClassifier(n_estimators=10)
27 |     dask_model.fit(dX, dy)
28 |     assert dask_model.fitted_
29 | 
30 |     print("done training")
31 | 


--------------------------------------------------------------------------------
/examples/python-guide/dask/multiclass-classification.py:
--------------------------------------------------------------------------------
 1 | import dask.array as da
 2 | from distributed import Client, LocalCluster
 3 | from sklearn.datasets import make_blobs
 4 | 
 5 | import lightgbm as lgb
 6 | 
 7 | if __name__ == "__main__":
 8 |     print("loading data")
 9 | 
10 |     X, y = make_blobs(n_samples=1000, n_features=50, centers=3)
11 | 
12 |     print("initializing a Dask cluster")
13 | 
14 |     cluster = LocalCluster(n_workers=2)
15 |     client = Client(cluster)
16 | 
17 |     print("created a Dask LocalCluster")
18 | 
19 |     print("distributing training data on the Dask cluster")
20 | 
21 |     dX = da.from_array(X, chunks=(100, 50))
22 |     dy = da.from_array(y, chunks=(100,))
23 | 
24 |     print("beginning training")
25 | 
26 |     dask_model = lgb.DaskLGBMClassifier(n_estimators=10)
27 |     dask_model.fit(dX, dy)
28 |     assert dask_model.fitted_
29 | 
30 |     print("done training")
31 | 


--------------------------------------------------------------------------------
/examples/python-guide/dask/prediction.py:
--------------------------------------------------------------------------------
 1 | import dask.array as da
 2 | from distributed import Client, LocalCluster
 3 | from sklearn.datasets import make_regression
 4 | from sklearn.metrics import mean_squared_error
 5 | 
 6 | import lightgbm as lgb
 7 | 
 8 | if __name__ == "__main__":
 9 |     print("loading data")
10 | 
11 |     X, y = make_regression(n_samples=1000, n_features=50)
12 | 
13 |     print("initializing a Dask cluster")
14 | 
15 |     cluster = LocalCluster(n_workers=2)
16 |     client = Client(cluster)
17 | 
18 |     print("created a Dask LocalCluster")
19 | 
20 |     print("distributing training data on the Dask cluster")
21 | 
22 |     dX = da.from_array(X, chunks=(100, 50))
23 |     dy = da.from_array(y, chunks=(100,))
24 | 
25 |     print("beginning training")
26 | 
27 |     dask_model = lgb.DaskLGBMRegressor(n_estimators=10)
28 |     dask_model.fit(dX, dy)
29 |     assert dask_model.fitted_
30 | 
31 |     print("done training")
32 | 
33 |     print("predicting on the training data")
34 | 
35 |     preds = dask_model.predict(dX)
36 | 
37 |     # the code below uses sklearn.metrics, but this requires pulling all of the
38 |     # predictions and target values back from workers to the client
39 |     #
40 |     # for larger datasets, consider the metrics from dask-ml instead
41 |     # https://ml.dask.org/modules/api.html#dask-ml-metrics-metrics
42 |     print("computing MSE")
43 | 
44 |     preds_local = preds.compute()
45 |     actuals_local = dy.compute()
46 |     mse = mean_squared_error(actuals_local, preds_local)
47 | 
48 |     print(f"MSE: {mse}")
49 | 


--------------------------------------------------------------------------------
/examples/python-guide/dask/ranking.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import dask.array as da
 4 | import numpy as np
 5 | from distributed import Client, LocalCluster
 6 | from sklearn.datasets import load_svmlight_file
 7 | 
 8 | import lightgbm as lgb
 9 | 
10 | if __name__ == "__main__":
11 |     print("loading data")
12 | 
13 |     rank_example_dir = Path(__file__).absolute().parents[2] / "lambdarank"
14 |     X, y = load_svmlight_file(str(rank_example_dir / "rank.train"))
15 |     group = np.loadtxt(str(rank_example_dir / "rank.train.query"))
16 | 
17 |     print("initializing a Dask cluster")
18 | 
19 |     cluster = LocalCluster(n_workers=2)
20 |     client = Client(cluster)
21 | 
22 |     print("created a Dask LocalCluster")
23 | 
24 |     print("distributing training data on the Dask cluster")
25 | 
26 |     # split training data into two partitions
27 |     rows_in_part1 = int(np.sum(group[:100]))
28 |     rows_in_part2 = X.shape[0] - rows_in_part1
29 |     num_features = X.shape[1]
30 | 
31 |     # make this array dense because we're splitting across
32 |     # a sparse boundary to partition the data
33 |     X = X.toarray()
34 | 
35 |     dX = da.from_array(x=X, chunks=[(rows_in_part1, rows_in_part2), (num_features,)])
36 |     dy = da.from_array(
37 |         x=y,
38 |         chunks=[
39 |             (rows_in_part1, rows_in_part2),
40 |         ],
41 |     )
42 |     dg = da.from_array(x=group, chunks=[(100, group.size - 100)])
43 | 
44 |     print("beginning training")
45 | 
46 |     dask_model = lgb.DaskLGBMRanker(n_estimators=10)
47 |     dask_model.fit(dX, dy, group=dg)
48 |     assert dask_model.fitted_
49 | 
50 |     print("done training")
51 | 


--------------------------------------------------------------------------------
/examples/python-guide/dask/regression.py:
--------------------------------------------------------------------------------
 1 | import dask.array as da
 2 | from distributed import Client, LocalCluster
 3 | from sklearn.datasets import make_regression
 4 | 
 5 | import lightgbm as lgb
 6 | 
 7 | if __name__ == "__main__":
 8 |     print("loading data")
 9 | 
10 |     X, y = make_regression(n_samples=1000, n_features=50)
11 | 
12 |     print("initializing a Dask cluster")
13 | 
14 |     cluster = LocalCluster(n_workers=2)
15 |     client = Client(cluster)
16 | 
17 |     print("created a Dask LocalCluster")
18 | 
19 |     print("distributing training data on the Dask cluster")
20 | 
21 |     dX = da.from_array(X, chunks=(100, 50))
22 |     dy = da.from_array(y, chunks=(100,))
23 | 
24 |     print("beginning training")
25 | 
26 |     dask_model = lgb.DaskLGBMRegressor(n_estimators=10)
27 |     dask_model.fit(dX, dy)
28 |     assert dask_model.fitted_
29 | 
30 |     print("done training")
31 | 


--------------------------------------------------------------------------------
/examples/python-guide/plot_example.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from pathlib import Path
 3 | 
 4 | import pandas as pd
 5 | 
 6 | import lightgbm as lgb
 7 | 
 8 | if lgb.compat.MATPLOTLIB_INSTALLED:
 9 |     import matplotlib.pyplot as plt
10 | else:
11 |     raise ImportError("You need to install matplotlib and restart your session for plot_example.py.")
12 | 
13 | print("Loading data...")
14 | # load or create your dataset
15 | regression_example_dir = Path(__file__).absolute().parents[1] / "regression"
16 | df_train = pd.read_csv(str(regression_example_dir / "regression.train"), header=None, sep="\t")
17 | df_test = pd.read_csv(str(regression_example_dir / "regression.test"), header=None, sep="\t")
18 | 
19 | y_train = df_train[0]
20 | y_test = df_test[0]
21 | X_train = df_train.drop(0, axis=1)
22 | X_test = df_test.drop(0, axis=1)
23 | 
24 | # create dataset for lightgbm
25 | lgb_train = lgb.Dataset(
26 |     X_train,
27 |     y_train,
28 |     feature_name=[f"f{i + 1}" for i in range(X_train.shape[-1])],
29 |     categorical_feature=[21],
30 | )
31 | lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train)
32 | 
33 | # specify your configurations as a dict
34 | params = {"num_leaves": 5, "metric": ("l1", "l2"), "verbose": 0}
35 | 
36 | evals_result = {}  # to record eval results for plotting
37 | 
38 | print("Starting training...")
39 | # train
40 | gbm = lgb.train(
41 |     params,
42 |     lgb_train,
43 |     num_boost_round=100,
44 |     valid_sets=[lgb_train, lgb_test],
45 |     callbacks=[lgb.log_evaluation(10), lgb.record_evaluation(evals_result)],
46 | )
47 | 
48 | print("Plotting metrics recorded during training...")
49 | ax = lgb.plot_metric(evals_result, metric="l1")
50 | plt.show()
51 | 
52 | print("Plotting feature importances...")
53 | ax = lgb.plot_importance(gbm, max_num_features=10)
54 | plt.show()
55 | 
56 | print("Plotting split value histogram...")
57 | ax = lgb.plot_split_value_histogram(gbm, feature="f26", bins="auto")
58 | plt.show()
59 | 
60 | print("Plotting 54th tree...")  # one tree use categorical feature to split
61 | ax = lgb.plot_tree(gbm, tree_index=53, figsize=(15, 15), show_info=["split_gain"])
62 | plt.show()
63 | 
64 | print("Plotting 54th tree with graphviz...")
65 | graph = lgb.create_tree_digraph(gbm, tree_index=53, name="Tree54")
66 | graph.render(view=True)
67 | 


--------------------------------------------------------------------------------
/examples/python-guide/simple_example.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from pathlib import Path
 3 | 
 4 | import pandas as pd
 5 | from sklearn.metrics import mean_squared_error
 6 | 
 7 | import lightgbm as lgb
 8 | 
 9 | print("Loading data...")
10 | # load or create your dataset
11 | regression_example_dir = Path(__file__).absolute().parents[1] / "regression"
12 | df_train = pd.read_csv(str(regression_example_dir / "regression.train"), header=None, sep="\t")
13 | df_test = pd.read_csv(str(regression_example_dir / "regression.test"), header=None, sep="\t")
14 | 
15 | y_train = df_train[0]
16 | y_test = df_test[0]
17 | X_train = df_train.drop(0, axis=1)
18 | X_test = df_test.drop(0, axis=1)
19 | 
20 | # create dataset for lightgbm
21 | lgb_train = lgb.Dataset(X_train, y_train)
22 | lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
23 | 
24 | # specify your configurations as a dict
25 | params = {
26 |     "boosting_type": "gbdt",
27 |     "objective": "regression",
28 |     "metric": {"l2", "l1"},
29 |     "num_leaves": 31,
30 |     "learning_rate": 0.05,
31 |     "feature_fraction": 0.9,
32 |     "bagging_fraction": 0.8,
33 |     "bagging_freq": 5,
34 |     "verbose": 0,
35 | }
36 | 
37 | print("Starting training...")
38 | # train
39 | gbm = lgb.train(
40 |     params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, callbacks=[lgb.early_stopping(stopping_rounds=5)]
41 | )
42 | 
43 | print("Saving model...")
44 | # save model to file
45 | gbm.save_model("model.txt")
46 | 
47 | print("Starting predicting...")
48 | # predict
49 | y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
50 | # eval
51 | rmse_test = mean_squared_error(y_test, y_pred) ** 0.5
52 | print(f"The RMSE of prediction is: {rmse_test}")
53 | 


--------------------------------------------------------------------------------
/examples/regression/README.md:
--------------------------------------------------------------------------------
 1 | Regression Example
 2 | ==================
 3 | 
 4 | Here is an example for LightGBM to run regression task.
 5 | 
 6 | ***You must follow the [installation instructions](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html)
 7 | for the following commands to work. The `lightgbm` binary must be built and available at the root of this project.***
 8 | 
 9 | Training
10 | --------
11 | 
12 | Run the following command in this folder:
13 | 
14 | ```bash
15 | "../../lightgbm" config=train.conf
16 | ```
17 | 
18 | Prediction
19 | ----------
20 | 
21 | You should finish training first.
22 | 
23 | Run the following command in this folder:
24 | 
25 | ```bash
26 | "../../lightgbm" config=predict.conf
27 | ```
28 | 


--------------------------------------------------------------------------------
/examples/regression/forced_bins.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "feature": 0,
 4 |         "bin_upper_bound": [0.3, 0.35, 0.4]
 5 |     },
 6 |     {
 7 |         "feature": 1,
 8 |         "bin_upper_bound": [-0.1, -0.15, -0.2]
 9 |     }
10 | ]
11 | 


--------------------------------------------------------------------------------
/examples/regression/forced_bins2.json:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "feature": 0,
4 |         "bin_upper_bound": [0.19, 0.39, 0.59, 0.79]
5 |     }
6 | ]
7 | 


--------------------------------------------------------------------------------
/examples/regression/predict.conf:
--------------------------------------------------------------------------------
1 | task = predict
2 | 
3 | data = regression.test
4 | 
5 | input_model= LightGBM_model.txt
6 | 


--------------------------------------------------------------------------------
/examples/xendcg/README.md:
--------------------------------------------------------------------------------
 1 | XE_NDCG Ranking Example
 2 | =======================
 3 | 
 4 | Here is an example for LightGBM to train a ranking model with the [XE_NDCG loss](https://arxiv.org/abs/1911.09798).
 5 | 
 6 | ***You must follow the [installation instructions](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html)
 7 | for the following commands to work. The `lightgbm` binary must be built and available at the root of this project.***
 8 | 
 9 | Training
10 | --------
11 | 
12 | Run the following command in this folder:
13 | 
14 | ```bash
15 | "../../lightgbm" config=train.conf
16 | ```
17 | 
18 | Prediction
19 | ----------
20 | 
21 | You should finish training first.
22 | 
23 | Run the following command in this folder:
24 | 
25 | ```bash
26 | "../../lightgbm" config=predict.conf
27 | ```
28 | 
29 | Data Format
30 | -----------
31 | 
32 | To learn more about the query format used in this example, check out the
33 | [query data format](https://lightgbm.readthedocs.io/en/latest/Parameters.html#query-data).
34 | 


--------------------------------------------------------------------------------
/examples/xendcg/predict.conf:
--------------------------------------------------------------------------------
1 | task = predict
2 | 
3 | data = rank.test
4 | 
5 | input_model= LightGBM_model.txt
6 | 


--------------------------------------------------------------------------------
/examples/xendcg/rank.test.query:
--------------------------------------------------------------------------------
 1 | 12
 2 | 19
 3 | 18
 4 | 10
 5 | 15
 6 | 15
 7 | 22
 8 | 23
 9 | 18
10 | 16
11 | 16
12 | 11
13 | 6
14 | 13
15 | 17
16 | 21
17 | 20
18 | 16
19 | 13
20 | 16
21 | 21
22 | 15
23 | 10
24 | 19
25 | 10
26 | 13
27 | 18
28 | 17
29 | 23
30 | 24
31 | 16
32 | 13
33 | 17
34 | 24
35 | 17
36 | 10
37 | 17
38 | 15
39 | 18
40 | 16
41 | 9
42 | 9
43 | 21
44 | 14
45 | 13
46 | 13
47 | 13
48 | 10
49 | 10
50 | 6
51 | 


--------------------------------------------------------------------------------
/examples/xendcg/rank.train.query:
--------------------------------------------------------------------------------
  1 | 1
  2 | 13
  3 | 5
  4 | 8
  5 | 19
  6 | 12
  7 | 18
  8 | 5
  9 | 14
 10 | 13
 11 | 8
 12 | 9
 13 | 16
 14 | 11
 15 | 21
 16 | 14
 17 | 21
 18 | 9
 19 | 14
 20 | 11
 21 | 20
 22 | 18
 23 | 13
 24 | 20
 25 | 22
 26 | 22
 27 | 13
 28 | 17
 29 | 10
 30 | 13
 31 | 12
 32 | 13
 33 | 13
 34 | 23
 35 | 18
 36 | 13
 37 | 20
 38 | 12
 39 | 22
 40 | 14
 41 | 13
 42 | 23
 43 | 13
 44 | 14
 45 | 14
 46 | 5
 47 | 13
 48 | 15
 49 | 14
 50 | 14
 51 | 16
 52 | 16
 53 | 15
 54 | 21
 55 | 22
 56 | 10
 57 | 22
 58 | 18
 59 | 25
 60 | 16
 61 | 12
 62 | 12
 63 | 15
 64 | 15
 65 | 25
 66 | 13
 67 | 9
 68 | 12
 69 | 8
 70 | 16
 71 | 25
 72 | 19
 73 | 24
 74 | 12
 75 | 16
 76 | 10
 77 | 16
 78 | 9
 79 | 17
 80 | 15
 81 | 7
 82 | 9
 83 | 15
 84 | 14
 85 | 16
 86 | 17
 87 | 8
 88 | 17
 89 | 12
 90 | 18
 91 | 23
 92 | 10
 93 | 12
 94 | 12
 95 | 4
 96 | 14
 97 | 12
 98 | 15
 99 | 27
100 | 16
101 | 20
102 | 13
103 | 19
104 | 13
105 | 17
106 | 17
107 | 16
108 | 12
109 | 15
110 | 14
111 | 14
112 | 19
113 | 12
114 | 23
115 | 18
116 | 16
117 | 9
118 | 23
119 | 11
120 | 15
121 | 8
122 | 10
123 | 10
124 | 16
125 | 11
126 | 15
127 | 22
128 | 16
129 | 17
130 | 23
131 | 16
132 | 22
133 | 17
134 | 14
135 | 12
136 | 14
137 | 20
138 | 15
139 | 17
140 | 15
141 | 15
142 | 22
143 | 9
144 | 21
145 | 9
146 | 17
147 | 16
148 | 15
149 | 13
150 | 13
151 | 15
152 | 14
153 | 18
154 | 21
155 | 14
156 | 17
157 | 15
158 | 14
159 | 16
160 | 12
161 | 17
162 | 19
163 | 16
164 | 11
165 | 18
166 | 11
167 | 13
168 | 14
169 | 9
170 | 16
171 | 15
172 | 16
173 | 25
174 | 9
175 | 13
176 | 22
177 | 16
178 | 18
179 | 20
180 | 14
181 | 11
182 | 9
183 | 16
184 | 19
185 | 19
186 | 11
187 | 11
188 | 13
189 | 14
190 | 14
191 | 13
192 | 16
193 | 6
194 | 21
195 | 16
196 | 12
197 | 16
198 | 11
199 | 24
200 | 12
201 | 10
202 | 


--------------------------------------------------------------------------------
/include/LightGBM/cuda/cuda_metadata.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | 
 6 | #ifdef USE_CUDA
 7 | 
 8 | #ifndef LIGHTGBM_CUDA_CUDA_METADATA_HPP_
 9 | #define LIGHTGBM_CUDA_CUDA_METADATA_HPP_
10 | 
11 | #include <LightGBM/cuda/cuda_utils.hu>
12 | #include <LightGBM/meta.h>
13 | 
14 | #include <vector>
15 | 
16 | namespace LightGBM {
17 | 
18 | class CUDAMetadata {
19 |  public:
20 |   explicit CUDAMetadata(const int gpu_device_id);
21 | 
22 |   ~CUDAMetadata();
23 | 
24 |   void Init(const std::vector<label_t>& label,
25 |             const std::vector<label_t>& weight,
26 |             const std::vector<data_size_t>& query_boundaries,
27 |             const std::vector<label_t>& query_weights,
28 |             const std::vector<double>& init_score);
29 | 
30 |   void SetLabel(const label_t* label, data_size_t len);
31 | 
32 |   void SetWeights(const label_t* weights, data_size_t len);
33 | 
34 |   void SetQuery(const data_size_t* query, const label_t* query_weights, data_size_t num_queries);
35 | 
36 |   void SetInitScore(const double* init_score, data_size_t len);
37 | 
38 |   const label_t* cuda_label() const { return cuda_label_; }
39 | 
40 |   const label_t* cuda_weights() const { return cuda_weights_; }
41 | 
42 |   const data_size_t* cuda_query_boundaries() const { return cuda_query_boundaries_; }
43 | 
44 |   const label_t* cuda_query_weights() const { return cuda_query_weights_; }
45 | 
46 |  private:
47 |   label_t* cuda_label_;
48 |   label_t* cuda_weights_;
49 |   data_size_t* cuda_query_boundaries_;
50 |   label_t* cuda_query_weights_;
51 |   double* cuda_init_score_;
52 | };
53 | 
54 | }  // namespace LightGBM
55 | 
56 | #endif  // LIGHTGBM_CUDA_CUDA_METADATA_HPP_
57 | 
58 | #endif  // USE_CUDA
59 | 


--------------------------------------------------------------------------------
/include/LightGBM/cuda/cuda_metric.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for
 4 |  * license information.
 5 |  */
 6 | 
 7 | #ifndef LIGHTGBM_CUDA_CUDA_METRIC_HPP_
 8 | #define LIGHTGBM_CUDA_CUDA_METRIC_HPP_
 9 | 
10 | #ifdef USE_CUDA
11 | 
12 | #include <LightGBM/cuda/cuda_utils.hu>
13 | #include <LightGBM/metric.h>
14 | 
15 | namespace LightGBM {
16 | 
17 | template <typename HOST_METRIC>
18 | class CUDAMetricInterface: public HOST_METRIC {
19 |  public:
20 |   explicit CUDAMetricInterface(const Config& config): HOST_METRIC(config) {
21 |     cuda_labels_ = nullptr;
22 |     cuda_weights_ = nullptr;
23 |     const int gpu_device_id = config.gpu_device_id >= 0 ? config.gpu_device_id : 0;
24 |     SetCUDADevice(gpu_device_id, __FILE__, __LINE__);
25 |   }
26 | 
27 |   void Init(const Metadata& metadata, data_size_t num_data) override {
28 |     HOST_METRIC::Init(metadata, num_data);
29 |     cuda_labels_ = metadata.cuda_metadata()->cuda_label();
30 |     cuda_weights_ = metadata.cuda_metadata()->cuda_weights();
31 |   }
32 | 
33 |   bool IsCUDAMetric() const { return true; }
34 | 
35 |  protected:
36 |   const label_t* cuda_labels_;
37 |   const label_t* cuda_weights_;
38 | };
39 | 
40 | }  // namespace LightGBM
41 | 
42 | #endif  // USE_CUDA
43 | 
44 | #endif  // LIGHTGBM_CUDA_CUDA_METRIC_HPP_
45 | 


--------------------------------------------------------------------------------
/include/LightGBM/cuda/cuda_random.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
 6 | #define LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
 7 | 
 8 | #ifdef USE_CUDA
 9 | 
10 | #include <cuda.h>
11 | #include <cuda_runtime.h>
12 | 
13 | namespace LightGBM {
14 | 
15 | /*!
16 | * \brief A wrapper for random generator
17 | */
18 | class CUDARandom {
19 |  public:
20 |   /*!
21 |   * \brief Set specific seed
22 |   */
23 |   __device__ void SetSeed(int seed) {
24 |     x = seed;
25 |   }
26 |   /*!
27 |   * \brief Generate random integer, int16 range. [0, 65536]
28 |   * \param lower_bound lower bound
29 |   * \param upper_bound upper bound
30 |   * \return The random integer between [lower_bound, upper_bound)
31 |   */
32 |   __device__ inline int NextShort(int lower_bound, int upper_bound) {
33 |     return (RandInt16()) % (upper_bound - lower_bound) + lower_bound;
34 |   }
35 | 
36 |   /*!
37 |   * \brief Generate random integer, int32 range
38 |   * \param lower_bound lower bound
39 |   * \param upper_bound upper bound
40 |   * \return The random integer between [lower_bound, upper_bound)
41 |   */
42 |   __device__ inline int NextInt(int lower_bound, int upper_bound) {
43 |     return (RandInt32()) % (upper_bound - lower_bound) + lower_bound;
44 |   }
45 | 
46 |   /*!
47 |   * \brief Generate random float data
48 |   * \return The random float between [0.0, 1.0)
49 |   */
50 |   __device__ inline float NextFloat() {
51 |     // get random float in [0,1)
52 |     return static_cast<float>(RandInt16()) / (32768.0f);
53 |   }
54 | 
55 |  private:
56 |   __device__ inline int RandInt16() {
57 |     x = (214013 * x + 2531011);
58 |     return static_cast<int>((x >> 16) & 0x7FFF);
59 |   }
60 | 
61 |   __device__ inline int RandInt32() {
62 |     x = (214013 * x + 2531011);
63 |     return static_cast<int>(x & 0x7FFFFFFF);
64 |   }
65 | 
66 |   unsigned int x = 123456789;
67 | };
68 | 
69 | 
70 | }  // namespace LightGBM
71 | 
72 | #endif  // USE_CUDA
73 | 
74 | #endif  // LIGHTGBM_CUDA_CUDA_RANDOM_HPP_
75 | 


--------------------------------------------------------------------------------
/include/LightGBM/export.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef LIGHTGBM_EXPORT_H_
 6 | #define LIGHTGBM_EXPORT_H_
 7 | 
 8 | /** Macros for exporting symbols in MSVC/GCC/CLANG **/
 9 | 
10 | #ifdef __cplusplus
11 | #define LIGHTGBM_EXTERN_C extern "C"
12 | #else
13 | #define LIGHTGBM_EXTERN_C
14 | #endif
15 | 
16 | 
17 | #ifdef _MSC_VER
18 | #define LIGHTGBM_EXPORT __declspec(dllexport)
19 | #define LIGHTGBM_C_EXPORT LIGHTGBM_EXTERN_C __declspec(dllexport)
20 | #else
21 | #define LIGHTGBM_EXPORT
22 | #define LIGHTGBM_C_EXPORT LIGHTGBM_EXTERN_C
23 | #endif
24 | 
25 | #endif /** LIGHTGBM_EXPORT_H_ **/
26 | 


--------------------------------------------------------------------------------
/include/LightGBM/prediction_early_stop.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef LIGHTGBM_PREDICTION_EARLY_STOP_H_
 6 | #define LIGHTGBM_PREDICTION_EARLY_STOP_H_
 7 | 
 8 | #include <LightGBM/export.h>
 9 | 
10 | #include <string>
11 | #include <functional>
12 | 
13 | namespace LightGBM {
14 | 
15 | struct PredictionEarlyStopInstance {
16 |   /// Callback function type for early stopping.
17 |   /// Takes current prediction and number of elements in prediction
18 |   /// @returns true if prediction should stop according to criterion
19 |   using FunctionType = std::function<bool(const double*, int)>;
20 | 
21 |   FunctionType callback_function;  // callback function itself
22 |   int          round_period;       // call callback_function every `runPeriod` iterations
23 | };
24 | 
25 | struct PredictionEarlyStopConfig {
26 |   int round_period;
27 |   double margin_threshold;
28 | };
29 | 
30 | /// Create an early stopping algorithm of type `type`, with given round_period and margin threshold
31 | LIGHTGBM_EXPORT PredictionEarlyStopInstance CreatePredictionEarlyStopInstance(const std::string& type,
32 |                                                                               const PredictionEarlyStopConfig& config);
33 | 
34 | }   // namespace LightGBM
35 | 
36 | #endif  // LIGHTGBM_PREDICTION_EARLY_STOP_H_
37 | 


--------------------------------------------------------------------------------
/include/LightGBM/utils/binary_writer.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef LIGHTGBM_UTILS_BINARY_WRITER_H_
 6 | #define LIGHTGBM_UTILS_BINARY_WRITER_H_
 7 | 
 8 | #include <cstdlib>
 9 | #include <vector>
10 | 
11 | namespace LightGBM {
12 | 
13 | /*!
14 |   * \brief An interface for serializing binary data to a buffer
15 |   */
16 | struct BinaryWriter {
17 |   /*!
18 |     * \brief Append data to this binary target
19 |     * \param data Buffer to write from
20 |     * \param bytes Number of bytes to write from buffer
21 |     * \return Number of bytes written
22 |     */
23 |   virtual size_t Write(const void* data, size_t bytes) = 0;
24 | 
25 |   /*!
26 |     * \brief Append data to this binary target aligned on a given byte size boundary
27 |     * \param data Buffer to write from
28 |     * \param bytes Number of bytes to write from buffer
29 |     * \param alignment The size of bytes to align to in whole increments
30 |     * \return Number of bytes written
31 |     */
32 |   size_t AlignedWrite(const void* data, size_t bytes, size_t alignment = 8) {
33 |     auto ret = Write(data, bytes);
34 |     if (bytes % alignment != 0) {
35 |       size_t padding = AlignedSize(bytes, alignment) - bytes;
36 |       std::vector<char> tmp(padding, 0);
37 |       ret += Write(tmp.data(), padding);
38 |     }
39 |     return ret;
40 |   }
41 | 
42 |   /*!
43 |     * \brief The aligned size of a buffer length.
44 |     * \param bytes The number of bytes in a buffer
45 |     * \param alignment The size of bytes to align to in whole increments
46 |     * \return Number of aligned bytes
47 |     */
48 |   static size_t AlignedSize(size_t bytes, size_t alignment = 8) {
49 |     if (bytes % alignment == 0) {
50 |       return bytes;
51 |     } else {
52 |       return bytes / alignment * alignment + alignment;
53 |     }
54 |   }
55 | };
56 | }  // namespace LightGBM
57 | 
58 | #endif   // LIGHTGBM_UTILS_BINARY_WRITER_H_
59 | 


--------------------------------------------------------------------------------
/include/LightGBM/utils/byte_buffer.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef LIGHTGBM_UTILS_BYTE_BUFFER_H_
 6 | #define LIGHTGBM_UTILS_BYTE_BUFFER_H_
 7 | 
 8 | #include <LightGBM/export.h>
 9 | #include <LightGBM/utils/binary_writer.h>
10 | 
11 | #include <string>
12 | #include <cstdio>
13 | #include <cstdlib>
14 | #include <cstring>
15 | #include <iostream>
16 | #include <memory>
17 | #include <vector>
18 | 
19 | namespace LightGBM {
20 | 
21 | /*!
22 |   * \brief An implementation for serializing binary data to an auto-expanding memory buffer
23 |   */
24 | struct ByteBuffer final : public BinaryWriter {
25 |   ByteBuffer() {}
26 | 
27 |   explicit ByteBuffer(size_t initial_size) {
28 |     buffer_.reserve(initial_size);
29 |   }
30 | 
31 |   size_t Write(const void* data, size_t bytes) {
32 |     const char* mem_ptr = static_cast<const char*>(data);
33 |     for (size_t i = 0; i < bytes; ++i) {
34 |       buffer_.push_back(mem_ptr[i]);
35 |     }
36 | 
37 |     return bytes;
38 |   }
39 | 
40 |   LIGHTGBM_EXPORT void Reserve(size_t capacity) {
41 |     buffer_.reserve(capacity);
42 |   }
43 | 
44 |   LIGHTGBM_EXPORT size_t GetSize() {
45 |     return buffer_.size();
46 |   }
47 | 
48 |   LIGHTGBM_EXPORT char GetAt(size_t index) {
49 |     return buffer_.at(index);
50 |   }
51 | 
52 |   LIGHTGBM_EXPORT char* Data() {
53 |     return buffer_.data();
54 |   }
55 | 
56 |  private:
57 |   std::vector<char> buffer_;
58 | };
59 | 
60 | }  // namespace LightGBM
61 | 
62 | #endif   // LightGBM_UTILS_BYTE_BUFFER_H_
63 | 


--------------------------------------------------------------------------------
/include/LightGBM/utils/file_io.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2018 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifndef LIGHTGBM_UTILS_FILE_IO_H_
 6 | #define LIGHTGBM_UTILS_FILE_IO_H_
 7 | 
 8 | #include <LightGBM/utils/binary_writer.h>
 9 | 
10 | #include <string>
11 | #include <cstdio>
12 | #include <cstdlib>
13 | #include <cstring>
14 | #include <iostream>
15 | #include <memory>
16 | #include <vector>
17 | 
18 | namespace LightGBM {
19 | 
20 | /*!
21 |  * \brief An interface for writing files from buffers
22 |  */
23 | struct VirtualFileWriter : BinaryWriter {
24 |   virtual ~VirtualFileWriter() {}
25 | 
26 |   /*!
27 |    * \brief Initialize the writer
28 |    * \return True when the file is available for writes
29 |    */
30 |   virtual bool Init() = 0;
31 | 
32 |   /*!
33 |    * \brief Create appropriate writer for filename
34 |    * \param filename Filename of the data
35 |    * \return File writer instance
36 |    */
37 |   static std::unique_ptr<VirtualFileWriter> Make(const std::string& filename);
38 | 
39 |   /*!
40 |    * \brief Check filename existence
41 |    * \param filename Filename of the data
42 |    * \return True when the file exists
43 |    */
44 |   static bool Exists(const std::string& filename);
45 | };
46 | 
47 | /**
48 |  * \brief An interface for reading files into buffers
49 |  */
50 | struct VirtualFileReader {
51 |   /*!
52 |    * \brief Constructor
53 |    * \param filename Filename of the data
54 |    */
55 |   virtual ~VirtualFileReader() {}
56 |   /*!
57 |    * \brief Initialize the reader
58 |    * \return True when the file is available for read
59 |    */
60 |   virtual bool Init() = 0;
61 |   /*!
62 |    * \brief Read data into buffer
63 |    * \param buffer Buffer to read data into
64 |    * \param bytes Number of bytes to read
65 |    * \return Number of bytes read
66 |    */
67 |   virtual size_t Read(void* buffer, size_t bytes) const = 0;
68 |   /*!
69 |    * \brief Create appropriate reader for filename
70 |    * \param filename Filename of the data
71 |    * \return File reader instance
72 |    */
73 |   static std::unique_ptr<VirtualFileReader> Make(const std::string& filename);
74 | };
75 | 
76 | }  // namespace LightGBM
77 | 
78 | #endif   // LightGBM_UTILS_FILE_IO_H_
79 | 


--------------------------------------------------------------------------------
/python-package/lightgbm/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """LightGBM, Light Gradient Boosting Machine.
 3 | 
 4 | Contributors: https://github.com/microsoft/LightGBM/graphs/contributors.
 5 | """
 6 | 
 7 | from pathlib import Path
 8 | 
 9 | # .basic is intentionally loaded as early as possible, to dlopen() lib_lightgbm.{dll,dylib,so}
10 | # and its dependencies as early as possible
11 | from .basic import Booster, Dataset, Sequence, register_logger
12 | from .callback import EarlyStopException, early_stopping, log_evaluation, record_evaluation, reset_parameter
13 | from .engine import CVBooster, cv, train
14 | 
15 | try:
16 |     from .sklearn import LGBMClassifier, LGBMModel, LGBMRanker, LGBMRegressor
17 | except ImportError:
18 |     pass
19 | try:
20 |     from .plotting import create_tree_digraph, plot_importance, plot_metric, plot_split_value_histogram, plot_tree
21 | except ImportError:
22 |     pass
23 | try:
24 |     from .dask import DaskLGBMClassifier, DaskLGBMRanker, DaskLGBMRegressor
25 | except ImportError:
26 |     pass
27 | 
28 | 
29 | _version_path = Path(__file__).absolute().parent / "VERSION.txt"
30 | if _version_path.is_file():
31 |     __version__ = _version_path.read_text(encoding="utf-8").strip()
32 | 
33 | __all__ = [
34 |     "Dataset",
35 |     "Booster",
36 |     "CVBooster",
37 |     "Sequence",
38 |     "register_logger",
39 |     "train",
40 |     "cv",
41 |     "LGBMModel",
42 |     "LGBMRegressor",
43 |     "LGBMClassifier",
44 |     "LGBMRanker",
45 |     "DaskLGBMRegressor",
46 |     "DaskLGBMClassifier",
47 |     "DaskLGBMRanker",
48 |     "log_evaluation",
49 |     "record_evaluation",
50 |     "reset_parameter",
51 |     "early_stopping",
52 |     "EarlyStopException",
53 |     "plot_importance",
54 |     "plot_split_value_histogram",
55 |     "plot_metric",
56 |     "plot_tree",
57 |     "create_tree_digraph",
58 | ]
59 | 


--------------------------------------------------------------------------------
/python-package/lightgbm/libpath.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Find the path to LightGBM dynamic library files."""
 3 | 
 4 | import ctypes
 5 | from os import environ
 6 | from pathlib import Path
 7 | from platform import system
 8 | from typing import List
 9 | 
10 | __all__: List[str] = []
11 | 
12 | 
13 | def _find_lib_path() -> List[str]:
14 |     """Find the path to LightGBM library files.
15 | 
16 |     Returns
17 |     -------
18 |     lib_path: list of str
19 |        List of all found library paths to LightGBM.
20 |     """
21 |     curr_path = Path(__file__).absolute()
22 |     dll_path = [
23 |         curr_path.parents[1],
24 |         curr_path.parents[0] / "bin",
25 |         curr_path.parents[0] / "lib",
26 |     ]
27 |     if system() in ("Windows", "Microsoft"):
28 |         dll_path.append(curr_path.parents[1] / "Release")
29 |         dll_path.append(curr_path.parents[1] / "windows" / "x64" / "DLL")
30 |         dll_path = [p / "lib_lightgbm.dll" for p in dll_path]
31 |     elif system() == "Darwin":
32 |         dll_path = [p / "lib_lightgbm.dylib" for p in dll_path]
33 |     else:
34 |         dll_path = [p / "lib_lightgbm.so" for p in dll_path]
35 |     lib_path = [str(p) for p in dll_path if p.is_file()]
36 |     if not lib_path:
37 |         dll_path_joined = "\n".join(map(str, dll_path))
38 |         raise Exception(f"Cannot find lightgbm library file in following paths:\n{dll_path_joined}")
39 |     return lib_path
40 | 
41 | 
42 | # we don't need lib_lightgbm while building docs
43 | _LIB: ctypes.CDLL
44 | if environ.get("LIGHTGBM_BUILD_DOC", False):
45 |     from unittest.mock import Mock  # isort: skip
46 | 
47 |     _LIB = Mock(ctypes.CDLL)  # type: ignore
48 | else:
49 |     _LIB = ctypes.cdll.LoadLibrary(_find_lib_path()[0])
50 | 


--------------------------------------------------------------------------------
/python-package/lightgbm/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/python-package/lightgbm/py.typed


--------------------------------------------------------------------------------
/src/boosting/cuda/cuda_score_updater.cu:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | 
 6 | #include "cuda_score_updater.hpp"
 7 | 
 8 | #ifdef USE_CUDA
 9 | 
10 | namespace LightGBM {
11 | 
12 | __global__ void AddScoreConstantKernel(
13 |   const double val,
14 |   const data_size_t num_data,
15 |   double* score) {
16 |   const data_size_t data_index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x);
17 |   if (data_index < num_data) {
18 |     score[data_index] += val;
19 |   }
20 | }
21 | 
22 | void CUDAScoreUpdater::LaunchAddScoreConstantKernel(const double val, const size_t offset) {
23 |   const int num_blocks = (num_data_ + num_threads_per_block_) / num_threads_per_block_;
24 |   Log::Debug("Adding init score = %lf", val);
25 |   AddScoreConstantKernel<<<num_blocks, num_threads_per_block_>>>(val, num_data_, cuda_score_ + offset);
26 | }
27 | 
28 | __global__ void MultiplyScoreConstantKernel(
29 |   const double val,
30 |   const data_size_t num_data,
31 |   double* score) {
32 |   const data_size_t data_index = static_cast<data_size_t>(threadIdx.x + blockIdx.x * blockDim.x);
33 |   if (data_index < num_data) {
34 |     score[data_index] *= val;
35 |   }
36 | }
37 | 
38 | void CUDAScoreUpdater::LaunchMultiplyScoreConstantKernel(const double val, const size_t offset) {
39 |   const int num_blocks = (num_data_ + num_threads_per_block_) / num_threads_per_block_;
40 |   MultiplyScoreConstantKernel<<<num_blocks, num_threads_per_block_>>>(val, num_data_, cuda_score_ + offset);
41 | }
42 | 
43 | }  // namespace LightGBM
44 | 
45 | #endif  // USE_CUDA
46 | 


--------------------------------------------------------------------------------
/src/boosting/cuda/cuda_score_updater.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | 
 6 | #ifndef LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
 7 | #define LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
 8 | 
 9 | #ifdef USE_CUDA
10 | 
11 | #include <LightGBM/cuda/cuda_utils.hu>
12 | 
13 | #include "../score_updater.hpp"
14 | 
15 | namespace LightGBM {
16 | 
17 | class CUDAScoreUpdater: public ScoreUpdater {
18 |  public:
19 |   CUDAScoreUpdater(const Dataset* data, int num_tree_per_iteration, const bool boosting_on_cuda);
20 | 
21 |   ~CUDAScoreUpdater();
22 | 
23 |   inline void AddScore(double val, int cur_tree_id) override;
24 | 
25 |   inline void AddScore(const Tree* tree, int cur_tree_id) override;
26 | 
27 |   inline void AddScore(const TreeLearner* tree_learner, const Tree* tree, int cur_tree_id) override;
28 | 
29 |   inline void AddScore(const Tree* tree, const data_size_t* data_indices,
30 |                        data_size_t data_cnt, int cur_tree_id) override;
31 | 
32 |   inline void MultiplyScore(double val, int cur_tree_id) override;
33 | 
34 |   inline const double* score() const override {
35 |     if (boosting_on_cuda_) {
36 |       return cuda_score_;
37 |     } else {
38 |       return score_.data();
39 |     }
40 |   }
41 | 
42 |   /*! \brief Disable copy */
43 |   CUDAScoreUpdater& operator=(const CUDAScoreUpdater&) = delete;
44 | 
45 |   CUDAScoreUpdater(const CUDAScoreUpdater&) = delete;
46 | 
47 |  private:
48 |   void InitCUDA(const size_t total_size);
49 | 
50 |   void LaunchAddScoreConstantKernel(const double val, const size_t offset);
51 | 
52 |   void LaunchMultiplyScoreConstantKernel(const double val, const size_t offset);
53 | 
54 |   double* cuda_score_;
55 | 
56 |   const int num_threads_per_block_;
57 | 
58 |   const bool boosting_on_cuda_;
59 | };
60 | 
61 | }  // namespace LightGBM
62 | 
63 | #endif  // USE_CUDA
64 | 
65 | #endif  // LIGHTGBM_BOOSTING_CUDA_CUDA_SCORE_UPDATER_HPP_
66 | 


--------------------------------------------------------------------------------
/src/boosting/sample_strategy.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | 
 6 | #include <LightGBM/sample_strategy.h>
 7 | #include "goss.hpp"
 8 | #include "bagging.hpp"
 9 | 
10 | namespace LightGBM {
11 | 
12 | SampleStrategy* SampleStrategy::CreateSampleStrategy(
13 |   const Config* config,
14 |   const Dataset* train_data,
15 |   const ObjectiveFunction* objective_function,
16 |   int num_tree_per_iteration) {
17 |   if (config->data_sample_strategy == std::string("goss")) {
18 |     return new GOSSStrategy(config, train_data, num_tree_per_iteration);
19 |   } else {
20 |     return new BaggingSampleStrategy(config, train_data, objective_function, num_tree_per_iteration);
21 |   }
22 | }
23 | 
24 | }  // namespace LightGBM
25 | 


--------------------------------------------------------------------------------
/src/cuda/cuda_utils.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | 
 6 | #ifdef USE_CUDA
 7 | 
 8 | #include <LightGBM/cuda/cuda_utils.hu>
 9 | 
10 | namespace LightGBM {
11 | 
12 | void SynchronizeCUDADevice(const char* file, const int line) {
13 |   gpuAssert(cudaDeviceSynchronize(), file, line);
14 | }
15 | 
16 | void PrintLastCUDAError() {
17 |   const char* error_name = cudaGetErrorName(cudaGetLastError());
18 |   Log::Fatal(error_name);
19 | }
20 | 
21 | void SetCUDADevice(int gpu_device_id, const char* file, int line) {
22 |   int cur_gpu_device_id = 0;
23 |   CUDASUCCESS_OR_FATAL_OUTER(cudaGetDevice(&cur_gpu_device_id));
24 |   if (cur_gpu_device_id != gpu_device_id) {
25 |     CUDASUCCESS_OR_FATAL_OUTER(cudaSetDevice(gpu_device_id));
26 |   }
27 | }
28 | 
29 | int GetCUDADevice(const char* file, int line) {
30 |   int cur_gpu_device_id = 0;
31 |   CUDASUCCESS_OR_FATAL_OUTER(cudaGetDevice(&cur_gpu_device_id));
32 |   return cur_gpu_device_id;
33 | }
34 | 
35 | }  // namespace LightGBM
36 | 
37 | #endif  // USE_CUDA
38 | 


--------------------------------------------------------------------------------
/src/io/file_io.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2018 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for
 4 |  * license information.
 5 |  */
 6 | #include <LightGBM/utils/file_io.h>
 7 | 
 8 | #include <LightGBM/utils/log.h>
 9 | 
10 | #include <algorithm>
11 | #include <sstream>
12 | #include <unordered_map>
13 | 
14 | namespace LightGBM {
15 | 
16 | struct LocalFile : VirtualFileReader, VirtualFileWriter {
17 |   LocalFile(const std::string& filename, const std::string& mode)
18 |       : filename_(filename), mode_(mode) {}
19 |   virtual ~LocalFile() {
20 |     if (file_ != NULL) {
21 |       fclose(file_);
22 |     }
23 |   }
24 | 
25 |   bool Init() {
26 |     if (file_ == NULL) {
27 | #if _MSC_VER
28 |       fopen_s(&file_, filename_.c_str(), mode_.c_str());
29 | #else
30 |       file_ = fopen(filename_.c_str(), mode_.c_str());
31 | #endif
32 |     }
33 |     return file_ != NULL;
34 |   }
35 | 
36 |   bool Exists() const {
37 |     LocalFile file(filename_, "rb");
38 |     return file.Init();
39 |   }
40 | 
41 |   size_t Read(void* buffer, size_t bytes) const {
42 |     return fread(buffer, 1, bytes, file_);
43 |   }
44 | 
45 |   size_t Write(const void* buffer, size_t bytes) {
46 |     return fwrite(buffer, bytes, 1, file_) == 1 ? bytes : 0;
47 |   }
48 | 
49 |  private:
50 |   FILE* file_ = NULL;
51 |   const std::string filename_;
52 |   const std::string mode_;
53 | };
54 | 
55 | std::unique_ptr<VirtualFileReader> VirtualFileReader::Make(
56 |     const std::string& filename) {
57 |   return std::unique_ptr<VirtualFileReader>(new LocalFile(filename, "rb"));
58 | }
59 | 
60 | std::unique_ptr<VirtualFileWriter> VirtualFileWriter::Make(
61 |     const std::string& filename) {
62 |   return std::unique_ptr<VirtualFileWriter>(new LocalFile(filename, "wb"));
63 | }
64 | 
65 | bool VirtualFileWriter::Exists(const std::string& filename) {
66 |   LocalFile file(filename, "rb");
67 |   return file.Exists();
68 | }
69 | 
70 | }  // namespace LightGBM
71 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #include <LightGBM/application.h>
 6 | 
 7 | #include <iostream>
 8 | 
 9 | #ifdef USE_MPI
10 |   #include "network/linkers.h"
11 | #endif
12 | 
13 | int main(int argc, char** argv) {
14 |   bool success = false;
15 |   try {
16 |     LightGBM::Application app(argc, argv);
17 |     app.Run();
18 | 
19 | #ifdef USE_MPI
20 |     LightGBM::Linkers::MpiFinalizeIfIsParallel();
21 | #endif
22 | 
23 |     success = true;
24 |   }
25 |   catch (const std::exception& ex) {
26 |     std::cerr << "Met Exceptions:" << std::endl;
27 |     std::cerr << ex.what() << std::endl;
28 |   }
29 |   catch (const std::string& ex) {
30 |     std::cerr << "Met Exceptions:" << std::endl;
31 |     std::cerr << ex << std::endl;
32 |   }
33 |   catch (...) {
34 |     std::cerr << "Unknown Exceptions" << std::endl;
35 |   }
36 | 
37 |   if (!success) {
38 | #ifdef USE_MPI
39 |     LightGBM::Linkers::MpiAbortIfIsParallel();
40 | #endif
41 | 
42 |     exit(-1);
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/metric/cuda/cuda_binary_metric.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for
 4 |  * license information.
 5 |  */
 6 | 
 7 | #ifdef USE_CUDA
 8 | 
 9 | #include "cuda_binary_metric.hpp"
10 | 
11 | namespace LightGBM {
12 | 
13 | CUDABinaryLoglossMetric::CUDABinaryLoglossMetric(const Config& config):
14 |   CUDABinaryMetricInterface<BinaryLoglossMetric, CUDABinaryLoglossMetric>(config) {}
15 | 
16 | template <typename HOST_METRIC, typename CUDA_METRIC>
17 | std::vector<double> CUDABinaryMetricInterface<HOST_METRIC, CUDA_METRIC>::Eval(const double* score, const ObjectiveFunction* objective) const {
18 |   const double* score_convert = score;
19 |   if (objective != nullptr && objective->NeedConvertOutputCUDA()) {
20 |     this->score_convert_buffer_.Resize(static_cast<size_t>(this->num_data_) * static_cast<size_t>(this->num_class_));
21 |     score_convert = objective->ConvertOutputCUDA(this->num_data_, score, this->score_convert_buffer_.RawData());
22 |   }
23 |   double sum_loss = 0.0, sum_weight = 0.0;
24 |   this->LaunchEvalKernel(score_convert, &sum_loss, &sum_weight);
25 |   const double eval_score = sum_loss / sum_weight;
26 |   return std::vector<double>{eval_score};
27 | }
28 | 
29 | }  // namespace LightGBM
30 | 
31 | #endif  // USE_CUDA
32 | 


--------------------------------------------------------------------------------
/src/metric/cuda/cuda_binary_metric.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for
 4 |  * license information.
 5 |  */
 6 | 
 7 | #ifndef LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
 8 | #define LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
 9 | 
10 | #ifdef USE_CUDA
11 | 
12 | #include <LightGBM/cuda/cuda_metric.hpp>
13 | #include <LightGBM/cuda/cuda_utils.hu>
14 | 
15 | #include <vector>
16 | 
17 | #include "cuda_regression_metric.hpp"
18 | #include "../binary_metric.hpp"
19 | 
20 | namespace LightGBM {
21 | 
22 | template <typename HOST_METRIC, typename CUDA_METRIC>
23 | class CUDABinaryMetricInterface: public CUDAPointwiseMetricInterface<HOST_METRIC, CUDA_METRIC> {
24 |  public:
25 |   explicit CUDABinaryMetricInterface(const Config& config): CUDAPointwiseMetricInterface<HOST_METRIC, CUDA_METRIC>(config) {}
26 | 
27 |   virtual ~CUDABinaryMetricInterface() {}
28 | 
29 |   std::vector<double> Eval(const double* score, const ObjectiveFunction* objective) const override;
30 | };
31 | 
32 | class CUDABinaryLoglossMetric: public CUDABinaryMetricInterface<BinaryLoglossMetric, CUDABinaryLoglossMetric> {
33 |  public:
34 |   explicit CUDABinaryLoglossMetric(const Config& config);
35 | 
36 |   virtual ~CUDABinaryLoglossMetric() {}
37 | 
38 |   __device__ static double MetricOnPointCUDA(label_t label, double score, const double /*param*/) {
39 |     // score should have been converted to probability
40 |     if (label <= 0) {
41 |       if (1.0f - score > kEpsilon) {
42 |         return -log(1.0f - score);
43 |       }
44 |     } else {
45 |       if (score > kEpsilon) {
46 |         return -log(score);
47 |       }
48 |     }
49 |     return -log(kEpsilon);
50 |   }
51 | };
52 | 
53 | }  // namespace LightGBM
54 | 
55 | #endif  // USE_CUDA
56 | 
57 | #endif  // LIGHTGBM_METRIC_CUDA_CUDA_BINARY_METRIC_HPP_
58 | 


--------------------------------------------------------------------------------
/src/metric/cuda/cuda_pointwise_metric.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for
 4 |  * license information.
 5 |  */
 6 | 
 7 | #ifndef LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
 8 | #define LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
 9 | 
10 | #ifdef USE_CUDA
11 | 
12 | #include <LightGBM/cuda/cuda_metric.hpp>
13 | #include <LightGBM/cuda/cuda_utils.hu>
14 | 
15 | #include <vector>
16 | 
17 | #define NUM_DATA_PER_EVAL_THREAD (1024)
18 | 
19 | namespace LightGBM {
20 | 
21 | template <typename HOST_METRIC, typename CUDA_METRIC>
22 | class CUDAPointwiseMetricInterface: public CUDAMetricInterface<HOST_METRIC> {
23 |  public:
24 |   explicit CUDAPointwiseMetricInterface(const Config& config): CUDAMetricInterface<HOST_METRIC>(config), num_class_(config.num_class) {}
25 | 
26 |   virtual ~CUDAPointwiseMetricInterface() {}
27 | 
28 |   void Init(const Metadata& metadata, data_size_t num_data) override;
29 | 
30 |  protected:
31 |   void LaunchEvalKernel(const double* score_convert, double* sum_loss, double* sum_weight) const;
32 | 
33 |   virtual double GetParamFromConfig() const { return 0.0; }
34 | 
35 |   mutable CUDAVector<double> score_convert_buffer_;
36 |   CUDAVector<double> reduce_block_buffer_;
37 |   CUDAVector<double> reduce_block_buffer_inner_;
38 |   const int num_class_;
39 | };
40 | 
41 | }  // namespace LightGBM
42 | 
43 | #endif  // USE_CUDA
44 | 
45 | #endif  // LIGHTGBM_METRIC_CUDA_CUDA_POINTWISE_METRIC_HPP_
46 | 


--------------------------------------------------------------------------------
/src/network/linkers_mpi.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #ifdef USE_MPI
 6 | 
 7 | #include "linkers.h"
 8 | 
 9 | namespace LightGBM {
10 | 
11 | Linkers::Linkers(Config) {
12 |   is_init_ = false;
13 |   int argc = 0;
14 |   char**argv = nullptr;
15 |   int flag = 0;
16 |   MPI_SAFE_CALL(MPI_Initialized(&flag));  // test if MPI has been initialized
17 |   if (!flag) {  // if MPI not started, start it
18 |     MPI_SAFE_CALL(MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &flag));
19 |   }
20 |   MPI_SAFE_CALL(MPI_Comm_size(MPI_COMM_WORLD, &num_machines_));
21 |   MPI_SAFE_CALL(MPI_Comm_rank(MPI_COMM_WORLD, &rank_));
22 |   // wait for all client start up
23 |   MPI_SAFE_CALL(MPI_Barrier(MPI_COMM_WORLD));
24 |   bruck_map_ = BruckMap::Construct(rank_, num_machines_);
25 |   recursive_halving_map_ = RecursiveHalvingMap::Construct(rank_, num_machines_);
26 |   is_init_ = true;
27 | }
28 | 
29 | Linkers::~Linkers() {
30 |   // Don't call MPI_Finalize() here: If the destructor was called because only this node had an exception, calling MPI_Finalize() will cause all nodes to hang.
31 |   // Instead we will handle finalize/abort for MPI in main().
32 | }
33 | 
34 | bool Linkers::IsMpiInitialized() {
35 |   int is_mpi_init;
36 |   MPI_SAFE_CALL(MPI_Initialized(&is_mpi_init));
37 |   return is_mpi_init;
38 | }
39 | 
40 | void Linkers::MpiFinalizeIfIsParallel() {
41 |   if (IsMpiInitialized()) {
42 |     Log::Debug("Finalizing MPI session.");
43 |     MPI_SAFE_CALL(MPI_Finalize());
44 |   }
45 | }
46 | 
47 | void Linkers::MpiAbortIfIsParallel() {
48 |   try {
49 |     if (IsMpiInitialized()) {
50 |       std::cerr << "Aborting MPI communication." << std::endl << std::flush;
51 |       MPI_SAFE_CALL(MPI_Abort(MPI_COMM_WORLD, -1));;
52 |     }
53 |   }
54 |   catch (...) {
55 |     std::cerr << "Exception was raised before aborting MPI. Aborting process..." << std::endl << std::flush;
56 |     abort();
57 |   }
58 | }
59 | 
60 | }  // namespace LightGBM
61 | #endif  // USE_MPI
62 | 


--------------------------------------------------------------------------------
/src/objective/cuda/cuda_binary_objective.hpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for
 4 |  * license information.
 5 |  */
 6 | 
 7 | #ifndef LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
 8 | #define LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
 9 | 
10 | #ifdef USE_CUDA
11 | 
12 | #define GET_GRADIENTS_BLOCK_SIZE_BINARY (1024)
13 | #define CALC_INIT_SCORE_BLOCK_SIZE_BINARY (1024)
14 | 
15 | #include <LightGBM/cuda/cuda_objective_function.hpp>
16 | 
17 | #include <string>
18 | #include <vector>
19 | 
20 | #include "../binary_objective.hpp"
21 | 
22 | namespace LightGBM {
23 | 
24 | class CUDABinaryLogloss : public CUDAObjectiveInterface<BinaryLogloss> {
25 |  public:
26 |   explicit CUDABinaryLogloss(const Config& config);
27 | 
28 |   explicit CUDABinaryLogloss(const Config& config, const int ova_class_id);
29 | 
30 |   explicit CUDABinaryLogloss(const std::vector<std::string>& strs);
31 | 
32 |   ~CUDABinaryLogloss();
33 | 
34 |   void Init(const Metadata& metadata, data_size_t num_data) override;
35 | 
36 |   bool NeedConvertOutputCUDA() const override { return true; }
37 | 
38 |  private:
39 |   void LaunchGetGradientsKernel(const double* scores, score_t* gradients, score_t* hessians) const override;
40 | 
41 |   double LaunchCalcInitScoreKernel(const int class_id) const override;
42 | 
43 |   const double* LaunchConvertOutputCUDAKernel(const data_size_t num_data, const double* input, double* output) const override;
44 | 
45 |   void LaunchResetOVACUDALabelKernel() const;
46 | 
47 |   // CUDA memory, held by other objects
48 |   const label_t* cuda_label_;
49 |   label_t* cuda_ova_label_;
50 |   const label_t* cuda_weights_;
51 | 
52 |   // CUDA memory, held by this object
53 |   double* cuda_boost_from_score_;
54 |   double* cuda_sum_weights_;
55 |   double* cuda_label_weights_;
56 |   const int ova_class_id_ = -1;
57 | };
58 | 
59 | }  // namespace LightGBM
60 | 
61 | #endif  // USE_CUDA
62 | 
63 | #endif  // LIGHTGBM_OBJECTIVE_CUDA_CUDA_BINARY_OBJECTIVE_HPP_
64 | 


--------------------------------------------------------------------------------
/src/utils/openmp_wrapper.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2023 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #include <LightGBM/utils/openmp_wrapper.h>
 6 | 
 7 | int LGBM_MAX_NUM_THREADS = -1;
 8 | 
 9 | int LGBM_DEFAULT_NUM_THREADS = -1;
10 | 
11 | #ifdef _OPENMP
12 | 
13 | #include <omp.h>
14 | 
15 | int OMP_NUM_THREADS() {
16 |   int default_num_threads = 1;
17 | 
18 |   if (LGBM_DEFAULT_NUM_THREADS > 0) {
19 |     // if LightGBM-specific default has been set, ignore OpenMP-global config
20 |     default_num_threads = LGBM_DEFAULT_NUM_THREADS;
21 |   } else {
22 |     // otherwise, default to OpenMP-global config
23 |     default_num_threads = omp_get_max_threads();
24 |   }
25 | 
26 |   // ensure that if LGBM_SetMaxThreads() was ever called, LightGBM doesn't
27 |   // use more than that many threads
28 |   if (LGBM_MAX_NUM_THREADS > 0 && default_num_threads > LGBM_MAX_NUM_THREADS) {
29 |     return LGBM_MAX_NUM_THREADS;
30 |   }
31 | 
32 |   return default_num_threads;
33 | }
34 | 
35 | void OMP_SET_NUM_THREADS(int num_threads) {
36 |   if (num_threads <= 0) {
37 |     LGBM_DEFAULT_NUM_THREADS = -1;
38 |   } else {
39 |     LGBM_DEFAULT_NUM_THREADS = num_threads;
40 |   }
41 | }
42 | 
43 | #endif  // _OPENMP
44 | 


--------------------------------------------------------------------------------
/swig/ChunkedArray_API_extensions.i:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | /**
 6 |  * Wrap chunked_array.hpp class for SWIG usage.
 7 |  *
 8 |  * Author: Alberto Ferreira
 9 |  */
10 | 
11 | %{
12 | #include "../include/LightGBM/utils/chunked_array.hpp"
13 | %}
14 | 
15 | %include "../include/LightGBM/utils/chunked_array.hpp"
16 | 
17 | using LightGBM::ChunkedArray;
18 | 
19 | %template(int32ChunkedArray) ChunkedArray<int32_t>;
20 | /* Unfortunately, for the time being,
21 |  * SWIG has issues generating the overloads to coalesce_to()
22 |  * for larger integral types
23 |  * so we won't support that for now:
24 |  */
25 | //%template(int64ChunkedArray) ChunkedArray<int64_t>;
26 | %template(floatChunkedArray) ChunkedArray<float>;
27 | %template(doubleChunkedArray) ChunkedArray<double>;
28 | 


--------------------------------------------------------------------------------
/tests/cpp_tests/predict.conf:
--------------------------------------------------------------------------------
1 | data=../data/categorical.data
2 | 
3 | input_model=LightGBM_model.txt
4 | 
5 | task=predict
6 | 


--------------------------------------------------------------------------------
/tests/cpp_tests/test.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from pathlib import Path
3 | 
4 | import numpy as np
5 | 
6 | preds = [np.loadtxt(str(name)) for name in Path(__file__).absolute().parent.glob("*.pred")]
7 | np.testing.assert_allclose(preds[0], preds[1])
8 | 


--------------------------------------------------------------------------------
/tests/cpp_tests/test_array_args.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | 
 6 | #include <gtest/gtest.h>
 7 | #include <LightGBM/meta.h>
 8 | #include <LightGBM/utils/array_args.h>
 9 | 
10 | #include <random>
11 | 
12 | using LightGBM::data_size_t;
13 | using LightGBM::score_t;
14 | using LightGBM::ArrayArgs;
15 | 
16 | 
17 | TEST(Partition, JustWorks) {
18 |   std::vector<score_t> gradients({0.5f, 5.0f, 1.0f, 2.0f, 2.0f});
19 |   data_size_t middle_begin, middle_end;
20 | 
21 |   ArrayArgs<score_t>::Partition(&gradients, 0, static_cast<int>(gradients.size()), &middle_begin, &middle_end);
22 | 
23 |   EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
24 |   EXPECT_GT(gradients[0], gradients[middle_begin + 1]);
25 |   EXPECT_GT(gradients[middle_begin + 1], gradients.back());
26 | }
27 | 
28 | TEST(Partition, PartitionOneElement) {
29 |   std::vector<score_t> gradients({0.5f});
30 |   data_size_t middle_begin, middle_end;
31 |   ArrayArgs<score_t>::Partition(&gradients, 0, static_cast<int>(gradients.size()), &middle_begin, &middle_end);
32 |   EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
33 | }
34 | 
35 | TEST(Partition, Empty) {
36 |   std::vector<score_t> gradients;
37 |   data_size_t middle_begin, middle_end;
38 |   ArrayArgs<score_t>::Partition(&gradients, 0, static_cast<int>(gradients.size()), &middle_begin, &middle_end);
39 | 
40 |   EXPECT_EQ(middle_begin, -1);
41 |   EXPECT_EQ(middle_end, 0);
42 | }
43 | 
44 | TEST(Partition, AllEqual) {
45 |   std::vector<score_t> gradients({0.5f, 0.5f, 0.5f});
46 |   data_size_t middle_begin, middle_end;
47 |   ArrayArgs<score_t>::Partition(&gradients, 0, static_cast<int>(gradients.size()), &middle_begin, &middle_end);
48 | 
49 |   EXPECT_EQ(gradients[middle_begin + 1], gradients[middle_end - 1]);
50 |   EXPECT_EQ(middle_begin, -1);
51 |   EXPECT_EQ(middle_end, 3);
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/cpp_tests/test_main.cpp:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2021 Microsoft Corporation. All rights reserved.
 3 |  * Licensed under the MIT License. See LICENSE file in the project root for license information.
 4 |  */
 5 | #include <gtest/gtest.h>
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   testing::InitGoogleTest(&argc, argv);
 9 |   testing::FLAGS_gtest_death_test_style = "threadsafe";
10 |   return RUN_ALL_TESTS();
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/cpp_tests/train.conf:
--------------------------------------------------------------------------------
1 | data=../data/categorical.data
2 | 
3 | app=binary
4 | 
5 | num_trees=10
6 | 
7 | categorical_column=0,1,4,5,6
8 | 


--------------------------------------------------------------------------------
/tests/distributed/conftest.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | 
3 | default_exec_file = Path(__file__).absolute().parents[2] / "lightgbm"
4 | 
5 | 
6 | def pytest_addoption(parser):
7 |     parser.addoption("--execfile", action="store", default=str(default_exec_file))
8 | 


--------------------------------------------------------------------------------
/tests/python_package_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/LightGBM/72a39817c331ed5fe46b5a5b50f32f809e177c7a/tests/python_package_test/__init__.py


--------------------------------------------------------------------------------
/tests/python_package_test/conftest.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | import lightgbm
 5 | 
 6 | 
 7 | @pytest.fixture(scope="function")
 8 | def missing_module_cffi(monkeypatch):
 9 |     """Mock 'cffi' not being importable"""
10 |     monkeypatch.setattr(lightgbm.compat, "CFFI_INSTALLED", False)
11 |     monkeypatch.setattr(lightgbm.basic, "CFFI_INSTALLED", False)
12 | 
13 | 
14 | @pytest.fixture(scope="function")
15 | def rng():
16 |     return np.random.default_rng()
17 | 
18 | 
19 | @pytest.fixture(scope="function")
20 | def rng_fixed_seed():
21 |     return np.random.default_rng(seed=42)
22 | 


--------------------------------------------------------------------------------
/tests/python_package_test/test_dual.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """Tests for dual GPU+CPU support."""
 3 | 
 4 | import os
 5 | import platform
 6 | 
 7 | import pytest
 8 | from sklearn.metrics import log_loss
 9 | 
10 | import lightgbm as lgb
11 | 
12 | from .utils import load_breast_cancer
13 | 
14 | 
15 | @pytest.mark.skipif(
16 |     os.environ.get("LIGHTGBM_TEST_DUAL_CPU_GPU", None) is None,
17 |     reason="Only run if appropriate env variable is set",
18 | )
19 | def test_cpu_and_gpu_work():
20 |     # If compiled appropriately, the same installation will support both GPU and CPU.
21 |     X, y = load_breast_cancer(return_X_y=True)
22 |     data = lgb.Dataset(X, y)
23 | 
24 |     params_cpu = {"verbosity": -1, "num_leaves": 31, "objective": "binary", "device": "cpu"}
25 |     cpu_bst = lgb.train(params_cpu, data, num_boost_round=10)
26 |     cpu_score = log_loss(y, cpu_bst.predict(X))
27 | 
28 |     params_gpu = params_cpu.copy()
29 |     params_gpu["device"] = "gpu"
30 |     # Double-precision floats are only supported on x86_64 with PoCL
31 |     params_gpu["gpu_use_dp"] = platform.machine() == "x86_64"
32 |     gpu_bst = lgb.train(params_gpu, data, num_boost_round=10)
33 |     gpu_score = log_loss(y, gpu_bst.predict(X))
34 | 
35 |     rel = 1e-6 if params_gpu["gpu_use_dp"] else 1e-4
36 |     assert cpu_score == pytest.approx(gpu_score, rel=rel)
37 |     assert gpu_score < 0.242
38 | 


--------------------------------------------------------------------------------
/windows/LightGBM.sln:
--------------------------------------------------------------------------------
 1 | Microsoft Visual Studio Solution File, Format Version 12.00
 2 | # Visual Studio 14
 3 | VisualStudioVersion = 14.0.25420.1
 4 | MinimumVisualStudioVersion = 14.0.23107.0
 5 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "LightGBM", "LightGBM.vcxproj", "{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}"
 6 | EndProject
 7 | Global
 8 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 9 | 		Debug_DLL|x64 = Debug_DLL|x64
10 | 		Debug_mpi|x64 = Debug_mpi|x64
11 | 		Debug|x64 = Debug|x64
12 | 		DLL|x64 = DLL|x64
13 | 		Release_mpi|x64 = Release_mpi|x64
14 | 		Release|x64 = Release|x64
15 | 	EndGlobalSection
16 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
17 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Debug_DLL|x64.ActiveCfg = Debug_DLL|x64
18 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Debug_DLL|x64.Build.0 = Debug_DLL|x64
19 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Debug_mpi|x64.ActiveCfg = Debug_mpi|x64
20 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Debug_mpi|x64.Build.0 = Debug_mpi|x64
21 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Debug|x64.ActiveCfg = Debug|x64
22 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Debug|x64.Build.0 = Debug|x64
23 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.DLL|x64.ActiveCfg = DLL|x64
24 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.DLL|x64.Build.0 = DLL|x64
25 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Release_mpi|x64.ActiveCfg = Release_mpi|x64
26 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Release_mpi|x64.Build.0 = Release_mpi|x64
27 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Release|x64.ActiveCfg = Release|x64
28 | 		{F31C0B5D-715E-4953-AA1B-8D2AEEE4344C}.Release|x64.Build.0 = Release|x64
29 | 	EndGlobalSection
30 | 	GlobalSection(SolutionProperties) = preSolution
31 | 		HideSolutionNode = FALSE
32 | 	EndGlobalSection
33 | EndGlobal
34 | 


--------------------------------------------------------------------------------