├── .flake8
├── .github
    └── workflows
    │   ├── gpu.yaml
    │   └── test.yaml
├── .gitignore
├── LICENSE
├── README.md
├── examples
├── format.sh
├── lightgbm_ray
    ├── __init__.py
    ├── callback.py
    ├── examples
    │   ├── __init__.py
    │   ├── create_test_data.py
    │   ├── higgs.py
    │   ├── higgs_parquet.py
    │   ├── readme.py
    │   ├── readme_sklearn_api.py
    │   ├── simple.py
    │   ├── simple_dask.py
    │   ├── simple_modin.py
    │   ├── simple_predict.py
    │   ├── simple_ray_dataset.py
    │   ├── simple_tune.py
    │   ├── train_on_test_data.py
    │   └── train_with_ml_dataset.py
    ├── main.py
    ├── sklearn.py
    ├── tests
    │   ├── __init__.py
    │   ├── env_info.sh
    │   ├── release
    │   │   ├── benchmark_cpu_gpu.py
    │   │   ├── cluster_cpu.yaml
    │   │   ├── cluster_gpu.yaml
    │   │   ├── create_learnable_data.py
    │   │   ├── create_test_data.py
    │   │   ├── custom_objective_metric.py
    │   │   ├── run_e2e_gpu.sh
    │   │   ├── setup_lightgbm.sh
    │   │   ├── start_cpu_cluster.sh
    │   │   ├── start_gpu_cluster.sh
    │   │   └── submit_cpu_gpu_benchmark.sh
    │   ├── test_client.py
    │   ├── test_end_to_end.py
    │   ├── test_fault_tolerance.py
    │   ├── test_lightgbm.py
    │   ├── test_lightgbm_api.py
    │   └── test_tune.py
    ├── tune.py
    └── util.py
├── requirements
    ├── lint-requirements.txt
    └── test-requirements.txt
├── run_ci_examples.sh
├── run_ci_tests.sh
└── setup.py


/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 88
 3 | inline-quotes = "
 4 | ignore =
 5 |   C408
 6 |   C417
 7 |   E121
 8 |   E123
 9 |   E126
10 |   E203
11 |   E226
12 |   E24
13 |   E704
14 |   W503
15 |   W504
16 |   W605
17 |   I
18 |   N
19 |   B001
20 |   B002
21 |   B003
22 |   B004
23 |   B005
24 |   B007
25 |   B008
26 |   B009
27 |   B010
28 |   B011
29 |   B012
30 |   B013
31 |   B014
32 |   B015
33 |   B016
34 |   B017
35 | avoid-escape = no
36 | # Error E731 is ignored because of the migration from YAPF to Black.
37 | # See https://github.com/ray-project/ray/issues/21315 for more information.
38 | per-file-ignores =
39 |     rllib/evaluation/worker_set.py:E731
40 |     rllib/evaluation/sampler.py:E731
41 | 


--------------------------------------------------------------------------------
/.github/workflows/gpu.yaml:
--------------------------------------------------------------------------------
 1 | name: GPU on manual trigger
 2 | 
 3 | on:
 4 |   workflow_dispatch
 5 | 
 6 | jobs:
 7 |   test_gpu:
 8 |     runs-on: ubuntu-latest
 9 |     timeout-minutes: 20
10 |     steps:
11 |     - uses: actions/checkout@v3
12 |     - name: Set up Python 3.8
13 |       uses: actions/setup-python@v3
14 |       with:
15 |         python-version: 3.8
16 |     - name: Install dependencies
17 |       run: |
18 |         python -m pip install --upgrade pip
19 |         python -m pip install -U anyscale pyyaml
20 |     - name: Print environment info
21 |       run: |
22 |         ./lightgbm_ray/tests/env_info.sh
23 |     - name: Set anyscale project
24 |       env:
25 |         ANYSCALE_PROJECT: ${{ secrets.ANYSCALE_PROJECT }}
26 |       run: |
27 |         echo "project_id: ${ANYSCALE_PROJECT}" > ./lightgbm_ray/tests/release/.anyscale.yaml
28 |     - name: Run end to end GPU test
29 |       env:
30 |         ANYSCALE_CLI_TOKEN: ${{ secrets.ANYSCALE_CLI_TOKEN }}
31 |       run: |
32 |         pushd ./lightgbm_ray/tests/release
33 |         ./run_e2e_gpu.sh
34 |         popd || true
35 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
  1 | name: pytest on push
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 |   schedule:
  7 |     - cron: "0 5 * * *"
  8 | 
  9 | jobs:
 10 |   test_lint:
 11 |     runs-on: ubuntu-latest
 12 |     timeout-minutes: 3
 13 |     steps:
 14 |       - uses: actions/checkout@v3
 15 |       - name: Set up Python 3.8
 16 |         uses: actions/setup-python@v3
 17 |         with:
 18 |           python-version: 3.8
 19 |       - name: Install dependencies
 20 |         run: |
 21 |           python -m pip install --upgrade pip
 22 |           python -m pip install codecov
 23 |           if [ -f requirements/lint-requirements.txt ]; then python -m pip install -r requirements/lint-requirements.txt; fi
 24 |       - name: Print environment info
 25 |         run: |
 26 |           ./lightgbm_ray/tests/env_info.sh
 27 |       - name: Run format script
 28 |         run: |
 29 |           ls -alp
 30 |           ./format.sh --all
 31 | 
 32 |   test_linux_ray_master:
 33 |     runs-on: ubuntu-latest
 34 |     timeout-minutes: 160
 35 |     strategy:
 36 |       matrix:
 37 |         python-version: ["3.8", "3.9", "3.10"]
 38 |         include:
 39 |           - python-version: "3.8"
 40 |             ray-wheel: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
 41 |           - python-version: "3.9"
 42 |             ray-wheel: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
 43 |           - python-version: "3.10"
 44 |             ray-wheel: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl
 45 |     steps:
 46 |     - uses: actions/checkout@v3
 47 |     - name: Set up Python ${{ matrix.python-version }}
 48 |       uses: actions/setup-python@v3
 49 |       with:
 50 |         python-version: ${{ matrix.python-version }}
 51 |     - name: Install dependencies
 52 |       run: |
 53 |         python -m pip install --upgrade pip
 54 |         python -m pip install -U ${{ matrix.ray-wheel }}
 55 |         if [ -f requirements/test-requirements.txt ]; then python -m pip install -r requirements/test-requirements.txt; fi
 56 |     - name: Install package
 57 |       run: |
 58 |         python -m pip install -e .
 59 |     - name: Print environment info
 60 |       run: |
 61 |         ./lightgbm_ray/tests/env_info.sh
 62 |     - name: Run tests
 63 |       uses: nick-invision/retry@v2
 64 |       with:
 65 |         timeout_minutes: 60
 66 |         max_attempts: 3
 67 |         command: bash ./run_ci_tests.sh
 68 |     - name: Run examples
 69 |       uses: nick-invision/retry@v2
 70 |       with:
 71 |         timeout_minutes: 40
 72 |         max_attempts: 3
 73 |         command: bash ./run_ci_examples.sh
 74 | 
 75 |   test_linux_ray_release:
 76 |     runs-on: ubuntu-latest
 77 |     timeout-minutes: 160
 78 |     strategy:
 79 |       matrix:
 80 |         python-version: ["3.8", "3.9", "3.10"]
 81 |     steps:
 82 |     - uses: actions/checkout@v3
 83 |     - name: Set up Python ${{ matrix.python-version }}
 84 |       uses: actions/setup-python@v3
 85 |       with:
 86 |         python-version: ${{ matrix.python-version }}
 87 |     - name: Install dependencies
 88 |       run: |
 89 |         python -m pip install --upgrade pip
 90 |         python -m pip install -U ray
 91 |         if [ -f requirements/test-requirements.txt ]; then python -m pip install -r requirements/test-requirements.txt; fi
 92 |     - name: Install package
 93 |       run: |
 94 |         python -m pip install -e .
 95 |     - name: Print environment info
 96 |       run: |
 97 |         ./lightgbm_ray/tests/env_info.sh
 98 |     - name: Run tests
 99 |       uses: nick-invision/retry@v2
100 |       with:
101 |         timeout_minutes: 60
102 |         max_attempts: 3
103 |         command: bash ./run_ci_tests.sh
104 |     - name: Run examples
105 |       uses: nick-invision/retry@v2
106 |       with:
107 |         timeout_minutes: 40
108 |         max_attempts: 3
109 |         command: bash ./run_ci_examples.sh
110 | 
111 |   test_linux_compat:
112 |     # Test compatibility when some optional libraries are missing
113 |     # Test runs on latest ray release
114 |     runs-on: ubuntu-latest
115 |     timeout-minutes: 160
116 |     strategy:
117 |       matrix:
118 |         python-version: ["3.8", "3.9", "3.10"]
119 |     steps:
120 |     - uses: actions/checkout@v3
121 |     - name: Set up Python ${{ matrix.python-version }}
122 |       uses: actions/setup-python@v3
123 |       with:
124 |         python-version: ${{ matrix.python-version }}
125 |     - name: Install dependencies
126 |       run: |
127 |         python -m pip install --upgrade pip
128 |         python -m pip install -U ray
129 |         if [ -f requirements/test-requirements.txt ]; then python -m pip install -r requirements/test-requirements.txt; fi
130 |     - name: Uninstall unavailable dependencies
131 |       # Disables modin and Ray Tune (via tabulate)
132 |       run: |
133 |         python -m pip uninstall -y modin
134 |         python -m pip uninstall -y tabulate
135 |     - name: Install package
136 |       run: |
137 |         python -m pip install -e .
138 |     - name: Print environment info
139 |       run: |
140 |         ./lightgbm_ray/tests/env_info.sh
141 |     - name: Run tests
142 |       uses: nick-invision/retry@v2
143 |       with:
144 |         timeout_minutes: 60
145 |         max_attempts: 3
146 |         command: bash ./run_ci_tests.sh --no-tune
147 |     - name: Run examples
148 |       uses: nick-invision/retry@v2
149 |       with:
150 |         timeout_minutes: 40
151 |         max_attempts: 3
152 |         command: bash ./run_ci_examples.sh --no-tune
153 | 
154 |   test_linux_cutting_edge:
155 |     # Tests on cutting edge, i.e. latest Ray master, latest LightGBM master
156 |     runs-on: ubuntu-latest
157 |     timeout-minutes: 160
158 |     strategy:
159 |       matrix:
160 |         python-version: ["3.8", "3.9", "3.10"]
161 |         include:
162 |           - python-version: "3.8"
163 |             ray-wheel: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp38-cp38-manylinux2014_x86_64.whl
164 |           - python-version: "3.9"
165 |             ray-wheel: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
166 |           - python-version: "3.10"
167 |             ray-wheel: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl
168 |     steps:
169 |     - uses: actions/checkout@v3
170 |     - name: Set up Python ${{ matrix.python-version }}
171 |       uses: actions/setup-python@v3
172 |       with:
173 |         python-version: ${{ matrix.python-version }}
174 |     - name: Install dependencies
175 |       run: |
176 |         python -m pip install --upgrade pip
177 |         python -m pip install -U ${{ matrix.ray-wheel }}
178 |         if [ -f requirements/test-requirements.txt ]; then python -m pip install -r requirements/test-requirements.txt; fi
179 |     - name: Install Ubuntu system dependencies
180 |       run: |
181 |         sudo apt-get install -y --no-install-recommends ninja-build
182 |     - name: Install package
183 |       run: |
184 |         python -m pip install -e .
185 |     - name: Clone LightGBM repo
186 |       uses: actions/checkout@v3
187 |       with:
188 |         repository: microsoft/LightGBM
189 |         path: lightgbm
190 |         submodules: true
191 |     - name: Install LightGBM from source
192 |       shell: bash -l {0}
193 |       run: |
194 |         pushd ${GITHUB_WORKSPACE}/lightgbm/python-package
195 |         python --version
196 |         python setup.py sdist
197 |         pip install -v ./dist/lightgbm-*.tar.gz
198 |         popd
199 |     - name: Print environment info
200 |       run: |
201 |         ./lightgbm_ray/tests/env_info.sh
202 |     - name: Run tests
203 |       uses: nick-invision/retry@v2
204 |       with:
205 |         timeout_minutes: 60
206 |         max_attempts: 3
207 |         command: bash ./run_ci_tests.sh
208 |     - name: Run examples
209 |       uses: nick-invision/retry@v2
210 |       with:
211 |         timeout_minutes: 40
212 |         max_attempts: 3
213 |         command: bash ./run_ci_examples.sh
214 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | .vscode
132 | 
133 | *.lgbm


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | --------------------------------------------------------------------------------
204 | 
205 | Code in python/ray/rllib/{evolution_strategies, dqn} adapted from
206 | https://github.com/openai (MIT License)
207 | 
208 | Copyright (c) 2016 OpenAI (http://openai.com)
209 | 
210 | Permission is hereby granted, free of charge, to any person obtaining a copy
211 | of this software and associated documentation files (the "Software"), to deal
212 | in the Software without restriction, including without limitation the rights
213 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
214 | copies of the Software, and to permit persons to whom the Software is
215 | furnished to do so, subject to the following conditions:
216 | 
217 | The above copyright notice and this permission notice shall be included in
218 | all copies or substantial portions of the Software.
219 | 
220 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
221 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
222 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
223 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
224 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
225 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
226 | THE SOFTWARE.
227 | 
228 | --------------------------------------------------------------------------------
229 | 
230 | Code in python/ray/rllib/impala/vtrace.py from
231 | https://github.com/deepmind/scalable_agent
232 | 
233 | Copyright 2018 Google LLC
234 | 
235 | Licensed under the Apache License, Version 2.0 (the "License");
236 | you may not use this file except in compliance with the License.
237 | You may obtain a copy of the License at
238 | 
239 |     https://www.apache.org/licenses/LICENSE-2.0
240 | 
241 | Unless required by applicable law or agreed to in writing, software
242 | distributed under the License is distributed on an "AS IS" BASIS,
243 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
244 | See the License for the specific language governing permissions and
245 | limitations under the License.
246 | 
247 | --------------------------------------------------------------------------------
248 | Code in python/ray/rllib/ars is adapted from https://github.com/modestyachts/ARS
249 | 
250 | Copyright (c) 2018, ARS contributors (Horia Mania, Aurelia Guy, Benjamin Recht)
251 | All rights reserved.
252 | 
253 | Redistribution and use of ARS in source and binary forms, with or without
254 | modification, are permitted provided that the following conditions are met:
255 | 
256 | 1. Redistributions of source code must retain the above copyright notice, this
257 | list of conditions and the following disclaimer.
258 | 
259 | 2. Redistributions in binary form must reproduce the above copyright notice,
260 | this list of conditions and the following disclaimer in the documentation and/or
261 | other materials provided with the distribution.
262 | 
263 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
264 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
265 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
266 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
267 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
268 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
269 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
270 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
271 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
272 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
273 | 
274 | ------------------
275 | Code in python/ray/prometheus_exporter.py is adapted from https://github.com/census-instrumentation/opencensus-python/blob/master/contrib/opencensus-ext-prometheus/opencensus/ext/prometheus/stats_exporter/__init__.py
276 | 
277 | # Copyright 2018, OpenCensus Authors
278 | #
279 | # Licensed under the Apache License, Version 2.0 (the "License");
280 | # you may not use this file except in compliance with the License.
281 | # You may obtain a copy of the License at
282 | #
283 | #     http://www.apache.org/licenses/LICENSE-2.0
284 | #
285 | # Unless required by applicable law or agreed to in writing, software
286 | # distributed under the License is distributed on an "AS IS" BASIS,
287 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
288 | # See the License for the specific language governing permissions and
289 | # limitations under the License.
290 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!--$UNCOMMENT(lightgbm-ray)=-->
  2 | 
  3 | # Distributed LightGBM on Ray
  4 | <!--$REMOVE-->
  5 | ![Build Status](https://github.com/ray-project/lightgbm_ray/workflows/pytest%20on%20push/badge.svg)
  6 | [![docs.ray.io](https://img.shields.io/badge/docs-ray.io-blue)](https://docs.ray.io/en/master/lightgbm-ray.html)
  7 | <!--$END_REMOVE-->
  8 | LightGBM-Ray is a distributed backend for
  9 | [LightGBM](https://lightgbm.readthedocs.io/), built
 10 | on top of
 11 | [distributed computing framework Ray](https://ray.io).
 12 | 
 13 | LightGBM-Ray
 14 | 
 15 | - enables [multi-node](#usage) and [multi-GPU](#multi-gpu-training) training
 16 | - integrates seamlessly with distributed [hyperparameter optimization](#hyperparameter-tuning) library [Ray Tune](http://tune.io)
 17 | - comes with [fault tolerance handling](#fault-tolerance) mechanisms, and
 18 | - supports [distributed dataframes and distributed data loading](#distributed-data-loading)
 19 | 
 20 | All releases are tested on large clusters and workloads.
 21 | 
 22 | This package is based on <!--$UNCOMMENT{ref}`XGBoost-Ray <xgboost-ray>`--><!--$REMOVE-->[XGBoost-Ray](https://github.com/ray-project/xgboost_ray)<!--$END_REMOVE-->. As of now, XGBoost-Ray is a dependency for LightGBM-Ray.
 23 | 
 24 | ## Installation
 25 | 
 26 | You can install the latest LightGBM-Ray release from PIP:
 27 | 
 28 | ```bash
 29 | pip install "lightgbm_ray"
 30 | ```
 31 | 
 32 | If you'd like to install the latest master, use this command instead:
 33 | 
 34 | ```bash
 35 | pip install "git+https://github.com/ray-project/lightgbm_ray.git#egg=lightgbm_ray"
 36 | ```
 37 | 
 38 | ## Usage
 39 | 
 40 | LightGBM-Ray provides a drop-in replacement for LightGBM's `train`
 41 | function. To pass data, a `RayDMatrix` object is required, common
 42 | with XGBoost-Ray. You can also use a scikit-learn
 43 | interface - see next section.
 44 | 
 45 | Just as in original `lgbm.train()` function, the 
 46 | [training parameters](https://lightgbm.readthedocs.io/en/latest/Parameters.html)
 47 | are passed as the `params` dictionary.
 48 | 
 49 | Ray-specific distributed training parameters are configured with a
 50 | `lightgbm_ray.RayParams` object. For instance, you can set
 51 | the `num_actors` property to specify how many distributed actors
 52 | you would like to use.
 53 | 
 54 | Here is a simplified example (which requires `sklearn`):
 55 | 
 56 | **Training:**
 57 | 
 58 | ```python
 59 | from lightgbm_ray import RayDMatrix, RayParams, train
 60 | from sklearn.datasets import load_breast_cancer
 61 | 
 62 | train_x, train_y = load_breast_cancer(return_X_y=True)
 63 | train_set = RayDMatrix(train_x, train_y)
 64 | 
 65 | evals_result = {}
 66 | bst = train(
 67 |     {
 68 |         "objective": "binary",
 69 |         "metric": ["binary_logloss", "binary_error"],
 70 |     },
 71 |     train_set,
 72 |     evals_result=evals_result,
 73 |     valid_sets=[train_set],
 74 |     valid_names=["train"],
 75 |     verbose_eval=False,
 76 |     ray_params=RayParams(num_actors=2, cpus_per_actor=2))
 77 | 
 78 | bst.booster_.save_model("model.lgbm")
 79 | print("Final training error: {:.4f}".format(
 80 |     evals_result["train"]["binary_error"][-1]))
 81 | ```
 82 | 
 83 | **Prediction:**
 84 | 
 85 | ```python
 86 | from lightgbm_ray import RayDMatrix, RayParams, predict
 87 | from sklearn.datasets import load_breast_cancer
 88 | import lightgbm as lgbm
 89 | 
 90 | data, labels = load_breast_cancer(return_X_y=True)
 91 | 
 92 | dpred = RayDMatrix(data, labels)
 93 | 
 94 | bst = lgbm.Booster(model_file="model.lgbm")
 95 | pred_ray = predict(bst, dpred, ray_params=RayParams(num_actors=2))
 96 | 
 97 | print(pred_ray)
 98 | ```
 99 | 
100 | ### scikit-learn API
101 | 
102 | LightGBM-Ray also features a scikit-learn API fully mirroring pure
103 | LightGBM scikit-learn API, providing a completely drop-in
104 | replacement. The following estimators are available:
105 | 
106 | - `RayLGBMClassifier`
107 | - `RayLGBMRegressor`
108 | 
109 | Example usage of `RayLGBMClassifier`:
110 | 
111 | ```python
112 | from lightgbm_ray import RayLGBMClassifier, RayParams
113 | from sklearn.datasets import load_breast_cancer
114 | from sklearn.model_selection import train_test_split
115 | 
116 | seed = 42
117 | 
118 | X, y = load_breast_cancer(return_X_y=True)
119 | X_train, X_test, y_train, y_test = train_test_split(
120 |     X, y, train_size=0.25, random_state=42)
121 | 
122 | clf = RayLGBMClassifier(
123 |     n_jobs=2,  # In LightGBM-Ray, n_jobs sets the number of actors
124 |     random_state=seed)
125 | 
126 | # scikit-learn API will automatically convert the data
127 | # to RayDMatrix format as needed.
128 | # You can also pass X as a RayDMatrix, in which case
129 | # y will be ignored.
130 | 
131 | clf.fit(X_train, y_train)
132 | 
133 | pred_ray = clf.predict(X_test)
134 | print(pred_ray)
135 | 
136 | pred_proba_ray = clf.predict_proba(X_test)
137 | print(pred_proba_ray)
138 | 
139 | # It is also possible to pass a RayParams object
140 | # to fit/predict/predict_proba methods - will override
141 | # n_jobs set during initialization
142 | 
143 | clf.fit(X_train, y_train, ray_params=RayParams(num_actors=2))
144 | 
145 | pred_ray = clf.predict(X_test, ray_params=RayParams(num_actors=2))
146 | print(pred_ray)
147 | ```
148 | 
149 | Things to keep in mind:
150 | 
151 | - `n_jobs` parameter controls the number of actors spawned.
152 | You can pass a `RayParams` object to the
153 | `fit`/`predict`/`predict_proba` methods as the `ray_params` argument 
154 | for greater control over resource allocation. Doing
155 | so will override the value of `n_jobs` with the value of
156 | `ray_params.num_actors` attribute. For more information, refer
157 | to the [Resources](#resources) section below.
158 | - By default `n_jobs` is set to `1`, which means the training
159 | will **not** be distributed. Make sure to either set `n_jobs`
160 | to a higher value or pass a `RayParams` object as outlined above
161 | in order to take advantage of LightGBM-Ray's functionality.
162 | - After calling `fit`, additional evaluation results (e.g. training time,
163 | number of rows, callback results) will be available under
164 | `additional_results_` attribute.
165 | - `eval_` arguments are supported, but early stopping is not.
166 | - LightGBM-Ray's scikit-learn API is based on LightGBM 3.2.1.
167 | While we try to support older LightGBM versions, please note that
168 | this library is only fully tested and supported for LightGBM >= 3.2.1.
169 | 
170 | For more information on the scikit-learn API, refer to the [LightGBM documentation](https://lightgbm.readthedocs.io/en/latest/Python-API.html#scikit-learn-api).
171 | 
172 | ## Data loading
173 | 
174 | Data is passed to LightGBM-Ray via a `RayDMatrix` object.
175 | 
176 | The `RayDMatrix` lazy loads data and stores it sharded in the
177 | Ray object store. The Ray LightGBM actors then access these
178 | shards to run their training on. 
179 | 
180 | A `RayDMatrix` support various data and file types, like
181 | Pandas DataFrames, Numpy Arrays, CSV files and Parquet files.
182 | 
183 | Example loading multiple parquet files:
184 | 
185 | ```python
186 | import glob
187 | from lightgbm_ray import RayDMatrix, RayFileType
188 | 
189 | # We can also pass a list of files
190 | path = list(sorted(glob.glob("/data/nyc-taxi/*/*/*.parquet")))
191 | 
192 | # This argument will be passed to `pd.read_parquet()`
193 | columns = [
194 |     "passenger_count",
195 |     "trip_distance", "pickup_longitude", "pickup_latitude",
196 |     "dropoff_longitude", "dropoff_latitude",
197 |     "fare_amount", "extra", "mta_tax", "tip_amount",
198 |     "tolls_amount", "total_amount"
199 | ]
200 | 
201 | dtrain = RayDMatrix(
202 |     path, 
203 |     label="passenger_count",  # Will select this column as the label
204 |     columns=columns,
205 |     # ignore=["total_amount"],  # Optional list of columns to ignore
206 |     filetype=RayFileType.PARQUET)
207 | ```
208 | 
209 | <!--$UNCOMMENT(lightgbm-ray-tuning)=-->
210 | 
211 | ## Hyperparameter Tuning
212 | 
213 | LightGBM-Ray integrates with  <!--$UNCOMMENT{ref}`Ray Tune <tune-main>`--><!--$REMOVE-->[Ray Tune](https://tune.io)<!--$END_REMOVE--> to provide distributed hyperparameter tuning for your
214 | distributed LightGBM models. You can run multiple LightGBM-Ray training runs in parallel, each with a different
215 | hyperparameter configuration, and each training run parallelized by itself. All you have to do is move your training
216 | code to a function, and pass the function to `tune.run`. Internally, `train` will detect if `tune` is being used and will
217 | automatically report results to tune.
218 | 
219 | Example using LightGBM-Ray with Ray Tune:
220 | 
221 | ```python
222 | from lightgbm_ray import RayDMatrix, RayParams, train
223 | from sklearn.datasets import load_breast_cancer
224 | 
225 | num_actors = 2
226 | num_cpus_per_actor = 2
227 | 
228 | ray_params = RayParams(
229 |     num_actors=num_actors, cpus_per_actor=num_cpus_per_actor)
230 | 
231 | def train_model(config):
232 |     train_x, train_y = load_breast_cancer(return_X_y=True)
233 |     train_set = RayDMatrix(train_x, train_y)
234 | 
235 |     evals_result = {}
236 |     bst = train(
237 |         params=config,
238 |         dtrain=train_set,
239 |         evals_result=evals_result,
240 |         valid_sets=[train_set],
241 |         valid_names=["train"],
242 |         verbose_eval=False,
243 |         ray_params=ray_params)
244 |     bst.booster_.save_model("model.lgbm")
245 | 
246 | from ray import tune
247 | 
248 | # Specify the hyperparameter search space.
249 | config = {
250 |     "objective": "binary",
251 |     "metric": ["binary_logloss", "binary_error"],
252 |     "eta": tune.loguniform(1e-4, 1e-1),
253 |     "subsample": tune.uniform(0.5, 1.0),
254 |     "max_depth": tune.randint(1, 9)
255 | }
256 | 
257 | # Make sure to use the `get_tune_resources` method to set the `resources_per_trial`
258 | analysis = tune.run(
259 |     train_model,
260 |     config=config,
261 |     metric="train-binary_error",
262 |     mode="min",
263 |     num_samples=4,
264 |     resources_per_trial=ray_params.get_tune_resources())
265 | print("Best hyperparameters", analysis.best_config)
266 | ```
267 | 
268 | Also see examples/simple_tune.py for another example.
269 | 
270 | ## Fault tolerance
271 | 
272 | LightGBM-Ray leverages the stateful Ray actor model to
273 | enable fault tolerant training. Currently, only non-elastic
274 | training is supported.
275 | 
276 | ### Non-elastic training (warm restart)
277 | 
278 | When an actor or node dies, LightGBM-Ray will retain the
279 | state of the remaining actors. In non-elastic training,
280 | the failed actors will be replaced as soon as resources
281 | are available again. Only these actors will reload their
282 | parts of the data. Training will resume once all actors
283 | are ready for training again.
284 | 
285 | You can configure this mode in the `RayParams`:
286 | 
287 | ```python
288 | from lightgbm_ray import RayParams
289 | 
290 | ray_params = RayParams(
291 |     max_actor_restarts=2,    # How often are actors allowed to fail, Default = 0
292 | )
293 | ```
294 | 
295 | ## Resources
296 | 
297 | By default, LightGBM-Ray tries to determine the number of CPUs
298 | available and distributes them evenly across actors.
299 | 
300 | In the case of very large clusters or clusters with many different
301 | machine sizes, it makes sense to limit the number of CPUs per actor
302 | by setting the `cpus_per_actor` argument. Consider always
303 | setting this explicitly.
304 | 
305 | The number of LightGBM actors always has to be set manually with
306 | the `num_actors` argument.
307 | 
308 | ### Multi GPU training
309 | 
310 | By default, LightGBM-Ray tries to determine the number of CPUs
311 | available and distributes them evenly across actors.
312 | 
313 | It is important to note that distributed LightGBM needs at least
314 | two CPUs per actor to function efficiently (without blocking).
315 | Therefore, by default, at least two CPUs will be assigned to each actor,
316 | and an exception will be raised if an actor has less than two CPUs.
317 | It is possible to override this check by setting the
318 | `allow_less_than_two_cpus` argument to `True`, though it is not
319 | recommended, as it will negatively impact training performance.
320 | 
321 | In the case of very large clusters or clusters with many different
322 | machine sizes, it makes sense to limit the number of CPUs per actor
323 | by setting the `cpus_per_actor` argument. Consider always
324 | setting this explicitly.
325 | 
326 | The number of LightGBM actors always has to be set manually with
327 | the `num_actors` argument.
328 | 
329 | ### Multi GPU training
330 | LightGBM-Ray enables multi GPU training. The LightGBM core backend
331 | will automatically handle communication.
332 | All you have to do is to start one actor per GPU and set LightGBM's
333 | `device_type` to a GPU-compatible option, eg. `gpu` (see LightGBM
334 | documentation for more details.) 
335 | 
336 | For instance, if you have 2 machines with 4 GPUs each, you will want
337 | to start 8 remote actors, and set `gpus_per_actor=1`. There is usually
338 | no benefit in allocating less (e.g. 0.5) or more than one GPU per actor. 
339 | 
340 | You should divide the CPUs evenly across actors per machine, so if your 
341 | machines have 16 CPUs in addition to the 4 GPUs, each actor should have
342 | 4 CPUs to use.
343 | 
344 | ```python
345 | from lightgbm_ray import RayParams
346 | 
347 | ray_params = RayParams(
348 |     num_actors=8,
349 |     gpus_per_actor=1,
350 |     cpus_per_actor=4,   # Divide evenly across actors per machine
351 | )
352 | ```
353 | 
354 | ### How many remote actors should I use?
355 | 
356 | This depends on your workload and your cluster setup.
357 | Generally there is no inherent benefit of running more than
358 | one remote actor per node for CPU-only training. This is because
359 | LightGBM core can already leverage multiple CPUs via threading.
360 | 
361 | However, there are some cases when you should consider starting
362 | more than one actor per node:
363 | 
364 | - For [**multi GPU training**](#multi-gpu-training), each GPU should have a separate
365 |   remote actor. Thus, if your machine has 24 CPUs and 4 GPUs,
366 |   you will want to start 4 remote actors with 6 CPUs and 1 GPU
367 |   each
368 | - In a **heterogeneous cluster**, you might want to find the
369 |   [greatest common divisor](https://en.wikipedia.org/wiki/Greatest_common_divisor)
370 |   for the number of CPUs.
371 |   E.g. for a cluster with three nodes of 4, 8, and 12 CPUs, respectively,
372 |   you should set the number of actors to 6 and the CPUs per 
373 |   actor to 4.
374 | 
375 | ## Distributed data loading
376 | 
377 | LightGBM-Ray can leverage both centralized and distributed data loading.
378 | 
379 | In **centralized data loading**, the data is partitioned by the head node
380 | and stored in the object store. Each remote actor then retrieves their
381 | partitions by querying the Ray object store. Centralized loading is used
382 | when you pass centralized in-memory dataframes, such as Pandas dataframes
383 | or Numpy arrays, or when you pass a single source file, such as a single CSV
384 | or Parquet file.
385 | 
386 | 
387 | ```python
388 | from lightgbm_ray import RayDMatrix
389 | 
390 | # This will use centralized data loading, as only one source file is specified
391 | # `label_col` is a column in the CSV, used as the target label
392 | ray_params = RayDMatrix("./source_file.csv", label="label_col")
393 | ```
394 | 
395 | In **distributed data loading**, each remote actor loads their data directly from
396 | the source (e.g. local hard disk, NFS, HDFS, S3), 
397 | without a central bottleneck. The data is still stored in the
398 | object store, but locally to each actor. This mode is used automatically
399 | when loading data from multiple CSV or Parquet files. Please note that
400 | we do not check or enforce partition sizes in this case - it is your job
401 | to make sure the data is evenly distributed across the source files.
402 | 
403 | ```python
404 | from lightgbm_ray import RayDMatrix
405 | 
406 | # This will use distributed data loading, as four source files are specified
407 | # Please note that you cannot schedule more than four actors in this case.
408 | # `label_col` is a column in the Parquet files, used as the target label
409 | ray_params = RayDMatrix([
410 |     "hdfs:///tmp/part1.parquet",
411 |     "hdfs:///tmp/part2.parquet",
412 |     "hdfs:///tmp/part3.parquet",
413 |     "hdfs:///tmp/part4.parquet",
414 | ], label="label_col")
415 | ```
416 | 
417 | Lastly, LightGBM-Ray supports **distributed dataframe** representations, such
418 | as <!--$UNCOMMENT{ref}`Ray Datasets <datasets>`--><!--$REMOVE-->[Ray Datasets](https://docs.ray.io/en/latest/data/dataset.html)<!--$END_REMOVE-->,
419 | [Modin](https://modin.readthedocs.io/en/latest/) and
420 | [Dask dataframes](https://docs.dask.org/en/latest/dataframe.html)
421 | (used with <!--$UNCOMMENT{ref}`Dask on Ray <dask-on-ray>`--><!--$REMOVE-->[Dask on Ray](https://docs.ray.io/en/master/dask-on-ray.html)<!--$END_REMOVE-->).
422 | Here, LightGBM-Ray will check on which nodes the distributed partitions 
423 | are currently located, and will assign partitions to actors in order to
424 | minimize cross-node data transfer. Please note that we also assume here
425 | that partition sizes are uniform. 
426 | 
427 | ```python
428 | from lightgbm_ray import RayDMatrix
429 | 
430 | # This will try to allocate the existing Modin partitions
431 | # to co-located Ray actors. If this is not possible, data will
432 | # be transferred across nodes
433 | ray_params = RayDMatrix(existing_modin_df)
434 | ```
435 | 
436 | ### Data sources
437 | 
438 | The following data sources can be used with a `RayDMatrix` object.
439 | 
440 | | Type                                                             | Centralized loading | Distributed loading |
441 | |------------------------------------------------------------------|---------------------|---------------------|
442 | | Numpy array                                                      | Yes                 | No                  |
443 | | Pandas dataframe                                                 | Yes                 | No                  |
444 | | Single CSV                                                       | Yes                 | No                  |
445 | | Multi CSV                                                        | Yes                 | Yes                 |
446 | | Single Parquet                                                   | Yes                 | No                  |
447 | | Multi Parquet                                                    | Yes                 | Yes                 |
448 | | [Ray Dataset](https://docs.ray.io/en/latest/data/dataset.html)   | Yes                 | Yes                 |
449 | | [Petastorm](https://github.com/uber/petastorm)                   | Yes                 | Yes                 |
450 | | [Dask dataframe](https://docs.dask.org/en/latest/dataframe.html) | Yes                 | Yes                 |
451 | | [Modin dataframe](https://modin.readthedocs.io/en/latest/)       | Yes                 | Yes                 |
452 | 
453 | ## Memory usage
454 | 
455 | Details coming soon.
456 | <!-- This hasn't been verifiec -->
457 | <!-- 
458 | XGBoost uses a compute-optimized datastructure, the `DMatrix`,
459 | to hold training data. When converting a dataset to a `DMatrix`,
460 | XGBoost creates intermediate copies and ends up 
461 | holding a complete copy of the full data. The data will be converted
462 | into the local dataformat (on a 64 bit system these are 64 bit floats.)
463 | Depending on the system and original dataset dtype, this matrix can 
464 | thus occupy more memory than the original dataset.
465 | 
466 | The **peak memory usage** for CPU-based training is at least
467 | **3x** the dataset size (assuming dtype `float32` on a 64bit system) 
468 | plus about **400,000 KiB** for other resources,
469 | like operating system requirements and storing of intermediate
470 | results.
471 | 
472 | **Example**
473 | - Machine type: AWS m5.xlarge (4 vCPUs, 16 GiB RAM)
474 | - Usable RAM: ~15,350,000 KiB
475 | - Dataset: 1,250,000 rows with 1024 features, dtype float32.
476 |   Total size: 5,000,000 KiB
477 | - XGBoost DMatrix size: ~10,000,000 KiB
478 | 
479 | This dataset will fit exactly on this node for training.
480 | 
481 | Note that the DMatrix size might be lower on a 32 bit system. 
482 | 
483 | **GPUs**
484 | 
485 | Generally, the same memory requirements exist for GPU-based
486 | training. Additionally, the GPU must have enough memory
487 | to hold the dataset. 
488 | 
489 | In the example above, the GPU must have at least 
490 | 10,000,000 KiB (about 9.6 GiB) memory. However, 
491 | empirically we found that using a `DeviceQuantileDMatrix`
492 | seems to show more peak GPU memory usage, possibly 
493 | for intermediate storage when loading data (about 10%). -->
494 | 
495 | **Best practices**
496 | 
497 | In order to reduce peak memory usage, consider the following
498 | suggestions:
499 | 
500 | - Store data as `float32` or less. More precision is often 
501 |   not needed, and keeping data in a smaller format will
502 |   help reduce peak memory usage for initial data loading.
503 | - Pass the `dtype` when loading data from CSV. Otherwise,
504 |   floating point values will be loaded as `np.float64` 
505 |   per default, increasing peak memory usage by 33%.
506 | 
507 | ## Placement Strategies
508 | 
509 | LightGBM-Ray leverages Ray's Placement Group API (https://docs.ray.io/en/master/placement-group.html)
510 | to implement placement strategies for better fault tolerance. 
511 | 
512 | By default, a SPREAD strategy is used for training, which attempts to spread all of the training workers
513 | across the nodes in a cluster on a best-effort basis. This improves fault tolerance since it minimizes the 
514 | number of worker failures when a node goes down, but comes at a cost of increased inter-node communication
515 | To disable this strategy, set the `RXGB_USE_SPREAD_STRATEGY` environment variable to 0. If disabled, no
516 | particular placement strategy will be used.
517 | 
518 | <!-- Note that this strategy is used only when `elastic_training` is not used. If `elastic_training` is set to `True`,
519 | no placement strategy is used. -->
520 | 
521 | When LightGBM-Ray is used with Ray Tune for hyperparameter tuning, a PACK strategy is used. This strategy
522 | attempts to place all workers for each trial on the same node on a best-effort basis. This means that if a node
523 | goes down, it will be less likely to impact multiple trials.
524 | 
525 | When placement strategies are used, LightGBM-Ray will wait for 100 seconds for the required resources
526 | to become available, and will fail if the required resources cannot be reserved and the cluster cannot autoscale
527 | to increase the number of resources. You can change the `RXGB_PLACEMENT_GROUP_TIMEOUT_S` environment variable to modify 
528 | how long this timeout should be. 
529 | 
530 | ## More examples
531 | 
532 | For complete end to end examples, please have a look at
533 | the [examples folder](https://github.com/ray-project/lightgbm_ray/tree/main/lightgbm_ray/examples/):
534 | 
535 | * [Simple sklearn breastcancer dataset example](https://github.com/ray-project/lightgbm_ray/tree/main/lightgbm_ray/examples/simple.py) (requires `sklearn`)
536 | * [HIGGS classification example](https://github.com/ray-project/lightgbm_ray/tree/main/lightgbm_ray/examples/higgs.py) 
537 | ([download dataset (2.6 GB)](https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz))
538 | * [HIGGS classification example with Parquet](https://github.com/ray-project/lightgbm_ray/tree/main/lightgbm_ray/examples/higgs_parquet.py) (uses the same dataset) 
539 | * [Test data classification](https://github.com/ray-project/lightgbm_ray/tree/main/lightgbm_ray/examples/train_on_test_data.py) (uses a self-generated dataset)
540 | <!--$REMOVE-->
541 | ## Resources
542 | 
543 | * [LightGBM-Ray documentation](https://docs.ray.io/en/master/lightgbm-ray.html)
544 | * [Ray community slack](https://forms.gle/9TSdDYUgxYs8SA9e8)
545 | <!--$END_REMOVE-->
546 | <!--$UNCOMMENT## API reference
547 | 
548 | ```{eval-rst}
549 | .. autoclass:: lightgbm_ray.RayParams
550 |     :members:
551 | ```
552 | 
553 | ```{eval-rst}
554 | .. note::
555 |   The ``xgboost_ray.RayDMatrix`` class is shared with :ref:`XGBoost-Ray <xgboost-ray>`.
556 | 
557 | .. autoclass:: xgboost_ray.RayDMatrix
558 |     :members:
559 |     :noindex:
560 | ```
561 | 
562 | ```{eval-rst}
563 | .. autofunction:: lightgbm_ray.train
564 | ```
565 | 
566 | ```{eval-rst}
567 | .. autofunction:: lightgbm_ray.predict
568 | ```
569 | 
570 | ### scikit-learn API
571 | 
572 | ```{eval-rst}
573 | .. autoclass:: lightgbm_ray.RayLGBMClassifier
574 |     :members:
575 | ```
576 | 
577 | ```{eval-rst}
578 | .. autoclass:: lightgbm_ray.RayLGBMRegressor
579 |     :members:
580 | ```-->
581 | 


--------------------------------------------------------------------------------
/examples:
--------------------------------------------------------------------------------
1 | lightgbm_ray/examples/


--------------------------------------------------------------------------------
/format.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # Black + Clang formatter (if installed). This script formats all changed files from the last mergebase.
  3 | # You are encouraged to run this locally before pushing changes for review.
  4 | 
  5 | # Cause the script to exit if a single command fails
  6 | set -euo pipefail
  7 | 
  8 | FLAKE8_VERSION_REQUIRED="3.9.1"
  9 | BLACK_VERSION_REQUIRED="22.10.0"
 10 | SHELLCHECK_VERSION_REQUIRED="0.7.1"
 11 | ISORT_VERSION_REQUIRED="5.10.1"
 12 | 
 13 | check_python_command_exist() {
 14 |     VERSION=""
 15 |     case "$1" in
 16 |         black)
 17 |             VERSION=$BLACK_VERSION_REQUIRED
 18 |             ;;
 19 |         flake8)
 20 |             VERSION=$FLAKE8_VERSION_REQUIRED
 21 |             ;;
 22 |         isort)
 23 |             VERSION=$ISORT_VERSION_REQUIRED
 24 |             ;;
 25 |         *)
 26 |             echo "$1 is not a required dependency"
 27 |             exit 1
 28 |     esac
 29 |     if ! [ -x "$(command -v "$1")" ]; then
 30 |         echo "$1 not installed. Install the python package with: pip install $1==$VERSION"
 31 |         exit 1
 32 |     fi
 33 | }
 34 | 
 35 | check_docstyle() {
 36 |     echo "Checking docstyle..."
 37 |     violations=$(git ls-files | grep '.py$' | xargs grep -E '^[ ]+[a-z_]+ ?\([a-zA-Z]+\): ' | grep -v 'str(' | grep -v noqa || true)
 38 |     if [[ -n "$violations" ]]; then
 39 |         echo
 40 |         echo "=== Found Ray docstyle violations ==="
 41 |         echo "$violations"
 42 |         echo
 43 |         echo "Per the Google pydoc style, omit types from pydoc args as they are redundant: https://docs.ray.io/en/latest/ray-contribute/getting-involved.html#code-style "
 44 |         echo "If this is a false positive, you can add a '# noqa' comment to the line to ignore."
 45 |         exit 1
 46 |     fi
 47 |     return 0
 48 | }
 49 | 
 50 | check_python_command_exist black
 51 | check_python_command_exist flake8
 52 | check_python_command_exist isort
 53 | 
 54 | # this stops git rev-parse from failing if we run this from the .git directory
 55 | builtin cd "$(dirname "${BASH_SOURCE:-$0}")"
 56 | 
 57 | ROOT="$(git rev-parse --show-toplevel)"
 58 | builtin cd "$ROOT" || exit 1
 59 | 
 60 | # NOTE(edoakes): black version differs based on installation method:
 61 | #   Option 1) 'black, 21.12b0 (compiled: no)'
 62 | #   Option 2) 'black, version 21.12b0'
 63 | #   For newer versions (at least 22.10.0), a second line is printed which must be dropped:
 64 | #
 65 | #     black, 22.10.0 (compiled: yes)
 66 | #     Python (CPython) 3.9.13
 67 | BLACK_VERSION_STR=$(black --version)
 68 | if [[ "$BLACK_VERSION_STR" == *"compiled"* ]]
 69 | then
 70 |     BLACK_VERSION=$(echo "$BLACK_VERSION_STR" | head -n 1 | awk '{print $2}')
 71 | else
 72 |     BLACK_VERSION=$(echo "$BLACK_VERSION_STR" | head -n 1 | awk '{print $3}')
 73 | fi
 74 | FLAKE8_VERSION=$(flake8 --version | head -n 1 | awk '{print $1}')
 75 | ISORT_VERSION=$(isort --version | grep VERSION | awk '{print $2}')
 76 | 
 77 | # params: tool name, tool version, required version
 78 | tool_version_check() {
 79 |     if [ "$2" != "$3" ]; then
 80 |         echo "WARNING: Ray uses $1 $3, You currently are using $2. This might generate different results."
 81 |     fi
 82 | }
 83 | 
 84 | tool_version_check "flake8" "$FLAKE8_VERSION" "$FLAKE8_VERSION_REQUIRED"
 85 | tool_version_check "black" "$BLACK_VERSION" "$BLACK_VERSION_REQUIRED"
 86 | tool_version_check "isort" "$ISORT_VERSION" "$ISORT_VERSION_REQUIRED"
 87 | 
 88 | if command -v shellcheck >/dev/null; then
 89 |     SHELLCHECK_VERSION=$(shellcheck --version | awk '/^version:/ {print $2}')
 90 |     tool_version_check "shellcheck" "$SHELLCHECK_VERSION" "$SHELLCHECK_VERSION_REQUIRED"
 91 | else
 92 |     echo "INFO: Ray uses shellcheck for shell scripts, which is not installed. You may install shellcheck=$SHELLCHECK_VERSION_REQUIRED with your system package manager."
 93 | fi
 94 | 
 95 | if command -v clang-format >/dev/null; then
 96 |   CLANG_FORMAT_VERSION=$(clang-format --version | awk '{print $3}')
 97 |   tool_version_check "clang-format" "$CLANG_FORMAT_VERSION" "12.0.0"
 98 | else
 99 |     echo "WARNING: clang-format is not installed!"
100 | fi
101 | 
102 | if [[ $(flake8 --version) != *"flake8_quotes"* ]]; then
103 |     echo "WARNING: Ray uses flake8 with flake8_quotes. Might error without it. Install with: pip install flake8-quotes"
104 | fi
105 | 
106 | if [[ $(flake8 --version) != *"flake8-bugbear"* ]]; then
107 |     echo "WARNING: Ray uses flake8 with flake8-bugbear. Might error without it. Install with: pip install flake8-bugbear"
108 | fi
109 | 
110 | SHELLCHECK_FLAGS=(
111 |   --exclude=1090  # "Can't follow non-constant source. Use a directive to specify location."
112 |   --exclude=1091  # "Not following {file} due to some error"
113 |   --exclude=2207  # "Prefer mapfile or read -a to split command output (or quote to avoid splitting)." -- these aren't compatible with macOS's old Bash
114 | )
115 | 
116 | 
117 | BLACK_EXCLUDES=(
118 |     '--force-exclude'
119 |     'python/ray/cloudpickle/*|'`
120 |     `'python/build/*|'`
121 |     `'python/ray/core/src/ray/gcs/*|'`
122 |     `'python/ray/thirdparty_files/*|'`
123 |     `'python/ray/_private/thirdparty/*|'`
124 |     `'python/ray/serve/tests/test_config_files/syntax_error\.py'
125 | )
126 | 
127 | GIT_LS_EXCLUDES=(
128 |   ':(exclude)python/ray/cloudpickle/'
129 |   ':(exclude)python/ray/_private/runtime_env/_clonevirtualenv.py'
130 | )
131 | 
132 | # TODO(barakmich): This should be cleaned up. I've at least excised the copies
133 | # of these arguments to this location, but the long-term answer is to actually
134 | # make a flake8 config file
135 | FLAKE8_PYX_IGNORES="--ignore=C408,E121,E123,E126,E211,E225,E226,E227,E24,E704,E999,W503,W504,W605"
136 | 
137 | shellcheck_scripts() {
138 |   shellcheck "${SHELLCHECK_FLAGS[@]}" "$@"
139 | }
140 | 
141 | # Format specified files
142 | format_files() {
143 |     local shell_files=() python_files=() bazel_files=()
144 | 
145 |     local name
146 |     for name in "$@"; do
147 |       local base="${name%.*}"
148 |       local suffix="${name#"${base}"}"
149 | 
150 |       local shebang=""
151 |       read -r shebang < "${name}" || true
152 |       case "${shebang}" in
153 |         '#!'*)
154 |           shebang="${shebang#/usr/bin/env }"
155 |           shebang="${shebang%% *}"
156 |           shebang="${shebang##*/}"
157 |           ;;
158 |       esac
159 | 
160 |       if [ "${base}" = "WORKSPACE" ] || [ "${base}" = "BUILD" ] || [ "${suffix}" = ".BUILD" ] || [ "${suffix}" = ".bazel" ] || [ "${suffix}" = ".bzl" ]; then
161 |         bazel_files+=("${name}")
162 |       elif [ -z "${suffix}" ] && [ "${shebang}" != "${shebang#python}" ] || [ "${suffix}" != "${suffix#.py}" ]; then
163 |         python_files+=("${name}")
164 |       elif [ -z "${suffix}" ] && [ "${shebang}" != "${shebang%sh}" ] || [ "${suffix}" != "${suffix#.sh}" ]; then
165 |         shell_files+=("${name}")
166 |       else
167 |         echo "error: failed to determine file type: ${name}" 1>&2
168 |         return 1
169 |       fi
170 |     done
171 | 
172 |     if [ 0 -lt "${#python_files[@]}" ]; then
173 |       isort "${python_files[@]}"
174 |       black "${python_files[@]}"
175 |     fi
176 | 
177 |     if command -v shellcheck >/dev/null; then
178 |       if shellcheck --shell=sh --format=diff - < /dev/null; then
179 |         if [ 0 -lt "${#shell_files[@]}" ]; then
180 |           local difference
181 |           difference="$(shellcheck_scripts --format=diff "${shell_files[@]}" || true && printf "-")"
182 |           difference="${difference%-}"
183 |           printf "%s" "${difference}" | patch -p1
184 |         fi
185 |       else
186 |         echo "error: this version of shellcheck does not support diffs"
187 |       fi
188 |     fi
189 | }
190 | 
191 | format_all_scripts() {
192 |     command -v flake8 &> /dev/null;
193 |     HAS_FLAKE8=$?
194 | 
195 |     # Run isort before black to fix imports and let black deal with file format.
196 |     echo "$(date)" "isort...."
197 |     git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \
198 |       isort
199 |     echo "$(date)" "Black...."
200 |     git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \
201 |       black "${BLACK_EXCLUDES[@]}"
202 |     if [ $HAS_FLAKE8 ]; then
203 |       echo "$(date)" "Flake8...."
204 |       git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 5 \
205 |         flake8 --config=.flake8
206 |     fi
207 | 
208 |     if command -v shellcheck >/dev/null; then
209 |       local shell_files non_shell_files
210 |       non_shell_files=($(git ls-files -- ':(exclude)*.sh'))
211 |       shell_files=($(git ls-files -- '*.sh'))
212 |       if [ 0 -lt "${#non_shell_files[@]}" ]; then
213 |         shell_files+=($(git --no-pager grep -l -- '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' "${non_shell_files[@]}" || true))
214 |       fi
215 |       if [ 0 -lt "${#shell_files[@]}" ]; then
216 |         echo "$(date)" "shellcheck scripts...."
217 |         shellcheck_scripts "${shell_files[@]}"
218 |       fi
219 |     fi
220 | }
221 | 
222 | # Format files that differ from main branch. Ignores dirs that are not slated
223 | # for autoformat yet.
224 | format_changed() {
225 |     # The `if` guard ensures that the list of filenames is not empty, which
226 |     # could cause the formatter to receive 0 positional arguments, making
227 |     # Black error.
228 |     #
229 |     # `diff-filter=ACRM` and $MERGEBASE is to ensure we only format files that
230 |     # exist on both branches.
231 |     MERGEBASE="$(git merge-base upstream/main HEAD)"
232 | 
233 |     if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.py' &>/dev/null; then
234 |         git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
235 |             isort
236 |     fi
237 | 
238 |     if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.py' &>/dev/null; then
239 |         git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
240 |             black "${BLACK_EXCLUDES[@]}"
241 |         if which flake8 >/dev/null; then
242 |             git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \
243 |                  flake8 --config=.flake8
244 |         fi
245 |     fi
246 | 
247 |     if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' &>/dev/null; then
248 |         if which flake8 >/dev/null; then
249 |             git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \
250 |                  flake8 --config=.flake8 "$FLAKE8_PYX_IGNORES"
251 |         fi
252 |     fi
253 | 
254 |     if which clang-format >/dev/null; then
255 |         if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.cc' '*.h' &>/dev/null; then
256 |             git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.cc' '*.h' | xargs -P 5 \
257 |                  clang-format -i
258 |         fi
259 |     fi
260 | 
261 |     if command -v shellcheck >/dev/null; then
262 |         local shell_files non_shell_files
263 |         non_shell_files=($(git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- ':(exclude)*.sh'))
264 |         shell_files=($(git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.sh'))
265 |         if [ 0 -lt "${#non_shell_files[@]}" ]; then
266 |             shell_files+=($(git --no-pager grep -l -- '^#!\(/usr\)\?/bin/\(env \+\)\?\(ba\)\?sh' "${non_shell_files[@]}" || true))
267 |         fi
268 |         if [ 0 -lt "${#shell_files[@]}" ]; then
269 |             shellcheck_scripts "${shell_files[@]}"
270 |         fi
271 |     fi
272 | }
273 | 
274 | # This flag formats individual files. --files *must* be the first command line
275 | # arg to use this option.
276 | if [ "${1-}" == '--files' ]; then
277 |     format_files "${@:2}"
278 | # If `--all` or `--scripts` are passed, then any further arguments are ignored.
279 | # Format the entire python directory and other scripts.
280 | elif [ "${1-}" == '--all-scripts' ]; then
281 |     format_all_scripts "${@}"
282 |     if [ -n "${FORMAT_SH_PRINT_DIFF-}" ]; then git --no-pager diff; fi
283 | # Format the all Python, C++, Java and other script files.
284 | elif [ "${1-}" == '--all' ]; then
285 |     format_all_scripts "${@}"
286 |     if [ -n "${FORMAT_SH_PRINT_DIFF-}" ]; then git --no-pager diff; fi
287 | else
288 |     # Add the upstream remote if it doesn't exist
289 |     if ! git remote -v | grep -q upstream; then
290 |         git remote add 'upstream' 'https://github.com/ray-project/lightgbm_ray.git'
291 |     fi
292 | 
293 |     # Only fetch main since that's the branch we're diffing against.
294 |     git fetch upstream main || true
295 | 
296 |     # Format only the files that changed in last commit.
297 |     format_changed
298 | fi
299 | 
300 | check_docstyle
301 | 
302 | if ! git diff --quiet &>/dev/null; then
303 |     echo 'Reformatted changed files. Please review and stage the changes.'
304 |     echo 'Files updated:'
305 |     echo
306 | 
307 |     git --no-pager diff --name-only
308 | 
309 |     exit 1
310 | fi
311 | 


--------------------------------------------------------------------------------
/lightgbm_ray/__init__.py:
--------------------------------------------------------------------------------
 1 | from xgboost_ray.matrix import (
 2 |     Data,
 3 |     RayDeviceQuantileDMatrix,
 4 |     RayDMatrix,
 5 |     RayFileType,
 6 |     RayShardingMode,
 7 |     combine_data,
 8 | )
 9 | 
10 | from lightgbm_ray.main import RayParams, predict, train
11 | from lightgbm_ray.sklearn import RayLGBMClassifier, RayLGBMRegressor
12 | 
13 | __version__ = "0.1.10"
14 | 
15 | __all__ = [
16 |     "__version__",
17 |     "RayParams",
18 |     "RayDMatrix",
19 |     "RayDeviceQuantileDMatrix",
20 |     "RayFileType",
21 |     "RayShardingMode",
22 |     "Data",
23 |     "combine_data",
24 |     "train",
25 |     "predict",
26 |     "RayLGBMClassifier",
27 |     "RayLGBMRegressor",
28 | ]
29 | 


--------------------------------------------------------------------------------
/lightgbm_ray/callback.py:
--------------------------------------------------------------------------------
1 | from xgboost_ray.callback import (
2 |     DistributedCallback,
3 |     DistributedCallbackContainer,
4 |     EnvironmentCallback,
5 | )
6 | 
7 | __all__ = ["DistributedCallback", "DistributedCallbackContainer", "EnvironmentCallback"]
8 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ray-project/lightgbm_ray/4c4d3413f86db769bddb6d08e2480a04bc75d712/lightgbm_ray/examples/__init__.py


--------------------------------------------------------------------------------
/lightgbm_ray/examples/create_test_data.py:
--------------------------------------------------------------------------------
 1 | from xgboost_ray.tests.utils import create_parquet
 2 | 
 3 | 
 4 | def main():
 5 |     create_parquet(
 6 |         "example.parquet",
 7 |         num_rows=1_000_000,
 8 |         num_partitions=100,
 9 |         num_features=8,
10 |         num_classes=2,
11 |     )
12 | 
13 | 
14 | if __name__ == "__main__":
15 |     main()
16 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/higgs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | from lightgbm_ray import RayDMatrix, RayParams, train
 5 | 
 6 | FILENAME_CSV = "HIGGS.csv.gz"
 7 | 
 8 | 
 9 | def download_higgs(target_file):
10 |     url = (
11 |         "https://archive.ics.uci.edu/ml/machine-learning-databases/"
12 |         "00280/HIGGS.csv.gz"
13 |     )
14 | 
15 |     try:
16 |         import urllib.request
17 |     except ImportError as e:
18 |         raise ValueError(
19 |             f"Automatic downloading of the HIGGS dataset requires `urllib`."
20 |             f"\nFIX THIS by running `pip install urllib` or manually "
21 |             f"downloading the dataset from {url}."
22 |         ) from e
23 | 
24 |     print(f"Downloading HIGGS dataset to {target_file}")
25 |     urllib.request.urlretrieve(url, target_file)
26 |     return os.path.exists(target_file)
27 | 
28 | 
29 | def main():
30 |     # Example adapted from this blog post:
31 |     # https://medium.com/rapids-ai/a-new-official-dask-api-for-xgboost-e8b10f3d1eb7
32 |     # This uses the HIGGS dataset. Download here:
33 |     # https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz
34 | 
35 |     if not os.path.exists(FILENAME_CSV):
36 |         assert download_higgs(FILENAME_CSV), "Downloading of HIGGS dataset failed."
37 |         print("HIGGS dataset downloaded.")
38 |     else:
39 |         print("HIGGS dataset found locally.")
40 | 
41 |     colnames = ["label"] + ["feature-%02d" % i for i in range(1, 29)]
42 | 
43 |     dtrain = RayDMatrix(os.path.abspath(FILENAME_CSV), label="label", names=colnames)
44 | 
45 |     config = {
46 |         "objective": "binary",
47 |         "metric": ["binary_logloss", "binary_error"],
48 |     }
49 | 
50 |     evals_result = {}
51 | 
52 |     start = time.time()
53 |     bst = train(
54 |         config,
55 |         dtrain,
56 |         evals_result=evals_result,
57 |         ray_params=RayParams(max_actor_restarts=1, num_actors=2),
58 |         num_boost_round=100,
59 |         evals=[(dtrain, "train")],
60 |     )
61 |     taken = time.time() - start
62 |     print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
63 | 
64 |     bst.booster_.save_model("higgs.lgbm")
65 |     print(
66 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
67 |     )
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     import ray
72 | 
73 |     ray.init()
74 | 
75 |     start = time.time()
76 |     main()
77 |     taken = time.time() - start
78 |     print(f"TOTAL TIME TAKEN: {taken:.2f} seconds")
79 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/higgs_parquet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | import pandas as pd
  5 | import pyarrow as pa
  6 | import pyarrow.parquet as pq
  7 | 
  8 | from lightgbm_ray import RayDMatrix, RayParams, train
  9 | 
 10 | from .higgs import download_higgs
 11 | 
 12 | FILENAME_CSV = "HIGGS.csv.gz"
 13 | FILENAME_PARQUET = "HIGGS.parquet"
 14 | 
 15 | 
 16 | def csv_to_parquet(in_file, out_file, chunksize=100_000, **csv_kwargs):
 17 |     if os.path.exists(out_file):
 18 |         return False
 19 | 
 20 |     print(f"Converting CSV {in_file} to PARQUET {out_file}")
 21 |     csv_stream = pd.read_csv(
 22 |         in_file, sep=",", chunksize=chunksize, low_memory=False, **csv_kwargs
 23 |     )
 24 | 
 25 |     parquet_schema = None
 26 |     parquet_writer = None
 27 |     for i, chunk in enumerate(csv_stream):
 28 |         print("Chunk", i)
 29 |         if not parquet_schema:
 30 |             # Guess the schema of the CSV file from the first chunk
 31 |             parquet_schema = pa.Table.from_pandas(df=chunk).schema
 32 |             # Open a Parquet file for writing
 33 |             parquet_writer = pq.ParquetWriter(
 34 |                 out_file, parquet_schema, compression="snappy"
 35 |             )
 36 |         # Write CSV chunk to the parquet file
 37 |         table = pa.Table.from_pandas(chunk, schema=parquet_schema)
 38 |         parquet_writer.write_table(table)
 39 | 
 40 |     parquet_writer.close()
 41 |     return True
 42 | 
 43 | 
 44 | def main():
 45 |     # Example adapted from this blog post:
 46 |     # https://medium.com/rapids-ai/a-new-official-dask-api-for-xgboost-e8b10f3d1eb7
 47 |     # This uses the HIGGS dataset. Download here:
 48 |     # https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz
 49 | 
 50 |     if not os.path.exists(FILENAME_PARQUET):
 51 |         if not os.path.exists(FILENAME_CSV):
 52 |             download_higgs(FILENAME_CSV)
 53 |             print("Downloaded HIGGS csv dataset")
 54 |         print("Converting HIGGS csv dataset to parquet")
 55 |         csv_to_parquet(
 56 |             FILENAME_CSV,
 57 |             FILENAME_PARQUET,
 58 |             names=[
 59 |                 "label",
 60 |                 "feature-01",
 61 |                 "feature-02",
 62 |                 "feature-03",
 63 |                 "feature-04",
 64 |                 "feature-05",
 65 |                 "feature-06",
 66 |                 "feature-07",
 67 |                 "feature-08",
 68 |                 "feature-09",
 69 |                 "feature-10",
 70 |                 "feature-11",
 71 |                 "feature-12",
 72 |                 "feature-13",
 73 |                 "feature-14",
 74 |                 "feature-15",
 75 |                 "feature-16",
 76 |                 "feature-17",
 77 |                 "feature-18",
 78 |                 "feature-19",
 79 |                 "feature-20",
 80 |                 "feature-21",
 81 |                 "feature-22",
 82 |                 "feature-23",
 83 |                 "feature-24",
 84 |                 "feature-25",
 85 |                 "feature-26",
 86 |                 "feature-27",
 87 |                 "feature-28",
 88 |             ],
 89 |         )
 90 | 
 91 |     colnames = ["label"] + ["feature-%02d" % i for i in range(1, 29)]
 92 | 
 93 |     # Here we load the Parquet file
 94 |     dtrain = RayDMatrix(
 95 |         os.path.abspath(FILENAME_PARQUET), label="label", columns=colnames
 96 |     )
 97 | 
 98 |     config = {
 99 |         "objective": "binary",
100 |         "metric": ["binary_logloss", "binary_error"],
101 |     }
102 | 
103 |     evals_result = {}
104 | 
105 |     start = time.time()
106 |     bst = train(
107 |         config,
108 |         dtrain,
109 |         evals_result=evals_result,
110 |         ray_params=RayParams(max_actor_restarts=1, num_actors=2),
111 |         num_boost_round=100,
112 |         evals=[(dtrain, "train")],
113 |     )
114 |     taken = time.time() - start
115 |     print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
116 | 
117 |     bst.booster_.save_model("higgs.lgbm")
118 |     print(
119 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
120 |     )
121 | 
122 | 
123 | if __name__ == "__main__":
124 |     import ray
125 | 
126 |     ray.init()
127 | 
128 |     start = time.time()
129 |     main()
130 |     taken = time.time() - start
131 |     print(f"TOTAL TIME TAKEN: {taken:.2f} seconds")
132 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/readme.py:
--------------------------------------------------------------------------------
  1 | # flake8: noqa E501
  2 | 
  3 | 
  4 | def readme_simple():
  5 |     from sklearn.datasets import load_breast_cancer
  6 | 
  7 |     from lightgbm_ray import RayDMatrix, RayParams, train
  8 | 
  9 |     train_x, train_y = load_breast_cancer(return_X_y=True)
 10 |     train_set = RayDMatrix(train_x, train_y)
 11 | 
 12 |     evals_result = {}
 13 |     bst = train(
 14 |         {
 15 |             "objective": "binary",
 16 |             "metric": ["binary_logloss", "binary_error"],
 17 |         },
 18 |         train_set,
 19 |         evals_result=evals_result,
 20 |         valid_sets=[train_set],
 21 |         valid_names=["train"],
 22 |         verbose_eval=False,
 23 |         ray_params=RayParams(num_actors=2, cpus_per_actor=2),
 24 |     )
 25 | 
 26 |     bst.booster_.save_model("model.lgbm")
 27 |     print(
 28 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
 29 |     )
 30 | 
 31 | 
 32 | def readme_predict():
 33 |     import lightgbm as lgbm
 34 |     from sklearn.datasets import load_breast_cancer
 35 | 
 36 |     from lightgbm_ray import RayDMatrix, RayParams, predict
 37 | 
 38 |     data, labels = load_breast_cancer(return_X_y=True)
 39 | 
 40 |     dpred = RayDMatrix(data, labels)
 41 | 
 42 |     bst = lgbm.Booster(model_file="model.lgbm")
 43 |     pred_ray = predict(bst, dpred, ray_params=RayParams(num_actors=2))
 44 | 
 45 |     print(pred_ray)
 46 | 
 47 | 
 48 | def readme_tune():
 49 |     from sklearn.datasets import load_breast_cancer
 50 | 
 51 |     from lightgbm_ray import RayDMatrix, RayParams, train
 52 | 
 53 |     num_actors = 2
 54 |     num_cpus_per_actor = 2
 55 | 
 56 |     ray_params = RayParams(num_actors=num_actors, cpus_per_actor=num_cpus_per_actor)
 57 | 
 58 |     def train_model(config):
 59 |         train_x, train_y = load_breast_cancer(return_X_y=True)
 60 |         train_set = RayDMatrix(train_x, train_y)
 61 | 
 62 |         evals_result = {}
 63 |         bst = train(
 64 |             params=config,
 65 |             dtrain=train_set,
 66 |             evals_result=evals_result,
 67 |             valid_sets=[train_set],
 68 |             valid_names=["train"],
 69 |             verbose_eval=False,
 70 |             ray_params=ray_params,
 71 |         )
 72 |         bst.booster_.save_model("model.lgbm")
 73 | 
 74 |     from ray import tune
 75 | 
 76 |     # Specify the hyperparameter search space.
 77 |     config = {
 78 |         "objective": "binary",
 79 |         "metric": ["binary_logloss", "binary_error"],
 80 |         "eta": tune.loguniform(1e-4, 1e-1),
 81 |         "subsample": tune.uniform(0.5, 1.0),
 82 |         "max_depth": tune.randint(1, 9),
 83 |     }
 84 | 
 85 |     # Make sure to use the `get_tune_resources` method to set the `resources_per_trial`
 86 |     analysis = tune.run(
 87 |         train_model,
 88 |         config=config,
 89 |         metric="train-binary_error",
 90 |         mode="min",
 91 |         num_samples=4,
 92 |         resources_per_trial=ray_params.get_tune_resources(),
 93 |     )
 94 |     print("Best hyperparameters", analysis.best_config)
 95 | 
 96 | 
 97 | if __name__ == "__main__":
 98 |     import ray
 99 | 
100 |     ray.init(num_cpus=5)
101 | 
102 |     print("Readme: Simple example")
103 |     readme_simple()
104 |     readme_predict()
105 |     try:
106 |         print("Readme: Ray Tune example")
107 |         readme_tune()
108 |     except ImportError:
109 |         print("Ray Tune not installed.")
110 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/readme_sklearn_api.py:
--------------------------------------------------------------------------------
 1 | def readme_sklearn_api():
 2 |     from sklearn.datasets import load_breast_cancer
 3 |     from sklearn.model_selection import train_test_split
 4 | 
 5 |     from lightgbm_ray import RayLGBMClassifier, RayParams
 6 | 
 7 |     seed = 42
 8 | 
 9 |     X, y = load_breast_cancer(return_X_y=True)
10 |     X_train, X_test, y_train, y_test = train_test_split(
11 |         X, y, train_size=0.25, random_state=42
12 |     )
13 | 
14 |     clf = RayLGBMClassifier(
15 |         n_jobs=2, random_state=seed  # In LightGBM-Ray, n_jobs sets the number of actors
16 |     )
17 | 
18 |     # scikit-learn API will automatically convert the data
19 |     # to RayDMatrix format as needed.
20 |     # You can also pass X as a RayDMatrix, in which case
21 |     # y will be ignored.
22 | 
23 |     clf.fit(X_train, y_train)
24 | 
25 |     pred_ray = clf.predict(X_test)
26 |     print(pred_ray)
27 | 
28 |     pred_proba_ray = clf.predict_proba(X_test)
29 |     print(pred_proba_ray)
30 | 
31 |     # It is also possible to pass a RayParams object
32 |     # to fit/predict/predict_proba methods - will override
33 |     # n_jobs set during initialization
34 | 
35 |     clf.fit(X_train, y_train, ray_params=RayParams(num_actors=2))
36 | 
37 |     pred_ray = clf.predict(X_test, ray_params=RayParams(num_actors=2))
38 |     print(pred_ray)
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     import ray
43 | 
44 |     ray.init(num_cpus=5)
45 | 
46 |     print("Readme: scikit-learn API example")
47 |     readme_sklearn_api()
48 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/simple.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import ray
 4 | from sklearn import datasets
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | from lightgbm_ray import RayDMatrix, RayParams, train
 8 | 
 9 | 
10 | def main(cpus_per_actor, num_actors):
11 |     # Load dataset
12 |     data, labels = datasets.load_breast_cancer(return_X_y=True)
13 |     # Split into train and test set
14 |     train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.25)
15 | 
16 |     train_set = RayDMatrix(train_x, train_y)
17 |     test_set = RayDMatrix(test_x, test_y)
18 | 
19 |     evals_result = {}
20 | 
21 |     # Set LGBM config.
22 |     lightgbm_params = {
23 |         "objective": "binary",
24 |         "metric": ["binary_logloss", "binary_error"],
25 |     }
26 | 
27 |     # Train the classifier
28 |     bst = train(
29 |         params=lightgbm_params,
30 |         dtrain=train_set,
31 |         valid_sets=[test_set],
32 |         valid_names=["eval"],
33 |         evals_result=evals_result,
34 |         ray_params=RayParams(
35 |             max_actor_restarts=0,
36 |             gpus_per_actor=0,
37 |             cpus_per_actor=cpus_per_actor,
38 |             num_actors=num_actors,
39 |         ),
40 |         verbose_eval=False,
41 |         num_boost_round=10,
42 |     )
43 | 
44 |     model_path = "simple.lgbm"
45 |     bst.booster_.save_model(model_path)
46 |     print(
47 |         "Final validation error: {:.4f}".format(
48 |             evals_result["eval"]["binary_error"][-1]
49 |         )
50 |     )
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     parser = argparse.ArgumentParser()
55 |     parser.add_argument(
56 |         "--address", required=False, type=str, help="the address to use for Ray"
57 |     )
58 |     parser.add_argument(
59 |         "--server-address",
60 |         required=False,
61 |         type=str,
62 |         help="Address of the remote server if using Ray Client.",
63 |     )
64 |     parser.add_argument(
65 |         "--cpus-per-actor",
66 |         type=int,
67 |         default=2,
68 |         help="Sets number of CPUs per lightgbm training worker.",
69 |     )
70 |     parser.add_argument(
71 |         "--num-actors",
72 |         type=int,
73 |         default=2,
74 |         help="Sets number of lightgbm workers to use.",
75 |     )
76 |     parser.add_argument("--smoke-test", action="store_true", default=False, help="gpu")
77 | 
78 |     args, _ = parser.parse_known_args()
79 | 
80 |     if args.smoke_test:
81 |         ray.init(num_cpus=args.num_actors * args.cpus_per_actor)
82 |     elif args.server_address:
83 |         ray.util.connect(args.server_address)
84 |     else:
85 |         ray.init(address=args.address)
86 | 
87 |     main(args.cpus_per_actor, args.num_actors)
88 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/simple_dask.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import ray
  6 | from xgboost_ray.data_sources.dask import DASK_INSTALLED
  7 | 
  8 | from lightgbm_ray import RayDMatrix, RayParams, train
  9 | 
 10 | 
 11 | def main(cpus_per_actor, num_actors):
 12 |     if not DASK_INSTALLED:
 13 |         print("Dask is not installed. Install with `pip install dask`")
 14 |         return
 15 | 
 16 |     # Local import so the installation check comes first
 17 |     import dask
 18 |     import dask.dataframe as dd
 19 |     from ray.util.dask import ray_dask_get
 20 | 
 21 |     dask.config.set(scheduler=ray_dask_get)
 22 | 
 23 |     # Generate dataset
 24 |     x = np.repeat(range(8), 16).reshape((32, 4))
 25 |     # Even numbers --> 0, odd numbers --> 1
 26 |     y = np.tile(np.repeat(range(2), 4), 4)
 27 | 
 28 |     # Flip some bits to reduce max accuracy
 29 |     bits_to_flip = np.random.choice(32, size=6, replace=False)
 30 |     y[bits_to_flip] = 1 - y[bits_to_flip]
 31 | 
 32 |     data = pd.DataFrame(x)
 33 |     data["label"] = y
 34 | 
 35 |     # Split into 4 partitions
 36 |     dask_df = dd.from_pandas(data, npartitions=4)
 37 | 
 38 |     train_set = RayDMatrix(dask_df, "label")
 39 | 
 40 |     evals_result = {}
 41 |     # Set XGBoost config.
 42 |     lightgbm_params = {
 43 |         "objective": "binary",
 44 |         "metric": ["binary_logloss", "binary_error"],
 45 |     }
 46 | 
 47 |     # Train the classifier
 48 |     bst = train(
 49 |         params=lightgbm_params,
 50 |         dtrain=train_set,
 51 |         valid_sets=[train_set],
 52 |         valid_names=["train"],
 53 |         evals_result=evals_result,
 54 |         ray_params=RayParams(
 55 |             max_actor_restarts=0,
 56 |             gpus_per_actor=0,
 57 |             cpus_per_actor=cpus_per_actor,
 58 |             num_actors=num_actors,
 59 |         ),
 60 |         verbose_eval=False,
 61 |         num_boost_round=10,
 62 |     )
 63 | 
 64 |     model_path = "dask.lgbm"
 65 |     bst.booster_.save_model(model_path)
 66 |     print(
 67 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
 68 |     )
 69 | 
 70 | 
 71 | if __name__ == "__main__":
 72 |     parser = argparse.ArgumentParser()
 73 |     parser.add_argument(
 74 |         "--address", required=False, type=str, help="the address to use for Ray"
 75 |     )
 76 |     parser.add_argument(
 77 |         "--server-address",
 78 |         required=False,
 79 |         type=str,
 80 |         help="Address of the remote server if using Ray Client.",
 81 |     )
 82 |     parser.add_argument(
 83 |         "--cpus-per-actor",
 84 |         type=int,
 85 |         default=2,
 86 |         help="Sets number of CPUs per lightgbm training worker.",
 87 |     )
 88 |     parser.add_argument(
 89 |         "--num-actors",
 90 |         type=int,
 91 |         default=2,
 92 |         help="Sets number of lightgbm workers to use.",
 93 |     )
 94 |     parser.add_argument("--smoke-test", action="store_true", default=False, help="gpu")
 95 | 
 96 |     args, _ = parser.parse_known_args()
 97 | 
 98 |     if args.smoke_test:
 99 |         ray.init(num_cpus=args.num_actors * args.cpus_per_actor)
100 |     elif args.server_address:
101 |         ray.util.connect(args.server_address)
102 |     else:
103 |         ray.init(address=args.address)
104 | 
105 |     main(args.cpus_per_actor, args.num_actors)
106 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/simple_modin.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import ray
  6 | from packaging.version import Version
  7 | from sklearn.utils import shuffle
  8 | from xgboost_ray.data_sources.modin import MODIN_INSTALLED
  9 | 
 10 | from lightgbm_ray import RayDMatrix, RayParams, train
 11 | 
 12 | 
 13 | def main(cpus_per_actor, num_actors):
 14 |     if not MODIN_INSTALLED:
 15 |         print(
 16 |             "Modin is not installed or installed in a version that is not "
 17 |             "compatible with lightgbm_ray (< 0.9.0)."
 18 |         )
 19 |         return
 20 | 
 21 |     import modin
 22 | 
 23 |     if Version(modin.__version__) < Version("0.16.0") and Version(
 24 |         ray.__version__
 25 |     ) >= Version("2.6.0"):
 26 |         print("modin<=0.16.0 is not compatible with ray>=2.6.0.")
 27 |         return
 28 | 
 29 |     # Import modin after initializing Ray
 30 |     from modin.distributed.dataframe.pandas import from_partitions
 31 | 
 32 |     # Generate dataset
 33 |     x = np.repeat(range(8), 16).reshape((32, 4))
 34 |     # Even numbers --> 0, odd numbers --> 1
 35 |     y = np.tile(np.repeat(range(2), 4), 4)
 36 | 
 37 |     # Flip some bits to reduce max accuracy
 38 |     bits_to_flip = np.random.choice(32, size=6, replace=False)
 39 |     y[bits_to_flip] = 1 - y[bits_to_flip]
 40 | 
 41 |     # LightGBM requires well-shuffled data
 42 |     x, y = shuffle(x, y, random_state=1)
 43 | 
 44 |     data = pd.DataFrame(x)
 45 |     data["label"] = y
 46 | 
 47 |     # Split into 4 partitions
 48 |     partitions = [ray.put(part) for part in np.split(data, 4)]
 49 | 
 50 |     # Create modin df here
 51 |     modin_df = from_partitions(partitions, axis=0)
 52 | 
 53 |     train_set = RayDMatrix(modin_df, "label")
 54 | 
 55 |     evals_result = {}
 56 |     # Set LGBM config.
 57 |     lightgbm_params = {
 58 |         "objective": "binary",
 59 |         "metric": ["binary_logloss", "binary_error"],
 60 |     }
 61 | 
 62 |     # Train the classifier
 63 |     bst = train(
 64 |         params=lightgbm_params,
 65 |         dtrain=train_set,
 66 |         valid_sets=[train_set],
 67 |         valid_names=["train"],
 68 |         evals_result=evals_result,
 69 |         ray_params=RayParams(
 70 |             max_actor_restarts=0,
 71 |             gpus_per_actor=0,
 72 |             cpus_per_actor=cpus_per_actor,
 73 |             num_actors=num_actors,
 74 |         ),
 75 |         verbose_eval=False,
 76 |         num_boost_round=10,
 77 |     )
 78 | 
 79 |     model_path = "modin.lgbm"
 80 |     bst.booster_.save_model(model_path)
 81 |     print(
 82 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
 83 |     )
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 |     parser = argparse.ArgumentParser()
 88 |     parser.add_argument(
 89 |         "--address", required=False, type=str, help="the address to use for Ray"
 90 |     )
 91 |     parser.add_argument(
 92 |         "--server-address",
 93 |         required=False,
 94 |         type=str,
 95 |         help="Address of the remote server if using Ray Client.",
 96 |     )
 97 |     parser.add_argument(
 98 |         "--cpus-per-actor",
 99 |         type=int,
100 |         default=2,
101 |         help="Sets number of CPUs per lightgbm training worker.",
102 |     )
103 |     parser.add_argument(
104 |         "--num-actors",
105 |         type=int,
106 |         default=2,
107 |         help="Sets number of lightgbm workers to use.",
108 |     )
109 |     parser.add_argument("--smoke-test", action="store_true", default=False, help="gpu")
110 | 
111 |     args, _ = parser.parse_known_args()
112 | 
113 |     if args.smoke_test:
114 |         ray.init(num_cpus=(args.num_actors * args.cpus_per_actor) + 1)
115 |     elif args.server_address:
116 |         ray.util.connect(args.server_address)
117 |     else:
118 |         ray.init(address=args.address)
119 | 
120 |     main(args.cpus_per_actor, args.num_actors)
121 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/simple_predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import lightgbm as lgbm
 4 | import numpy as np
 5 | from sklearn import datasets
 6 | 
 7 | from lightgbm_ray import RayDMatrix, RayParams, predict
 8 | 
 9 | 
10 | def main():
11 |     if not os.path.exists("simple.lgbm"):
12 |         raise ValueError(
13 |             "Model file not found: `simple.lgbm`"
14 |             "\nFIX THIS by running `python `simple.py` first to "
15 |             "train the model."
16 |         )
17 | 
18 |     # Load dataset
19 |     data, labels = datasets.load_breast_cancer(return_X_y=True)
20 | 
21 |     dmat_ray = RayDMatrix(data, labels)
22 | 
23 |     bst = lgbm.Booster(model_file="simple.lgbm")
24 | 
25 |     pred_lgbm = bst.predict(data)
26 |     pred_ray = predict(bst, dmat_ray, ray_params=RayParams(num_actors=2))
27 | 
28 |     np.testing.assert_array_equal(pred_lgbm, pred_ray)
29 |     print(pred_ray)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/simple_ray_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import ray
  6 | from sklearn.utils import shuffle
  7 | 
  8 | from lightgbm_ray import RayDMatrix, RayParams, train
  9 | 
 10 | 
 11 | def main(cpus_per_actor, num_actors):
 12 |     # Generate dataset
 13 |     x = np.repeat(range(8), 16).reshape((32, 4))
 14 |     # Even numbers --> 0, odd numbers --> 1
 15 |     y = np.tile(np.repeat(range(2), 4), 4)
 16 | 
 17 |     # Flip some bits to reduce max accuracy
 18 |     bits_to_flip = np.random.choice(32, size=6, replace=False)
 19 |     y[bits_to_flip] = 1 - y[bits_to_flip]
 20 | 
 21 |     # LightGBM requires well-shuffled data
 22 |     x, y = shuffle(x, y, random_state=1)
 23 | 
 24 |     data = pd.DataFrame(x)
 25 |     # Ray Datasets require all columns to be string
 26 |     data.columns = [str(c) for c in data.columns]
 27 |     data["label"] = y
 28 | 
 29 |     # There was recent API change - the first clause covers the new
 30 |     # and current Ray master API
 31 |     if hasattr(ray.data, "from_pandas_refs"):
 32 |         # Generate Ray dataset from 4 partitions
 33 |         ray_ds = ray.data.from_pandas(np.split(data, 4))
 34 |     else:
 35 |         # Split into 4 partitions
 36 |         partitions = [ray.put(part) for part in np.split(data, 4)]
 37 |         ray_ds = ray.data.from_pandas(partitions)
 38 | 
 39 |     # Generate Ray dataset from 4 partitions
 40 |     ray_ds = ray.data.from_pandas(np.split(data, 4))
 41 | 
 42 |     train_set = RayDMatrix(ray_ds, "label")
 43 | 
 44 |     evals_result = {}
 45 |     # Set LightGBM config.
 46 |     lightgbm_params = {
 47 |         "objective": "binary",
 48 |         "metric": ["binary_logloss", "binary_error"],
 49 |     }
 50 | 
 51 |     # Train the classifier
 52 |     bst = train(
 53 |         params=lightgbm_params,
 54 |         dtrain=train_set,
 55 |         valid_sets=[train_set],
 56 |         valid_names=["train"],
 57 |         evals_result=evals_result,
 58 |         ray_params=RayParams(
 59 |             max_actor_restarts=0,
 60 |             gpus_per_actor=0,
 61 |             cpus_per_actor=cpus_per_actor,
 62 |             num_actors=num_actors,
 63 |         ),
 64 |         verbose_eval=False,
 65 |         num_boost_round=10,
 66 |     )
 67 | 
 68 |     model_path = "ray_datasets.lgbm"
 69 |     bst.booster_.save_model(model_path)
 70 |     print(
 71 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
 72 |     )
 73 | 
 74 | 
 75 | if __name__ == "__main__":
 76 |     parser = argparse.ArgumentParser()
 77 |     parser.add_argument(
 78 |         "--address", required=False, type=str, help="the address to use for Ray"
 79 |     )
 80 |     parser.add_argument(
 81 |         "--server-address",
 82 |         required=False,
 83 |         type=str,
 84 |         help="Address of the remote server if using Ray Client.",
 85 |     )
 86 |     parser.add_argument(
 87 |         "--cpus-per-actor",
 88 |         type=int,
 89 |         default=2,
 90 |         help="Sets number of CPUs per lightgbm training worker.",
 91 |     )
 92 |     parser.add_argument(
 93 |         "--num-actors",
 94 |         type=int,
 95 |         default=2,
 96 |         help="Sets number of lightgbm workers to use.",
 97 |     )
 98 |     parser.add_argument("--smoke-test", action="store_true", default=False, help="gpu")
 99 | 
100 |     args, _ = parser.parse_known_args()
101 | 
102 |     if args.smoke_test:
103 |         ray.init(num_cpus=(args.num_actors * args.cpus_per_actor) + 1)
104 |     elif args.server_address:
105 |         ray.util.connect(args.server_address)
106 |     else:
107 |         ray.init(address=args.address)
108 | 
109 |     main(args.cpus_per_actor, args.num_actors)
110 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/simple_tune.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import ray
  5 | from ray import tune
  6 | from sklearn import datasets
  7 | from sklearn.model_selection import train_test_split
  8 | 
  9 | import lightgbm_ray
 10 | from lightgbm_ray import RayDMatrix, RayParams, train
 11 | 
 12 | 
 13 | def train_breast_cancer(config, ray_params):
 14 |     # Load dataset
 15 |     data, labels = datasets.load_breast_cancer(return_X_y=True)
 16 |     # Split into train and test set
 17 |     train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.25)
 18 | 
 19 |     train_set = RayDMatrix(train_x, train_y)
 20 |     test_set = RayDMatrix(test_x, test_y)
 21 | 
 22 |     evals_result = {}
 23 | 
 24 |     bst = train(
 25 |         params=config,
 26 |         dtrain=train_set,
 27 |         valid_sets=[test_set],
 28 |         valid_names=["eval"],
 29 |         evals_result=evals_result,
 30 |         ray_params=ray_params,
 31 |         verbose_eval=False,
 32 |         num_boost_round=10,
 33 |     )
 34 | 
 35 |     model_path = "tuned.lgbm"
 36 |     bst.booster_.save_model(model_path)
 37 |     print(
 38 |         "Final validation error: {:.4f}".format(
 39 |             evals_result["eval"]["binary_error"][-1]
 40 |         )
 41 |     )
 42 | 
 43 | 
 44 | def main(cpus_per_actor, num_actors, num_samples):
 45 |     # Set LightGBM config.
 46 |     config = {
 47 |         "objective": "binary",
 48 |         "metric": ["binary_logloss", "binary_error"],
 49 |         "eta": tune.loguniform(1e-4, 1e-1),
 50 |         "subsample": tune.uniform(0.5, 1.0),
 51 |         "max_depth": tune.randint(1, 9),
 52 |     }
 53 | 
 54 |     ray_params = RayParams(
 55 |         max_actor_restarts=1,
 56 |         gpus_per_actor=0,
 57 |         cpus_per_actor=cpus_per_actor,
 58 |         num_actors=num_actors,
 59 |     )
 60 | 
 61 |     analysis = tune.run(
 62 |         tune.with_parameters(train_breast_cancer, ray_params=ray_params),
 63 |         # Use the `get_tune_resources` helper function to set the resources.
 64 |         resources_per_trial=ray_params.get_tune_resources(),
 65 |         config=config,
 66 |         num_samples=num_samples,
 67 |         metric="eval-binary_error",
 68 |         mode="min",
 69 |     )
 70 | 
 71 |     # Load the best model checkpoint.
 72 |     best_bst = lightgbm_ray.tune.load_model(
 73 |         os.path.join(analysis.best_trial.local_path, "tuned.lgbm")
 74 |     )
 75 | 
 76 |     best_bst.save_model("best_model.lgbm")
 77 | 
 78 |     accuracy = 1.0 - analysis.best_result["eval-binary_error"]
 79 |     print(f"Best model parameters: {analysis.best_config}")
 80 |     print(f"Best model total accuracy: {accuracy:.4f}")
 81 | 
 82 | 
 83 | if __name__ == "__main__":
 84 |     parser = argparse.ArgumentParser()
 85 |     parser.add_argument(
 86 |         "--address", required=False, type=str, help="the address to use for Ray"
 87 |     )
 88 |     parser.add_argument(
 89 |         "--server-address",
 90 |         required=False,
 91 |         type=str,
 92 |         help="Address of the remote server if using Ray Client.",
 93 |     )
 94 |     parser.add_argument(
 95 |         "--cpus-per-actor",
 96 |         type=int,
 97 |         default=2,
 98 |         help="Sets number of CPUs per LightGBM training worker.",
 99 |     )
100 |     parser.add_argument(
101 |         "--num-actors",
102 |         type=int,
103 |         default=2,
104 |         help="Sets number of LightGBM workers to use.",
105 |     )
106 |     parser.add_argument(
107 |         "--num-samples", type=int, default=4, help="Number of samples to use for Tune."
108 |     )
109 |     parser.add_argument("--smoke-test", action="store_true", default=False)
110 | 
111 |     args, _ = parser.parse_known_args()
112 | 
113 |     if args.smoke_test:
114 |         ray.init(num_cpus=args.num_actors * max(args.num_samples, args.cpus_per_actor))
115 |     elif args.server_address:
116 |         ray.util.connect(args.server_address)
117 |     else:
118 |         ray.init(address=args.address)
119 | 
120 |     main(args.cpus_per_actor, args.num_actors, args.num_samples)
121 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/train_on_test_data.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import shutil
 4 | import time
 5 | 
 6 | from xgboost_ray.tests.utils import create_parquet_in_tempdir
 7 | 
 8 | from lightgbm_ray import RayDMatrix, RayParams, train
 9 | 
10 | ####
11 | # Run `create_test_data.py` first to create a large fake data set.
12 | # Alternatively, run with `--smoke-test` to create an ephemeral small fake
13 | # data set.
14 | ####
15 | 
16 | 
17 | def main(fname, num_actors=2):
18 |     dtrain = RayDMatrix(os.path.abspath(fname), label="labels", ignore=["partition"])
19 | 
20 |     config = {
21 |         "objective": "binary",
22 |         "metric": ["binary_logloss", "binary_error"],
23 |     }
24 | 
25 |     evals_result = {}
26 | 
27 |     start = time.time()
28 |     bst = train(
29 |         config,
30 |         dtrain,
31 |         evals_result=evals_result,
32 |         ray_params=RayParams(max_actor_restarts=1, num_actors=num_actors),
33 |         num_boost_round=10,
34 |         evals=[(dtrain, "train")],
35 |     )
36 |     taken = time.time() - start
37 |     print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
38 | 
39 |     bst.booster_.save_model("test_data.lgbm")
40 |     print(
41 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
42 |     )
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     parser = argparse.ArgumentParser()
47 |     parser.add_argument(
48 |         "--smoke-test",
49 |         action="store_true",
50 |         default=False,
51 |         help="Finish quickly for testing",
52 |     )
53 |     args = parser.parse_args()
54 | 
55 |     temp_dir, path = None, None
56 |     if args.smoke_test:
57 |         temp_dir, path = create_parquet_in_tempdir(
58 |             "smoketest.parquet",
59 |             num_rows=1_000,
60 |             num_features=4,
61 |             num_classes=2,
62 |             num_partitions=2,
63 |         )
64 |     else:
65 |         path = os.path.join(os.path.dirname(__file__), "parted.parquet")
66 | 
67 |     import ray
68 | 
69 |     ray.init(num_cpus=5)
70 | 
71 |     start = time.time()
72 |     main(path)
73 |     taken = time.time() - start
74 |     print(f"TOTAL TIME TAKEN: {taken:.2f} seconds")
75 | 
76 |     if args.smoke_test:
77 |         shutil.rmtree(temp_dir)
78 | 


--------------------------------------------------------------------------------
/lightgbm_ray/examples/train_with_ml_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import shutil
 4 | import time
 5 | 
 6 | from ray.util.data import read_parquet
 7 | from xgboost_ray.tests.utils import create_parquet_in_tempdir
 8 | 
 9 | from lightgbm_ray import RayDMatrix, RayParams, train
10 | 
11 | ####
12 | # Run `create_test_data.py` first to create a large fake data set.
13 | # Alternatively, run with `--smoke-test` to create an ephemeral small fake
14 | # data set.
15 | ####
16 | 
17 | 
18 | def main(fname, num_actors=2):
19 |     ml_dataset = read_parquet(fname, num_shards=num_actors)
20 | 
21 |     dtrain = RayDMatrix(ml_dataset, label="labels", ignore=["partition"])
22 | 
23 |     config = {
24 |         "objective": "binary",
25 |         "metric": ["binary_logloss", "binary_error"],
26 |     }
27 | 
28 |     evals_result = {}
29 | 
30 |     start = time.time()
31 |     bst = train(
32 |         config,
33 |         dtrain,
34 |         evals_result=evals_result,
35 |         ray_params=RayParams(max_actor_restarts=1, num_actors=num_actors),
36 |         num_boost_round=10,
37 |         evals=[(dtrain, "train")],
38 |     )
39 |     taken = time.time() - start
40 |     print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
41 | 
42 |     bst.save_model("test_data.lgbm")
43 |     print(
44 |         "Final training error: {:.4f}".format(evals_result["train"]["binary_error"][-1])
45 |     )
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     parser = argparse.ArgumentParser()
50 |     parser.add_argument(
51 |         "--smoke-test",
52 |         action="store_true",
53 |         default=False,
54 |         help="Finish quickly for testing",
55 |     )
56 |     args = parser.parse_args()
57 | 
58 |     temp_dir, path = None, None
59 |     if args.smoke_test:
60 |         temp_dir, path = create_parquet_in_tempdir(
61 |             "smoketest.parquet",
62 |             num_rows=1_000,
63 |             num_features=4,
64 |             num_classes=2,
65 |             num_partitions=2,
66 |         )
67 |     else:
68 |         path = os.path.join(os.path.dirname(__file__), "parted.parquet")
69 | 
70 |     import ray
71 | 
72 |     ray.init()
73 | 
74 |     start = time.time()
75 |     main(path)
76 |     taken = time.time() - start
77 |     print(f"TOTAL TIME TAKEN: {taken:.2f} seconds")
78 | 
79 |     if args.smoke_test:
80 |         shutil.rmtree(temp_dir)
81 | 


--------------------------------------------------------------------------------
/lightgbm_ray/sklearn.py:
--------------------------------------------------------------------------------
  1 | """scikit-learn wrapper for lightgbm-ray. Based on lightgbm.dask."""
  2 | 
  3 | # Portions of code used in this file and implementation logic are based
  4 | # on lightgbm.dask.
  5 | # https://github.com/microsoft/LightGBM/blob/b5502d19b2b462f665e3d1edbaa70c0d6472bca4/python-package/lightgbm/dask.py
  6 | 
  7 | # The MIT License (MIT)
  8 | 
  9 | # Copyright (c) Microsoft Corporation
 10 | 
 11 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 12 | # of this software and associated documentation files (the "Software"), to deal
 13 | # in the Software without restriction, including without limitation the rights
 14 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 | # copies of the Software, and to permit persons to whom the Software is
 16 | # furnished to do so, subject to the following conditions:
 17 | 
 18 | # The above copyright notice and this permission notice shall be included in
 19 | # all copies or substantial portions of the Software.
 20 | 
 21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 27 | # SOFTWARE.
 28 | 
 29 | # License:
 30 | # https://github.com/microsoft/LightGBM/blob/c3b9363d02564625332583e166e3ab3135f436e3/LICENSE
 31 | 
 32 | import logging
 33 | import warnings
 34 | from typing import Any, Callable, Dict, List, Optional, Type, Union
 35 | 
 36 | from lightgbm import LGBMClassifier, LGBMModel, LGBMRegressor  # LGBMRanker
 37 | from lightgbm.basic import _choose_param_value, _ConfigAliases
 38 | from ray.util.annotations import PublicAPI
 39 | from xgboost_ray.sklearn import (
 40 |     RayXGBMixin,
 41 |     _check_if_params_are_ray_dmatrix,
 42 |     _wrap_evaluation_matrices,
 43 | )
 44 | 
 45 | from lightgbm_ray.main import RayDMatrix, RayParams, predict, train
 46 | 
 47 | logger = logging.getLogger(__name__)
 48 | 
 49 | _RAY_PARAMS_DOC = """
 50 |     ray_params : RayParams or dict, optional (default=None)
 51 |         Parameters to configure Ray-specific behavior.
 52 |         See :class:`RayParams` for a list of valid configuration parameters.
 53 |         Will override ``n_jobs`` attribute with own ``num_actors`` parameter.
 54 |     _remote : bool, optional (default=False)
 55 |         Whether to run the driver process in a remote function.
 56 |         This is enabled by default in Ray client mode.
 57 |     ray_dmatrix_params : dict, optional (default=None)
 58 |         Dict of parameters (such as sharding mode) passed to the internal
 59 |         RayDMatrix initialization."""
 60 | 
 61 | _N_JOBS_DOC_REPLACE = (
 62 |     """        n_jobs : int, optional (default=-1)
 63 |             Number of parallel threads.""",  # noqa: E501, W291
 64 |     """        n_jobs : int, optional (default=1)
 65 |             Number of Ray actors used to run LightGBM in parallel.
 66 |             In order to set number of threads per actor, pass a :class:`RayParams`
 67 |             object to the relevant method as a ``ray_params`` argument. Will be
 68 |             overriden by the ``num_actors`` parameter of ``ray_params`` argument
 69 |             should it be passed to a method.""",  # noqa: E501, W291
 70 | )
 71 | 
 72 | 
 73 | def _treat_estimator_doc(doc: str) -> str:
 74 |     """Helper function to make nececssary changes in estimator docstrings"""
 75 |     doc = doc.replace(*_N_JOBS_DOC_REPLACE).replace(
 76 |         "Construct a gradient boosting model.",
 77 |         "Construct a gradient boosting model distributed on Ray.",
 78 |     )
 79 |     return doc
 80 | 
 81 | 
 82 | def _treat_method_doc(doc: str, insert_before: str) -> str:
 83 |     """Helper function to make changes in estimator method docstrings"""
 84 |     doc = (
 85 |         doc[: doc.find(insert_before)]
 86 |         + _RAY_PARAMS_DOC
 87 |         + doc[doc.find(insert_before) :]
 88 |     )
 89 |     return doc
 90 | 
 91 | 
 92 | class _RayLGBMModel(RayXGBMixin):
 93 |     def _ray_get_wrap_evaluation_matrices_compat_kwargs(
 94 |         self, label_transform=None
 95 |     ) -> dict:
 96 |         self.enable_categorical = False
 97 |         self.feature_types = None
 98 |         return super()._ray_get_wrap_evaluation_matrices_compat_kwargs(
 99 |             label_transform=label_transform
100 |         )
101 | 
102 |     def _ray_set_ray_params_n_jobs(
103 |         self, ray_params: Optional[Union[RayParams, dict]], n_jobs: Optional[int]
104 |     ) -> RayParams:
105 |         """Helper function to set num_actors in ray_params if not
106 |         set by the user"""
107 |         if ray_params is None:
108 |             if not n_jobs or n_jobs < 1:
109 |                 n_jobs = 1
110 |             ray_params = RayParams(num_actors=n_jobs)
111 |         elif n_jobs is not None:
112 |             warnings.warn(
113 |                 "`ray_params` is not `None` and will override "
114 |                 "the `n_jobs` attribute."
115 |             )
116 |         return ray_params
117 | 
118 |     def _ray_fit(
119 |         self,
120 |         model_factory: Type[LGBMModel],
121 |         X,
122 |         y,
123 |         sample_weight=None,
124 |         init_score=None,
125 |         group=None,
126 |         eval_set=None,
127 |         eval_names: Optional[List[str]] = None,
128 |         eval_sample_weight=None,
129 |         eval_init_score=None,
130 |         eval_group=None,
131 |         eval_metric: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,
132 |         ray_params: Union[None, RayParams, Dict] = None,
133 |         _remote: Optional[bool] = None,
134 |         ray_dmatrix_params: Optional[Dict] = None,
135 |         **kwargs: Any,
136 |     ) -> "_RayLGBMModel":
137 | 
138 |         params = self.get_params(True)
139 | 
140 |         ray_params = self._ray_set_ray_params_n_jobs(ray_params, params["n_jobs"])
141 | 
142 |         params = _choose_param_value(
143 |             main_param_name="n_estimators", params=params, default_value=100
144 |         )
145 | 
146 |         num_boosting_round = params.pop("n_estimators")
147 |         ray_dmatrix_params = ray_dmatrix_params or {}
148 | 
149 |         train_dmatrix, evals = _check_if_params_are_ray_dmatrix(
150 |             X, sample_weight, init_score, eval_set, eval_sample_weight, eval_init_score
151 |         )
152 | 
153 |         if train_dmatrix is None:
154 |             train_dmatrix, evals = _wrap_evaluation_matrices(
155 |                 missing=None,
156 |                 X=X,
157 |                 y=y,
158 |                 group=group,
159 |                 qid=None,
160 |                 sample_weight=sample_weight,
161 |                 base_margin=init_score,
162 |                 feature_weights=None,
163 |                 eval_set=eval_set,
164 |                 sample_weight_eval_set=eval_sample_weight,
165 |                 base_margin_eval_set=eval_init_score,
166 |                 eval_group=eval_group,
167 |                 eval_qid=None,
168 |                 # changed in xgboost-ray:
169 |                 create_dmatrix=lambda **kwargs: RayDMatrix(
170 |                     **{
171 |                         **kwargs,
172 |                         **ray_dmatrix_params,
173 |                     }
174 |                 ),
175 |                 **self._ray_get_wrap_evaluation_matrices_compat_kwargs(),
176 |             )
177 | 
178 |         eval_names = eval_names or []
179 | 
180 |         for i, _ in enumerate(evals):
181 |             if len(eval_names) > i:
182 |                 evals[i] = (evals[i][0], eval_names[i])
183 |             else:
184 |                 # _wrap_evaluation_matrices sets default names to
185 |                 # `validation_`, but lgbm uses `valid_`, so
186 |                 # we fix that here
187 |                 evals[i] = (evals[i][0], f"valid_{i}")
188 | 
189 |         for param in _ConfigAliases.get("n_jobs"):
190 |             params.pop(param, None)
191 | 
192 |         model = train(
193 |             dtrain=train_dmatrix,
194 |             num_boost_round=num_boosting_round,
195 |             params=params,
196 |             model_factory=model_factory,
197 |             evals=evals,
198 |             eval_metric=eval_metric,
199 |             ray_params=ray_params,
200 |             _remote=_remote,
201 |             **kwargs,
202 |         )
203 | 
204 |         self.set_params(**model.get_params())
205 |         self._lgb_ray_copy_extra_params(model, self)
206 | 
207 |         return self
208 | 
209 |     def _ray_predict(
210 |         self,
211 |         X,
212 |         model_factory: Type[LGBMModel],
213 |         *,
214 |         method: str = "predict",
215 |         ray_params: Union[None, RayParams, Dict] = None,
216 |         _remote: Optional[bool] = None,
217 |         ray_dmatrix_params: Optional[Dict],
218 |         **kwargs,
219 |     ):
220 |         params = self.get_params(True)
221 |         ray_params = self._ray_set_ray_params_n_jobs(ray_params, params["n_jobs"])
222 | 
223 |         ray_dmatrix_params = ray_dmatrix_params or {}
224 |         if not isinstance(X, RayDMatrix):
225 |             test = RayDMatrix(X, **ray_dmatrix_params)
226 |         else:
227 |             test = X
228 |         return predict(
229 |             self._lgb_ray_to_local(model_factory),
230 |             data=test,
231 |             method=method,
232 |             ray_params=ray_params,
233 |             _remote=_remote,
234 |             **kwargs,
235 |         )
236 | 
237 |     def _lgb_ray_to_local(self, model_factory: Type[LGBMModel]) -> LGBMModel:
238 |         params = self.get_params()
239 |         model = model_factory(**params)
240 |         self._lgb_ray_copy_extra_params(self, model)
241 |         return model
242 | 
243 |     @staticmethod
244 |     def _lgb_ray_copy_extra_params(
245 |         source: Union["_RayLGBMModel", LGBMModel],
246 |         dest: Union["_RayLGBMModel", LGBMModel],
247 |     ) -> None:
248 |         params = source.get_params()
249 |         attributes = source.__dict__
250 |         extra_param_names = set(attributes.keys()).difference(params.keys())
251 |         for name in extra_param_names:
252 |             setattr(dest, name, attributes[name])
253 | 
254 | 
255 | @PublicAPI(stability="beta")
256 | class RayLGBMClassifier(LGBMClassifier, _RayLGBMModel):
257 |     def fit(
258 |         self,
259 |         X,
260 |         y,
261 |         sample_weight=None,
262 |         init_score=None,
263 |         eval_set=None,
264 |         eval_names: Optional[List[str]] = None,
265 |         eval_sample_weight=None,
266 |         eval_class_weight: Optional[List[Union[dict, str]]] = None,
267 |         eval_init_score=None,
268 |         eval_metric: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,
269 |         ray_params: Union[None, RayParams, Dict] = None,
270 |         _remote: Optional[bool] = None,
271 |         ray_dmatrix_params: Optional[Dict] = None,
272 |         **kwargs: Any,
273 |     ) -> "RayLGBMClassifier":
274 |         return self._ray_fit(
275 |             model_factory=LGBMClassifier,
276 |             X=X,
277 |             y=y,
278 |             sample_weight=sample_weight,
279 |             init_score=init_score,
280 |             eval_set=eval_set,
281 |             eval_names=eval_names,
282 |             eval_sample_weight=eval_sample_weight,
283 |             eval_class_weight=eval_class_weight,
284 |             eval_init_score=eval_init_score,
285 |             eval_metric=eval_metric,
286 |             ray_params=ray_params,
287 |             _remote=_remote,
288 |             ray_dmatrix_params=ray_dmatrix_params,
289 |             **kwargs,
290 |         )
291 | 
292 |     fit.__doc__ = _treat_method_doc(LGBMClassifier.fit.__doc__, "\n\n    Returns")
293 | 
294 |     def predict_proba(
295 |         self,
296 |         X,
297 |         *,
298 |         ray_params: Union[None, RayParams, Dict] = None,
299 |         _remote: Optional[bool] = None,
300 |         ray_dmatrix_params: Optional[Dict] = None,
301 |         **kwargs,
302 |     ):
303 |         return self._ray_predict(
304 |             X,
305 |             model_factory=LGBMClassifier,
306 |             method="predict_proba",
307 |             ray_params=ray_params,
308 |             _remote=_remote,
309 |             ray_dmatrix_params=ray_dmatrix_params,
310 |             **kwargs,
311 |         )
312 | 
313 |     predict_proba.__doc__ = _treat_method_doc(
314 |         LGBMClassifier.predict_proba.__doc__, "\n    **kwargs"
315 |     )
316 | 
317 |     def predict(
318 |         self,
319 |         X,
320 |         *,
321 |         ray_params: Union[None, RayParams, Dict] = None,
322 |         _remote: Optional[bool] = None,
323 |         ray_dmatrix_params: Optional[Dict] = None,
324 |         **kwargs,
325 |     ):
326 |         return self._ray_predict(
327 |             X,
328 |             model_factory=LGBMClassifier,
329 |             method="predict",
330 |             ray_params=ray_params,
331 |             _remote=_remote,
332 |             ray_dmatrix_params=ray_dmatrix_params,
333 |             **kwargs,
334 |         )
335 | 
336 |     predict.__doc__ = _treat_method_doc(
337 |         LGBMClassifier.predict.__doc__, "\n    **kwargs"
338 |     )
339 | 
340 |     def to_local(self) -> LGBMClassifier:
341 |         """Create regular version of lightgbm.LGBMClassifier from the
342 |         distributed version.
343 | 
344 |         Returns
345 |         -------
346 |         model : lightgbm.LGBMClassifier
347 |             Local underlying model.
348 |         """
349 |         return self._lgb_ray_to_local(LGBMClassifier)
350 | 
351 | 
352 | RayLGBMClassifier.__init__.__doc__ = _treat_estimator_doc(
353 |     LGBMClassifier.__init__.__doc__
354 | )
355 | 
356 | 
357 | @PublicAPI(stability="beta")
358 | class RayLGBMRegressor(LGBMRegressor, _RayLGBMModel):
359 |     def fit(
360 |         self,
361 |         X,
362 |         y,
363 |         sample_weight=None,
364 |         init_score=None,
365 |         eval_set=None,
366 |         eval_names: Optional[List[str]] = None,
367 |         eval_sample_weight=None,
368 |         eval_init_score=None,
369 |         eval_metric: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,
370 |         ray_params: Union[None, RayParams, Dict] = None,
371 |         _remote: Optional[bool] = None,
372 |         ray_dmatrix_params: Optional[Dict] = None,
373 |         **kwargs: Any,
374 |     ) -> "RayLGBMRegressor":
375 |         return self._ray_fit(
376 |             model_factory=LGBMRegressor,
377 |             X=X,
378 |             y=y,
379 |             sample_weight=sample_weight,
380 |             init_score=init_score,
381 |             eval_set=eval_set,
382 |             eval_names=eval_names,
383 |             eval_sample_weight=eval_sample_weight,
384 |             eval_init_score=eval_init_score,
385 |             eval_metric=eval_metric,
386 |             ray_params=ray_params,
387 |             _remote=_remote,
388 |             ray_dmatrix_params=ray_dmatrix_params,
389 |             **kwargs,
390 |         )
391 | 
392 |     fit.__doc__ = _treat_method_doc(LGBMRegressor.fit.__doc__, "\n\n    Returns")
393 | 
394 |     def predict(
395 |         self,
396 |         X,
397 |         *,
398 |         ray_params: Union[None, RayParams, Dict] = None,
399 |         _remote: Optional[bool] = None,
400 |         ray_dmatrix_params: Optional[Dict] = None,
401 |         **kwargs,
402 |     ):
403 |         return self._ray_predict(
404 |             X,
405 |             model_factory=LGBMRegressor,
406 |             method="predict",
407 |             ray_params=ray_params,
408 |             _remote=_remote,
409 |             ray_dmatrix_params=ray_dmatrix_params,
410 |             **kwargs,
411 |         )
412 | 
413 |     predict.__doc__ = _treat_method_doc(LGBMRegressor.predict.__doc__, "\n    **kwargs")
414 | 
415 |     def to_local(self) -> LGBMRegressor:
416 |         """Create regular version of lightgbm.LGBMRegressor from the
417 |         distributed version.
418 | 
419 |         Returns
420 |         -------
421 |         model : lightgbm.LGBMRegressor
422 |             Local underlying model.
423 |         """
424 |         return self._lgb_ray_to_local(LGBMRegressor)
425 | 
426 | 
427 | RayLGBMRegressor.__init__.__doc__ = _treat_estimator_doc(
428 |     RayLGBMRegressor.__init__.__doc__
429 | )
430 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ray-project/lightgbm_ray/4c4d3413f86db769bddb6d08e2480a04bc75d712/lightgbm_ray/tests/__init__.py


--------------------------------------------------------------------------------
/lightgbm_ray/tests/env_info.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # shellcheck disable=SC2005
 3 | 
 4 | echo "Test environment information"
 5 | echo "----------------------------"
 6 | echo "Python version: $(python --version 2>/dev/null || echo 'Python not installed')"
 7 | echo "Ray version: $(ray --version 2>/dev/null || echo 'Ray not installed')"
 8 | echo "Installed pip packages:"
 9 | echo "$(python -m pip freeze 2>/dev/null || echo 'Pip not installed')"
10 | echo "----------------------------"
11 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/benchmark_cpu_gpu.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import glob
  3 | import os
  4 | import shutil
  5 | import time
  6 | 
  7 | import ray
  8 | from xgboost_ray.tests.utils import create_parquet_in_tempdir
  9 | 
 10 | from lightgbm_ray import (
 11 |     RayDeviceQuantileDMatrix,
 12 |     RayDMatrix,
 13 |     RayFileType,
 14 |     RayParams,
 15 |     train,
 16 | )
 17 | 
 18 | if "OMP_NUM_THREADS" in os.environ:
 19 |     del os.environ["OMP_NUM_THREADS"]
 20 | 
 21 | 
 22 | def train_ray(
 23 |     path,
 24 |     num_workers,
 25 |     num_boost_rounds,
 26 |     num_files=0,
 27 |     regression=False,
 28 |     use_gpu=False,
 29 |     smoke_test=False,
 30 |     ray_params=None,
 31 |     lightgbm_params=None,
 32 |     **kwargs,
 33 | ):
 34 |     if num_files:
 35 |         files = sorted(glob.glob(f"{path}/**/*.parquet"))
 36 |         while num_files > len(files):
 37 |             files = files + files
 38 |         path = files[0:num_files]
 39 | 
 40 |     use_device_matrix = False
 41 | 
 42 |     if use_device_matrix:
 43 |         dtrain = RayDeviceQuantileDMatrix(
 44 |             path,
 45 |             num_actors=num_workers,
 46 |             label="labels",
 47 |             ignore=["partition"],
 48 |             filetype=RayFileType.PARQUET,
 49 |         )
 50 |     else:
 51 |         dtrain = RayDMatrix(
 52 |             path,
 53 |             num_actors=num_workers,
 54 |             label="labels",
 55 |             ignore=["partition"],
 56 |             filetype=RayFileType.PARQUET,
 57 |         )
 58 | 
 59 |     config = lightgbm_params or {"device": "cpu" if not use_gpu else "gpu"}
 60 |     if not regression:
 61 |         # Classification
 62 |         config.update(
 63 |             {
 64 |                 "objective": "binary",
 65 |                 "metric": ["binary_logloss", "binary_error"],
 66 |             }
 67 |         )
 68 |     else:
 69 |         # Regression
 70 |         config.update(
 71 |             {
 72 |                 "objective": "regression",
 73 |                 "metric": ["l2", "rmse"],
 74 |             }
 75 |         )
 76 | 
 77 |     start = time.time()
 78 |     evals_result = {}
 79 |     bst = train(
 80 |         config,
 81 |         dtrain,
 82 |         evals_result=evals_result,
 83 |         num_boost_round=num_boost_rounds,
 84 |         ray_params=ray_params
 85 |         or RayParams(
 86 |             max_actor_restarts=2,
 87 |             num_actors=num_workers,
 88 |             cpus_per_actor=4 if not smoke_test else 2,
 89 |             gpus_per_actor=0 if not use_gpu else 1,
 90 |         ),
 91 |         evals=[(dtrain, "train")],
 92 |         **kwargs,
 93 |     )
 94 |     taken = time.time() - start
 95 |     print(f"TRAIN TIME TAKEN: {taken:.2f} seconds")
 96 | 
 97 |     bst.booster_.save_model("benchmark_{}.lgbm".format("cpu" if not use_gpu else "gpu"))
 98 |     print(
 99 |         "Final training error: {:.4f}".format(
100 |             evals_result["train"]["binary_error" if not regression else "l2"][-1]
101 |         )
102 |     )
103 |     return bst, taken
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     parser = argparse.ArgumentParser(description="Process some integers.")
108 | 
109 |     parser.add_argument("num_workers", type=int, help="num workers")
110 |     parser.add_argument("num_rounds", type=int, help="num boost rounds")
111 |     parser.add_argument("num_files", type=int, help="num files")
112 | 
113 |     parser.add_argument(
114 |         "--file", default="/data/parted.parquet", type=str, help="data file"
115 |     )
116 | 
117 |     parser.add_argument(
118 |         "--regression", action="store_true", default=False, help="regression"
119 |     )
120 | 
121 |     parser.add_argument("--gpu", action="store_true", default=False, help="gpu")
122 | 
123 |     parser.add_argument(
124 |         "--smoke-test", action="store_true", default=False, help="smoke test"
125 |     )
126 | 
127 |     args = parser.parse_args()
128 | 
129 |     num_workers = args.num_workers
130 |     num_boost_rounds = args.num_rounds
131 |     num_files = args.num_files
132 |     use_gpu = args.gpu
133 | 
134 |     temp_dir = None
135 |     if args.smoke_test:
136 |         temp_dir, path = create_parquet_in_tempdir(
137 |             filename="smoketest.parquet",
138 |             num_rows=args.num_workers * 500,
139 |             num_features=4,
140 |             num_classes=2,
141 |             num_partitions=args.num_workers * 10,
142 |         )
143 |         use_gpu = False
144 |     else:
145 |         path = args.file
146 |         if not os.path.exists(path):
147 |             raise ValueError(
148 |                 f"Benchmarking data not found: {path}."
149 |                 f"\nFIX THIS by running `python create_test_data.py` first."
150 |             )
151 | 
152 |     init_start = time.time()
153 |     if args.smoke_test:
154 |         ray.init(num_cpus=num_workers)
155 |     else:
156 |         ray.init(address="auto")
157 |     init_taken = time.time() - init_start
158 | 
159 |     full_start = time.time()
160 |     bst, train_taken = train_ray(
161 |         path=path,
162 |         num_workers=num_workers,
163 |         num_boost_rounds=num_boost_rounds,
164 |         num_files=num_files,
165 |         regression=args.regression,
166 |         use_gpu=use_gpu,
167 |         smoke_test=args.smoke_test,
168 |     )
169 |     full_taken = time.time() - full_start
170 |     print(f"TOTAL TIME TAKEN: {full_taken:.2f} seconds " f"({init_taken:.2f} for init)")
171 | 
172 |     if args.smoke_test:
173 |         shutil.rmtree(temp_dir, ignore_errors=True)
174 |     else:
175 |         with open("res.csv", "at") as fp:
176 |             fp.writelines(
177 |                 [
178 |                     ",".join(
179 |                         [
180 |                             str(e)
181 |                             for e in [
182 |                                 num_workers,
183 |                                 num_files,
184 |                                 int(use_gpu),
185 |                                 num_boost_rounds,
186 |                                 init_taken,
187 |                                 full_taken,
188 |                                 train_taken,
189 |                             ]
190 |                         ]
191 |                     )
192 |                     + "\n"
193 |                 ]
194 |             )
195 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/cluster_cpu.yaml:
--------------------------------------------------------------------------------
 1 | cluster_name: lightgbm_ray_release_tests_cpu_{{env["NUM_WORKERS"] | default(0)}}
 2 | 
 3 | max_workers: {{env["NUM_WORKERS"] | default(0)}}
 4 | upscaling_speed: 9999
 5 | 
 6 | idle_timeout_minutes: 15
 7 | 
 8 | docker:
 9 |     image: anyscale/ray:nightly
10 |     container_name: ray_container
11 |     pull_before_run: true
12 |     run_options:
13 |       - --privileged
14 | 
15 | provider:
16 |     type: aws
17 |     region: us-west-2
18 |     availability_zone: us-west-2a
19 |     cache_stopped_nodes: false
20 | 
21 | available_node_types:
22 |     cpu_4_ondemand:
23 |         node_config:
24 |             InstanceType: m5.xlarge
25 |         resources: {"CPU": 4}
26 |         min_workers: {{env["NUM_WORKERS"] | default(0)}}
27 |         max_workers: {{env["NUM_WORKERS"] | default(0)}}
28 | 
29 | auth:
30 |     ssh_user: ubuntu
31 | 
32 | head_node_type: cpu_4_ondemand
33 | worker_default_node_type: cpu_4_ondemand
34 | 
35 | file_mounts_sync_continuously: false
36 | 
37 | setup_commands:
38 |     - pip install -U {{env["RAY_WHEEL"] | default("ray")}}
39 |     - pip install dask pytest
40 |     - pip install -U {{env["LIGHTGBM_RAY_PACKAGE"] | default("lightgbm_ray")}}
41 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/cluster_gpu.yaml:
--------------------------------------------------------------------------------
 1 | cluster_name: lightgbm_ray_release_tests_gpu_{{env["NUM_WORKERS"] | default(0)}}
 2 | 
 3 | max_workers: {{env["NUM_WORKERS"] | default(0)}}
 4 | upscaling_speed: 9999
 5 | 
 6 | idle_timeout_minutes: 15
 7 | 
 8 | docker:
 9 |     image: anyscale/ray:nightly-gpu
10 |     container_name: ray_container
11 |     pull_before_run: true
12 |     run_options:
13 |       - --privileged
14 | 
15 | provider:
16 |     type: aws
17 |     region: us-west-2
18 |     availability_zone: us-west-2a
19 |     cache_stopped_nodes: false
20 | 
21 | available_node_types:
22 |     gpu_4_ondemand:
23 |         node_config:
24 |             InstanceType: p2.xlarge
25 |         resources: {"CPU": 4, "GPU": 1}
26 |         min_workers: {{env["NUM_WORKERS"] | default(0)}}
27 |         max_workers: {{env["NUM_WORKERS"] | default(0)}}
28 | 
29 | auth:
30 |     ssh_user: ubuntu
31 | 
32 | head_node_type: gpu_4_ondemand
33 | worker_default_node_type: gpu_4_ondemand
34 | 
35 | file_mounts: {
36 |     "~/lightgbm_tests": "."
37 | }
38 | 
39 | file_mounts_sync_continuously: false
40 | 
41 | setup_commands:
42 |     - pip uninstall -y lightgbm && pip install -U "lightgbm>=3.2.1" --install-option=--gpu
43 |     - pip install -U pyarrow cupy-cuda101
44 |     - pip install -U {{env["RAY_WHEEL"] | default("ray")}}
45 |     - export LIGHTGBM_RAY_PACKAGE="{{env["LIGHTGBM_RAY_PACKAGE"] | default("lightgbm_ray")}}" && /bin/bash ~/lightgbm_tests/setup_lightgbm.sh
46 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/create_learnable_data.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | from sklearn.datasets import make_classification, make_regression
  7 | 
  8 | if __name__ == "__main__":
  9 |     if "OMP_NUM_THREADS" in os.environ:
 10 |         del os.environ["OMP_NUM_THREADS"]
 11 | 
 12 |     parser = argparse.ArgumentParser(description="Create fake data.")
 13 |     parser.add_argument("filename", type=str, default="/data/parted.parquet/")
 14 |     parser.add_argument(
 15 |         "-r", "--num-rows", required=False, type=int, default=1e8, help="num rows"
 16 |     )
 17 |     parser.add_argument(
 18 |         "-p",
 19 |         "--num-partitions",
 20 |         required=False,
 21 |         type=int,
 22 |         default=100,
 23 |         help="num partitions",
 24 |     )
 25 |     parser.add_argument(
 26 |         "-c",
 27 |         "--num-cols",
 28 |         required=False,
 29 |         type=int,
 30 |         default=4,
 31 |         help="num columns (features)",
 32 |     )
 33 |     parser.add_argument(
 34 |         "-C", "--num-classes", required=False, type=int, default=2, help="num classes"
 35 |     )
 36 |     parser.add_argument(
 37 |         "-s", "--seed", required=False, type=int, default=1234, help="random seed"
 38 |     )
 39 |     parser.add_argument(
 40 |         "-T",
 41 |         "--target",
 42 |         required=False,
 43 |         type=float,
 44 |         default=0.8,
 45 |         help="target accuracy",
 46 |     )
 47 | 
 48 |     args = parser.parse_args()
 49 | 
 50 |     seed = int(args.seed)
 51 |     np.random.seed(seed)
 52 | 
 53 |     num_rows = int(args.num_rows)
 54 |     num_cols = int(args.num_cols)
 55 |     num_classes = int(args.num_classes)
 56 |     target = float(args.target)
 57 | 
 58 |     if num_classes > 0:
 59 |         x, y = make_classification(
 60 |             n_samples=num_rows,
 61 |             n_features=num_cols,
 62 |             n_informative=num_cols // 2,
 63 |             n_redundant=num_cols // 10,
 64 |             n_repeated=0,
 65 |             n_classes=num_classes,
 66 |             n_clusters_per_class=2,
 67 |             flip_y=1 - target,
 68 |             random_state=seed,
 69 |         )
 70 |     else:
 71 |         x, y = make_regression(
 72 |             n_samples=num_rows,
 73 |             n_features=num_cols,
 74 |             n_informative=num_cols // 2,
 75 |             n_targets=1,
 76 |             noise=0.1,
 77 |             random_state=seed,
 78 |         )
 79 | 
 80 |     filename = args.filename
 81 |     num_partitions = args.num_partitions
 82 | 
 83 |     data = pd.DataFrame(x, columns=[f"feature_{i}" for i in range(num_cols)])
 84 | 
 85 |     rows_per_partition = np.floor(len(data) / num_partitions)
 86 | 
 87 |     partition_arr = np.repeat(np.arange(num_partitions), repeats=rows_per_partition)
 88 |     if len(partition_arr) < len(data):
 89 |         # If this was not evenly divided, append
 90 |         missing = len(data) - len(partition_arr)
 91 |         partition_arr = np.append(partition_arr, np.arange(missing))
 92 | 
 93 |     partition = pd.Series(partition_arr, copy=False, dtype=np.int32)
 94 | 
 95 |     data["labels"] = y
 96 |     data["partition"] = partition
 97 | 
 98 |     os.makedirs(filename, 0o755, exist_ok=True)
 99 | 
100 |     # Write partition-wise to avoid OOM errors
101 |     for i in range(num_partitions):
102 |         part = data[partition_arr == i]
103 |         part.to_parquet(
104 |             filename,
105 |             partition_cols=["partition"],
106 |             engine="pyarrow",
107 |             partition_filename_cb=lambda key: f"part_{key[0]}.parquet",
108 |         )
109 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/create_test_data.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import numpy as np
 5 | from xgboost_ray.tests.utils import create_parquet
 6 | 
 7 | if __name__ == "__main__":
 8 |     if "OMP_NUM_THREADS" in os.environ:
 9 |         del os.environ["OMP_NUM_THREADS"]
10 | 
11 |     parser = argparse.ArgumentParser(description="Create fake data.")
12 |     parser.add_argument(
13 |         "filename", type=str, default="/data/parted.parquet/", help="ray/dask"
14 |     )
15 |     parser.add_argument(
16 |         "-r", "--num-rows", required=False, type=int, default=1e8, help="num rows"
17 |     )
18 |     parser.add_argument(
19 |         "-p",
20 |         "--num-partitions",
21 |         required=False,
22 |         type=int,
23 |         default=100,
24 |         help="num partitions",
25 |     )
26 |     parser.add_argument(
27 |         "-c",
28 |         "--num-cols",
29 |         required=False,
30 |         type=int,
31 |         default=4,
32 |         help="num columns (features)",
33 |     )
34 |     parser.add_argument(
35 |         "-C", "--num-classes", required=False, type=int, default=2, help="num classes"
36 |     )
37 |     parser.add_argument(
38 |         "-s", "--seed", required=False, type=int, default=1234, help="random seed"
39 |     )
40 | 
41 |     args = parser.parse_args()
42 | 
43 |     np.random.seed(args.seed)
44 |     create_parquet(
45 |         args.filename,
46 |         num_rows=int(args.num_rows),
47 |         num_partitions=int(args.num_partitions),
48 |         num_features=int(args.num_cols),
49 |         num_classes=int(args.num_classes),
50 |     )
51 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/custom_objective_metric.py:
--------------------------------------------------------------------------------
 1 | import ray
 2 | 
 3 | from lightgbm_ray.tests.test_lightgbm_api import LightGBMAPITest
 4 | 
 5 | 
 6 | class LightGBMDistributedAPITest(LightGBMAPITest):
 7 |     def _init_ray(self):
 8 |         if not ray.is_initialized():
 9 |             ray.init(address="auto")
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     import sys
14 | 
15 |     import pytest
16 | 
17 |     sys.exit(pytest.main(["-v", f"{__file__}::LightGBMDistributedAPITest"]))
18 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/run_e2e_gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ! -f "./.anyscale.yaml" ]; then
 4 |   echo "Anyscale project not initialized. Please run 'anyscale init'"
 5 |   exit 1
 6 | fi
 7 | 
 8 | NOW=$(date +%s)
 9 | export SESSION_NAME="lightgbm_ray_ci_gpu_${NOW}"
10 | export NUM_WORKERS=3
11 | export LIGHTGBM_RAY_PACKAGE="git+https://github.com/ray-project/lightgbm_ray.git@${GITHUB_SHA:-master}#lightgbm_ray"
12 | export NO_TMUX=1
13 | 
14 | ./start_gpu_cluster.sh
15 | ./submit_cpu_gpu_benchmark.sh 4 100 100 --gpu --file /data/classification.parquet
16 | anyscale down "${SESSION_NAME}"
17 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/setup_lightgbm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pip install pytest
 4 | # Uninstall any existing lightgbm_ray repositories
 5 | pip uninstall -y lightgbm_ray || true
 6 | 
 7 | # Install lightgbm package
 8 | pip install -U "${LIGHTGBM_RAY_PACKAGE:-lightgbm_ray}"
 9 | 
10 | # Create test dataset
11 | sudo mkdir -p /data || true
12 | sudo chown ray:1000 /data || true
13 | rm -rf /data/classification.parquet || true
14 | cp -R /tmp/ray_tmp_mount/lightgbm_tests ~/lightgbm_tests || echo "Copy failed"
15 | python ~/lightgbm_tests/create_test_data.py /data/classification.parquet --seed 1234 --num-rows 1000000 --num-cols 40 --num-partitions 100 --num-classes 2
16 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/start_cpu_cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ! -f "./.anyscale.yaml" ]; then
 4 |   echo "Anyscale project not initialized. Please run 'anyscale init'"
 5 |   exit 1
 6 | fi
 7 | 
 8 | export LIGHTGBM_RAY_PACKAGE="${LIGHTGBM_RAY_PACKAGE:-lightgbm_ray}"
 9 | export NUM_WORKERS="${NUM_WORKERS:-3}"
10 | 
11 | SESSION_NAME=${SESSION_NAME:-lightgbm_ray_release_cpu_$(date +%s)}
12 | 
13 | echo "Starting GPU cluster with ${NUM_WORKERS} worker nodes (plus the head node)"
14 | echo "This will install lightgbm_ray using the following package: ${LIGHTGBM_RAY_PACKAGE}"
15 | 
16 | CMD="anyscale up --cloud-name anyscale_default_cloud --config cluster_cpu.yaml ${SESSION_NAME}"
17 | 
18 | echo "Running: ${CMD}"
19 | ${CMD}
20 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/start_gpu_cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ! -f "./.anyscale.yaml" ]; then
 4 |   echo "Anyscale project not initialized. Please run 'anyscale init'"
 5 |   exit 1
 6 | fi
 7 | 
 8 | export LIGHTGBM_RAY_PACKAGE="${LIGHTGBM_RAY_PACKAGE:-lightgbm_ray}"
 9 | export NUM_WORKERS="${NUM_WORKERS:-3}"
10 | 
11 | SESSION_NAME=${SESSION_NAME:-lightgbm_ray_release_gpu_$(date +%s)}
12 | 
13 | echo "Starting GPU cluster with ${NUM_WORKERS} worker nodes (plus the head node)"
14 | echo "This will install lightgbm_ray using the following package: ${LIGHTGBM_RAY_PACKAGE}"
15 | 
16 | CMD="anyscale up --cloud-name anyscale_default_cloud --config cluster_gpu.yaml ${SESSION_NAME}"
17 | 
18 | echo "Running: ${CMD}"
19 | ${CMD}
20 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/release/submit_cpu_gpu_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ! -f "./.anyscale.yaml" ]; then
 4 |   echo "Anyscale project not initialized. Please run 'anyscale init'"
 5 |   exit 1
 6 | fi
 7 | 
 8 | ANYSCALE_CMD="python ~/lightgbm_tests/benchmark_cpu_gpu.py $*"
 9 | 
10 | SESSION_STR=""
11 | if [ -n "${SESSION_NAME}" ]; then
12 |   SESSION_STR="--session-name ${SESSION_NAME}"
13 | fi
14 | 
15 | TMUX="--tmux"
16 | if [ "${NO_TMUX}" = "1" ]; then
17 |   TMUX=""
18 | fi
19 | 
20 | CMD="anyscale exec ${TMUX} ${SESSION_STR} -- ${ANYSCALE_CMD}"
21 | 
22 | echo "Running: ${CMD}"
23 | ${CMD}
24 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/test_client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | import ray
 5 | from ray.util.client.ray_client_helpers import ray_start_client_server
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def start_client_server_4_cpus():
10 |     ray.init(num_cpus=4)
11 |     with ray_start_client_server() as client:
12 |         yield client
13 | 
14 | 
15 | @pytest.fixture
16 | def start_client_server_5_cpus():
17 |     ray.init(num_cpus=5)
18 |     with ray_start_client_server() as client:
19 |         yield client
20 | 
21 | 
22 | @pytest.fixture
23 | def start_client_server_5_cpus_modin(monkeypatch):
24 |     monkeypatch.setenv("__MODIN_AUTOIMPORT_PANDAS__", "1")
25 |     ray.init(num_cpus=5, runtime_env={"env_vars": {"__MODIN_AUTOIMPORT_PANDAS__": "1"}})
26 |     with ray_start_client_server() as client:
27 |         yield client
28 | 
29 | 
30 | def test_simple_train(start_client_server_4_cpus):
31 |     assert ray.util.client.ray.is_connected()
32 |     from lightgbm_ray.examples.simple import main
33 | 
34 |     main(num_actors=2, cpus_per_actor=2)
35 | 
36 | 
37 | @pytest.mark.skipif(os.environ.get("TUNE", "0") != "1", reason="Sipping Tune tests")
38 | def test_simple_tune(start_client_server_4_cpus):
39 |     assert ray.util.client.ray.is_connected()
40 |     from lightgbm_ray.examples.simple_tune import main
41 | 
42 |     main(cpus_per_actor=2, num_actors=1, num_samples=4)
43 | 
44 | 
45 | def test_simple_dask(start_client_server_5_cpus):
46 |     assert ray.util.client.ray.is_connected()
47 |     from lightgbm_ray.examples.simple_dask import main
48 | 
49 |     main(cpus_per_actor=2, num_actors=2)
50 | 
51 | 
52 | def test_simple_modin(start_client_server_5_cpus_modin):
53 |     assert ray.util.client.ray.is_connected()
54 |     from lightgbm_ray.examples.simple_modin import main
55 | 
56 |     main(cpus_per_actor=2, num_actors=2)
57 | 
58 | 
59 | if __name__ == "__main__":
60 |     import sys
61 | 
62 |     import pytest  # noqa: F811
63 | 
64 |     sys.exit(pytest.main(["-v", __file__]))
65 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/test_end_to_end.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import shutil
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | import lightgbm as lgbm
  8 | import numpy as np
  9 | import ray
 10 | from ray.exceptions import RayActorError, RayTaskError
 11 | from xgboost_ray.callback import DistributedCallback
 12 | 
 13 | from lightgbm_ray import RayDMatrix, RayParams, RayShardingMode, predict, train
 14 | from lightgbm_ray.main import RayXGBoostTrainingError
 15 | 
 16 | # from sklearn.utils import shuffle
 17 | 
 18 | logging.getLogger("lightgbm_ray.main").setLevel(logging.DEBUG)
 19 | 
 20 | 
 21 | def _make_callback(tmpdir: str) -> DistributedCallback:
 22 |     class TestDistributedCallback(DistributedCallback):
 23 |         logdir = tmpdir
 24 | 
 25 |         def on_init(self, actor, *args, **kwargs):
 26 |             log_file = os.path.join(self.logdir, f"rank_{actor.rank}.log")
 27 |             actor.log_fp = open(log_file, "at")
 28 |             actor.log_fp.write(f"Actor {actor.rank}: Init\n")
 29 |             actor.log_fp.flush()
 30 | 
 31 |         def before_data_loading(self, actor, data, *args, **kwargs):
 32 |             actor.log_fp.write(f"Actor {actor.rank}: Before loading\n")
 33 |             actor.log_fp.flush()
 34 | 
 35 |         def after_data_loading(self, actor, data, *args, **kwargs):
 36 |             actor.log_fp.write(f"Actor {actor.rank}: After loading\n")
 37 |             actor.log_fp.flush()
 38 | 
 39 |         def before_train(self, actor, *args, **kwargs):
 40 |             actor.log_fp.write(f"Actor {actor.rank}: Before train\n")
 41 |             actor.log_fp.flush()
 42 | 
 43 |         def after_train(self, actor, result_dict, *args, **kwargs):
 44 |             actor.log_fp.write(f"Actor {actor.rank}: After train\n")
 45 |             actor.log_fp.flush()
 46 | 
 47 |         def before_predict(self, actor, *args, **kwargs):
 48 |             actor.log_fp.write(f"Actor {actor.rank}: Before predict\n")
 49 |             actor.log_fp.flush()
 50 | 
 51 |         def after_predict(self, actor, predictions, *args, **kwargs):
 52 |             actor.log_fp.write(f"Actor {actor.rank}: After predict\n")
 53 |             actor.log_fp.flush()
 54 | 
 55 |     return TestDistributedCallback()
 56 | 
 57 | 
 58 | class LGBMRayEndToEndTest(unittest.TestCase):
 59 |     """In this test suite we validate Ray-XGBoost multi class prediction.
 60 | 
 61 |     First, we validate that XGBoost is able to achieve 100% accuracy on
 62 |     a simple training task.
 63 | 
 64 |     Then we split the dataset into two halves. These halves don't have access
 65 |     to all relevant data, so overfit on their respective data. I.e. the first
 66 |     half always predicts feature 2 -> label 2, while the second half always
 67 |     predicts feature 2 -> label 3.
 68 | 
 69 |     We then train using Ray XGBoost. Again both halves will be trained
 70 |     separately, but because of Rabit's allreduce, they should end up being
 71 |     able to achieve 100% accuracy, again."""
 72 | 
 73 |     def setUp(self):
 74 |         repeat = 64  # Repeat data a couple of times for stability
 75 |         self.x = np.array(
 76 |             [
 77 |                 [1, 0, 0, 0],  # Feature 0 -> Label 0
 78 |                 [0, 1, 0, 0],  # Feature 1 -> Label 1
 79 |                 [0, 0, 1, 1],  # Feature 2+3 -> Label 2
 80 |                 [0, 0, 1, 0],  # Feature 2+!3 -> Label 3
 81 |             ]
 82 |             * repeat
 83 |         )
 84 |         self.y = np.array([0, 1, 2, 3] * repeat)
 85 | 
 86 |         # self.x, self.y = shuffle(self.x, self.y, random_state=1)
 87 | 
 88 |         self.params = {
 89 |             "boosting": "gbdt",
 90 |             "objective": "multiclass",
 91 |             "num_class": 4,
 92 |             "random_state": 1,
 93 |             "tree_learner": "data",
 94 |             "deterministic": True,
 95 |         }
 96 | 
 97 |     def tearDown(self):
 98 |         ray.shutdown()
 99 | 
100 |     def testSingleTraining(self):
101 |         """Test that XGBoost learns to predict full matrix"""
102 |         dtrain = lgbm.Dataset(self.x, self.y)
103 |         bst = lgbm.train(self.params, dtrain, num_boost_round=2)
104 | 
105 |         pred_y = np.argmax(bst.predict(self.x), axis=1)
106 |         self.assertSequenceEqual(list(self.y), list(pred_y))
107 | 
108 |     def testHalfTraining(self):
109 |         """Test that XGBoost learns to predict half matrices individually"""
110 |         x_first = self.x[::2]
111 |         y_first = self.y[::2]
112 | 
113 |         x_second = self.x[1::2]
114 |         y_second = self.y[1::2]
115 | 
116 |         # Test case: The first model only sees feature 2 --> label 2
117 |         # and the second model only sees feature 2 --> label 3
118 |         test_X = np.array([[0, 0, 1, 1], [0, 0, 1, 0]])
119 |         test_y_first = [2, 2]
120 |         test_y_second = [3, 3]
121 | 
122 |         # First half
123 |         dtrain = lgbm.Dataset(x_first, y_first)
124 |         bst = lgbm.train(self.params, dtrain, num_boost_round=2)
125 | 
126 |         pred_y = np.argmax(bst.predict(x_first), axis=1)
127 |         self.assertSequenceEqual(list(y_first), list(pred_y))
128 | 
129 |         pred_test = np.argmax(bst.predict(test_X), axis=1)
130 |         self.assertSequenceEqual(test_y_first, list(pred_test))
131 | 
132 |         # Second half
133 |         dtrain = lgbm.Dataset(x_second, y_second)
134 |         bst = lgbm.train(self.params, dtrain, num_boost_round=2)
135 | 
136 |         pred_y = np.argmax(bst.predict(x_second), axis=1)
137 |         self.assertSequenceEqual(list(y_second), list(pred_y))
138 | 
139 |         pred_test = np.argmax(bst.predict(test_X), axis=1)
140 |         self.assertSequenceEqual(test_y_second, list(pred_test))
141 | 
142 |     def _testJointTraining(self, cpus_per_actor):
143 |         ray.init(num_cpus=4, num_gpus=0, include_dashboard=False)
144 | 
145 |         bst = train(
146 |             self.params,
147 |             RayDMatrix(self.x, self.y, sharding=RayShardingMode.BATCH),
148 |             num_boost_round=50,
149 |             ray_params=RayParams(num_actors=2, cpus_per_actor=cpus_per_actor),
150 |         )
151 | 
152 |         self.assertEqual(bst.booster_.current_iteration(), 50)
153 | 
154 |         pred_y = bst.predict(self.x)
155 |         pred_y = np.argmax(pred_y, axis=1)
156 |         self.assertSequenceEqual(list(self.y), list(pred_y))
157 | 
158 |         pred_y = predict(
159 |             bst,
160 |             RayDMatrix(self.x),
161 |             ray_params=RayParams(num_actors=2, cpus_per_actor=cpus_per_actor),
162 |         )
163 |         pred_y = np.argmax(pred_y, axis=1)
164 |         self.assertSequenceEqual(list(self.y), list(pred_y))
165 | 
166 |         pred_y = predict(
167 |             bst.booster_,
168 |             RayDMatrix(self.x),
169 |             ray_params=RayParams(num_actors=2, cpus_per_actor=cpus_per_actor),
170 |         )
171 |         pred_y = np.argmax(pred_y, axis=1)
172 |         self.assertSequenceEqual(list(self.y), list(pred_y))
173 | 
174 |     def testJointTraining(self):
175 |         """Train with Ray. The data will be split, but the trees
176 |         should be combined together and find the true model."""
177 |         return self._testJointTraining(cpus_per_actor=2)
178 | 
179 |     def testJointTrainingDefaultRayParams(self):
180 |         """Train with Ray. The data will be split, but the trees
181 |         should be combined together and find the true model."""
182 |         return self._testJointTraining(cpus_per_actor=0)
183 | 
184 |     def testCpusPerActorEqualTo1RaisesException(self):
185 |         ray.init(num_cpus=4, num_gpus=0, include_dashboard=False)
186 |         with self.assertRaisesRegex(ValueError, "cpus_per_actor is set to less than 2"):
187 |             train(
188 |                 self.params,
189 |                 RayDMatrix(self.x, self.y),
190 |                 num_boost_round=50,
191 |                 ray_params=RayParams(num_actors=2, cpus_per_actor=1),
192 |             )
193 | 
194 |     def testBothEvalsAndValidSetsRaisesException(self):
195 |         ray.init(num_cpus=4, num_gpus=0, include_dashboard=False)
196 |         with self.assertRaisesRegex(
197 |             ValueError, "Specifying both `evals` and `valid_sets` is ambiguous"
198 |         ):
199 |             data = (RayDMatrix(self.x, self.y),)
200 |             train(
201 |                 self.params,
202 |                 data,
203 |                 num_boost_round=50,
204 |                 ray_params=RayParams(num_actors=2),
205 |                 evals=[(data, "eval")],
206 |                 valid_sets=[data],
207 |             )
208 | 
209 |     def testTrainPredict(self, init=True, remote=None, **ray_param_dict):
210 |         """Train with evaluation and predict"""
211 |         if init:
212 |             ray.init(num_cpus=8, num_gpus=0, include_dashboard=False)
213 | 
214 |         dtrain = RayDMatrix(self.x, self.y, sharding=RayShardingMode.BATCH)
215 | 
216 |         params = self.params
217 | 
218 |         evals_result = {}
219 |         bst = train(
220 |             params,
221 |             dtrain,
222 |             num_boost_round=38,
223 |             ray_params=RayParams(
224 |                 num_actors=2,
225 |                 cpus_per_actor=1,
226 |                 allow_less_than_two_cpus=True,
227 |                 **ray_param_dict,
228 |             ),
229 |             evals=[(dtrain, "dtrain")],
230 |             evals_result=evals_result,
231 |             _remote=remote,
232 |         )
233 | 
234 |         self.assertTrue("dtrain" in evals_result)
235 | 
236 |         evals_result = {}
237 |         bst = train(
238 |             params,
239 |             dtrain,
240 |             num_boost_round=38,
241 |             ray_params=RayParams(
242 |                 num_actors=2,
243 |                 cpus_per_actor=1,
244 |                 allow_less_than_two_cpus=True,
245 |                 **ray_param_dict,
246 |             ),
247 |             valid_sets=[dtrain],
248 |             valid_names=["dtrain"],
249 |             evals_result=evals_result,
250 |             _remote=remote,
251 |         )
252 | 
253 |         self.assertTrue("dtrain" in evals_result)
254 | 
255 |         x_mat = RayDMatrix(self.x)
256 |         pred_y = predict(
257 |             bst,
258 |             x_mat,
259 |             ray_params=RayParams(
260 |                 num_actors=2,
261 |                 cpus_per_actor=1,
262 |                 allow_less_than_two_cpus=True,
263 |                 **ray_param_dict,
264 |             ),
265 |             _remote=remote,
266 |         )
267 | 
268 |         self.assertEqual(pred_y.shape[1], len(np.unique(self.y)))
269 |         pred_y = np.argmax(pred_y, axis=1)
270 | 
271 |         self.assertSequenceEqual(list(self.y), list(pred_y))
272 | 
273 |     def testTrainPredictRemote(self):
274 |         """Train with evaluation and predict in a remote call"""
275 |         self.testTrainPredict(init=True, remote=True)
276 | 
277 |     def testTrainPredictClient(self):
278 |         """Train with evaluation and predict in a client session"""
279 |         if ray.__version__ <= "1.2.0":
280 |             self.skipTest("Ray client mocks do not work in Ray <= 1.2.0")
281 |         from ray.util.client.ray_client_helpers import ray_start_client_server
282 | 
283 |         # (yard1) this hangs when num_cpus=2
284 |         ray.init(num_cpus=8, num_gpus=0, include_dashboard=False)
285 |         self.assertFalse(ray.util.client.ray.is_connected())
286 |         with ray_start_client_server():
287 |             self.assertTrue(ray.util.client.ray.is_connected())
288 | 
289 |             self.testTrainPredict(init=False, remote=None)
290 | 
291 |     def testDistributedCallbacksTrainPredict(self, init=True, remote=False):
292 |         """Test distributed callbacks for train/predict"""
293 |         tmpdir = tempfile.mkdtemp()
294 |         test_callback = _make_callback(tmpdir)
295 | 
296 |         self.testTrainPredict(
297 |             init=init, remote=remote, distributed_callbacks=[test_callback]
298 |         )
299 |         rank_0_log_file = os.path.join(tmpdir, "rank_0.log")
300 |         rank_1_log_file = os.path.join(tmpdir, "rank_1.log")
301 |         self.assertTrue(os.path.exists(rank_1_log_file))
302 | 
303 |         rank_0_log = open(rank_0_log_file, "rt").read()
304 |         self.assertEqual(
305 |             rank_0_log,
306 |             "Actor 0: Init\n"
307 |             "Actor 0: Before loading\n"
308 |             "Actor 0: After loading\n"
309 |             "Actor 0: Before train\n"
310 |             "Actor 0: After train\n"
311 |             "Actor 0: Init\n"
312 |             "Actor 0: Before loading\n"
313 |             "Actor 0: After loading\n"
314 |             "Actor 0: Before train\n"
315 |             "Actor 0: After train\n"
316 |             "Actor 0: Init\n"
317 |             "Actor 0: Before loading\n"
318 |             "Actor 0: After loading\n"
319 |             "Actor 0: Before predict\n"
320 |             "Actor 0: After predict\n",
321 |         )
322 |         shutil.rmtree(tmpdir)
323 | 
324 |     def testDistributedCallbacksTrainPredictClient(self):
325 |         """Test distributed callbacks for train/predict via Ray client"""
326 | 
327 |         if ray.__version__ <= "1.2.0":
328 |             self.skipTest("Ray client mocks do not work in Ray <= 1.2.0")
329 |         from ray.util.client.ray_client_helpers import ray_start_client_server
330 | 
331 |         ray.init(num_cpus=8, num_gpus=0, include_dashboard=False)
332 |         self.assertFalse(ray.util.client.ray.is_connected())
333 |         with ray_start_client_server():
334 |             self.assertTrue(ray.util.client.ray.is_connected())
335 | 
336 |             self.testDistributedCallbacksTrainPredict(init=False, remote=None)
337 | 
338 |     def testFailPrintErrors(self):
339 |         """Test that XGBoost training errors are propagated"""
340 |         x = np.random.uniform(0, 1, size=(100, 4))
341 |         y = np.random.randint(0, 2, size=100)
342 | 
343 |         train_set = RayDMatrix(x, y)
344 | 
345 |         try:
346 |             train(
347 |                 {
348 |                     **self.params,
349 |                     **{"num_class": 2, "metric": ["multi_logloss", "multi_error"]},
350 |                 },  # This will error
351 |                 train_set,
352 |                 evals=[(train_set, "train")],
353 |                 ray_params=RayParams(
354 |                     num_actors=1, cpus_per_actor=2, max_actor_restarts=0
355 |                 ),
356 |             )
357 |         except RuntimeError as exc:
358 |             self.assertTrue(exc.__cause__)
359 |             self.assertTrue(isinstance(exc.__cause__, RayActorError))
360 | 
361 |             self.assertTrue(exc.__cause__.__cause__)
362 |             self.assertTrue(isinstance(exc.__cause__.__cause__, RayTaskError))
363 | 
364 |             self.assertTrue(exc.__cause__.__cause__.cause)
365 |             self.assertTrue(
366 |                 isinstance(exc.__cause__.__cause__.cause, RayXGBoostTrainingError)
367 |             )
368 | 
369 |             self.assertIn(
370 |                 "label and prediction size not match", str(exc.__cause__.__cause__)
371 |             )
372 | 
373 | 
374 | class LGBMRayEndToEndTestVoting(LGBMRayEndToEndTest):
375 |     def setUp(self):
376 |         super().setUp()
377 |         self.params["tree_learner"] = "voting"
378 | 
379 | 
380 | if __name__ == "__main__":
381 |     import sys
382 | 
383 |     import pytest
384 | 
385 |     sys.exit(pytest.main(["-v", __file__]))
386 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/test_fault_tolerance.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import time
  5 | import unittest
  6 | from unittest.mock import DEFAULT, patch
  7 | 
  8 | import lightgbm
  9 | import numpy as np
 10 | import ray
 11 | from lightgbm import LGBMModel
 12 | from sklearn.utils import shuffle
 13 | from xgboost_ray.session import get_actor_rank, put_queue
 14 | from xgboost_ray.tests.utils import flatten_obj
 15 | 
 16 | from lightgbm_ray import RayDMatrix, RayParams, train
 17 | 
 18 | 
 19 | def get_num_trees(model_or_booster):
 20 |     if isinstance(model_or_booster, LGBMModel):
 21 |         return model_or_booster.booster_.current_iteration()
 22 |     return model_or_booster.current_iteration()
 23 | 
 24 | 
 25 | def _kill_callback(die_lock_file: str, actor_rank: int = 0, fail_iteration: int = 6):
 26 |     """Returns a callback to kill an actor process.
 27 | 
 28 |     Args:
 29 |         die_lock_file: A file lock used to prevent race conditions
 30 |             when killing the actor.
 31 |         actor_rank: The rank of the actor to kill.
 32 |         fail_iteration: The iteration after which the actor is killed.
 33 | 
 34 |     """
 35 | 
 36 |     def _callback(env):
 37 |         if get_actor_rank() == actor_rank:
 38 |             put_queue((env.iteration, time.time()))
 39 |         if (
 40 |             get_actor_rank() == actor_rank
 41 |             and env.iteration == fail_iteration
 42 |             and not os.path.exists(die_lock_file)
 43 |         ):
 44 | 
 45 |             # Get PID
 46 |             pid = os.getpid()
 47 |             print(f"Killing process: {pid}")
 48 |             with open(die_lock_file, "wt") as fp:
 49 |                 fp.write("")
 50 | 
 51 |             time.sleep(2)
 52 |             print(f"Testing: Rank {get_actor_rank()} will now die.")
 53 |             os.kill(pid, 9)
 54 | 
 55 |     _callback.order = 10  # type: ignore
 56 |     return _callback
 57 | 
 58 | 
 59 | def _checkpoint_callback(frequency: int = 1, before_iteration_=False):
 60 |     """Returns a callback to checkpoint a model.
 61 | 
 62 |     Args:
 63 |         frequency: The interval at which checkpointing occurs. If
 64 |             frequency is set to n, checkpointing occurs every n epochs.
 65 |         before_iteration_: If True, checkpoint before the iteration
 66 |             begins. Else, checkpoint after the iteration ends.
 67 | 
 68 |     """
 69 | 
 70 |     def _callback(env):
 71 |         if env.iteration % frequency == 0:
 72 |             put_queue(env.model.model_to_string())
 73 | 
 74 |     _callback.before_iteration = before_iteration_
 75 |     return _callback
 76 | 
 77 | 
 78 | def _fail_callback(die_lock_file: str, actor_rank: int = 0, fail_iteration: int = 6):
 79 |     """Returns a callback to cause an Xgboost actor to fail training.
 80 | 
 81 |     Args:
 82 |         die_lock_file: A file lock used to prevent race conditions
 83 |             when causing the actor to fail.
 84 |         actor_rank: The rank of the actor to fail.
 85 |         fail_iteration: The iteration after which the training for
 86 |             the specified actor fails.
 87 | 
 88 |     """
 89 | 
 90 |     def _callback(env):
 91 |         if get_actor_rank() == actor_rank:
 92 |             put_queue((env.iteration, time.time()))
 93 |         if (
 94 |             get_actor_rank() == actor_rank
 95 |             and env.iteration == fail_iteration
 96 |             and not os.path.exists(die_lock_file)
 97 |         ):
 98 | 
 99 |             with open(die_lock_file, "wt") as fp:
100 |                 fp.write("")
101 |             time.sleep(2)
102 |             import sys
103 | 
104 |             print(f"Testing: Rank {get_actor_rank()} will now fail.")
105 |             sys.exit(1)
106 | 
107 |     return _callback
108 | 
109 | 
110 | class LightGBMRayFaultToleranceTest(unittest.TestCase):
111 |     """In this test suite we validate fault tolerance when a Ray actor dies.
112 | 
113 |     For this, we set up a callback that makes one worker die exactly once.
114 |     """
115 | 
116 |     def setUp(self):
117 |         repeat = 64  # Repeat data a couple of times for stability
118 |         self.x = np.array(
119 |             [
120 |                 [1, 0, 0, 0],  # Feature 0 -> Label 0
121 |                 [0, 1, 0, 0],  # Feature 1 -> Label 1
122 |                 [0, 0, 1, 1],  # Feature 2+3 -> Label 2
123 |                 [0, 0, 1, 0],  # Feature 2+!3 -> Label 3
124 |             ]
125 |             * repeat
126 |         )
127 |         self.y = np.array([0, 1, 2, 3] * repeat)
128 | 
129 |         self.x, self.y = shuffle(self.x, self.y, random_state=1)
130 | 
131 |         self.params = {
132 |             "nthread": 2,
133 |             "max_depth": 2,
134 |             "num_leaves": 2,
135 |             "tree_learner": "data",
136 |             "objective": "multiclass",
137 |             "num_class": 4,
138 |             "random_state": 1,
139 |             "deterministic": True,
140 |             "time_out": 1,
141 |         }
142 | 
143 |         self.tmpdir = str(tempfile.mkdtemp())
144 | 
145 |         self.die_lock_file = "/tmp/died_worker.lock"
146 |         if os.path.exists(self.die_lock_file):
147 |             os.remove(self.die_lock_file)
148 | 
149 |         self.die_lock_file_2 = "/tmp/died_worker_2.lock"
150 |         if os.path.exists(self.die_lock_file_2):
151 |             os.remove(self.die_lock_file_2)
152 | 
153 |     def tearDown(self) -> None:
154 |         if os.path.exists(self.tmpdir):
155 |             shutil.rmtree(self.tmpdir)
156 |         ray.shutdown()
157 | 
158 |         if os.path.exists(self.die_lock_file):
159 |             os.remove(self.die_lock_file)
160 | 
161 |         if os.path.exists(self.die_lock_file_2):
162 |             os.remove(self.die_lock_file_2)
163 | 
164 |     def testTrainingContinuationKilled(self):
165 |         """This should continue after one actor died."""
166 |         ray.init(num_cpus=4, num_gpus=0, log_to_driver=True)
167 |         additional_results = {}
168 |         keep_actors = {}
169 | 
170 |         def keep(actors, *args, **kwargs):
171 |             keep_actors["actors"] = actors.copy()
172 |             return DEFAULT
173 | 
174 |         with patch("lightgbm_ray.main._shutdown") as mocked:
175 |             mocked.side_effect = keep
176 |             bst = train(
177 |                 self.params,
178 |                 RayDMatrix(self.x, self.y),
179 |                 callbacks=[_kill_callback(self.die_lock_file)],
180 |                 num_boost_round=50,
181 |                 ray_params=RayParams(
182 |                     max_actor_restarts=1, num_actors=2, cpus_per_actor=2
183 |                 ),
184 |                 additional_results=additional_results,
185 |             )
186 | 
187 |         self.assertEqual(50, get_num_trees(bst))
188 | 
189 |         pred_y = bst.predict(self.x)
190 |         pred_y = np.argmax(pred_y, axis=1)
191 |         self.assertSequenceEqual(list(self.y), list(pred_y))
192 |         print(f"Got correct predictions: {pred_y}")
193 | 
194 |         actors = keep_actors["actors"]
195 |         # End with two working actors
196 |         self.assertTrue(actors[0])
197 |         self.assertTrue(actors[1])
198 | 
199 |         # Two workers finished, so N=64*4
200 |         self.assertEqual(additional_results["total_n"], 64 * 4)
201 | 
202 |     def testTrainingStop(self):
203 |         """This should now stop training after one actor died."""
204 |         # The `train()` function raises a RuntimeError
205 |         ray.init(num_cpus=4, num_gpus=0, log_to_driver=True)
206 |         with self.assertRaises(RuntimeError):
207 |             train(
208 |                 self.params,
209 |                 RayDMatrix(self.x, self.y),
210 |                 callbacks=[_kill_callback(self.die_lock_file)],
211 |                 num_boost_round=20,
212 |                 ray_params=RayParams(max_actor_restarts=0, num_actors=2),
213 |             )
214 | 
215 |     def testCheckpointContinuationValidity(self):
216 |         """Test that checkpoints are stored and loaded correctly"""
217 | 
218 |         ray.init(num_cpus=4, num_gpus=0, log_to_driver=True)
219 |         # Train once, get checkpoint via callback returns
220 |         res_1 = {}
221 |         train(
222 |             self.params,
223 |             RayDMatrix(self.x, self.y),
224 |             callbacks=[_checkpoint_callback(frequency=1, before_iteration_=False)],
225 |             num_boost_round=2,
226 |             ray_params=RayParams(num_actors=2, cpus_per_actor=2),
227 |             additional_results=res_1,
228 |         )
229 |         last_checkpoint_1 = res_1["callback_returns"][0][-1]
230 | 
231 |         lc1 = lightgbm.Booster(model_str=last_checkpoint_1)
232 | 
233 |         # Start new training run, starting from existing model
234 |         res_2 = {}
235 |         train(
236 |             self.params,
237 |             RayDMatrix(self.x, self.y),
238 |             callbacks=[
239 |                 _checkpoint_callback(frequency=1, before_iteration_=True),
240 |                 _checkpoint_callback(frequency=1, before_iteration_=False),
241 |             ],
242 |             num_boost_round=4,
243 |             ray_params=RayParams(num_actors=2, cpus_per_actor=2),
244 |             additional_results=res_2,
245 |             init_model=lc1,
246 |         )
247 |         first_checkpoint_2 = res_2["callback_returns"][0][0]
248 |         last_checkpoint_2 = res_2["callback_returns"][0][-1]
249 | 
250 |         fcp_bst = lightgbm.Booster(model_str=first_checkpoint_2)
251 | 
252 |         lcp_bst = lightgbm.Booster(model_str=last_checkpoint_2)
253 | 
254 |         # Training should not have proceeded for the first checkpoint,
255 |         # so trees should be equal
256 |         self.assertEqual(lc1.current_iteration(), fcp_bst.current_iteration())
257 | 
258 |         # Training should have proceeded for the last checkpoint,
259 |         # so trees should not be equal
260 |         self.assertNotEqual(fcp_bst.model_to_string(), lcp_bst.model_to_string())
261 | 
262 |     def testSameResultWithAndWithoutError(self):
263 |         """Get the same model with and without errors during training."""
264 | 
265 |         ray.init(num_cpus=5, num_gpus=0, log_to_driver=True)
266 |         # Run training
267 |         print("test no error")
268 |         bst_noerror = train(
269 |             self.params,
270 |             RayDMatrix(self.x, self.y),
271 |             num_boost_round=10,
272 |             ray_params=RayParams(max_actor_restarts=0, num_actors=2, cpus_per_actor=2),
273 |         )
274 | 
275 |         print("test part 1")
276 |         bst_2part_1 = train(
277 |             self.params,
278 |             RayDMatrix(self.x, self.y),
279 |             num_boost_round=5,
280 |             ray_params=RayParams(max_actor_restarts=0, num_actors=2, cpus_per_actor=2),
281 |         )
282 | 
283 |         print("test part 2")
284 |         bst_2part_2 = train(
285 |             self.params,
286 |             RayDMatrix(self.x, self.y),
287 |             num_boost_round=5,
288 |             ray_params=RayParams(max_actor_restarts=0, num_actors=2, cpus_per_actor=2),
289 |             init_model=bst_2part_1,
290 |         )
291 | 
292 |         print("test error")
293 |         res_error = {}
294 |         bst_error = train(
295 |             self.params,
296 |             RayDMatrix(self.x, self.y),
297 |             callbacks=[_fail_callback(self.die_lock_file, fail_iteration=7)],
298 |             num_boost_round=10,
299 |             ray_params=RayParams(
300 |                 max_actor_restarts=1,
301 |                 num_actors=2,
302 |                 checkpoint_frequency=5,
303 |                 cpus_per_actor=2,
304 |             ),
305 |             additional_results=res_error,
306 |         )
307 | 
308 |         self.assertEqual(
309 |             bst_error.booster_.current_iteration(),
310 |             bst_noerror.booster_.current_iteration(),
311 |         )
312 |         self.assertEqual(
313 |             bst_2part_2.booster_.current_iteration(),
314 |             bst_noerror.booster_.current_iteration(),
315 |         )
316 | 
317 |         flat_noerror = flatten_obj({"tree": bst_noerror.booster_.dump_model()})
318 |         flat_error = flatten_obj({"tree": bst_error.booster_.dump_model()})
319 |         flat_2part = flatten_obj({"tree": bst_2part_2.booster_.dump_model()})
320 | 
321 |         for key in flat_noerror:
322 |             self.assertAlmostEqual(flat_noerror[key], flat_error[key], places=4)
323 |             self.assertAlmostEqual(flat_noerror[key], flat_2part[key], places=4)
324 | 
325 |         # We fail at iteration 7, but checkpoints are saved at iteration 5
326 |         # Thus we have two additional returns here.
327 |         print("Callback returns:", res_error["callback_returns"][0])
328 |         self.assertEqual(len(res_error["callback_returns"][0]), 10 + 2)
329 | 
330 | 
331 | if __name__ == "__main__":
332 |     import sys
333 | 
334 |     import pytest
335 | 
336 |     sys.exit(pytest.main(["-v", __file__]))
337 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/test_lightgbm.py:
--------------------------------------------------------------------------------
  1 | """Tests for lightgbm-ray, based om lightgbm.dask tests"""
  2 | 
  3 | # The MIT License (MIT)
  4 | 
  5 | # Copyright (c) Microsoft Corporation
  6 | 
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | # of this software and associated documentation files (the "Software"), to deal
  9 | # in the Software without restriction, including without limitation the rights
 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | # copies of the Software, and to permit persons to whom the Software is
 12 | # furnished to do so, subject to the following conditions:
 13 | 
 14 | # The above copyright notice and this permission notice shall be included in
 15 | # all copies or substantial portions of the Software.
 16 | 
 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | # File based on:
 26 | # https://github.com/microsoft/LightGBM/blob/c3b9363d02564625332583e166e3ab3135f436e3/tests/python_package_test/test_dask.py
 27 | 
 28 | # License:
 29 | # https://github.com/microsoft/LightGBM/blob/c3b9363d02564625332583e166e3ab3135f436e3/LICENSE
 30 | 
 31 | import itertools
 32 | import unittest
 33 | 
 34 | import lightgbm as lgb
 35 | import numpy as np
 36 | import pandas as pd
 37 | import ray
 38 | import sklearn.utils.estimator_checks as sklearn_checks
 39 | from parameterized import parameterized
 40 | from sklearn.datasets import make_blobs, make_regression
 41 | from sklearn.metrics import accuracy_score, r2_score
 42 | from sklearn.model_selection import train_test_split
 43 | from sklearn.utils import _safe_indexing
 44 | 
 45 | from lightgbm_ray import RayDMatrix, RayParams, RayShardingMode
 46 | from lightgbm_ray.sklearn import RayLGBMClassifier, RayLGBMRegressor
 47 | 
 48 | data_output = [
 49 |     "array",
 50 |     "dataframe",
 51 |     "dataframe-with-categorical",
 52 |     "raydmatrix-interleaved",  # "raydmatrix-batch"
 53 | ]
 54 | data_output_local = [x for x in data_output if "raydmatrix" not in x]
 55 | boosting_types = ["gbdt"]  # "dart", "goss", "rf"]
 56 | distributed_training_algorithms = ["data", "voting"]
 57 | 
 58 | 
 59 | def sklearn_checks_to_run():
 60 |     check_names = [
 61 |         "check_estimator_get_tags_default_keys",
 62 |         "check_get_params_invariance",
 63 |         "check_set_params",
 64 |     ]
 65 |     checks = []
 66 |     for check_name in check_names:
 67 |         check_func = getattr(sklearn_checks, check_name, None)
 68 |         if check_func:
 69 |             checks.append(check_func)
 70 |     return checks
 71 | 
 72 | 
 73 | estimators_to_test = [RayLGBMClassifier, RayLGBMRegressor]
 74 | 
 75 | 
 76 | def _create_data(objective, n_samples=2000, output="array", **kwargs):
 77 |     if objective.endswith("classification"):
 78 |         if objective == "binary-classification":
 79 |             centers = [[-4, -4], [4, 4]]
 80 |         elif objective == "multiclass-classification":
 81 |             centers = [[-4, -4], [4, 4], [-4, 4]]
 82 |         else:
 83 |             raise ValueError(f"Unknown classification task '{objective}'")
 84 |         X, y = make_blobs(n_samples=n_samples, centers=centers, random_state=42)
 85 |     elif objective == "regression":
 86 |         X, y = make_regression(
 87 |             n_samples=n_samples, n_features=4, n_informative=2, random_state=42
 88 |         )
 89 |     # elif objective == "ranking":
 90 |     #     return _create_ranking_data(
 91 |     #         n_samples=n_samples,
 92 |     #         output=output,
 93 |     #         chunk_size=chunk_size,
 94 |     #         **kwargs
 95 |     #     )
 96 |     else:
 97 |         raise ValueError(f"Unknown objective '{objective}'")
 98 |     rnd = np.random.RandomState(42)
 99 |     weights = rnd.random(X.shape[0]) * 0.01
100 | 
101 |     def convert_data(X, y, weights):
102 |         if output == "array":
103 |             dX = X
104 |             dy = y
105 |             dw = weights
106 |         elif output.startswith("dataframe"):
107 |             X_df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
108 |             if output == "dataframe-with-categorical":
109 |                 num_cat_cols = 2
110 |                 for i in range(num_cat_cols):
111 |                     col_name = f"cat_col{i}"
112 |                     cat_values = rnd.choice(["a", "b"], X.shape[0])
113 |                     cat_series = pd.Series(cat_values, dtype="category")
114 |                     X_df[col_name] = cat_series
115 |                     X = np.hstack((X, cat_series.cat.codes.values.reshape(-1, 1)))
116 | 
117 |                 # make one categorical feature relevant to the target
118 |                 cat_col_is_a = X_df["cat_col0"] == "a"
119 |                 if objective == "regression":
120 |                     y = np.where(cat_col_is_a, y, 2 * y)
121 |                 elif objective == "binary-classification":
122 |                     y = np.where(cat_col_is_a, y, 1 - y)
123 |                 elif objective == "multiclass-classification":
124 |                     n_classes = 3
125 |                     y = np.where(cat_col_is_a, y, (1 + y) % n_classes)
126 |             y_df = pd.Series(y, name="target")
127 |             dX = X_df
128 |             dy = y_df
129 |             dw = pd.Series(weights)
130 |         elif output.startswith("raydmatrix"):
131 |             sharding = {
132 |                 "raydmatrix-interleaved": RayShardingMode.INTERLEAVED,
133 |                 "raydmatrix-batch": RayShardingMode.BATCH,
134 |             }
135 |             dX = RayDMatrix(X, y, weights, sharding=sharding[output])
136 |             dy = None
137 |             dw = None
138 |         else:
139 |             raise ValueError(f"Unknown output type '{output}'")
140 |         return dX, dy, dw
141 | 
142 |     train_idx, test_idx = train_test_split(
143 |         np.arange(0, len(X)),
144 |         test_size=0.5,
145 |         stratify=y if objective.endswith("classification") else None,
146 |         random_state=42,
147 |         shuffle=True,
148 |     )
149 | 
150 |     if output.startswith("raydmatrix"):
151 |         dX, dy, dw = convert_data(X[train_idx], y[train_idx], weights[train_idx])
152 |         dX_test, dy_test, dw_test = convert_data(
153 |             X[test_idx], y[test_idx], weights[test_idx]
154 |         )
155 |     else:
156 |         dX, dy, dw = convert_data(X, y, weights)
157 |         dX_test = _safe_indexing(dX, test_idx)
158 |         dy_test = _safe_indexing(dy, test_idx)
159 |         dw_test = _safe_indexing(dw, test_idx)
160 |         dX = _safe_indexing(dX, train_idx)
161 |         dy = _safe_indexing(dy, train_idx)
162 |         dw = _safe_indexing(dw, train_idx)
163 | 
164 |     return (
165 |         X[train_idx],
166 |         y[train_idx],
167 |         weights[train_idx],
168 |         None,
169 |         dX,
170 |         dy,
171 |         dw,
172 |         None,
173 |         dX_test,
174 |         dy_test,
175 |         dw_test,
176 |     )
177 | 
178 | 
179 | class LGBMRayTest(unittest.TestCase):
180 |     def setUp(self):
181 |         self.ray_params = RayParams(num_actors=2, cpus_per_actor=2)
182 | 
183 |     def tearDown(self):
184 |         ray.shutdown()
185 | 
186 |     @parameterized.expand(
187 |         list(
188 |             itertools.product(
189 |                 data_output,
190 |                 ["binary-classification", "multiclass-classification"],
191 |                 boosting_types,
192 |                 distributed_training_algorithms,
193 |             )
194 |         )
195 |     )
196 |     def testClassifier(self, output, task, boosting_type, tree_learner):
197 |         ray.init(num_cpus=4, num_gpus=0)
198 | 
199 |         print(output, task, boosting_type, tree_learner)
200 | 
201 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
202 |             objective=task, output=output
203 |         )
204 | 
205 |         eval_weights = [dw_test]
206 |         if dy_test is None:
207 |             dy_test = "test"
208 |             eval_weights = None
209 |         eval_set = [(dX_test, dy_test)]
210 | 
211 |         if "raydmatrix" in output:
212 |             lX = X
213 |             ly = y
214 |             lw = w
215 |         else:
216 |             lX = dX
217 |             ly = dy
218 |             lw = dw
219 | 
220 |         params = {
221 |             "boosting_type": boosting_type,
222 |             "tree_learner": tree_learner,
223 |             "n_estimators": 50,
224 |             "num_leaves": 31,
225 |             "random_state": 1,
226 |             "deterministic": True,
227 |         }
228 |         if boosting_type == "rf":
229 |             params.update(
230 |                 {
231 |                     "bagging_freq": 1,
232 |                     "bagging_fraction": 0.9,
233 |                 }
234 |             )
235 |         elif boosting_type == "goss":
236 |             params["top_rate"] = 0.5
237 | 
238 |         ray_classifier = RayLGBMClassifier(**params)
239 |         ray_classifier = ray_classifier.fit(
240 |             dX,
241 |             dy,
242 |             sample_weight=dw,
243 |             ray_params=self.ray_params,
244 |             eval_set=eval_set,
245 |             eval_sample_weight=eval_weights,
246 |         )
247 |         ray_classifier = ray_classifier.fit(
248 |             dX,
249 |             dy,
250 |             sample_weight=dw,
251 |             ray_params=self.ray_params,
252 |         )
253 |         p1 = ray_classifier.predict(dX, ray_params=self.ray_params)
254 |         p1_proba = ray_classifier.predict_proba(dX, ray_params=self.ray_params)
255 |         p1_pred_leaf = ray_classifier.predict(
256 |             dX, pred_leaf=True, ray_params=self.ray_params
257 |         )
258 |         p1_local = ray_classifier.to_local().predict(lX)
259 |         s1 = accuracy_score(ly, p1)
260 | 
261 |         local_classifier = lgb.LGBMClassifier(**params)
262 |         local_classifier.fit(
263 |             lX,
264 |             ly,
265 |             sample_weight=lw,
266 |         )
267 |         p2 = local_classifier.predict(lX)
268 |         p2_proba = local_classifier.predict_proba(lX)
269 |         s2 = local_classifier.score(lX, ly)
270 | 
271 |         if boosting_type == "rf":
272 |             # https://github.com/microsoft/LightGBM/issues/4118
273 |             self.assertTrue(np.allclose(s1, s2, atol=0.01))
274 |             self.assertTrue(np.allclose(p1_proba, p2_proba, atol=0.8))
275 |         else:
276 |             self.assertTrue(np.allclose(s1, s2))
277 |             self.assertTrue(np.allclose(p1, p2))
278 |             self.assertTrue(np.allclose(p1, ly))
279 |             self.assertTrue(np.allclose(p2, ly))
280 |             self.assertTrue(np.allclose(p1_proba, p2_proba, atol=0.1))
281 |             self.assertTrue(np.allclose(p1_local, p2))
282 |             self.assertTrue(np.allclose(p1_local, ly))
283 | 
284 |         # pref_leaf values should have the right shape
285 |         # and values that look like valid tree nodes
286 |         pred_leaf_vals = p1_pred_leaf
287 |         assert pred_leaf_vals.shape == (
288 |             lX.shape[0],
289 |             ray_classifier.booster_.num_trees(),
290 |         )
291 |         assert np.max(pred_leaf_vals) <= params["num_leaves"]
292 |         assert np.min(pred_leaf_vals) >= 0
293 |         assert len(np.unique(pred_leaf_vals)) <= params["num_leaves"]
294 | 
295 |         # be sure LightGBM actually used at least one categorical column,
296 |         # and that it was correctly treated as a categorical feature
297 |         if output == "dataframe-with-categorical":
298 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
299 |             tree_df = ray_classifier.booster_.trees_to_dataframe()
300 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
301 |             assert node_uses_cat_col.sum() > 0
302 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
303 | 
304 |     @parameterized.expand(
305 |         list(
306 |             itertools.product(
307 |                 data_output_local,
308 |                 ["binary-classification", "multiclass-classification"],
309 |             )
310 |         )
311 |     )
312 |     def testClassifierEarlyStopping(self, output, task):
313 |         ray.init(num_cpus=4, num_gpus=0)
314 | 
315 |         print(output, task)
316 | 
317 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
318 |             objective=task, output=output
319 |         )
320 | 
321 |         eval_weights = [dw_test]
322 |         if dy_test is None:
323 |             dy_test = "test"
324 |             eval_weights = None
325 |         eval_set = [(dX_test, dy_test)]
326 | 
327 |         if "raydmatrix" in output:
328 |             lX = X
329 |             ly = y
330 |             lw = w
331 |         else:
332 |             lX = dX
333 |             ly = dy
334 |             lw = dw
335 | 
336 |         n_estimators = 400
337 |         params = {
338 |             "n_estimators": n_estimators,
339 |             "num_leaves": 31,
340 |             "random_state": 1,
341 |             "deterministic": True,
342 |         }
343 | 
344 |         callbacks = [lgb.early_stopping(1)]
345 | 
346 |         ray_classifier = RayLGBMClassifier(**params)
347 |         ray_classifier = ray_classifier.fit(
348 |             dX,
349 |             dy,
350 |             sample_weight=dw,
351 |             ray_params=self.ray_params,
352 |             eval_set=eval_set,
353 |             eval_sample_weight=eval_weights,
354 |             callbacks=callbacks,
355 |         )
356 | 
357 |         self.assertLess(
358 |             len(list(ray_classifier.evals_result_["valid_0"].values())[0]), n_estimators
359 |         )
360 | 
361 |         p1 = ray_classifier.predict(dX, ray_params=self.ray_params)
362 |         p1_proba = ray_classifier.predict_proba(dX, ray_params=self.ray_params)
363 |         p1_pred_leaf = ray_classifier.predict(
364 |             dX, pred_leaf=True, ray_params=self.ray_params
365 |         )
366 |         p1_local = ray_classifier.to_local().predict(lX)
367 |         s1 = accuracy_score(ly, p1)
368 | 
369 |         local_classifier = lgb.LGBMClassifier(**params)
370 |         local_classifier.fit(
371 |             lX,
372 |             ly,
373 |             sample_weight=lw,
374 |             eval_set=eval_set,
375 |             eval_sample_weight=eval_weights,
376 |             callbacks=callbacks,
377 |         )
378 |         p2 = local_classifier.predict(lX)
379 |         p2_proba = local_classifier.predict_proba(lX)
380 |         s2 = local_classifier.score(lX, ly)
381 | 
382 |         self.assertTrue(np.allclose(s1, s2))
383 |         self.assertTrue(np.allclose(p1, p2))
384 |         self.assertTrue(np.allclose(p1, ly))
385 |         self.assertTrue(np.allclose(p2, ly))
386 |         self.assertTrue(np.allclose(p1_proba, p2_proba, atol=0.1))
387 |         self.assertTrue(np.allclose(p1_local, p2))
388 |         self.assertTrue(np.allclose(p1_local, ly))
389 | 
390 |         # pref_leaf values should have the right shape
391 |         # and values that look like valid tree nodes
392 |         pred_leaf_vals = p1_pred_leaf
393 |         assert pred_leaf_vals.shape == (
394 |             lX.shape[0],
395 |             ray_classifier.booster_.num_trees(),
396 |         )
397 |         assert np.max(pred_leaf_vals) <= params["num_leaves"]
398 |         assert np.min(pred_leaf_vals) >= 0
399 |         assert len(np.unique(pred_leaf_vals)) <= params["num_leaves"]
400 | 
401 |         # be sure LightGBM actually used at least one categorical column,
402 |         # and that it was correctly treated as a categorical feature
403 |         if output == "dataframe-with-categorical":
404 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
405 |             tree_df = ray_classifier.booster_.trees_to_dataframe()
406 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
407 |             assert node_uses_cat_col.sum() > 0
408 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
409 | 
410 |     @parameterized.expand(
411 |         list(
412 |             itertools.product(
413 |                 data_output,
414 |                 ["binary-classification", "multiclass-classification"],
415 |             )
416 |         )
417 |     )
418 |     def testClassifierPredContrib(self, output, task):
419 |         ray.init(num_cpus=4, num_gpus=0)
420 | 
421 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
422 |             objective=task, output=output
423 |         )
424 | 
425 |         params = {
426 |             "n_estimators": 10,
427 |             "num_leaves": 10,
428 |             "random_state": 1,
429 |             "deterministic": True,
430 |         }
431 | 
432 |         ray_classifier = RayLGBMClassifier(tree_learner="data", **params)
433 |         ray_classifier = ray_classifier.fit(
434 |             dX, dy, sample_weight=dw, ray_params=self.ray_params
435 |         )
436 |         preds_with_contrib = ray_classifier.predict(
437 |             dX, pred_contrib=True, ray_params=self.ray_params
438 |         )
439 | 
440 |         local_classifier = lgb.LGBMClassifier(**params)
441 |         if "raydmatrix" in output:
442 |             lX = X
443 |             ly = y
444 |             lw = w
445 |         else:
446 |             lX = dX
447 |             ly = dy
448 |             lw = dw
449 |         local_classifier.fit(lX, ly, sample_weight=lw)
450 |         local_preds_with_contrib = local_classifier.predict(lX, pred_contrib=True)
451 | 
452 |         # be sure LightGBM actually used at least one categorical column,
453 |         # and that it was correctly treated as a categorical feature
454 |         if output == "dataframe-with-categorical":
455 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
456 |             tree_df = ray_classifier.booster_.trees_to_dataframe()
457 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
458 |             assert node_uses_cat_col.sum() > 0
459 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
460 | 
461 |         # shape depends on whether it is binary or multiclass classification
462 |         num_features = ray_classifier.n_features_
463 |         num_classes = ray_classifier.n_classes_
464 |         if num_classes == 2:
465 |             expected_num_cols = num_features + 1
466 |         else:
467 |             expected_num_cols = (num_features + 1) * num_classes
468 | 
469 |         # * shape depends on whether it is binary or multiclass classification
470 |         # * matrix for binary classification is of the form [feature_contrib,
471 |         #   base_value],
472 |         #   for multi-class it"s [feat_contrib_class1, base_value_class1,
473 |         #   feat_contrib_class2, base_value_class2, etc.]
474 |         # * contrib outputs for distributed training are different than from
475 |         #   local training, so we can just test
476 |         #   that the output has the right shape and base values are in the
477 |         #   right position
478 |         assert preds_with_contrib.shape[1] == expected_num_cols
479 |         assert preds_with_contrib.shape == local_preds_with_contrib.shape
480 | 
481 |         if num_classes == 2:
482 |             assert len(np.unique(preds_with_contrib[:, num_features]) == 1)
483 |         else:
484 |             for i in range(num_classes):
485 |                 base_value_col = num_features * (i + 1) + i
486 |                 assert len(np.unique(preds_with_contrib[:, base_value_col]) == 1)
487 | 
488 |     @parameterized.expand(
489 |         list(
490 |             itertools.product(
491 |                 data_output,
492 |                 boosting_types,
493 |                 distributed_training_algorithms,
494 |             )
495 |         )
496 |     )
497 |     def testRegressor(self, output, boosting_type, tree_learner):
498 |         ray.init(num_cpus=4, num_gpus=0)
499 | 
500 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
501 |             objective="regression", output=output
502 |         )
503 | 
504 |         eval_weights = [dw_test]
505 |         if dy_test is None:
506 |             dy_test = "test"
507 |             eval_weights = None
508 |         eval_set = [(dX_test, dy_test)]
509 | 
510 |         if "raydmatrix" in output:
511 |             lX = X
512 |             ly = y
513 |             lw = w
514 |         else:
515 |             lX = dX
516 |             ly = dy
517 |             lw = dw
518 | 
519 |         params = {
520 |             "boosting_type": boosting_type,
521 |             "random_state": 42,
522 |             "num_leaves": 31,
523 |             "n_estimators": 20,
524 |             "deterministic": True,
525 |         }
526 |         if boosting_type == "rf":
527 |             params.update(
528 |                 {
529 |                     "bagging_freq": 1,
530 |                     "bagging_fraction": 0.9,
531 |                 }
532 |             )
533 | 
534 |         ray_regressor = RayLGBMRegressor(tree=tree_learner, **params)
535 |         ray_regressor = ray_regressor.fit(
536 |             dX,
537 |             dy,
538 |             sample_weight=dw,
539 |             ray_params=self.ray_params,
540 |             eval_set=eval_set,
541 |             eval_sample_weight=eval_weights,
542 |         )
543 |         ray_regressor = ray_regressor.fit(
544 |             dX,
545 |             dy,
546 |             sample_weight=dw,
547 |             ray_params=self.ray_params,
548 |         )
549 |         p1 = ray_regressor.predict(dX, ray_params=self.ray_params)
550 |         p1_pred_leaf = ray_regressor.predict(
551 |             dX, pred_leaf=True, ray_params=self.ray_params
552 |         )
553 | 
554 |         s1 = r2_score(ly, p1)
555 |         p1_local = ray_regressor.to_local().predict(lX)
556 |         s1_local = ray_regressor.to_local().score(lX, ly)
557 | 
558 |         local_regressor = lgb.LGBMRegressor(**params)
559 |         local_regressor.fit(
560 |             lX,
561 |             ly,
562 |             sample_weight=lw,
563 |         )
564 |         s2 = local_regressor.score(lX, ly)
565 |         p2 = local_regressor.predict(lX)
566 | 
567 |         # Scores should be the same
568 |         self.assertTrue(np.allclose(s1, s2, atol=0.01))
569 |         self.assertTrue(np.allclose(s1, s1_local))
570 | 
571 |         # Predictions should be roughly the same.
572 |         self.assertTrue(np.allclose(p1, p1_local))
573 | 
574 |         # pref_leaf values should have the right shape
575 |         # and values that look like valid tree nodes
576 |         pred_leaf_vals = p1_pred_leaf
577 |         assert pred_leaf_vals.shape == (lX.shape[0], ray_regressor.booster_.num_trees())
578 |         assert np.max(pred_leaf_vals) <= params["num_leaves"]
579 |         assert np.min(pred_leaf_vals) >= 0
580 |         assert len(np.unique(pred_leaf_vals)) <= params["num_leaves"]
581 | 
582 |         self.assertTrue(np.allclose(p2, ly, rtol=0.5, atol=50.0))
583 |         self.assertTrue(np.allclose(p1, ly, rtol=0.5, atol=50.0))
584 | 
585 |         # be sure LightGBM actually used at least one categorical column,
586 |         # and that it was correctly treated as a categorical feature
587 |         if output == "dataframe-with-categorical":
588 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
589 |             tree_df = ray_regressor.booster_.trees_to_dataframe()
590 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
591 |             assert node_uses_cat_col.sum() > 0
592 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
593 | 
594 |     @parameterized.expand(data_output_local)
595 |     def testRegressorEarlyStopping(self, output):
596 |         ray.init(num_cpus=4, num_gpus=0)
597 | 
598 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
599 |             objective="regression", output=output
600 |         )
601 | 
602 |         eval_weights = [dw_test]
603 |         if dy_test is None:
604 |             dy_test = "test"
605 |             eval_weights = None
606 |         eval_set = [(dX_test, dy_test)]
607 | 
608 |         if "raydmatrix" in output:
609 |             lX = X
610 |             ly = y
611 |             lw = w
612 |         else:
613 |             lX = dX
614 |             ly = dy
615 |             lw = dw
616 | 
617 |         n_estimators = 400
618 |         params = {
619 |             "random_state": 42,
620 |             "num_leaves": 31,
621 |             "n_estimators": n_estimators,
622 |             "deterministic": True,
623 |         }
624 | 
625 |         callbacks = [lgb.early_stopping(1)]
626 | 
627 |         ray_regressor = RayLGBMRegressor(**params)
628 |         ray_regressor = ray_regressor.fit(
629 |             dX,
630 |             dy,
631 |             sample_weight=dw,
632 |             ray_params=self.ray_params,
633 |             eval_set=eval_set,
634 |             eval_sample_weight=eval_weights,
635 |             callbacks=callbacks,
636 |         )
637 | 
638 |         self.assertLess(
639 |             len(list(ray_regressor.evals_result_["valid_0"].values())[0]), n_estimators
640 |         )
641 | 
642 |         p1 = ray_regressor.predict(dX, ray_params=self.ray_params)
643 |         p1_pred_leaf = ray_regressor.predict(
644 |             dX, pred_leaf=True, ray_params=self.ray_params
645 |         )
646 | 
647 |         s1 = r2_score(ly, p1)
648 |         p1_local = ray_regressor.to_local().predict(lX)
649 |         s1_local = ray_regressor.to_local().score(lX, ly)
650 | 
651 |         local_regressor = lgb.LGBMRegressor(**params)
652 |         local_regressor.fit(
653 |             lX,
654 |             ly,
655 |             sample_weight=lw,
656 |             eval_set=eval_set,
657 |             eval_sample_weight=eval_weights,
658 |             callbacks=callbacks,
659 |         )
660 |         s2 = local_regressor.score(lX, ly)
661 |         p2 = local_regressor.predict(lX)
662 | 
663 |         # Scores should be the same
664 |         self.assertTrue(np.allclose(s1, s2, atol=0.01))
665 |         self.assertTrue(np.allclose(s1, s1_local))
666 | 
667 |         # Predictions should be roughly the same.
668 |         self.assertTrue(np.allclose(p1, p1_local))
669 | 
670 |         # pref_leaf values should have the right shape
671 |         # and values that look like valid tree nodes
672 |         pred_leaf_vals = p1_pred_leaf
673 |         assert pred_leaf_vals.shape == (lX.shape[0], ray_regressor.booster_.num_trees())
674 |         assert np.max(pred_leaf_vals) <= params["num_leaves"]
675 |         assert np.min(pred_leaf_vals) >= 0
676 |         assert len(np.unique(pred_leaf_vals)) <= params["num_leaves"]
677 | 
678 |         self.assertTrue(np.allclose(p2, ly, rtol=0.5, atol=50.0))
679 |         self.assertTrue(np.allclose(p1, ly, rtol=0.5, atol=50.0))
680 | 
681 |         # be sure LightGBM actually used at least one categorical column,
682 |         # and that it was correctly treated as a categorical feature
683 |         if output == "dataframe-with-categorical":
684 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
685 |             tree_df = ray_regressor.booster_.trees_to_dataframe()
686 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
687 |             assert node_uses_cat_col.sum() > 0
688 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
689 | 
690 |     @parameterized.expand(data_output)
691 |     def testRegressorPredContrib(self, output):
692 |         ray.init(num_cpus=4, num_gpus=0)
693 | 
694 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
695 |             objective="regression", output=output
696 |         )
697 | 
698 |         if "raydmatrix" in output:
699 |             lX = X
700 |             ly = y
701 |             lw = w
702 |         else:
703 |             lX = dX
704 |             ly = dy
705 |             lw = dw
706 | 
707 |         params = {
708 |             "n_estimators": 10,
709 |             "num_leaves": 10,
710 |             "random_state": 1,
711 |             "deterministic": True,
712 |         }
713 | 
714 |         ray_regressor = RayLGBMRegressor(tree_learner="data", **params)
715 |         ray_regressor = ray_regressor.fit(
716 |             dX, dy, sample_weight=dw, ray_params=self.ray_params
717 |         )
718 |         preds_with_contrib = ray_regressor.predict(
719 |             dX, pred_contrib=True, ray_params=self.ray_params
720 |         )
721 | 
722 |         local_regressor = lgb.LGBMRegressor(**params)
723 |         local_regressor.fit(lX, ly, sample_weight=lw)
724 |         local_preds_with_contrib = local_regressor.predict(lX, pred_contrib=True)
725 | 
726 |         # contrib outputs for distributed training are different than
727 |         # from local training, so we can just test
728 |         # that the output has the right shape and base values are in
729 |         # the right position
730 |         num_features = lX.shape[1]
731 |         assert preds_with_contrib.shape[1] == num_features + 1
732 |         assert preds_with_contrib.shape == local_preds_with_contrib.shape
733 | 
734 |         # be sure LightGBM actually used at least one categorical column,
735 |         # and that it was correctly treated as a categorical feature
736 |         if output == "dataframe-with-categorical":
737 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
738 |             tree_df = ray_regressor.booster_.trees_to_dataframe()
739 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
740 |             assert node_uses_cat_col.sum() > 0
741 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
742 | 
743 |     @parameterized.expand(list(itertools.product(data_output, [0.1, 0.5, 0.9])))
744 |     def testRegressorQuantile(self, output, alpha):
745 |         ray.init(num_cpus=4, num_gpus=0)
746 | 
747 |         X, y, w, _, dX, dy, dw, _, dX_test, dy_test, dw_test = _create_data(
748 |             objective="regression", output=output
749 |         )
750 | 
751 |         params = {
752 |             "objective": "quantile",
753 |             "alpha": alpha,
754 |             "random_state": 42,
755 |             "n_estimators": 10,
756 |             "num_leaves": 10,
757 |             "deterministic": True,
758 |         }
759 | 
760 |         if "raydmatrix" in output:
761 |             lX = X
762 |             ly = y
763 |             lw = w
764 |         else:
765 |             lX = dX
766 |             ly = dy
767 |             lw = dw
768 | 
769 |         ray_regressor = RayLGBMRegressor(tree_learner_type="data_parallel", **params)
770 |         ray_regressor = ray_regressor.fit(
771 |             dX, dy, sample_weight=dw, ray_params=self.ray_params
772 |         )
773 |         p1 = ray_regressor.predict(dX, ray_params=self.ray_params)
774 |         q1 = np.count_nonzero(ly < p1) / ly.shape[0]
775 | 
776 |         local_regressor = lgb.LGBMRegressor(**params)
777 |         local_regressor.fit(lX, ly, sample_weight=lw)
778 |         p2 = local_regressor.predict(lX)
779 |         q2 = np.count_nonzero(ly < p2) / ly.shape[0]
780 | 
781 |         # Quantiles should be right
782 |         np.testing.assert_allclose(q1, alpha, atol=0.2)
783 |         np.testing.assert_allclose(q2, alpha, atol=0.2)
784 | 
785 |         # be sure LightGBM actually used at least one categorical column,
786 |         # and that it was correctly treated as a categorical feature
787 |         if output == "dataframe-with-categorical":
788 |             cat_cols = [col for col in dX.columns if dX.dtypes[col].name == "category"]
789 |             tree_df = ray_regressor.booster_.trees_to_dataframe()
790 |             node_uses_cat_col = tree_df["split_feature"].isin(cat_cols)
791 |             assert node_uses_cat_col.sum() > 0
792 |             assert tree_df.loc[node_uses_cat_col, "decision_type"].unique()[0] == "=="
793 | 
794 |     @parameterized.expand(
795 |         list(
796 |             itertools.product(
797 |                 estimators_to_test,
798 |                 sklearn_checks_to_run(),
799 |             )
800 |         )
801 |     )
802 |     def testSklearnIntegration(self, estimator, check):
803 |         estimator = estimator()
804 |         estimator.set_params(local_listen_port=18000, time_out=5)
805 |         name = type(estimator).__name__
806 |         check(name, estimator)
807 | 
808 | 
809 | if __name__ == "__main__":
810 |     import sys
811 | 
812 |     import pytest
813 | 
814 |     sys.exit(pytest.main(["-v", __file__]))
815 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/test_lightgbm_api.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from typing import Tuple
  3 | 
  4 | import lightgbm
  5 | import numpy as np
  6 | import ray
  7 | from lightgbm.basic import _ConfigAliases
  8 | from lightgbm.callback import CallbackEnv
  9 | from xgboost_ray.session import put_queue
 10 | 
 11 | from lightgbm_ray import RayDMatrix, RayParams, RayShardingMode, train
 12 | from lightgbm_ray.tune import _TuneLGBMRank0Mixin
 13 | 
 14 | 
 15 | def gradient(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
 16 |     return (np.log1p(y_pred) - np.log1p(y_true)) / (y_pred + 1)
 17 | 
 18 | 
 19 | def hessian(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
 20 |     return (-np.log1p(y_pred) + np.log1p(y_true) + 1) / np.power(y_pred + 1, 2)
 21 | 
 22 | 
 23 | def squared_log(
 24 |     y_true: np.ndarray, y_pred: np.ndarray
 25 | ) -> Tuple[np.ndarray, np.ndarray]:
 26 |     y_pred[y_pred < -1] = -1 + 1e-6
 27 |     grad = gradient(y_pred, y_true)
 28 |     hess = hessian(y_pred, y_true)
 29 |     return grad, hess
 30 | 
 31 | 
 32 | def rmsle(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[str, float]:
 33 |     y_pred[y_pred < -1] = -1 + 1e-6
 34 |     elements = np.power(np.log1p(y_true) - np.log1p(y_pred), 2)
 35 |     return "PyRMSLE", float(np.sqrt(np.sum(elements) / len(y_true))), False
 36 | 
 37 | 
 38 | class LightGBMAPITest(unittest.TestCase):
 39 |     """This test suite validates core LightGBM API functionality."""
 40 | 
 41 |     def setUp(self):
 42 |         repeat = 128  # Repeat data a couple of times for stability
 43 |         self.x = np.array(
 44 |             [
 45 |                 [1, 0, 0, 0],  # Feature 0 -> Label 0
 46 |                 [0, 1, 0, 0],  # Feature 1 -> Label 1
 47 |                 [0, 0, 1, 1],  # Feature 2+3 -> Label 0
 48 |                 [0, 0, 1, 0],  # Feature 2+!3 -> Label 1
 49 |             ]
 50 |             * repeat
 51 |         )
 52 |         self.y = np.array([0, 1, 0, 1] * repeat)
 53 | 
 54 |         self.params = {
 55 |             "nthread": 2,
 56 |             "objective": "binary",
 57 |             "random_state": 1000,
 58 |             "deterministic": True,
 59 |         }
 60 | 
 61 |         self.kwargs = {}
 62 | 
 63 |     def tearDown(self) -> None:
 64 |         ray.shutdown()
 65 | 
 66 |     def _init_ray(self):
 67 |         ray.init(num_cpus=4, num_gpus=0)
 68 | 
 69 |     def testNumBoostRoundsValidation(self):
 70 |         """Ensure that an exception is thrown if num_iterations is passed
 71 |         as a parameter."""
 72 |         self._init_ray()
 73 | 
 74 |         for param_alias in _ConfigAliases.get("num_iterations"):
 75 |             with self.assertRaisesRegex(ValueError, "num_boost_round"):
 76 |                 params = self.params.copy()
 77 |                 params[param_alias] = 10
 78 |                 train(
 79 |                     params,
 80 |                     RayDMatrix(self.x, self.y, sharding=RayShardingMode.BATCH),
 81 |                     ray_params=RayParams(num_actors=2),
 82 |                     **self.kwargs,
 83 |                 )
 84 | 
 85 |     def testCustomObjectiveFunction(self):
 86 |         """Ensure that custom objective functions work.
 87 | 
 88 |         Runs a custom objective function with pure LightGBM and
 89 |         LightGBM on Ray and compares the prediction outputs."""
 90 |         self._init_ray()
 91 | 
 92 |         params = self.params.copy()
 93 |         params["objective"] = squared_log
 94 | 
 95 |         model_lgbm = lightgbm.LGBMModel(**params).fit(self.x, self.y)
 96 | 
 97 |         model_ray = train(
 98 |             params,
 99 |             RayDMatrix(self.x, self.y, sharding=RayShardingMode.BATCH),
100 |             ray_params=RayParams(num_actors=2),
101 |             num_boost_round=100,
102 |             **self.kwargs,
103 |         )
104 | 
105 |         pred_y_lgbm = np.round(model_lgbm.predict(self.x))
106 |         pred_y_ray = np.round(model_ray.predict(self.x))
107 | 
108 |         self.assertSequenceEqual(list(pred_y_lgbm), list(pred_y_ray))
109 |         self.assertSequenceEqual(list(self.y.astype(float)), list(pred_y_ray * -1))
110 | 
111 |     def testCustomMetricFunction(self):
112 |         """Ensure that custom objective functions work.
113 | 
114 |         Runs a custom objective function with pure LightGBM and
115 |         LightGBM on Ray and compares the prediction outputs."""
116 |         self._init_ray()
117 | 
118 |         params = self.params.copy()
119 |         params["objective"] = squared_log
120 | 
121 |         model_lgbm = lightgbm.LGBMModel(**params).fit(
122 |             self.x,
123 |             self.y,
124 |             eval_metric=[rmsle],
125 |             eval_set=[(self.x, self.y)],
126 |             eval_names=["dtrain"],
127 |         )
128 |         evals_result_lgbm = model_lgbm.evals_result_
129 | 
130 |         dtrain_ray = RayDMatrix(self.x, self.y, sharding=RayShardingMode.BATCH)
131 |         evals_result_ray = {}
132 |         train(
133 |             params,
134 |             dtrain_ray,
135 |             ray_params=RayParams(num_actors=2),
136 |             eval_metric=[rmsle],
137 |             evals=[(dtrain_ray, "dtrain")],
138 |             evals_result=evals_result_ray,
139 |             num_boost_round=100,
140 |             **self.kwargs,
141 |         )
142 | 
143 |         print(evals_result_ray["dtrain"]["PyRMSLE"])
144 |         print(evals_result_lgbm["dtrain"]["PyRMSLE"])
145 | 
146 |         self.assertTrue(
147 |             np.allclose(
148 |                 evals_result_lgbm["dtrain"]["PyRMSLE"],
149 |                 evals_result_ray["dtrain"]["PyRMSLE"],
150 |                 atol=0.1,
151 |             )
152 |         )
153 | 
154 |     def testCallbacks(self):
155 |         self._init_ray()
156 | 
157 |         class _Callback(_TuneLGBMRank0Mixin):
158 |             def __call__(self, env: CallbackEnv) -> None:
159 |                 print(f"My rank: {self.is_rank_0}")
160 |                 put_queue(("rank", self.is_rank_0))
161 | 
162 |         callback = _Callback()
163 | 
164 |         additional_results = {}
165 |         train(
166 |             self.params,
167 |             RayDMatrix(self.x, self.y),
168 |             ray_params=RayParams(num_actors=2),
169 |             callbacks=[callback],
170 |             additional_results=additional_results,
171 |             **self.kwargs,
172 |         )
173 | 
174 |         self.assertEqual(len(additional_results["callback_returns"]), 2)
175 |         self.assertTrue(
176 |             all(rank is True for (_, rank) in additional_results["callback_returns"][0])
177 |         )
178 |         self.assertTrue(
179 |             all(
180 |                 rank is False for (_, rank) in additional_results["callback_returns"][1]
181 |             )
182 |         )
183 | 
184 | 
185 | if __name__ == "__main__":
186 |     import sys
187 | 
188 |     import pytest
189 | 
190 |     sys.exit(pytest.main(["-v", __file__]))
191 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tests/test_tune.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shutil
  3 | import tempfile
  4 | import unittest
  5 | from unittest.mock import MagicMock, patch
  6 | 
  7 | import numpy as np
  8 | import ray
  9 | from ray import tune
 10 | from ray.tune.integration.lightgbm import (
 11 |     TuneReportCheckpointCallback as OrigTuneReportCheckpointCallback,
 12 | )
 13 | 
 14 | from lightgbm_ray import RayDMatrix, RayParams, RayShardingMode, train
 15 | from lightgbm_ray.tune import TuneReportCheckpointCallback, _try_add_tune_callback
 16 | 
 17 | 
 18 | class LightGBMRayTuneTest(unittest.TestCase):
 19 |     def setUp(self):
 20 |         repeat = 64  # Repeat data a couple of times for stability
 21 |         x = np.array(
 22 |             [
 23 |                 [1, 0, 0, 0],  # Feature 0 -> Label 0
 24 |                 [0, 1, 0, 0],  # Feature 1 -> Label 1
 25 |                 [0, 0, 1, 1],  # Feature 2+3 -> Label 2
 26 |                 [0, 0, 1, 0],  # Feature 2+!3 -> Label 3
 27 |             ]
 28 |             * repeat
 29 |         )
 30 |         y = np.array([0, 1, 2, 3] * repeat)
 31 | 
 32 |         self.params = {
 33 |             "lgbm": {
 34 |                 "boosting": "gbdt",
 35 |                 "objective": "multiclass",
 36 |                 "num_class": 4,
 37 |                 "random_state": 1,
 38 |                 "tree_learner": "data",
 39 |                 "metrics": ["multi_logloss", "multi_error"],
 40 |             },
 41 |             "num_boost_round": tune.choice([1, 3]),
 42 |         }
 43 | 
 44 |         def train_func(ray_params, callbacks=None):
 45 |             def _inner_train(config):
 46 |                 train_set = RayDMatrix(x, y, sharding=RayShardingMode.BATCH)
 47 |                 train(
 48 |                     config["lgbm"],
 49 |                     dtrain=train_set,
 50 |                     ray_params=ray_params,
 51 |                     num_boost_round=config["num_boost_round"],
 52 |                     evals=[(train_set, "train")],
 53 |                     callbacks=callbacks,
 54 |                 )
 55 | 
 56 |             return _inner_train
 57 | 
 58 |         self.train_func = train_func
 59 |         self.experiment_dir = tempfile.mkdtemp()
 60 | 
 61 |     def tearDown(self):
 62 |         ray.shutdown()
 63 |         shutil.rmtree(self.experiment_dir)
 64 | 
 65 |     # noinspection PyTypeChecker
 66 |     @patch.dict(os.environ, {"TUNE_RESULT_DELIM": "/"})
 67 |     def testNumIters(self, init=True):
 68 |         """Test that the number of reported tune results is correct"""
 69 |         if init:
 70 |             ray.init(num_cpus=8)
 71 |         ray_params = RayParams(cpus_per_actor=2, num_actors=2)
 72 |         params = self.params.copy()
 73 |         params["num_boost_round"] = tune.grid_search([1, 3])
 74 |         analysis = tune.run(
 75 |             self.train_func(ray_params),
 76 |             config=self.params,
 77 |             resources_per_trial=ray_params.get_tune_resources(),
 78 |             num_samples=1,
 79 |         )
 80 | 
 81 |         print(analysis.results_df.columns)
 82 |         self.assertSequenceEqual(
 83 |             list(analysis.results_df["training_iteration"]),
 84 |             list(analysis.results_df["config/num_boost_round"]),
 85 |         )
 86 | 
 87 |     def testNumItersClient(self):
 88 |         """Test ray client mode"""
 89 |         ray.init(num_cpus=8)
 90 |         if ray.__version__ <= "1.2.0":
 91 |             self.skipTest("Ray client mocks do not work in Ray <= 1.2.0")
 92 | 
 93 |         from ray.util.client.ray_client_helpers import ray_start_client_server
 94 | 
 95 |         self.assertFalse(ray.util.client.ray.is_connected())
 96 |         with ray_start_client_server():
 97 |             self.assertTrue(ray.util.client.ray.is_connected())
 98 |             self.testNumIters(init=False)
 99 | 
100 |     def testReplaceTuneCheckpoints(self):
101 |         """Test if ray.tune.integration.lightgbm callbacks are replaced"""
102 |         ray.init(num_cpus=4)
103 | 
104 |         # Report and checkpointing callback
105 |         in_cp = [OrigTuneReportCheckpointCallback(metrics="met")]
106 |         in_dict = {"callbacks": in_cp}
107 | 
108 |         with patch("ray.train.get_context") as mocked:
109 |             mocked.return_value = MagicMock(return_value=True)
110 |             _try_add_tune_callback(in_dict)
111 | 
112 |         replaced = in_dict["callbacks"][0]
113 |         self.assertTrue(isinstance(replaced, TuneReportCheckpointCallback))
114 | 
115 |         self.assertSequenceEqual(replaced._metrics, ["met"])
116 | 
117 |     def testEndToEndCheckpointing(self):
118 |         ray.init(num_cpus=4)
119 |         ray_params = RayParams(cpus_per_actor=2, num_actors=1)
120 |         analysis = tune.run(
121 |             self.train_func(
122 |                 ray_params, callbacks=[TuneReportCheckpointCallback(frequency=1)]
123 |             ),
124 |             config=self.params,
125 |             resources_per_trial=ray_params.get_tune_resources(),
126 |             num_samples=1,
127 |             metric="train-multi_logloss",
128 |             mode="min",
129 |             log_to_file=True,
130 |             local_dir=self.experiment_dir,
131 |         )
132 | 
133 |         self.assertTrue(os.path.exists(analysis.best_checkpoint.path))
134 | 
135 |     def testEndToEndCheckpointingOrigTune(self):
136 |         ray.init(num_cpus=4)
137 |         ray_params = RayParams(cpus_per_actor=2, num_actors=1)
138 |         analysis = tune.run(
139 |             self.train_func(
140 |                 ray_params, callbacks=[OrigTuneReportCheckpointCallback(frequency=1)]
141 |             ),
142 |             config=self.params,
143 |             resources_per_trial=ray_params.get_tune_resources(),
144 |             num_samples=1,
145 |             metric="train-multi_logloss",
146 |             mode="min",
147 |             log_to_file=True,
148 |             local_dir=self.experiment_dir,
149 |         )
150 | 
151 |         self.assertTrue(os.path.exists(analysis.best_checkpoint.path))
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     import sys
156 | 
157 |     import pytest
158 | 
159 |     sys.exit(pytest.main(["-v", __file__]))
160 | 


--------------------------------------------------------------------------------
/lightgbm_ray/tune.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Dict
  3 | 
  4 | import ray
  5 | from lightgbm.basic import Booster
  6 | from lightgbm.callback import CallbackEnv
  7 | from ray.util.annotations import PublicAPI
  8 | from xgboost_ray.session import put_queue
  9 | from xgboost_ray.util import force_on_current_node
 10 | 
 11 | try:
 12 |     import ray.train
 13 |     import ray.tune
 14 | except (ImportError, ModuleNotFoundError) as e:
 15 |     raise RuntimeError(
 16 |         "Ray Train and Ray Tune are required dependencies of `lightgbm_ray.tune` "
 17 |         'Please install with: `pip install "ray[train]"`'
 18 |     ) from e
 19 | 
 20 | 
 21 | from ray.tune.integration.lightgbm import TuneReportCallback as OrigTuneReportCallback
 22 | from ray.tune.integration.lightgbm import (
 23 |     TuneReportCheckpointCallback as OrigTuneReportCheckpointCallback,
 24 | )
 25 | 
 26 | 
 27 | class _TuneLGBMRank0Mixin:
 28 |     """Mixin to allow for dynamic setting of rank so that only
 29 |     one actor actually fires the callback"""
 30 | 
 31 |     @property
 32 |     def is_rank_0(self) -> bool:
 33 |         try:
 34 |             return self._is_rank_0
 35 |         except AttributeError:
 36 |             return True
 37 | 
 38 |     @is_rank_0.setter
 39 |     def is_rank_0(self, val: bool):
 40 |         self._is_rank_0 = val
 41 | 
 42 | 
 43 | class TuneReportCheckpointCallback(
 44 |     _TuneLGBMRank0Mixin, OrigTuneReportCheckpointCallback
 45 | ):
 46 |     def __call__(self, env: CallbackEnv):
 47 |         if self.is_rank_0:
 48 |             put_queue(
 49 |                 lambda: super(TuneReportCheckpointCallback, self).__call__(env=env)
 50 |             )
 51 | 
 52 | 
 53 | class TuneReportCallback(_TuneLGBMRank0Mixin, OrigTuneReportCallback):
 54 |     def __new__(cls: type, *args, **kwargs):
 55 |         # TODO(justinvyu): [code_removal] Remove in Ray 2.11.
 56 |         raise DeprecationWarning(
 57 |             "`TuneReportCallback` is deprecated. "
 58 |             "Use `ray.tune.integration.lightgbm.TuneReportCheckpointCallback` instead."
 59 |         )
 60 | 
 61 | 
 62 | def _try_add_tune_callback(kwargs: Dict):
 63 |     ray_train_context_initialized = (
 64 |         ray.train.get_context().get_trial_resources() is not None
 65 |     )
 66 |     if ray_train_context_initialized:
 67 |         callbacks = kwargs.get("callbacks", []) or []
 68 |         new_callbacks = []
 69 |         has_tune_callback = False
 70 | 
 71 |         REPLACE_MSG = (
 72 |             "Replaced `{orig}` with `{target}`. If you want to "
 73 |             "avoid this warning, pass `{target}` as a callback "
 74 |             "directly in your calls to `lightgbm_ray.train()`."
 75 |         )
 76 | 
 77 |         for cb in callbacks:
 78 |             if isinstance(cb, TuneReportCheckpointCallback):
 79 |                 has_tune_callback = True
 80 |                 new_callbacks.append(cb)
 81 |             elif isinstance(cb, OrigTuneReportCheckpointCallback):
 82 |                 orig_metrics = cb._metrics
 83 |                 orig_frequency = cb._frequency
 84 | 
 85 |                 replace_cb = TuneReportCheckpointCallback(
 86 |                     metrics=orig_metrics,
 87 |                     frequency=orig_frequency,
 88 |                 )
 89 |                 new_callbacks.append(replace_cb)
 90 |                 logging.warning(
 91 |                     REPLACE_MSG.format(
 92 |                         orig="ray.tune.integration.lightgbm."
 93 |                         "TuneReportCheckpointCallback",
 94 |                         target="lightgbm_ray.tune.TuneReportCheckpointCallback",
 95 |                     )
 96 |                 )
 97 |                 has_tune_callback = True
 98 |             else:
 99 |                 new_callbacks.append(cb)
100 | 
101 |         if not has_tune_callback:
102 |             new_callbacks.append(TuneReportCheckpointCallback(frequency=0))
103 | 
104 |         kwargs["callbacks"] = new_callbacks
105 |         return True
106 |     else:
107 |         return False
108 | 
109 | 
110 | @PublicAPI(stability="beta")
111 | def load_model(model_path):
112 |     """Loads the model stored in the provided model_path.
113 | 
114 |     If using Ray Client, this will automatically handle loading the path on
115 |     the server by using a Ray task.
116 | 
117 |     Returns:
118 |         lightgbm.Booster object of the model stored in the provided model_path
119 | 
120 |     """
121 | 
122 |     def load_model_fn(model_path):
123 |         best_bst = Booster(model_file=model_path)
124 |         return best_bst
125 | 
126 |     # Load the model checkpoint.
127 |     if ray.util.client.ray.is_connected():
128 |         # If using Ray Client, the best model is saved on the server.
129 |         # So we have to wrap the model loading in a ray task.
130 |         remote_load = ray.remote(load_model_fn)
131 |         remote_load = force_on_current_node(remote_load)
132 |         bst = ray.get(remote_load.remote(model_path))
133 |     else:
134 |         bst = load_model_fn(model_path)
135 | 
136 |     return bst
137 | 


--------------------------------------------------------------------------------
/lightgbm_ray/util.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import gc
 3 | import socket
 4 | from contextlib import closing
 5 | 
 6 | from lightgbm.basic import _LIB, _safe_call
 7 | 
 8 | 
 9 | class lgbm_network_free:
10 |     """Context to ensure LGBM_NetworkFree() is called
11 |     (makes sure network is cleaned and ports are
12 |     opened even if training fails)."""
13 | 
14 |     def __init__(self, model) -> None:
15 |         self.model = model
16 |         return
17 | 
18 |     def __enter__(self) -> None:
19 |         return
20 | 
21 |     def __exit__(self, type, value, traceback):
22 |         try:
23 |             self.model._Booster.free_network()
24 |         except Exception:
25 |             pass
26 |         _safe_call(_LIB.LGBM_NetworkFree())
27 |         # doesn't clean up properly without gc collect
28 |         gc.collect()
29 | 
30 | 
31 | def find_free_port() -> int:
32 |     """Find random free port."""
33 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
34 |         s.bind(("", 0))
35 |         s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
36 |         return s.getsockname()[1]
37 | 
38 | 
39 | def is_port_free(port: int) -> bool:
40 |     """Check if port is free"""
41 |     with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
42 |         try:
43 |             s.bind(("", port))
44 |         except socket.error as e:
45 |             if e.errno == errno.EADDRINUSE:
46 |                 return False
47 |             raise e
48 |     return True
49 | 


--------------------------------------------------------------------------------
/requirements/lint-requirements.txt:
--------------------------------------------------------------------------------
1 | flake8==3.9.1
2 | flake8-comprehensions==3.10.1
3 | flake8-quotes==2.0.0
4 | flake8-bugbear==21.9.2
5 | black==22.10.0
6 | isort==5.10.1
7 | importlib-metadata==4.13.0
8 | 


--------------------------------------------------------------------------------
/requirements/test-requirements.txt:
--------------------------------------------------------------------------------
 1 | packaging
 2 | parameterized
 3 | petastorm
 4 | pytest
 5 | pyarrow<15.0.0
 6 | ray[tune, data, default]
 7 | scikit-learn
 8 | # modin==0.23.1.post0 is not compatible with lightgbm_ray py38
 9 | modin<=0.23.1; python_version == '3.8'
10 | # modin==0.26.0 is not compatible with lightgbm_ray py39+
11 | modin<0.26.0; python_version > '3.8'
12 | git+https://github.com/ray-project/xgboost_ray.git
13 | 
14 | #workaround for now
15 | protobuf<4.0.0
16 | tensorboardX==2.2
17 | 


--------------------------------------------------------------------------------
/run_ci_examples.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | TUNE=1
 6 | 
 7 | for i in "$@"
 8 | do
 9 | echo "$i"
10 | case "$i" in
11 |     --no-tune)
12 |     TUNE=0
13 |     ;;
14 |     *)
15 |     echo "unknown arg, $i"
16 |     exit 1
17 |     ;;
18 | esac
19 | done
20 | 
21 | pushd lightgbm_ray/examples/ || exit 1
22 | ray stop || true
23 | echo "================"
24 | echo "Running examples"
25 | echo "================"
26 | echo "running readme.py" && python readme.py
27 | echo "running readme_sklearn_api.py" && python readme_sklearn_api.py
28 | echo "running simple.py" && python simple.py --smoke-test
29 | echo "running simple_predict.py" && python simple_predict.py
30 | echo "running simple_dask.py" && python simple_dask.py --smoke-test
31 | echo "running simple_modin.py" && python simple_modin.py --smoke-test
32 | echo "running simple_ray_dataset.py" && python simple_ray_dataset.py --smoke-test
33 | 
34 | if [ "$TUNE" = "1" ]; then
35 |   echo "running simple_tune.py" && python simple_tune.py --smoke-test
36 | else
37 |   echo "skipping tune example"
38 | fi
39 | 
40 | echo "running train_on_test_data.py" && python train_on_test_data.py --smoke-test
41 | popd
42 | 
43 | pushd lightgbm_ray/tests
44 | echo "running examples with Ray Client"
45 | python -m pytest -v --durations=0 -x test_client.py
46 | popd || exit 1


--------------------------------------------------------------------------------
/run_ci_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | TUNE=1
 4 | 
 5 | for i in "$@"
 6 | do
 7 | echo "$i"
 8 | case "$i" in
 9 |     --no-tune)
10 |     TUNE=0
11 |     ;;
12 |     *)
13 |     echo "unknown arg, $i"
14 |     exit 1
15 |     ;;
16 | esac
17 | done
18 | 
19 | pushd lightgbm_ray/tests || exit 1
20 | echo "============="
21 | echo "Running tests"
22 | echo "============="
23 | END_STATUS=0
24 | if ! python -m pytest -v --durations=0 -x "test_lightgbm_api.py" ; then exit 1; fi
25 | if ! python -m pytest -v --durations=0 -x "test_end_to_end.py" ; then exit 1; fi
26 | if ! python -m pytest -v -s --durations=0 -x "test_fault_tolerance.py" ; then exit 1; fi
27 | if ! python -m pytest -v --durations=0 -x "test_lightgbm.py" ; then exit 1; fi
28 | 
29 | if [ "$TUNE" = "1" ]; then
30 |  if ! python -m pytest -v --durations=0 -x "test_tune.py" ; then exit 1; fi
31 | else
32 |  echo "skipping tune tests"
33 | fi
34 | 
35 | #echo "running smoke test on benchmark_cpu_gpu.py" && if ! python release/benchmark_cpu_gpu.py 2 10 20 --smoke-test; then END_STATUS=1; fi
36 | popd || exit 1
37 | 
38 | if [ "$END_STATUS" = "1" ]; then
39 |   echo "At least one test has failed, exiting with code 1"
40 | fi
41 | exit "$END_STATUS"


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="lightgbm_ray",
 5 |     packages=find_packages(where=".", include="lightgbm_ray*"),
 6 |     version="0.1.10",
 7 |     author="Ray Team",
 8 |     description="A Ray backend for distributed LightGBM",
 9 |     license="Apache 2.0",
10 |     long_description="A distributed backend for LightGBM built on top of "
11 |     "distributed computing framework Ray.",
12 |     url="https://github.com/ray-project/lightgbm_ray",
13 |     install_requires=["lightgbm>=3.2.1", "xgboost_ray>=0.1.12", "packaging"],
14 | )
15 | 


--------------------------------------------------------------------------------