├── .coveragerc ├── .github └── workflows │ ├── azureml_pipelines.yml │ ├── benchmark_scripts.yml │ ├── codeql-analysis.yml │ ├── component_release.yml │ ├── docker_images.yml │ └── docs.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── conf ├── aml │ └── custom.yaml ├── compute │ └── custom.yaml └── experiments │ ├── benchmarks │ ├── lightgbm-inferencing.yaml │ ├── training-cpu-num-trees.yaml │ └── training-cpu-vs-gpu.yaml │ ├── data-generation.yaml │ ├── lightgbm-inferencing.yaml │ └── lightgbm_training │ ├── cpu-custom.yaml │ ├── cpu.yaml │ ├── gpu.yaml │ ├── mpi.yaml │ ├── ray.yaml │ ├── raytune.yaml │ ├── raytune_distributed.yaml │ └── sweep.yaml ├── data └── sample │ ├── unittests-regression-test.yml │ └── unittests-regression-train.yml ├── docker ├── lightgbm-custom │ ├── v321_patch_cpu_mpi_build.dockerfile │ └── v330_patch_cpu_mpi_build.dockerfile ├── lightgbm-v3.2.1 │ ├── linux_cpu_mpi_build.dockerfile │ ├── linux_cpu_mpi_pip.dockerfile │ ├── linux_cuda_build.dockerfile │ ├── linux_gpu_build.dockerfile │ ├── linux_gpu_pip.dockerfile │ └── windows_cpu_pip.dockerfile └── lightgbm-v3.3.0 │ ├── linux_cpu_mpi_build.dockerfile │ ├── linux_cpu_mpi_pip.dockerfile │ └── windows_cpu_pip.dockerfile ├── docs ├── contribute │ ├── architecture-guide.md │ ├── developer-guide.md │ └── reporting-guide.md ├── img │ ├── architecture-script-classes.png │ ├── designer-ui-components-tab-create-final.png │ ├── designer-ui-components-tab-create-github.png │ ├── designer-ui-components-tag-create.png │ ├── designer-ui-pipelines-compose.gif │ ├── designer-ui-pipelines-new.png │ └── lightgbm-training-metrics.png ├── index.md ├── lightgbm-benchmark-project.md ├── references │ ├── common │ │ ├── aml.md │ │ ├── components.md │ │ ├── io.md │ │ ├── lightgbm_utils.md │ │ ├── metrics.md │ │ ├── perf.md │ │ └── tasks.md │ └── scripts │ │ ├── sample │ │ └── sample.md │ │ └── training │ │ └── lightgbm_python.md ├── results │ ├── inferencing.md │ └── manual.md └── run │ ├── azureml │ ├── azure-setup.md │ ├── benchmark-inferencing.md │ ├── benchmark-training.md │ ├── designer-ui.md │ ├── generate-synthetic-data.md │ ├── local-setup.md │ ├── train-on-your-data.md │ └── upload-your-data.md │ ├── install.md │ └── manual-benchmark.md ├── mkdocs.yml ├── requirements.txt ├── src ├── __init__.py ├── binaries │ ├── CMakeLists.txt │ ├── README.md │ ├── common │ │ ├── custom_loader.cpp │ │ └── custom_loader.hpp │ └── lightgbm_predict │ │ └── main.cc ├── common │ ├── __init__.py │ ├── aml.py │ ├── components.py │ ├── data.py │ ├── distributed.py │ ├── io.py │ ├── lightgbm_utils.py │ ├── math.py │ ├── metrics.py │ ├── paths.py │ ├── perf.py │ ├── pipelines.py │ ├── ray.py │ ├── raytune_param.py │ ├── sweep.py │ └── tasks.py ├── pipelines │ └── azureml │ │ ├── data_generation.py │ │ ├── lightgbm_inferencing.py │ │ └── lightgbm_training.py └── scripts │ ├── __init__.py │ ├── analysis │ ├── analyze.py │ └── templates │ │ └── inferencing.md │ ├── data_processing │ ├── generate_data │ │ ├── .amlignore │ │ ├── conda_env.yaml │ │ ├── generate.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ ├── lightgbm_data2bin │ │ ├── .amlignore │ │ ├── conda_env.yml │ │ ├── data2bin.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ └── partition_data │ │ ├── .amlignore │ │ ├── conda_env.yml │ │ ├── partition.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ ├── inferencing │ ├── custom_win_cli │ │ ├── .amlignore │ │ ├── conda_env.yaml │ │ ├── score.py │ │ ├── spec.additional_includes │ │ ├── spec.yaml │ │ └── static_binaries │ │ │ └── README.md │ ├── lightgbm_c_api │ │ ├── .amlignore │ │ ├── default.dockerfile │ │ ├── score.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ ├── lightgbm_python │ │ ├── .amlignore │ │ ├── default.dockerfile │ │ ├── score.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ ├── lightgbm_ray │ │ ├── .amlignore │ │ ├── default.dockerfile │ │ ├── score.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ └── treelite_python │ │ ├── conda_env.yaml │ │ ├── score.py │ │ ├── spec.additional_includes │ │ └── spec.yaml │ ├── model_transformation │ └── treelite_compile │ │ ├── .amlignore │ │ ├── compile_treelite.py │ │ ├── conda_env.yaml │ │ ├── spec.additional_includes │ │ └── spec.yaml │ ├── sample │ ├── .amlignore │ ├── __init__.py │ ├── conda_env.yaml │ ├── sample.py │ ├── spec.additional_includes │ └── spec.yaml │ └── training │ ├── lightgbm_python │ ├── .amlignore │ ├── default.dockerfile │ ├── spec.additional_includes │ ├── spec.yaml │ ├── sweep_spec.yaml │ └── train.py │ ├── lightgbm_ray │ ├── .amlignore │ ├── default.dockerfile │ ├── spec.additional_includes │ ├── spec.yaml │ └── train.py │ ├── ray_tune │ ├── .amlignore │ ├── default.dockerfile │ ├── raytune.py │ ├── spec.additional_includes │ └── spec.yaml │ └── ray_tune_distributed │ ├── default.dockerfile │ ├── raytune_distributed.py │ ├── spec.additional_includes │ └── spec.yaml └── tests ├── __init__.py ├── aml └── test_components.py ├── common ├── test_aml.py ├── test_component.py ├── test_data.py ├── test_distributed.py ├── test_io.py ├── test_lightgbm_utils.py ├── test_math.py ├── test_metrics.py ├── test_perf.py ├── test_pipelines.py ├── test_raytune.py └── test_sweep.py ├── conftest.py ├── data └── regression │ ├── inference │ └── inference.txt │ ├── model │ └── model.txt │ ├── test │ └── test.txt │ └── train │ └── train.txt ├── pipelines ├── test_data_generation.py ├── test_lightgbm_inferencing.py └── test_lightgbm_training.py └── scripts ├── test_generate_data.py ├── test_inferencing_custom_win_cli.py ├── test_lightgbm_data2bin.py ├── test_lightgbm_inferencing_c_api.py ├── test_lightgbm_inferencing_ray.py ├── test_lightgbm_python.py ├── test_partition_data.py ├── test_sample_sample.py ├── test_training_lightgbm_ray.py ├── test_training_raytune.py └── test_treelite_python.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | omit = 5 | src/common/tasks.py 6 | -------------------------------------------------------------------------------- /.github/workflows/azureml_pipelines.yml: -------------------------------------------------------------------------------- 1 | name: AzureML Pipelines Validation 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths-ignore: 7 | - 'docs/**' 8 | pull_request: 9 | branches: [ main ] 10 | paths-ignore: 11 | - 'docs/**' 12 | workflow_dispatch: 13 | inputs: 14 | name: 15 | description: 'Reason' 16 | required: false 17 | default: '...' 18 | 19 | jobs: 20 | build: 21 | environment: mlops 22 | 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | 27 | - name: check out repo 28 | uses: actions/checkout@v2 29 | 30 | - name: Set up Python 3.8 31 | uses: actions/setup-python@v2 32 | with: 33 | python-version: 3.8 34 | 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip==21.3.1 38 | pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11 39 | sudo apt-get install libopenmpi-dev 40 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 41 | # hotfix for azurecli issue 42 | pip install --upgrade --force-reinstall --no-cache-dir "azure-cli<2.30.0" 43 | 44 | - name: azure login 45 | uses: azure/login@v1 46 | with: 47 | creds: ${{secrets.AZURE_CREDS}} 48 | 49 | - name : Validate pipelines [data generation] 50 | run: >- 51 | python src/pipelines/azureml/data_generation.py 52 | --exp-config ./conf/experiments/data-generation.yaml 53 | aml.subscription_id=${{secrets.SUBSCRIPTION}} 54 | aml.resource_group=${{secrets.RESOURCE_GROUP}} 55 | aml.workspace_name=${{secrets.WORKSPACE_NAME}} 56 | aml.auth="azurecli" 57 | 58 | - name : Validate pipelines [inferencing] 59 | run: >- 60 | python src/pipelines/azureml/lightgbm_inferencing.py 61 | --exp-config ./conf/experiments/lightgbm-inferencing.yaml 62 | lightgbm_inferencing_config.tasks="[{data:{name:"dummy_dataset"},model:{name:"dummy_dataset"}}]" 63 | aml.subscription_id=${{secrets.SUBSCRIPTION}} 64 | aml.resource_group=${{secrets.RESOURCE_GROUP}} 65 | aml.workspace_name=${{secrets.WORKSPACE_NAME}} 66 | aml.auth="azurecli" 67 | 68 | - name : Validate pipelines [training] 69 | run: >- 70 | python src/pipelines/azureml/lightgbm_training.py 71 | --exp-config ./conf/experiments/lightgbm_training/cpu.yaml 72 | lightgbm_training_config.tasks="[{train:{name:"dummy_dataset"},test:{name:"dummy_dataset"}}]" 73 | aml.subscription_id=${{secrets.SUBSCRIPTION}} 74 | aml.resource_group=${{secrets.RESOURCE_GROUP}} 75 | aml.workspace_name=${{secrets.WORKSPACE_NAME}} 76 | aml.auth="azurecli" 77 | 78 | - name : Validate pipelines [training+sweep] 79 | run: >- 80 | python src/pipelines/azureml/lightgbm_training.py 81 | --exp-config ./conf/experiments/lightgbm_training/sweep.yaml 82 | lightgbm_training_config.tasks="[{train:{name:"dummy_dataset"},test:{name:"dummy_dataset"}}]" 83 | aml.subscription_id=${{secrets.SUBSCRIPTION}} 84 | aml.resource_group=${{secrets.RESOURCE_GROUP}} 85 | aml.workspace_name=${{secrets.WORKSPACE_NAME}} 86 | aml.auth="azurecli" 87 | -------------------------------------------------------------------------------- /.github/workflows/benchmark_scripts.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Benchmark scripts gated build 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | paths-ignore: 10 | - 'docs/**' 11 | pull_request: 12 | branches: [ main ] 13 | paths-ignore: 14 | - 'docs/**' 15 | workflow_dispatch: 16 | inputs: 17 | name: 18 | description: 'Reason' 19 | required: false 20 | default: '...' 21 | 22 | jobs: 23 | build: 24 | environment: mlops 25 | 26 | runs-on: ubuntu-latest 27 | 28 | steps: 29 | 30 | - uses: fauguste/auto-cancellation-running-action@0.1.4 31 | # auto-cancellation-running-action 32 | with: 33 | githubToken: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | - uses: actions/checkout@v2 36 | 37 | - name: Set up Python 3.8 38 | uses: actions/setup-python@v2 39 | with: 40 | python-version: 3.8 41 | 42 | - name: Install dependencies 43 | run: | 44 | sudo apt-get install libopenmpi-dev 45 | python -m pip install --upgrade pip==21.3.1 46 | pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11 47 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 48 | # hotfix for azurecli issue 49 | pip install --upgrade --force-reinstall --no-cache-dir "azure-cli<2.30.0" 50 | # Fix: force protobuf downgrade to avoid exception 51 | pip install protobuf==3.19.4 52 | 53 | - name: azure login 54 | uses: azure/login@v1 55 | with: 56 | creds: ${{secrets.AZURE_CREDS}} 57 | 58 | - name: Lint with flake8 59 | run: | 60 | # stop the build if there are Python syntax errors or undefined names 61 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 62 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 63 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 64 | 65 | - name: Test with pytest 66 | run: >- 67 | python -m pytest tests/ 68 | --junitxml=test-build-result.xml 69 | --cov=src/ --cov-report xml:coverage.xml --cov-report term 70 | --aml_subscription_id=${{secrets.SUBSCRIPTION}} 71 | --aml_resource_group=${{secrets.RESOURCE_GROUP}} 72 | --aml_workspace_name=${{secrets.WORKSPACE_NAME}} 73 | --aml_auth="azurecli" 74 | 75 | - name: Publish test results 76 | uses: EnricoMi/publish-unit-test-result-action/composite@v1 77 | with: 78 | check_name: Unit Test Results for Build 79 | github_token: ${{ secrets.GITHUB_TOKEN }} 80 | files: test-build-result.xml 81 | 82 | - name: Code Coverage Summary Report 83 | uses: irongut/CodeCoverageSummary@v1.0.2 84 | # uses: joshjohanning/CodeCoverageSummary@v1.0.2 85 | with: 86 | filename: coverage.xml 87 | badge: true 88 | hide_complexity: true 89 | format: 'markdown' 90 | output: 'both' 91 | 92 | - name: Add Coverage PR Comment 93 | uses: marocchino/sticky-pull-request-comment@v2 94 | if: github.event_name == 'pull_request' 95 | with: 96 | recreate: true 97 | path: code-coverage-results.md 98 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ main ] 17 | paths-ignore: 18 | - 'docs/**' 19 | pull_request: 20 | # The branches below must be a subset of the branches above 21 | branches: [ main ] 22 | schedule: 23 | - cron: '16 13 * * 0' 24 | 25 | jobs: 26 | analyze: 27 | name: Analyze 28 | runs-on: ubuntu-latest 29 | permissions: 30 | actions: read 31 | contents: read 32 | security-events: write 33 | 34 | strategy: 35 | fail-fast: false 36 | matrix: 37 | language: [ 'python' ] 38 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 39 | # Learn more: 40 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 41 | 42 | steps: 43 | - name: Checkout repository 44 | uses: actions/checkout@v2 45 | 46 | # Initializes the CodeQL tools for scanning. 47 | - name: Initialize CodeQL 48 | uses: github/codeql-action/init@v1 49 | with: 50 | languages: ${{ matrix.language }} 51 | # If you wish to specify custom queries, you can do so here or in a config file. 52 | # By default, queries listed here will override any specified in a config file. 53 | # Prefix the list here with "+" to use these queries and those in the config file. 54 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 55 | 56 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 57 | # If this step fails, then you should remove it and run the build manually (see below) 58 | - name: Autobuild 59 | uses: github/codeql-action/autobuild@v1 60 | 61 | # ℹ️ Command-line programs to run using the OS shell. 62 | # 📚 https://git.io/JvXDl 63 | 64 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 65 | # and modify them (or add more) to build your code if your project 66 | # uses a compiled language 67 | 68 | #- run: | 69 | # make bootstrap 70 | # make release 71 | 72 | - name: Perform CodeQL Analysis 73 | uses: github/codeql-action/analyze@v1 74 | -------------------------------------------------------------------------------- /.github/workflows/component_release.yml: -------------------------------------------------------------------------------- 1 | name: Publish components as release assets 2 | 3 | on: 4 | release: 5 | types: 6 | - published 7 | 8 | jobs: 9 | build_release: 10 | name: Build components for release 11 | environment: mlops 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | include: 17 | - component_folder: src/scripts/training/lightgbm_python/ 18 | component_name: "lightgbm_python_training" 19 | - component_folder: src/scripts/data_processing/generate_data/ 20 | component_name: "generate_data" 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - name: Set up Python 3.8 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: 3.8 29 | 30 | - name: Install dependencies 31 | run: | 32 | sudo apt install zip 33 | 34 | # Install remote version of azure-cli-ml (which includes `az ml component` commands) 35 | # This might take several minutes depends on your network, thanks for your patience. 36 | # Install specific stable version (recommended) 37 | echo "Install remove version of azure-cli-ml..." 38 | az extension add -n ml --yes --verbose 39 | 40 | - name: azure login 41 | uses: azure/login@v1 42 | with: 43 | creds: ${{secrets.AZURE_CREDS}} 44 | 45 | - name: Build ${{matrix.component_name}} 46 | run: | 47 | # copy sources of the component 48 | mkdir -p dist/${{matrix.component_name}}/ 49 | cp ${{matrix.component_folder}}/* dist/${{matrix.component_name}}/ 50 | 51 | # process "additional includes" 52 | rm dist/${{matrix.component_name}}/*.additional_includes 53 | mkdir dist/${{matrix.component_name}}/common/ 54 | cp src/common/* dist/${{matrix.component_name}}/common/ 55 | 56 | # compress as zip 57 | cd dist/ 58 | zip -r component-${{matrix.component_name}}.zip ${{matrix.component_name}}/ 59 | cd .. 60 | 61 | # clean up 62 | rm -rf dist/${{matrix.component_name}}/ 63 | 64 | - name: Upload ${{matrix.component_name}} as build artifact 65 | uses: actions/upload-artifact@v2.3.1 66 | with: 67 | name: ${{matrix.component_name}} 68 | path: dist/component-${{matrix.component_name}}.zip 69 | 70 | - name: Upload Release Asset 71 | uses: actions/upload-release-asset@v1 72 | env: 73 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 74 | with: 75 | upload_url: ${{ github.event.release.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps 76 | asset_path: dist/component-${{matrix.component_name}}.zip 77 | asset_name: component-${{matrix.component_name}}.zip 78 | asset_content_type: application/zip 79 | -------------------------------------------------------------------------------- /.github/workflows/docker_images.yml: -------------------------------------------------------------------------------- 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - 'docker/**' 8 | - '.github/workflows/docker_images.yml' 9 | pull_request: 10 | branches: [ main ] 11 | paths: 12 | - 'docker/**' 13 | - '.github/workflows/docker_images.yml' 14 | workflow_dispatch: 15 | inputs: 16 | name: 17 | description: 'Reason' 18 | required: false 19 | default: '...' 20 | 21 | jobs: 22 | 23 | linux_container_build: 24 | 25 | runs-on: ubuntu-latest 26 | 27 | strategy: 28 | matrix: 29 | dockerfile: 30 | - docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile 31 | - docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile 32 | - docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile 33 | - docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile 34 | - docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile 35 | - docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile 36 | - docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile 37 | - docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile 38 | - src/scripts/training/lightgbm_python/default.dockerfile 39 | - src/scripts/inferencing/lightgbm_python/default.dockerfile 40 | - src/scripts/inferencing/lightgbm_c_api/default.dockerfile 41 | 42 | steps: 43 | - uses: actions/checkout@v2 44 | with: 45 | fetch-depth: 0 46 | 47 | - name: Query Git branch name 48 | uses: petehouston/github-actions-query-branch-name@v1.2 49 | 50 | - name: Check branch name 51 | run: |- 52 | echo "GIT_BRANCH_NAME = $GIT_BRANCH_NAME" 53 | echo "GIT_BRANCH_NAME_HEAD = $GIT_BRANCH_NAME_BEAD" 54 | echo "GIT_BRANCH_NAME_BASE = $GIT_BRANCH_NAME_BASE" 55 | echo "Branch name: ${{ steps.queryBranch.outputs.git_branch_name }}" 56 | echo "Branch name: ${{ steps.queryBranch.outputs.git_branch_name_head }}" 57 | echo "Branch name: ${{ steps.queryBranch.outputs.git_branch_name_base }}" 58 | 59 | - uses: marceloprado/has-changed-path@v1 60 | id: dockerfile-changed 61 | with: 62 | paths: ${{ matrix.dockerfile}} 63 | 64 | - name: Build the Docker image 65 | if: steps.dockerfile-changed.outputs.changed == 'true' 66 | run: docker build . --file ${{ matrix.dockerfile }} --tag temp:$(date +%s) --build-arg lightgbm_benchmark_branch=$GIT_BRANCH_NAME 67 | 68 | windows_container_build: 69 | 70 | runs-on: windows-latest 71 | 72 | strategy: 73 | matrix: 74 | dockerfile: 75 | - docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile 76 | - docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile 77 | 78 | steps: 79 | - uses: actions/checkout@v2 80 | with: 81 | fetch-depth: 0 82 | 83 | - uses: marceloprado/has-changed-path@v1 84 | id: dockerfile-changed 85 | with: 86 | paths: docker/${{ matrix.dockerfile}} 87 | 88 | - name: Build the Docker image 89 | if: steps.dockerfile-changed.outputs.changed == 'true' 90 | run: docker build . --file ${{ matrix.dockerfile }} --tag temp:$(date +%s) 91 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'docs/**' 9 | - '.github/workflows/docs.yml' 10 | - 'requirements.txt' 11 | pull_request: 12 | branches: 13 | - main 14 | paths: 15 | - 'docs/**' 16 | - '.github/workflows/docs.yml' 17 | - 'requirements.txt' 18 | 19 | jobs: 20 | build: 21 | name: Deploy docs 22 | runs-on: ubuntu-latest 23 | steps: 24 | 25 | - name: Checkout main 26 | uses: actions/checkout@v1 27 | 28 | - name: Setup Python 3.7 29 | uses: actions/setup-python@v2 30 | with: 31 | python-version: 3.7 32 | 33 | - name: pip install 34 | run: | 35 | python -m pip install --upgrade pip==21.3.1 36 | 37 | python -m pip install markdown-include==0.7.0 mkdocstrings==0.19.0 mkdocstrings-python==0.7.1 mkdocs-material==8.4.2 livereload==2.6.3 38 | 39 | # NOTE: we need requirements to be able to parse reference docs scripts 40 | sudo apt-get install libopenmpi-dev 41 | python -m pip install -r ./requirements.txt 42 | 43 | - name: build docs 44 | run: | 45 | mkdocs build 46 | 47 | - name: deploy docs 48 | if: github.ref == 'refs/heads/main' 49 | env: 50 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 51 | run: | 52 | remote_repo="https://x-access-token:${GITHUB_TOKEN}@${GITHUB_DOMAIN:-"github.com"}/${GITHUB_REPOSITORY}.git" 53 | git remote rm origin 54 | git remote add origin "${remote_repo}" 55 | mkdocs gh-deploy --config-file "${GITHUB_WORKSPACE}/mkdocs.yml" --force 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # ignore local data 132 | data/ 133 | 134 | # ignore mlflow local dumps 135 | mlruns/ 136 | 137 | # ignore hydra outputs (sdk 1.5 + shrike) 138 | outputs/ 139 | 140 | # ignore aml references 141 | conf/aml/ 142 | conf/compute/ 143 | conf/experiments/prod/ 144 | src/scripts/inferencing/custom_win_cli/static_binaries/ 145 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LightGBM benchmarking suite 2 | 3 | [![AzureML Pipelines Validation](https://github.com/microsoft/lightgbm-benchmark/actions/workflows/azureml_pipelines.yml/badge.svg)](https://github.com/microsoft/lightgbm-benchmark/actions/workflows/azureml_pipelines.yml) 4 | [![Benchmark scripts gated build](https://github.com/microsoft/lightgbm-benchmark/actions/workflows/benchmark_scripts.yml/badge.svg)](https://github.com/microsoft/lightgbm-benchmark/actions/workflows/benchmark_scripts.yml) 5 | 6 | The LightGBM benchmark aims at providing tools and automation to compare implementations of lightgbm and other boosting-tree-based algorithms for both training and inferencing. The focus is on production use cases, and the evaluation on both model quality (validation metrics) and computing performance (training speed, compute hours, inferencing latency, etc). 7 | 8 | The goal is to support the community of developers of LightGBM by providing tools and a methodology for evaluating new releases of LightGBM on a standard and reproducible benchmark. 9 | 10 | ## Documentation 11 | 12 | Please find the full documentation of this project at [microsoft.github.io/lightgbm-benchmark](https://microsoft.github.io/lightgbm-benchmark) 13 | 14 | ## Contributing 15 | 16 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 17 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 18 | the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com. 19 | 20 | When you submit a pull request, a CLA bot will automatically determine whether you need to provide 21 | a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions 22 | provided by the bot. You will only need to do this once across all repos using our CLA. 23 | 24 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 25 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 26 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 27 | 28 | ## Trademarks 29 | 30 | This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft 31 | trademarks or logos is subject to and must follow 32 | [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). 33 | Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. 34 | Any use of third-party trademarks or logos are subject to those third-party's policies. 35 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /conf/aml/custom.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | subscription_id: TODO 3 | resource_group: TODO 4 | workspace_name: TODO 5 | tenant: TODO 6 | auth: "interactive" 7 | -------------------------------------------------------------------------------- /conf/compute/custom.yaml: -------------------------------------------------------------------------------- 1 | # @package _group_ 2 | linux_cpu: "cpu-cluster" 3 | linux_gpu: "linux-gpu-nv6" 4 | windows_cpu: "win-cpu" 5 | -------------------------------------------------------------------------------- /conf/experiments/benchmarks/lightgbm-inferencing.yaml: -------------------------------------------------------------------------------- 1 | # This experiment runs multiple variants of lightgbm inferencing + treelite 2 | # on a given user-defined dataset and model 3 | # 4 | # to execute: 5 | # > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/benchmarks/lightgbm-inferencing.yaml 6 | 7 | defaults: 8 | - aml: custom 9 | - compute: custom 10 | 11 | ### CUSTOM PARAMETERS ### 12 | 13 | experiment: 14 | name: "lightgbm_inferencing_dev" 15 | description: "something interesting to say about this" 16 | 17 | lightgbm_inferencing_config: 18 | # name of your particular benchmark 19 | benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime! 20 | 21 | # list all the data/model pairs to run inferencing with 22 | tasks: 23 | - data: 24 | name: "data-synthetic-regression-10cols-10000samples-inference" 25 | model: 26 | name: "model-synthetic-regression-10cols-10trees-31leaves" 27 | - data: 28 | name: "data-synthetic-regression-10cols-10000samples-inference" 29 | model: 30 | name: "model-synthetic-regression-10cols-100trees-31leaves" 31 | - data: 32 | name: "data-synthetic-regression-10cols-10000samples-inference" 33 | model: 34 | name: "model-synthetic-regression-10cols-1000trees-31leaves" 35 | - data: 36 | name: "data-synthetic-regression-10cols-10000samples-inference" 37 | model: 38 | name: "model-synthetic-regression-10cols-5000trees-31leaves" 39 | - data: 40 | name: "data-synthetic-regression-100cols-10000samples-inference" 41 | model: 42 | name: "model-synthetic-regression-100cols-10trees-31leaves" 43 | - data: 44 | name: "data-synthetic-regression-100cols-10000samples-inference" 45 | model: 46 | name: "model-synthetic-regression-100cols-100trees-31leaves" 47 | - data: 48 | name: "data-synthetic-regression-100cols-10000samples-inference" 49 | model: 50 | name: "model-synthetic-regression-100cols-1000trees-31leaves" 51 | - data: 52 | name: "data-synthetic-regression-100cols-10000samples-inference" 53 | model: 54 | name: "model-synthetic-regression-100cols-5000trees-31leaves" 55 | - data: 56 | name: "data-synthetic-regression-1000cols-10000samples-inference" 57 | model: 58 | name: "model-synthetic-regression-1000cols-10trees-31leaves" 59 | - data: 60 | name: "data-synthetic-regression-1000cols-10000samples-inference" 61 | model: 62 | name: "model-synthetic-regression-1000cols-100trees-31leaves" 63 | - data: 64 | name: "data-synthetic-regression-1000cols-10000samples-inference" 65 | model: 66 | name: "model-synthetic-regression-1000cols-1000trees-31leaves" 67 | - data: 68 | name: "data-synthetic-regression-1000cols-10000samples-inference" 69 | model: 70 | name: "model-synthetic-regression-1000cols-5000trees-31leaves" 71 | 72 | # list all inferencing frameworks and their builds 73 | variants: 74 | - framework: lightgbm_python # v3.3.0 via pypi 75 | - framework: lightgbm_c_api # v3.3.0 with C API prediction 76 | - framework: lightgbm_c_api # v3.3.0 with C API prediction 77 | build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile 78 | - framework: lightgbm_c_api # v3.2.1 with C API prediction 79 | build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile 80 | - framework: lightgbm_c_api # v3.2.1 with C API prediction 81 | build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile 82 | - framework: lightgbm_ray # ray implementation 83 | - framework: treelite_python # v1.3.0 84 | 85 | # to use custom_win_cli, you need to compile your own binaries 86 | # see src/scripts/inferencing/custom_win_cli/static_binaries/README.md 87 | #- framework: custom_win_cli 88 | -------------------------------------------------------------------------------- /conf/experiments/benchmarks/training-cpu-num-trees.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/benchmarks/training-cpu-num-trees.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_benchmark" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-cpu-num-trees-20211216.1" # need to be provided at runtime! 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-10cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-10cols-10000samples-test" 24 | task_key: "synthetic-regression-10cols" # optional, user to register outputs 25 | - train: 26 | name: "data-synthetic-regression-100cols-100000samples-train" 27 | test: 28 | name: "data-synthetic-regression-100cols-10000samples-test" 29 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 30 | - train: 31 | name: "data-synthetic-regression-1000cols-100000samples-train" 32 | test: 33 | name: "data-synthetic-regression-1000cols-10000samples-test" 34 | task_key: "synthetic-regression-1000cols" # optional, user to register outputs 35 | 36 | # reference settings for the benchmark 37 | # all variants are defined as diffs of these parameters 38 | reference: 39 | # input parameters 40 | data: 41 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 42 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 43 | header: false 44 | label_column: "0" 45 | group_column: null 46 | 47 | # lightgbm training parameters 48 | training: 49 | objective: "regression" 50 | metric: "rmse" 51 | boosting: "gbdt" 52 | tree_learner: "data" 53 | num_iterations: 100 54 | num_leaves: 31 55 | min_data_in_leaf: 20 56 | learning_rate: 0.1 57 | max_bin: 255 58 | feature_fraction: 1.0 59 | 60 | # compute parameters 61 | device_type: "cpu" 62 | 63 | # you can add anything under custom_params, it will be sent as a dictionary 64 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 65 | custom_params: 66 | deterministic: True 67 | use_two_round_loading: True 68 | 69 | # compute parameters 70 | runtime: 71 | #target: null # optional: force target for this training job 72 | nodes: 1 73 | processes: 1 74 | 75 | # model registration 76 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 77 | output: 78 | register_model: False 79 | #register_model_prefix: "model" 80 | #register_model_suffix: null 81 | 82 | # variant settings override specific parameters of reference_training 83 | variants: 84 | - training: 85 | num_iterations: 10 86 | - training: 87 | num_iterations: 1000 88 | - training: 89 | num_iterations: 5000 90 | -------------------------------------------------------------------------------- /conf/experiments/benchmarks/training-cpu-vs-gpu.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/benchmarks/training-cpu-vs-gpu.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_benchmark" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-cpu-vs-gpu-20211216.1" # need to be provided at runtime! 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-10cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-10cols-10000samples-test" 24 | task_key: "synthetic-regression-10cols" # optional, user to register outputs 25 | - train: 26 | name: "data-synthetic-regression-100cols-100000samples-train" 27 | test: 28 | name: "data-synthetic-regression-100cols-10000samples-test" 29 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 30 | - train: 31 | name: "data-synthetic-regression-1000cols-100000samples-train" 32 | test: 33 | name: "data-synthetic-regression-1000cols-10000samples-test" 34 | task_key: "synthetic-regression-1000cols" # optional, user to register outputs 35 | 36 | # reference settings for the benchmark 37 | # all variants are defined as diffs of these parameters 38 | reference: 39 | # input parameters 40 | data: 41 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 42 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 43 | header: false 44 | label_column: "0" 45 | group_column: null 46 | 47 | # lightgbm training parameters 48 | training: 49 | objective: "regression" 50 | metric: "rmse" 51 | boosting: "gbdt" 52 | tree_learner: "data" 53 | num_iterations: 100 54 | num_leaves: 31 55 | min_data_in_leaf: 20 56 | learning_rate: 0.1 57 | max_bin: 255 58 | feature_fraction: 1.0 59 | 60 | # compute parameters 61 | device_type: "cpu" 62 | 63 | # you can add anything under custom_params, it will be sent as a dictionary 64 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 65 | custom_params: 66 | deterministic: True 67 | use_two_round_loading: True 68 | 69 | # compute parameters 70 | runtime: 71 | #target: null # optional: force target for this training job 72 | nodes: 1 73 | processes: 1 74 | 75 | # model registration 76 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 77 | output: 78 | register_model: False 79 | #register_model_prefix: "model" 80 | #register_model_suffix: null 81 | 82 | # variant settings override specific parameters of reference_training 83 | variants: 84 | - training: 85 | device_type: "gpu" 86 | runtime: 87 | build: "docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile" # relative to lightgbm_python folder 88 | -------------------------------------------------------------------------------- /conf/experiments/data-generation.yaml: -------------------------------------------------------------------------------- 1 | # This experiment generates multiple synthetic datasets for regression 2 | # with varying number of features 3 | # 4 | # to execute: 5 | # > python src/pipelines/azureml/data_generation.py --exp-config conf/experiments/data-generation.yaml 6 | 7 | defaults: 8 | - aml: custom 9 | - compute: custom 10 | 11 | ### CUSTOM PARAMETERS ### 12 | 13 | experiment: 14 | name: "data_generation_dev" 15 | description: "something interesting to say about this" 16 | 17 | data_generation_config: 18 | # name of your particular benchmark 19 | benchmark_name: "benchmark-dev" # override this with a unique name 20 | 21 | # DATA 22 | tasks: 23 | - task: "regression" 24 | train_samples: 100000 25 | test_samples: 10000 26 | inferencing_samples: 10000 27 | n_features: 10 28 | n_informative: 10 29 | - task: "lambdarank" 30 | train_samples: 100 31 | test_samples: 100 32 | inferencing_samples: 100 33 | n_features: 10 34 | n_informative: 13 35 | n_label_classes: 5 36 | docs_per_query: 10 37 | train_partitions: 7 38 | - task: "classification" 39 | train_samples: 100 40 | test_samples: 100 41 | inferencing_samples: 100 42 | n_features: 10 43 | n_informative: 13 44 | n_label_classes: 3 45 | 46 | register_outputs: false 47 | register_outputs_prefix: "data-synthetic" # "{prefix}-{task}-{n_features}cols-{samples}samples-{train|test|inference}" 48 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm-inferencing.yaml: -------------------------------------------------------------------------------- 1 | # This experiment runs multiple variants of lightgbm inferencing + treelite 2 | # on a given user-defined dataset and model 3 | # 4 | # to execute: 5 | # > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml 6 | 7 | defaults: 8 | - aml: custom 9 | - compute: custom 10 | 11 | ### CUSTOM PARAMETERS ### 12 | 13 | experiment: 14 | name: "lightgbm_inferencing_dev" 15 | description: "something interesting to say about this" 16 | 17 | lightgbm_inferencing_config: 18 | # name of your particular benchmark 19 | benchmark_name: "benchmark-dev" # override this with a unique name 20 | 21 | # list all the data/model pairs to run inferencing with 22 | tasks: 23 | - data: 24 | name: "data-synthetic-regression-100cols-10000samples-inference" 25 | model: 26 | name: "model-synthetic-regression-100cols-10trees-31leaves" 27 | 28 | # list all inferencing frameworks and their builds 29 | variants: 30 | - framework: lightgbm_python # v3.3.0 via pypi 31 | - framework: lightgbm_c_api # v3.3.0 with C API prediction 32 | - framework: lightgbm_c_api # v3.3.0 with C API prediction 33 | build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile 34 | - framework: lightgbm_c_api # v3.2.1 with C API prediction 35 | build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile 36 | - framework: lightgbm_c_api # v3.2.1 with C API prediction 37 | build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile 38 | - framework: lightgbm_ray # ray implementation 39 | - framework: treelite_python # v1.3.0 40 | 41 | # to use custom_win_cli, you need to compile your own binaries 42 | # see src/scripts/inferencing/custom_win_cli/static_binaries/README.md 43 | #- framework: custom_win_cli 44 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/cpu-custom.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/cpu-custom.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_dev" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-dev" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-100cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-100cols-10000samples-test" 24 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_python 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: false 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | objective: "regression" 42 | metric: "rmse" 43 | boosting: "gbdt" 44 | tree_learner: "data" 45 | num_iterations: 100 46 | num_leaves: 31 47 | min_data_in_leaf: 20 48 | learning_rate: 0.1 49 | max_bin: 255 50 | feature_fraction: 1.0 51 | 52 | # compute parameters 53 | device_type: "cpu" 54 | 55 | # you can add anything under custom_params, it will be sent as a dictionary 56 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 57 | custom_params: 58 | deterministic: True 59 | use_two_round_loading: True 60 | 61 | # compute parameters 62 | runtime: 63 | #target: null # optional: force target for this training job 64 | nodes: 1 65 | processes: 1 66 | build: "docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile" # relative to lightgbm_python folder 67 | 68 | # model registration 69 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 70 | output: 71 | register_model: False 72 | #register_model_prefix: "model" 73 | #register_model_suffix: "cpu-custom" 74 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/cpu.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/cpu.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_dev" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-dev" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-100cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-100cols-10000samples-test" 24 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_python 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: false 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | objective: "regression" 42 | metric: "rmse" 43 | boosting: "gbdt" 44 | tree_learner: "data" 45 | num_iterations: 100 46 | num_leaves: 31 47 | min_data_in_leaf: 20 48 | learning_rate: 0.1 49 | max_bin: 255 50 | feature_fraction: 1.0 51 | 52 | # compute parameters 53 | device_type: "cpu" 54 | 55 | # you can add anything under custom_params, it will be sent as a dictionary 56 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 57 | custom_params: 58 | deterministic: True 59 | use_two_round_loading: True 60 | 61 | # compute parameters 62 | runtime: 63 | #target: null # optional: force target for this training job 64 | nodes: 1 65 | processes: 1 66 | 67 | # model registration 68 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 69 | output: 70 | register_model: False 71 | #register_model_prefix: "model" 72 | #register_model_suffix: null 73 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/gpu.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/gpu.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_dev" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-dev" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-100cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-100cols-10000samples-test" 24 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_python 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: false 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | objective: "regression" 42 | metric: "rmse" 43 | boosting: "gbdt" 44 | tree_learner: "data" 45 | num_iterations: 100 46 | num_leaves: 31 47 | min_data_in_leaf: 20 48 | learning_rate: 0.1 49 | max_bin: 255 50 | feature_fraction: 1.0 51 | 52 | # compute parameters 53 | device_type: "gpu" 54 | 55 | # you can add anything under custom_params, it will be sent as a dictionary 56 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 57 | custom_params: 58 | deterministic: True 59 | use_two_round_loading: True 60 | 61 | # compute parameters 62 | runtime: 63 | #target: null # optional: force target for this training job 64 | nodes: 1 65 | processes: 1 66 | build: "docker/lightgbm-v3.3.0/linux_gpu_pip.dockerfile" # relative to lightgbm_python folder 67 | 68 | # model registration 69 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 70 | output: 71 | register_model: False 72 | #register_model_prefix: "model" 73 | #register_model_suffix: "gpu" 74 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/mpi.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/mpi.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_dev" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-dev" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-100cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-100cols-10000samples-test" 24 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_python 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: false 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | objective: "regression" 42 | metric: "rmse" 43 | boosting: "gbdt" 44 | tree_learner: "data" 45 | num_iterations: 100 46 | num_leaves: 31 47 | min_data_in_leaf: 20 48 | learning_rate: 0.1 49 | max_bin: 255 50 | feature_fraction: 1.0 51 | 52 | # compute parameters 53 | device_type: "cpu" 54 | multinode_driver: "mpi" 55 | 56 | # you can add anything under custom_params, it will be sent as a dictionary 57 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 58 | custom_params: 59 | deterministic: True 60 | use_two_round_loading: True 61 | 62 | # compute parameters 63 | runtime: 64 | #target: null # optional: force target for this training job 65 | build: "docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile" 66 | nodes: 2 67 | processes: 1 68 | 69 | # model registration 70 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 71 | output: 72 | register_model: False 73 | #register_model_prefix: "model" 74 | #register_model_suffix: null 75 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/ray.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/ray.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "dev_lightgbm_ray" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "lightgbm-ray" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-headercsv-regression-10cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-headercsv-regression-10cols-10000samples-test" 24 | task_key: "dev_ray" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_ray 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: true # IMPORTANT 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | objective: "regression" 42 | metric: "rmse" 43 | boosting: "gbdt" 44 | tree_learner: "data" 45 | num_iterations: 5 46 | num_leaves: 31 47 | min_data_in_leaf: 20 48 | learning_rate: 0.1 49 | max_bin: 255 50 | feature_fraction: 1.0 51 | 52 | # compute parameters 53 | device_type: "cpu" 54 | 55 | # you can add anything under custom_params, it will be sent as a dictionary 56 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 57 | custom_params: 58 | deterministic: True 59 | use_two_round_loading: True 60 | 61 | # compute parameters 62 | runtime: 63 | target: "linux-cpu-ds14v2" # optional: force target for this training job 64 | nodes: 1 65 | processes: 1 66 | 67 | # model registration 68 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 69 | output: 70 | register_model: False 71 | #register_model_prefix: "model" 72 | #register_model_suffix: null 73 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/raytune.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/raytune.yaml 3 | 4 | defaults: 5 | - aml: lightgbm-benchmark-eus2 6 | - compute: lightgbm-benchmark-eus2 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "dev_lightgbm_ray_tune" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "lightgbm-ray-tune" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-headercsv-regression-10cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-headercsv-regression-10cols-10000samples-test" 24 | task_key: "dev_ray" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_ray_tune 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: true # IMPORTANT 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | objective: "regression" 42 | metric: "rmse" 43 | boosting: "gbdt" 44 | tree_learner: "data" 45 | num_iterations: "choice([30,40,50,60])" 46 | num_leaves: "31" 47 | min_data_in_leaf: "20" 48 | learning_rate: "0.1" 49 | max_bin: "255" 50 | feature_fraction: "1.0" 51 | 52 | # compute parameters 53 | device_type: "cpu" 54 | 55 | # you can add anything under custom_params, it will be sent as a dictionary 56 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 57 | custom_params: 58 | deterministic: True 59 | use_two_round_loading: True 60 | 61 | # compute parameters 62 | runtime: 63 | target: "linux-cpu-ds14v2" # optional: force target for this training job 64 | nodes: 1 65 | processes: 1 66 | 67 | # model registration 68 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 69 | output: 70 | register_model: False 71 | #register_model_prefix: "model" 72 | #register_model_suffix: null 73 | 74 | raytune: 75 | mode: "min" 76 | search_alg: "BasicVariantGenerator" 77 | scheduler: "FIFOScheduler" 78 | num_samples: 5 79 | time_budget: 1800 80 | concurrent_trials: 0 81 | cpus_per_trial: 16 82 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/raytune_distributed.yaml: -------------------------------------------------------------------------------- 1 | # to execute: 2 | # > python src/pipelines/azureml/lightgbm_training.py --exp-config conf/experiments/lightgbm_training/raytune_distributed.yaml 3 | 4 | defaults: 5 | - aml: lightgbm-benchmark-eus2 6 | - compute: lightgbm-benchmark-eus2 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "dev_lightgbm_ray_tune" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "lightgbm-ray-tune" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-headercsv-regression-10cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-headercsv-regression-10cols-10000samples-test" 24 | task_key: "dev_ray" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_ray_tune_distributed 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: true # IMPORTANT 36 | label_column: "0" 37 | group_column: null 38 | train_data_format: 'CSV' 39 | test_data_format: 'CSV' 40 | 41 | # lightgbm training parameters 42 | training: 43 | objective: "regression" 44 | metric: "rmse" 45 | boosting: "gbdt" 46 | tree_learner: "data" 47 | num_iterations: "choice([30,40,50,60])" 48 | num_leaves: "31" 49 | min_data_in_leaf: "20" 50 | learning_rate: "0.1" 51 | max_bin: "255" 52 | feature_fraction: "1.0" 53 | 54 | # compute parameters 55 | device_type: "cpu" 56 | 57 | # you can add anything under custom_params, it will be sent as a dictionary 58 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 59 | custom_params: 60 | deterministic: True 61 | use_two_round_loading: True 62 | 63 | # compute parameters 64 | runtime: 65 | target: "linux-cpu-ds14v2" # optional: force target for this training job 66 | nodes: 4 67 | processes: 1 68 | 69 | # model registration 70 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 71 | output: 72 | register_model: False 73 | #register_model_prefix: "model" 74 | #register_model_suffix: null 75 | 76 | raytune: 77 | mode: "min" 78 | search_alg: "BasicVariantGenerator" 79 | scheduler: "FIFOScheduler" 80 | num_samples: 5 81 | time_budget: 1800 82 | concurrent_trials: 2 83 | lightgbm_ray_actors: 2 84 | cpus_per_actor: 16 85 | -------------------------------------------------------------------------------- /conf/experiments/lightgbm_training/sweep.yaml: -------------------------------------------------------------------------------- 1 | # to execute, run from /pipelines/azureml/ 2 | # > python pipelines/azureml/pipelines/lightgbm_training.py --exp-config pipelines/azureml/conf/experiments/lightgbm_training/sweep.yaml 3 | 4 | defaults: 5 | - aml: custom 6 | - compute: custom 7 | 8 | ### CUSTOM PARAMETERS ### 9 | 10 | experiment: 11 | name: "lightgbm_training_dev" 12 | description: "something interesting to say about this" 13 | 14 | lightgbm_training_config: 15 | # name of your particular benchmark 16 | benchmark_name: "benchmark-dev" # override this with a unique name 17 | 18 | # list all the train/test pairs to train on 19 | tasks: 20 | - train: 21 | name: "data-synthetic-regression-100cols-100000samples-train" 22 | test: 23 | name: "data-synthetic-regression-100cols-10000samples-test" 24 | task_key: "synthetic-regression-100cols" # optional, user to register outputs 25 | 26 | # NOTE: this example uses only 1 training (reference) 27 | # see other config files for creating training variants 28 | reference: 29 | framework: lightgbm_python 30 | 31 | # input parameters 32 | data: 33 | auto_partitioning: True # inserts partitioning to match expected number of partitions (if nodes*processes > 1) 34 | pre_convert_to_binary: False # inserts convertion of train/test data into binary to speed up training (not compatible with auto_partitioning yet) 35 | header: false 36 | label_column: "0" 37 | group_column: null 38 | 39 | # lightgbm training parameters 40 | training: 41 | # fixed values 42 | objective: "regression" 43 | metric: "rmse" 44 | boosting: "gbdt" 45 | tree_learner: "data" 46 | 47 | # "sweepable" training parameters 48 | num_iterations: "choice(100, 200)" 49 | num_leaves: "choice(10,20,30)" 50 | min_data_in_leaf: 20 51 | learning_rate: 0.1 52 | max_bin: 255 53 | feature_fraction: 1.0 54 | 55 | # compute parameters (fixed) 56 | device_type: "cpu" 57 | 58 | # you can add anything under custom_params, it will be sent as a dictionary 59 | # to the lightgbm training module to override its parameters (see lightgbm docs for list) 60 | custom_params: 61 | deterministic: True 62 | use_two_round_loading: True 63 | 64 | # compute parameters 65 | runtime: 66 | #target: null # optional: force target for this training job 67 | nodes: 1 68 | processes: 1 69 | 70 | # model registration 71 | # naming convention: "{register_model_prefix}-{task_key}-{num_iterations}trees-{num_leaves}leaves-{register_model_suffix}" 72 | output: 73 | register_model: False 74 | #register_model_prefix: "model" 75 | #register_model_suffix: "cpu-sweep" 76 | 77 | # SWEEP 78 | sweep: 79 | #primary_metric: "node_0/valid_0.rmse" # if you comment it out, will use "node_0/valid_0.METRIC" 80 | goal: "minimize" 81 | algorithm: "random" 82 | early_termination: 83 | policy_type: "median_stopping" 84 | evaluation_interval: 1 85 | delay_evaluation: 5 86 | truncation_percentage: 20 87 | limits: 88 | max_total_trials: 100 89 | max_concurrent_trials: 10 90 | timeout_minutes: 60 91 | -------------------------------------------------------------------------------- /data/sample/unittests-regression-test.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/dataset.schema.json 2 | name: unittests-regression-sample-test 3 | local_path: ../../tests/data/regression/test/ 4 | description: Data used in lightgbm-benchmark repo as unittest sample for regression (test data) 5 | -------------------------------------------------------------------------------- /data/sample/unittests-regression-train.yml: -------------------------------------------------------------------------------- 1 | $schema: https://azuremlschemas.azureedge.net/latest/dataset.schema.json 2 | name: unittests-regression-sample-train 3 | local_path: ../../tests/data/regression/train/ 4 | description: Data used in lightgbm-benchmark repo as unittest sample for regression (train data) 5 | -------------------------------------------------------------------------------- /docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1-patch/20211109.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.2.1" 8 | ARG lightgbm_benchmark_branch=main 9 | 10 | RUN apt-get update && \ 11 | apt-get -y install build-essential cmake 12 | 13 | # LIGHTGBM EXEC AND LIBRARY 14 | 15 | # Clone lightgbm official repository (master branch) 16 | RUN git clone --recursive https://github.com/microsoft/LightGBM && \ 17 | cd LightGBM && \ 18 | git checkout tags/v${lightgbm_version} 19 | 20 | # Download and apply a particular patch 21 | RUN cd /LightGBM && \ 22 | wget https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/32cbb007b61f5bed89af1423c7da250607726a35/pipelines/azureml_sdk15/components/lightgbm_python_custom/lightgbm_custom.python.patch && \ 23 | git apply --whitespace=fix ./lightgbm_custom.python.patch 24 | 25 | # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm 26 | RUN cd /LightGBM && \ 27 | mkdir build && \ 28 | cd build && \ 29 | cmake -DUSE_MPI=ON .. && \ 30 | make -j$(nproc) 31 | 32 | # Prepend path to LightGBM LIB 33 | ENV PATH /LightGBM:$PATH 34 | 35 | # building lightgbm-benchmark binaries 36 | RUN git clone --recursive https://github.com/microsoft/lightgbm-benchmark.git && \ 37 | cd lightgbm-benchmark && \ 38 | git checkout ${lightgbm_benchmark_branch} 39 | 40 | # assuming lightgbm lib+includes are installed on the system 41 | RUN cd /lightgbm-benchmark/src/binaries/ && \ 42 | mkdir build && \ 43 | cd build && \ 44 | cmake -DLIGHTGBM_CLONE=/LightGBM -DUSE_LIGHTGBM_V321_PARSER=ON .. && \ 45 | cmake --build . --target lightgbm_predict --config Release 46 | 47 | # provide env variable with path to built binaries 48 | ENV LIGHTGBM_BENCHMARK_BINARIES_PATH /lightgbm-benchmark/src/binaries/build 49 | RUN ls -l $LIGHTGBM_BENCHMARK_BINARIES_PATH 50 | 51 | ## ANACONDA ENVIRONMENT 52 | 53 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 54 | 55 | # Create conda environment 56 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 57 | python=3.8 pip=20.2.4 58 | 59 | # Prepend path to AzureML conda environment 60 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 61 | 62 | # Install pip dependencies 63 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 64 | pip install 'pandas>=1.1,<1.2' \ 65 | 'numpy>=1.10,<1.20' \ 66 | 'matplotlib==3.4.3' \ 67 | 'scipy~=1.5.0' \ 68 | 'scikit-learn~=0.24.1' \ 69 | 'azureml-core==1.35.0' \ 70 | 'azureml-defaults==1.35.0' \ 71 | 'azureml-mlflow==1.35.0' \ 72 | 'azureml-telemetry==1.35.0' \ 73 | 'mpi4py==3.1.1' 74 | 75 | RUN pip install --upgrade pip setuptools wheel && \ 76 | pip install 'cmake==3.21.0' 77 | 78 | # Install LightGBM Python API from build 79 | RUN cd /LightGBM/python-package/ && \ 80 | python setup.py install --precompile 81 | 82 | # This is needed for mpi to locate libpython 83 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 84 | -------------------------------------------------------------------------------- /docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0-patch/20211109.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.3.0" 8 | ARG lightgbm_benchmark_branch=main 9 | 10 | RUN apt-get update && \ 11 | apt-get -y install build-essential cmake 12 | 13 | # LIGHTGBM EXEC AND LIBRARY 14 | 15 | # Clone lightgbm official repository (master branch) 16 | RUN git clone --recursive https://github.com/microsoft/LightGBM && \ 17 | cd LightGBM && \ 18 | git checkout tags/v${lightgbm_version} 19 | 20 | # Download and apply a particular patch 21 | RUN cd /LightGBM && \ 22 | wget https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/32cbb007b61f5bed89af1423c7da250607726a35/pipelines/azureml_sdk15/components/lightgbm_python_custom/lightgbm_custom.python.patch && \ 23 | git apply --whitespace=fix ./lightgbm_custom.python.patch 24 | 25 | # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm 26 | RUN cd /LightGBM && \ 27 | mkdir build && \ 28 | cd build && \ 29 | cmake -DUSE_MPI=ON .. && \ 30 | make -j$(nproc) 31 | 32 | # Prepend path to LightGBM LIB 33 | ENV PATH /LightGBM:$PATH 34 | 35 | # building lightgbm-benchmark binaries 36 | RUN git clone --recursive https://github.com/microsoft/lightgbm-benchmark.git && \ 37 | cd lightgbm-benchmark && \ 38 | git checkout ${lightgbm_benchmark_branch} 39 | 40 | # assuming lightgbm lib+includes are installed on the system 41 | RUN cd /lightgbm-benchmark/src/binaries/ && \ 42 | mkdir build && \ 43 | cd build && \ 44 | cmake -DLIGHTGBM_CLONE=/LightGBM .. && \ 45 | cmake --build . --target lightgbm_predict --config Release 46 | 47 | # provide env variable with path to built binaries 48 | ENV LIGHTGBM_BENCHMARK_BINARIES_PATH /lightgbm-benchmark/src/binaries/build 49 | RUN ls -l $LIGHTGBM_BENCHMARK_BINARIES_PATH 50 | 51 | ## ANACONDA ENVIRONMENT 52 | 53 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 54 | 55 | # Create conda environment 56 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 57 | python=3.8 pip=20.2.4 58 | 59 | # Prepend path to AzureML conda environment 60 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 61 | 62 | # Install pip dependencies 63 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 64 | pip install 'pandas>=1.1,<1.2' \ 65 | 'numpy>=1.10,<1.20' \ 66 | 'matplotlib==3.4.3' \ 67 | 'scipy~=1.5.0' \ 68 | 'scikit-learn~=0.24.1' \ 69 | 'azureml-core==1.35.0' \ 70 | 'azureml-defaults==1.35.0' \ 71 | 'azureml-mlflow==1.35.0' \ 72 | 'azureml-telemetry==1.35.0' \ 73 | 'mpi4py==3.1.1' 74 | 75 | RUN pip install --upgrade pip setuptools wheel && \ 76 | pip install 'cmake==3.21.0' 77 | 78 | # Install LightGBM Python API from build 79 | RUN cd /LightGBM/python-package/ && \ 80 | python setup.py install --precompile 81 | 82 | # This is needed for mpi to locate libpython 83 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 84 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1/20211109.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.2.1" 8 | ARG lightgbm_benchmark_branch=main 9 | 10 | RUN apt-get update && \ 11 | apt-get -y install build-essential cmake 12 | 13 | # LIGHTGBM EXEC AND LIBRARY 14 | 15 | # Clone lightgbm official repository (master branch) 16 | RUN git clone --recursive https://github.com/microsoft/LightGBM && \ 17 | cd LightGBM && \ 18 | git checkout tags/v${lightgbm_version} 19 | 20 | # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm 21 | RUN cd /LightGBM && \ 22 | mkdir build && \ 23 | cd build && \ 24 | cmake -DUSE_MPI=ON .. && \ 25 | make -j$(nproc) 26 | 27 | # Prepend path to LightGBM LIB 28 | ENV PATH /LightGBM:$PATH 29 | 30 | # building lightgbm-benchmark binaries 31 | RUN git clone --recursive https://github.com/microsoft/lightgbm-benchmark.git && \ 32 | cd lightgbm-benchmark && \ 33 | git checkout ${lightgbm_benchmark_branch} 34 | 35 | # assuming lightgbm lib+includes are installed on the system 36 | RUN cd /lightgbm-benchmark/src/binaries/ && \ 37 | mkdir build && \ 38 | cd build && \ 39 | cmake -DLIGHTGBM_CLONE=/LightGBM -DUSE_LIGHTGBM_V321_PARSER=ON .. && \ 40 | cmake --build . --target lightgbm_predict --config Release 41 | 42 | # provide env variable with path to built binaries 43 | ENV LIGHTGBM_BENCHMARK_BINARIES_PATH /lightgbm-benchmark/src/binaries/build 44 | RUN ls -l $LIGHTGBM_BENCHMARK_BINARIES_PATH 45 | 46 | ## ANACONDA ENVIRONMENT 47 | 48 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 49 | 50 | # Create conda environment 51 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 52 | python=3.8 pip=20.2.4 53 | 54 | # Prepend path to AzureML conda environment 55 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 56 | 57 | # Install pip dependencies 58 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 59 | pip install 'pandas>=1.1,<1.2' \ 60 | 'numpy>=1.10,<1.20' \ 61 | 'matplotlib==3.4.3' \ 62 | 'scipy~=1.5.0' \ 63 | 'scikit-learn~=0.24.1' \ 64 | 'azureml-core==1.35.0' \ 65 | 'azureml-defaults==1.35.0' \ 66 | 'azureml-mlflow==1.35.0' \ 67 | 'azureml-telemetry==1.35.0' \ 68 | 'mpi4py==3.1.1' 69 | 70 | RUN pip install --upgrade pip setuptools wheel && \ 71 | pip install 'cmake==3.21.0' 72 | 73 | # Install LightGBM Python API from build 74 | RUN cd /LightGBM/python-package/ && \ 75 | python setup.py install --precompile 76 | 77 | # This is needed for mpi to locate libpython 78 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 79 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.2.1/20211108.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.2.1" 8 | 9 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 10 | 11 | # Create conda environment 12 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 13 | python=3.8 pip=20.2.4 14 | 15 | # Prepend path to AzureML conda environment 16 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 17 | 18 | # Install pip dependencies 19 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 20 | pip install 'pandas>=1.1,<1.2' \ 21 | 'numpy>=1.10,<1.20' \ 22 | 'matplotlib==3.4.3' \ 23 | 'scipy~=1.5.0' \ 24 | 'scikit-learn~=0.24.1' \ 25 | 'azureml-core==1.35.0' \ 26 | 'azureml-defaults==1.35.0' \ 27 | 'azureml-mlflow==1.35.0' \ 28 | 'azureml-telemetry==1.35.0' \ 29 | 'mpi4py==3.1.1' 30 | 31 | # install lightgbm with mpi 32 | RUN pip install --upgrade pip setuptools wheel && \ 33 | pip install 'cmake==3.21.0' && \ 34 | pip install lightgbm==${lightgbm_version} --install-option=--mpi 35 | 36 | # This is needed for mpi to locate libpython 37 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 38 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04 2 | LABEL lightgbmbenchmark.linux.gpu.build.version="3.2.1/20211108.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.2.1" 8 | 9 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 10 | 11 | # Create conda environment 12 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 13 | python=3.8 pip=20.2.4 14 | 15 | # Prepend path to AzureML conda environment 16 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 17 | 18 | RUN apt-get update -y 19 | 20 | RUN apt-get install --no-install-recommends nvidia-375 -y && \ 21 | apt-get install --no-install-recommends nvidia-opencl-icd-375 nvidia-opencl-dev opencl-headers -y 22 | 23 | RUN apt-get install --no-install-recommends git cmake build-essential libboost-dev libboost-system-dev libboost-filesystem-dev -y 24 | 25 | # Clone lightgbm official repository (master branch) 26 | RUN git clone --recursive https://github.com/microsoft/LightGBM && \ 27 | cd LightGBM && \ 28 | git checkout tags/v${lightgbm_version} 29 | 30 | # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm 31 | RUN cd /LightGBM && \ 32 | mkdir build && \ 33 | cd build && \ 34 | cmake -DUSE_GPU=ON -DUSE_MPI=ON .. && \ 35 | make -j$(nproc) 36 | 37 | # Install pip dependencies 38 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 39 | pip install 'pandas>=1.1,<1.2' \ 40 | 'numpy>=1.10,<1.20' \ 41 | 'scipy~=1.5.0' \ 42 | 'scikit-learn~=0.24.1' \ 43 | 'azureml-core==1.35.0' \ 44 | 'azureml-defaults==1.35.0' \ 45 | 'azureml-mlflow==1.35.0' \ 46 | 'azureml-telemetry==1.35.0' \ 47 | 'mpi4py==3.1.1' 48 | 49 | RUN pip install --upgrade pip setuptools wheel && \ 50 | pip install 'cmake==3.21.0' 51 | 52 | # Install LightGBM Python API from build 53 | RUN cd /LightGBM/python-package/ && \ 54 | python setup.py install --precompile 55 | 56 | # This is needed for mpi to locate libpython 57 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 58 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04 2 | LABEL lightgbmbenchmark.linux.gpu.pip.version="3.2.1/20211108.1" 3 | # Those arguments will NOT be used by AzureML 4 | # they are here just to allow for lightgbm-benchmark build to actually check 5 | # dockerfiles in a PR against their actual branch 6 | ARG lightgbm_version="3.2.1" 7 | 8 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 9 | 10 | # Create conda environment 11 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 12 | python=3.8 pip=20.2.4 13 | 14 | # Prepend path to AzureML conda environment 15 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 16 | 17 | RUN apt-get update -y 18 | 19 | RUN apt-get install --no-install-recommends nvidia-375 -y && \ 20 | apt-get install --no-install-recommends nvidia-opencl-icd-375 nvidia-opencl-dev opencl-headers -y 21 | 22 | RUN apt-get install --no-install-recommends git cmake build-essential libboost-dev libboost-system-dev libboost-filesystem-dev -y 23 | 24 | # Install pip dependencies 25 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 26 | pip install 'pandas>=1.1,<1.2' \ 27 | 'numpy>=1.10,<1.20' \ 28 | 'scipy~=1.5.0' \ 29 | 'scikit-learn~=0.24.1' \ 30 | 'azureml-core==1.35.0' \ 31 | 'azureml-defaults==1.35.0' \ 32 | 'azureml-mlflow==1.35.0' \ 33 | 'azureml-telemetry==1.35.0' \ 34 | 'mpi4py==3.1.1' 35 | 36 | # install lightgbm with mpi 37 | RUN pip install --upgrade pip setuptools wheel && \ 38 | pip install 'cmake==3.21.0' && \ 39 | pip install lightgbm==${lightgbm_version} --install-option=--gpu 40 | 41 | # This is needed for mpi to locate libpython 42 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 43 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/windows-servercore-1809:latest 2 | LABEL lightgbmbenchmark.windows.cpu.mpi.pip.version="3.2.1/20211108.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.2.1" 8 | 9 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 10 | 11 | # Create conda environment 12 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 13 | python=3.8 pip=20.2.4 14 | 15 | # Prepend path to AzureML conda environment 16 | #ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 17 | 18 | # Install pip dependencies 19 | RUN pip install 'pandas>=1.1,<1.2' \ 20 | 'numpy>=1.10,<1.20' \ 21 | 'scipy~=1.5.0' \ 22 | 'scikit-learn~=0.24.1' \ 23 | 'azureml-core==1.35.0' \ 24 | 'azureml-defaults==1.35.0' \ 25 | 'azureml-mlflow==1.35.0' \ 26 | 'azureml-telemetry==1.35.0' \ 27 | lightgbm==$($Env:lightgbm_version) 28 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.3.0" 8 | ARG lightgbm_benchmark_branch=main 9 | 10 | RUN apt-get update && \ 11 | apt-get -y install build-essential cmake 12 | 13 | # LIGHTGBM EXEC AND LIBRARY 14 | 15 | # Clone lightgbm official repository (master branch) 16 | RUN git clone --recursive https://github.com/microsoft/LightGBM && \ 17 | cd LightGBM && \ 18 | git checkout tags/v${lightgbm_version} 19 | 20 | # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm 21 | RUN cd /LightGBM && \ 22 | mkdir build && \ 23 | cd build && \ 24 | cmake -DUSE_MPI=ON .. && \ 25 | make -j$(nproc) 26 | 27 | # Prepend path to LightGBM LIB 28 | ENV PATH /LightGBM:$PATH 29 | 30 | # building lightgbm-benchmark binaries 31 | RUN git clone --recursive https://github.com/microsoft/lightgbm-benchmark.git && \ 32 | cd lightgbm-benchmark && \ 33 | git checkout ${lightgbm_benchmark_branch} 34 | 35 | # assuming lightgbm lib+includes are installed on the system 36 | RUN cd /lightgbm-benchmark/src/binaries/ && \ 37 | mkdir build && \ 38 | cd build && \ 39 | cmake -DLIGHTGBM_CLONE=/LightGBM .. && \ 40 | cmake --build . --target lightgbm_predict --config Release 41 | 42 | # provide env variable with path to built binaries 43 | ENV LIGHTGBM_BENCHMARK_BINARIES_PATH /lightgbm-benchmark/src/binaries/build 44 | RUN ls -l $LIGHTGBM_BENCHMARK_BINARIES_PATH 45 | 46 | ## ANACONDA ENVIRONMENT 47 | 48 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 49 | 50 | # Create conda environment 51 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 52 | python=3.8 pip=20.2.4 53 | 54 | # Prepend path to AzureML conda environment 55 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 56 | 57 | # Install pip dependencies 58 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 59 | pip install 'pandas>=1.1,<1.2' \ 60 | 'numpy>=1.10,<1.20' \ 61 | 'matplotlib==3.4.3' \ 62 | 'scipy~=1.5.0' \ 63 | 'scikit-learn~=0.24.1' \ 64 | 'azureml-core==1.35.0' \ 65 | 'azureml-defaults==1.35.0' \ 66 | 'azureml-mlflow==1.35.0' \ 67 | 'azureml-telemetry==1.35.0' \ 68 | 'mpi4py==3.1.1' 69 | 70 | RUN pip install --upgrade pip setuptools wheel && \ 71 | pip install 'cmake==3.21.0' 72 | 73 | # Install LightGBM Python API from build 74 | RUN cd /LightGBM/python-package/ && \ 75 | python setup.py install --precompile 76 | 77 | # This is needed for mpi to locate libpython 78 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 79 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20220114.1" 3 | 4 | # Those arguments will NOT be used by AzureML when building the image 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.3.0" 8 | 9 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 10 | 11 | # Create conda environment 12 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 13 | python=3.8 pip=20.2.4 14 | 15 | # Prepend path to AzureML conda environment 16 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 17 | 18 | # Install pip dependencies 19 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 20 | pip install 'pandas>=1.1,<1.2' \ 21 | 'numpy>=1.10,<1.20' \ 22 | 'matplotlib==3.4.3' \ 23 | 'scipy~=1.5.0' \ 24 | 'scikit-learn~=0.24.1' \ 25 | 'azureml-core==1.35.0' \ 26 | 'azureml-defaults==1.35.0' \ 27 | 'azureml-mlflow==1.35.0' \ 28 | 'azureml-telemetry==1.35.0' \ 29 | 'mpi4py==3.1.1' 30 | 31 | # install lightgbm with mpi 32 | RUN pip install --upgrade pip setuptools wheel && \ 33 | pip install 'cmake==3.21.0' && \ 34 | pip install lightgbm==${lightgbm_version} --install-option=--mpi 35 | 36 | # This is needed for mpi to locate libpython 37 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 38 | -------------------------------------------------------------------------------- /docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/windows-servercore-1809:latest 2 | LABEL lightgbmbenchmark.windows.cpu.mpi.pip.version="3.3.0/20211108.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.3.0" 8 | 9 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 10 | 11 | # Create conda environment 12 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 13 | python=3.8 pip=20.2.4 14 | 15 | # Prepend path to AzureML conda environment 16 | #ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 17 | 18 | # Install pip dependencies 19 | RUN pip install 'pandas>=1.1,<1.2' \ 20 | 'numpy>=1.10,<1.20' \ 21 | 'scipy~=1.5.0' \ 22 | 'scikit-learn~=0.24.1' \ 23 | 'azureml-core==1.35.0' \ 24 | 'azureml-defaults==1.35.0' \ 25 | 'azureml-mlflow==1.35.0' \ 26 | 'azureml-telemetry==1.35.0' \ 27 | lightgbm==$($Env:lightgbm_version) 28 | -------------------------------------------------------------------------------- /docs/img/architecture-script-classes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/architecture-script-classes.png -------------------------------------------------------------------------------- /docs/img/designer-ui-components-tab-create-final.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/designer-ui-components-tab-create-final.png -------------------------------------------------------------------------------- /docs/img/designer-ui-components-tab-create-github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/designer-ui-components-tab-create-github.png -------------------------------------------------------------------------------- /docs/img/designer-ui-components-tag-create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/designer-ui-components-tag-create.png -------------------------------------------------------------------------------- /docs/img/designer-ui-pipelines-compose.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/designer-ui-pipelines-compose.gif -------------------------------------------------------------------------------- /docs/img/designer-ui-pipelines-new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/designer-ui-pipelines-new.png -------------------------------------------------------------------------------- /docs/img/lightgbm-training-metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/docs/img/lightgbm-training-metrics.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | {!README.md!} -------------------------------------------------------------------------------- /docs/references/common/aml.md: -------------------------------------------------------------------------------- 1 | ::: src.common.aml 2 | -------------------------------------------------------------------------------- /docs/references/common/components.md: -------------------------------------------------------------------------------- 1 | ::: src.common.components 2 | -------------------------------------------------------------------------------- /docs/references/common/io.md: -------------------------------------------------------------------------------- 1 | ::: src.common.io 2 | -------------------------------------------------------------------------------- /docs/references/common/lightgbm_utils.md: -------------------------------------------------------------------------------- 1 | ::: src.common.lightgbm_utils 2 | -------------------------------------------------------------------------------- /docs/references/common/metrics.md: -------------------------------------------------------------------------------- 1 | ::: src.common.metrics 2 | -------------------------------------------------------------------------------- /docs/references/common/perf.md: -------------------------------------------------------------------------------- 1 | Each component in this repository is reporting performance metrics in MLFlow. 2 | 3 | The list of available metrics is detailed below: 4 | 5 | | Metric | Level | Description | 6 | | :-- | :-- | :-- | 7 | | `cpu_avg_utilization_over20_pct` | One value per node | How much time every cpu of that node are utilized more than 20%, over the total job time. | 8 | | `cpu_avg_utilization_over40_pct` | One value per node | How much time are all cpus are utilized more than 40%, over the total job time. | 9 | | `cpu_avg_utilization_over80_pct` | One value per node | ow much time are all cpus are utilized more than 80%, over the total job time. | 10 | | `cpu_avg_utilization_at100_pct` | One value per node | How much time are all cpus fully utilized at 100%, over the total job time. | 11 | | `cpu_avg_utilization_pct` | One value per node | How much are every cpu utilized on average during the entire job. | 12 | | `max_t_cpu_pct_per_cpu_avg` | One value per node | Maximum of **average cpu utilization** over the entire job time. | 13 | | `max_t_cpu_pct_per_cpu_max` | One value per node | Maximum of **maximum cpu utilization** over the entire job time. | 14 | | `max_t_cpu_pct_per_cpu_min` | One value per node | Maximum of **minimum cpu utilization** over the entire job time. | 15 | | `node_cpu_hours` | One value per node | `time * #cpus` | 16 | | `node_unused_cpu_hours` | One value per node | `time * #cpus * (1 - cpu_avg_utilization_pct)` | 17 | | `max_t_mem_percent` | One value per node | Maximum of **memory utilization** over the entire job time. | 18 | | `max_t_disk_usage_percent` | One value per node | Maximum of **disk usage** over the entire job time. | 19 | | `total_disk_io_read_mb` | One value per node | Total disk **data read** in MB (max value at the end of job). | 20 | | `total_disk_io_write_mb` | One value per node | Total disk **data write** in MB (max value at the end of job). | 21 | | `total_net_io_lo_sent_mb` | One value per node | Total net data **sent on loopback** device (max value at the end of job). | 22 | | `total_net_io_ext_sent_mb` | One value per node | Total net data **sent on external** device (max value at the end of job). | 23 | | `total_net_io_lo_recv_mb` | One value per node | Total net data **received on loopback** device (max value at the end of job). | 24 | | `total_net_io_ext_recv_mb` | One value per node | Total net data **received on external** device (max value at the end of job). | 25 | 26 | ::: src.common.perf 27 | -------------------------------------------------------------------------------- /docs/references/common/tasks.md: -------------------------------------------------------------------------------- 1 | This contains all the configuration dataclasses needed to configure AzureML pipelines. 2 | 3 | ```python 4 | {!./src/common/tasks.py!} 5 | ``` -------------------------------------------------------------------------------- /docs/references/scripts/sample/sample.md: -------------------------------------------------------------------------------- 1 | ## Usage 2 | 3 | ::: src.scripts.sample.sample 4 | -------------------------------------------------------------------------------- /docs/references/scripts/training/lightgbm_python.md: -------------------------------------------------------------------------------- 1 | WORK IN PROGRESS 2 | -------------------------------------------------------------------------------- /docs/results/manual.md: -------------------------------------------------------------------------------- 1 | # Latest Benchmark results 2 | 3 | ## STANDARD_DS14_V2 4 | 5 | Here's some manual results for [Standard DS14-4_v2](https://docs.microsoft.com/en-us/azure/virtual-machines/dv2-dsv2-series-memory#dsv2-series-11-15) (4 vcpus, 112 GiB memory), Linux (ubuntu 20.04) with Premium SSD LRS. 6 | 7 | ``` 8 | Train data shape: (100000, 4001) 9 | Test data shape: (10000, 4001) 10 | Inference data shape: (100000, 4000) 11 | --- time elapsed: data_generation = 48.887274 s [tags: {'task': 'generate'}] 12 | --- time elapsed: data_saving = 202.063839 s [tags: {'task': 'generate'}] 13 | --- time elapsed: data_loading = 64.472545 s [tags: {'framework': 'lightgbm_python', 'task': 'train', 'lightgbm_version': '3.2.1'}] 14 | --- time elapsed: training = 186.153282 s [tags: {'framework': 'lightgbm_python', 'task': 'train', 'lightgbm_version': '3.2.1'}] 15 | --- time elapsed: data_loading = 174.565443 s [tags: {'framework': 'lightgbm_python', 'task': 'score', 'lightgbm_version': '3.2.1'}] 16 | --- time elapsed: inferencing = 7.100806 s [tags: {'framework': 'lightgbm_python', 'task': 'score', 'lightgbm_version': '3.2.1'}] 17 | ``` 18 | -------------------------------------------------------------------------------- /docs/run/azureml/azure-setup.md: -------------------------------------------------------------------------------- 1 | # Provision an AzureML workspace to run the LightGBM benchmark 2 | 3 | **Objectives** - By following this tutorial, you will be able to setup the Azure resources you need to run the pipelines in this repo. 4 | 5 | **Requirements** - To enjoy this tutorial, you need to have a working Azure account and subscription (see [how to create one](https://azure.microsoft.com/en-us/free/), or how to get [Visual Studio Enterprise benefits](https://docs.microsoft.com/en-us/visualstudio/subscriptions/vs-azure)). 6 | 7 | ## Option A. Create an AzureML workspace (manual route) 8 | 9 | 1. If you don't have one already, [create an AzureML workspace](https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources). 10 | 11 | 2. In that workspace, you will need to [create compute clusters](https://docs.microsoft.com/en-us/azure/machine-learning/quickstart-create-resources#cluster). Here's what we recommend to get you started with LightGBM training using the pipelines of this repo. The names below are indicative, but we'll keep refering to those in our docs. 12 | 13 | | Cluster Name | SKU | Node count | Description | 14 | | :-- | :-- | :-- | :-- | 15 | | `cpu-cluster` | Standard_DS3_v2 | 4 | A cluster for simple jobs, running on cheap VMs. | 16 | | `linux-cpu-d32sv3` | Standard_D32s_v3 | 10 | A cluster for LightGBM itself, with a more powerful yet affordable VM. Feel free to provision more or less. | 17 | | `linux-gpu-nv6` | Standard_NV6 (gpu) | 1 | Optional: for trying out gpu lightgbm training (work in progress) | 18 | 19 | IMPORTANT: Whenever you create those, set the minimum number of nodes to 0 so that unused clusters will automatically size down and reduce costs. 20 | 21 | ## Option B. Create an AzureML workspace for LightGBM using an ARM template 22 | 23 | Work in progress, feel free to contribute to the [discussion on this topic in the github repo](https://github.com/microsoft/lightgbm-benchmark/discussions/133). 24 | -------------------------------------------------------------------------------- /docs/run/azureml/benchmark-training.md: -------------------------------------------------------------------------------- 1 | WORK IN PROGRESS -------------------------------------------------------------------------------- /docs/run/azureml/local-setup.md: -------------------------------------------------------------------------------- 1 | # Local Setup: run a sample benchmark pipeline on AzureML 2 | 3 | **Objectives** - By following this tutorial, you will be able to setup resources in Azure to be able to run the pipelines in this repo.: 4 | 5 | **Requirements** - To enjoy this tutorial, you first need to: 6 | - install the [local python requirements](../install.md). 7 | - provision [Azure resources first](azure-setup.md), and have a working AzureML workspace. 8 | 9 | ## A. Edit config files to point to your AzureML workspace 10 | 11 | To be able to submit the benchmark pipelines in AzureML, you need to edit some configuration files with the right references to connect to your AzureML resources. 12 | 13 | 1. Edit file under `conf/aml/custom.yaml` to match with your AzureML workspace references: 14 | 15 | ```yaml 16 | # @package _group_ 17 | subscription_id: TODO 18 | resource_group: TODO 19 | workspace_name: TODO 20 | tenant: TODO 21 | auth: "interactive" 22 | ``` 23 | 24 | 2. Edit file under `conf/compute/custom.yaml` to match with the name of your compute targets in AzureML. Check below for reference. If you haven't created a gpu cluster, you can leave the config file as is for the gpu lines. 25 | 26 | ```yaml 27 | # @package _group_ 28 | linux_cpu: "cpu-cluster" 29 | linux_gpu: "linux-gpu-nv6" 30 | windows_cpu: "win-cpu" 31 | ``` 32 | 33 | !!! note 34 | Configs the repo asusme you use `custom` as name to find your aml/compute config. If in the future you have multiple aml/compute configs (ex: `myotheraml.yaml`), when you'll want to run a pipeline, use arguments `aml=myotheraml compute=myotheraml` to override. 35 | 36 | ## B. Verify your setup: run a sample pipeline in your workspace 37 | 38 | Running a pipeline consists in launching a python script with a pipeline configuration file. 39 | 40 | For instance, when you run: 41 | ```bash 42 | python pipelines/azureml/pipelines/data_generation.py --exp-config pipelines/azureml/conf/experiments/data-generation.yaml 43 | ``` 44 | 45 | The python script will build a pipeline based on the collection of manual scripts, each running in its own python environment. The configuration for the parameters from each scripts will be provided from the configuration file in `conf/experiments/data-generation.yaml`. 46 | 47 | ```yaml 48 | {!./conf/experiments/data-generation.yaml!} 49 | ``` 50 | 51 | Running the python command should open a browser to your workspace opening the experiment view. 52 | -------------------------------------------------------------------------------- /docs/run/azureml/upload-your-data.md: -------------------------------------------------------------------------------- 1 | # Upload data sources into AzureML to run the benchmark 2 | 3 | **Objectives** - By following this tutorial, you will be able to: 4 | 5 | - upload sample or custom data into AzureML 6 | - have a train/test dataset ready to run a LightGBM training 7 | 8 | **Requirements** - To enjoy this tutorial, you need to: 9 | - have an existing [AzureML workspace with relevant compute resource](azure-setup.md). 10 | - have installed the [az ml cli](../install.md) (python and az ml cli). 11 | 12 | ## Get data into AzureML 13 | 14 | There are multiple ways to get your data into your AzureML workspace. Here's a couple: 15 | 16 | - Option A: use the `az ml cli` to upload files from the commandline 17 | - Option B: use the AzureML UI to [upload your local data](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-connect-data-ui) 18 | - Option C: use the AzurEML UI to [create a dataset from an existing storage](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-connect-data-ui) 19 | 20 | Options B and C are documented in the AzureML documentation (links above). 21 | 22 | We'll show option A in the following, as **we provide some yaml templates** to upload standard datasets into your provisioned AzureML workspace for running our benchmark. 23 | 24 | !!! warning 25 | The `data/` folder of our repository has been added to `.gitignore` to avoid uploading your own data in git. But **please be careful** when adding your own data into the repository folder to not commit is mistakenly. 26 | 27 | ## Upload a sample dataset using `az ml cli` 28 | 29 | Our repo has a minimal set of sample data we use for unit testing. We'll demo how to add those to your workspace as a way to show how to upload your own files using the command line. 30 | 31 | In a terminal: 32 | 33 | 1\. If you haven't already, connect by typing 34 | 35 | ```bash 36 | az login 37 | ``` 38 | 39 | To avoid having to add your workspace/resource group every time, set those as defaults: 40 | 41 | ```bash 42 | az account set --subscription 43 | az configure --defaults workspace= group= 44 | ``` 45 | 46 | 2\. From the repository root, type: 47 | 48 | ```bash 49 | # to upload dummy train dataset 50 | az ml dataset create --file data/sample/unittests-regression-train.yml 51 | 52 | # to upload dummy test dataset 53 | az ml dataset create --file data/sample/unittests-regression-test.yml 54 | ``` 55 | 56 | This will use the sample config file below to create a dataset uploading the file specified in `local_path` into your workspace. 57 | 58 | ``` yaml 59 | {!./data/sample/unittests-regression-train.yml!} 60 | ``` 61 | 62 | 3\. To find it in the AzureML UI, get into your workspace under the **Datasets** tab. 63 | 64 | You'll now be able to consume this data as an input of [lightgbm training](train-on-your-data.md) or inferencing pipelines. 65 | 66 | Feel free to edit this sample file to upload your own data into AzureML from local files and folders. 67 | 68 | ## Upload standard benchmark datasets into AzureML 69 | 70 | Work in progress, feel free to contribute to the [discussion on this topic in the github repo](https://github.com/microsoft/lightgbm-benchmark/discussions/131). 71 | -------------------------------------------------------------------------------- /docs/run/install.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | ## Install Python dependencies 4 | 5 | To enjoy this repository, you need to have an existing installation of `python>=3.8` ([Miniconda](https://docs.conda.io/en/latest/miniconda.html) or equivalent). 6 | 7 | Then, we suggest you create a conda environment and install dependencies for this benchmark: 8 | 9 | ```ps 10 | # create conda environment 11 | conda create --name lightgbmbenchmark python=3.8 -y 12 | 13 | # activate conda environment 14 | conda activate lightgbmbenchmark 15 | 16 | # install shrike library 17 | python -m pip install -r requirements.txt 18 | ``` 19 | 20 | ## Install `az ml cli` 21 | 22 | To be able to provision azure resources, or upload data from the command line, we recommend you to use the Azure CLI v2 with the ml extension. Follow the instructions to [install and set up the CLI (v2)](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-configure-cli). 23 | 24 | ## Build local dependencies 25 | 26 | The benchmark occasionaly relies on locally built dependencies. We will name those here. 27 | 28 | 29 | ### Run lightgbm train locally (requires mpi) 30 | 31 | Our lightgbm training script is distributed-ready, and currently using mpi. To be able to use this locally, either for debugging or for benchmarking, you'll need to [install LightGBM with mpi support](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html#build-mpi-version). 32 | 33 | One easy way is to install lightgbm with mpi option (requires cmake and other build tools): 34 | 35 | ``` 36 | pip install --upgrade pip setuptools wheel 37 | pip install cmake==3.21.0 38 | pip install lightgbm==3.2.1 --install-option=--mpi 39 | ``` 40 | 41 | ### Run scripts under `/src/scripts/lightgbm_cli/` 42 | 43 | Those scripts are intended to run LightGBM from the command line. Using them requires providing the path to the lightgbm executables (ex: `lightgbm.exe`). 44 | 45 | To build those locally, use [instructions from LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html#installation-guide). 46 | 47 | !!! note 48 | The `/build/` directory has been added to `.gitignore` to allow you to build local dependencies without pushing them in git. -------------------------------------------------------------------------------- /docs/run/manual-benchmark.md: -------------------------------------------------------------------------------- 1 | # Run benchmark manually 2 | 3 | **Objectives** - By following this tutorial, you will be able to: 4 | 5 | - generate synthetic data for running lightgbm 6 | - run lightgbm training and inferencing scripts to measure wall time 7 | 8 | **Requirements** - To enjoy this tutorial, you need to have installed python dependencies locally (see [instructions](../run/install.md)). 9 | 10 | ## Generate synthetic data 11 | 12 | To generate a synthetic dataset based on sklearn: 13 | 14 | === "Bash" 15 | 16 | ``` bash 17 | python src/scripts/data_processing/generate_data/generate.py \ 18 | --train_samples 30000 \ 19 | --test_samples 3000 \ 20 | --inferencing_samples 30000 \ 21 | --n_features 4000 \ 22 | --n_informative 400 \ 23 | --random_state 5 \ 24 | --output_train ./data/synthetic/train/ \ 25 | --output_test ./data/synthetic/test/ \ 26 | --output_inference ./data/synthetic/inference/ \ 27 | --type regression 28 | ``` 29 | 30 | === "Powershell" 31 | 32 | ``` powershell 33 | python src/scripts/data_processing/generate_data/generate.py ` 34 | --train_samples 30000 ` 35 | --test_samples 3000 ` 36 | --inferencing_samples 30000 ` 37 | --n_features 4000 ` 38 | --n_informative 400 ` 39 | --random_state 5 ` 40 | --output_train ./data/synthetic/train/ ` 41 | --output_test ./data/synthetic/test/ ` 42 | --output_inference ./data/synthetic/inference/ ` 43 | --type regression 44 | ``` 45 | 46 | 47 | !!! note 48 | Running the synthetic data generation script with these parameter values requires at least 4 GB of RAM available and generates a 754 MB training, a 75 MB testing, and a 744 MB inferencing dataset. 49 | 50 | ## Run training on synthetic data 51 | 52 | === "Bash" 53 | 54 | ``` bash 55 | python src/scripts/training/lightgbm_python/train.py \ 56 | --train ./data/synthetic/train/ \ 57 | --test ./data/synthetic/test/ \ 58 | --export_model ./data/models/synthetic-100trees-4000cols/ \ 59 | --objective regression \ 60 | --boosting_type gbdt \ 61 | --tree_learner serial \ 62 | --metric rmse \ 63 | --num_trees 100 \ 64 | --num_leaves 100 \ 65 | --min_data_in_leaf 400 \ 66 | --learning_rate 0.3 \ 67 | --max_bin 16 \ 68 | --feature_fraction 0.15 \ 69 | --device_type cpu 70 | ``` 71 | 72 | === "Powershell" 73 | 74 | ``` powershell 75 | python src/scripts/training/lightgbm_python/train.py ` 76 | --train ./data/synthetic/train/ ` 77 | --test ./data/synthetic/test/ ` 78 | --export_model ./data/models/synthetic-100trees-4000cols/ ` 79 | --objective regression ` 80 | --boosting_type gbdt ` 81 | --tree_learner serial ` 82 | --metric rmse ` 83 | --num_trees 100 ` 84 | --num_leaves 100 ` 85 | --min_data_in_leaf 400 ` 86 | --learning_rate 0.3 ` 87 | --max_bin 16 ` 88 | --feature_fraction 0.15 ` 89 | --device_type cpu 90 | ``` 91 | 92 | !!! note 93 | `--device_type cpu` is optional here, if you're running on gpu you can use `--device_type gpu` instead. 94 | 95 | ## Run inferencing on synthetic data (lightgbm python) 96 | 97 | === "Bash" 98 | 99 | ```bash 100 | python src/scripts/inferencing/lightgbm_python/score.py \ 101 | --data ./data/synthetic/inference/ \ 102 | --model ./data/models/synthetic-100trees-4000cols/ \ 103 | --output ./data/outputs/predictions/ \ 104 | --num_threads 1 105 | ``` 106 | 107 | === "Powershell" 108 | 109 | ``` powershell 110 | python src/scripts/inferencing/lightgbm_python/score.py ` 111 | --data ./data/synthetic/inference/ ` 112 | --model ./data/models/synthetic-100trees-4000cols/ ` 113 | --output ./data/outputs/predictions/ ` 114 | --num_threads 1 115 | ``` 116 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: LightGBM-Benchmark 2 | 3 | # https://www.mkdocs.org/user-guide/configuration/ 4 | repo_url: https://github.com/Microsoft/lightgbm-benchmark 5 | edit_uri: edit/main/docs 6 | 7 | nav: 8 | - Home: index.md 9 | - About: lightgbm-benchmark-project.md 10 | - Contribute: 11 | Developer Guide: contribute/developer-guide.md 12 | Architecture Guide: contribute/architecture-guide.md 13 | Reporting Guide: contribute/reporting-guide.md 14 | - Run: 15 | Install: run/install.md 16 | Run manually: run/manual-benchmark.md 17 | Run in AzureML: 18 | Azure Setup: run/azureml/azure-setup.md 19 | Local Setup: run/azureml/local-setup.md 20 | Benchmark Pipelines: 21 | Generate data: run/azureml/generate-synthetic-data.md 22 | Training: run/azureml/benchmark-training.md 23 | Inferencing: run/azureml/benchmark-inferencing.md 24 | Custom Pipelines: 25 | Upload your data: run/azureml/upload-your-data.md 26 | Train on your data: run/azureml/train-on-your-data.md 27 | Designer UI: run/azureml/designer-ui.md 28 | - Results: 29 | Inferencing: results/inferencing.md 30 | Manual: results/manual.md 31 | - Reference Docs: 32 | - src/common/: 33 | - aml.py: references/common/aml.md 34 | - components.py: references/common/components.md 35 | - io.py: references/common/io.md 36 | - lightgbm_utils.py: references/common/lightgbm_utils.md 37 | - metrics.py: references/common/metrics.md 38 | - perf.py: references/common/perf.md 39 | - tasks.py: references/common/tasks.md 40 | - src/scripts/: 41 | - sample/sample.py: references/scripts/sample/sample.md 42 | - training/lightgbm_python/: references/scripts/training/lightgbm_python.md 43 | 44 | theme: 45 | name: readthedocs 46 | name: material 47 | 48 | markdown_extensions: 49 | - pymdownx.tabbed 50 | - pymdownx.superfences 51 | - pymdownx.snippets 52 | - admonition 53 | - pymdownx.highlight: 54 | use_pygments: true 55 | linenums: true 56 | linenums_style: pymdownx-inline 57 | # https://github.com/mkdocs/mkdocs/issues/777 58 | - markdown_include.include: 59 | base_path: . 60 | 61 | plugins: 62 | - search 63 | - mkdocstrings: 64 | default_handler: python 65 | handlers: 66 | python: 67 | setup_commands: 68 | - import sys 69 | - sys.path.append("src") 70 | rendering: 71 | show_source: true 72 | show_if_no_docstring: false 73 | members_order: "source" 74 | show_root_toc_entry: false 75 | show_root_heading: false 76 | #custom_templates: templates 77 | watch: 78 | - src/ 79 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # benchmark common code 2 | mlflow==1.23.1 3 | omegaconf~=2.1 4 | mpi4py==3.1.1 5 | matplotlib==3.4.3 6 | psutil==5.8.0 7 | 8 | # frameworks 9 | ray==1.9.2 10 | lightgbm-ray==0.1.2 11 | lightgbm==3.3.1 12 | treelite==2.1.0 13 | treelite_runtime==2.1.0 14 | flaml==0.9.6 15 | hpbandster==0.7.4 16 | ConfigSpace==0.5.0 17 | optuna==2.8.0 18 | 19 | # pipelines 20 | shrike[pipeline]==1.14.7 21 | azure-ml-component==0.9.4.post1 # for component dsl 22 | azureml-train-core==1.36.0 # for azureml.train.hyperdrive 23 | azureml-dataset-runtime==1.36.0 # to register dataset 24 | hydra-core~=1.0.3 25 | typing_extensions==4.0.1 # for hydra 26 | numpy==1.20.3 27 | protobuf<=3.20.1 28 | 29 | # unit testing 30 | pytest==6.2.4 31 | pytest-cov==2.12.1 32 | pytest-mock==3.6.1 33 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/src/__init__.py -------------------------------------------------------------------------------- /src/binaries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0.0) 2 | project(lightgbm_benchmark_utils VERSION 0.3.0) 3 | 4 | if(LIGHTGBM_CLONE) 5 | if(NOT DEFINED LIGHTGBM_SRC) 6 | set(LIGHTGBM_SRC ${LIGHTGBM_CLONE}/src) 7 | endif() 8 | if(NOT DEFINED LIGHTGBM_LIB) 9 | set(LIGHTGBM_LIB ${LIGHTGBM_CLONE}/Release) 10 | endif() 11 | if(NOT DEFINED LIGHTGBM_INC) 12 | set(LIGHTGBM_INC ${LIGHTGBM_CLONE}/include) 13 | endif() 14 | endif() 15 | 16 | if(LIGHTGBM_SRC) 17 | message(STATUS "Looking for lightgbm/src/ at ${LIGHTGBM_SRC}") 18 | else() 19 | message(SEND_ERROR "We could not find lightgbm/src/. Please provide -DLIGHTGBM_SRC=... to cmake command") 20 | endif() 21 | 22 | if(LIGHTGBM_LIB) 23 | message(STATUS "Looking for lib_lightgbm at ${LIGHTGBM_LIB}") 24 | find_library(LIGHTGBM_LIBRARIES NAMES _lightgbm lib_lightgbm PATHS "${LIGHTGBM_LIB}" REQUIRED) 25 | else() 26 | message(STATUS "Looking for lib_lightgbm in PATH(win) or LD_LIBRARY_PATH(linux)") 27 | find_library(LIGHTGBM_LIBRARIES NAMES _lightgbm lib_lightgbm REQUIRED) 28 | endif() 29 | 30 | if(NOT LIGHTGBM_LIBRARIES) 31 | message(SEND_ERROR "We could not find lib_lightgbm. Please provide -DLIGHTGBM_LIB=... to cmake command") 32 | else() 33 | message(STATUS "Found lib_lightgbm at ${LIGHTGBM_LIBRARIES}") 34 | endif() 35 | 36 | if(LIGHTGBM_INC) 37 | if(EXISTS "${LIGHTGBM_INC}/LightGBM/c_api.h") 38 | message(STATUS "Found LightGBM/c_api.h in ${LIGHTGBM_INC} dir") 39 | else() 40 | message(SEND_ERROR "Could not find LightGBM/c_api.h in ${LIGHTGBM_INC} dir") 41 | endif() 42 | include_directories("${LIGHTGBM_INC}") 43 | else() 44 | message(WARNING "If you don't provide -DLIGHTGBM_INC=... you better hope LightGBM/c_api.h is in your includes dir.") 45 | endif() 46 | 47 | # NOTE LightGBM::Parser <3.2.1 uses 4 arguments, not 5 48 | if (USE_LIGHTGBM_V321_PARSER) 49 | message(WARNING "You specified -DDUSE_LIGHTGBM_V321_PARSER, using LightGBM v3.2.1 Parser with only 4 arguments.") 50 | ADD_DEFINITIONS(-DUSE_LIGHTGBM_V321_PARSER) 51 | endif() 52 | 53 | include_directories("./common") 54 | 55 | set(COMMON_CODE 56 | ./common/custom_loader.hpp 57 | ./common/custom_loader.cpp 58 | ${LIGHTGBM_SRC}/io/parser.hpp 59 | ${LIGHTGBM_SRC}/io/parser.cpp 60 | ${LIGHTGBM_SRC}/io/file_io.cpp) 61 | 62 | ## LIGHTGBM PREDICT 63 | add_executable(lightgbm_predict ./lightgbm_predict/main.cc ${COMMON_CODE}) 64 | target_link_libraries(lightgbm_predict "${LIGHTGBM_LIBRARIES}") 65 | -------------------------------------------------------------------------------- /src/binaries/README.md: -------------------------------------------------------------------------------- 1 | # Lightgbm Predict Executable 2 | 3 | A simple executable using LightGBM C API calls to run predictions and simulate production inferencing scenarios. 4 | 5 | Those building instructions matter only when running the executable locally. You should not need to follow those instructions if you plan to use the executable from AzureML. 6 | 7 | ## To build locally on Windows 8 | 9 | ### Build LightGBM first 10 | 11 | If you don't already have an existing build of LightGBM: 12 | 13 | ```bash 14 | git clone --recursive https://github.com/microsoft/LightGBM.git 15 | mkdir build 16 | cd build 17 | cmake -A x64 .. 18 | cmake --build . --target _lightgbm --config Release 19 | ``` 20 | 21 | Do not build with `ALL_BUILD`, as you don't need all the artefacts, just the lightgbm lib. Once build, the directory `LightGBM/Release/` should contain `lib_lightgbm.lib`, `lib_lightgbm.dll` and `lib_lightgbm.exp` (only). 22 | 23 | ### Build lightgbm-benchmark binaries 24 | 25 | (For now, only lightgbm_predict) 26 | 27 | Run the following in `src/binaries/`. You will beed to provde path to the clone repository built above using `-DLIGHTGBM_CLONE=...`. 28 | 29 | ```bash 30 | mkdir build 31 | cd build 32 | cmake -A x64 -DLIGHTGBM_CLONE=___ .. 33 | cmake --build . --target lightgbm_predict --config Release 34 | ``` 35 | 36 | **Note**: to compile for LightGBM v3.2.1, you need to add `-DUSE_LIGHTGBM_V321_PARSER=ON` -------------------------------------------------------------------------------- /src/binaries/common/custom_loader.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) Microsoft Corporation. 3 | Licensed under the MIT license. 4 | */ 5 | 6 | #ifndef LIGHTGBM_BENCHMARK_COMMON_CUSTOM_LOADER_H_ 7 | #define LIGHTGBM_BENCHMARK_COMMON_CUSTOM_LOADER_H_ 8 | 9 | #include 10 | #include "LightGBM/dataset.h" 11 | 12 | 13 | // struct to organize arguments for call to LGBM_BoosterPredictForCSRSingleRow() 14 | // see https://lightgbm.readthedocs.io/en/latest/C-API.html#c.LGBM_BoosterPredictForCSRSingleRow 15 | struct CSRDataRow_t { 16 | // array of size nindptr (number of rows) 17 | // row_headers[n] = mem index start of row n 18 | // row_headers[n+1] = mem index end of row n 19 | int32_t * row_headers = nullptr; 20 | 21 | // array of size maximum num_features 22 | // sparse indices in the row 23 | int32_t * indices = nullptr; 24 | 25 | // array of size maximum num_features 26 | // sparse values in the row 27 | float * row = nullptr; 28 | 29 | // maximum number of features (total) 30 | size_t num_features; 31 | 32 | // number of rows 33 | int64_t nindptr; 34 | 35 | // number of null elements in the row (?) 36 | int64_t null_elem; 37 | 38 | // some metadata for debugging 39 | int32_t file_line_index; 40 | float row_label; 41 | }; 42 | 43 | // class to read libsvm file and iterate on each line 44 | class LightGBMDataReader { 45 | private: 46 | // counts how many rows have been processed so far 47 | int row_counter; 48 | int num_features; 49 | std::ifstream * file_handler; 50 | LightGBM::Parser * lightgbm_parser; 51 | 52 | public: 53 | // Constructor 54 | LightGBMDataReader(); 55 | 56 | // Destructor 57 | ~LightGBMDataReader(); 58 | 59 | // open the file for parsing 60 | int open(const std::string file_path, int32_t init_num_features); 61 | 62 | // close the file handler (duh) 63 | void close(); 64 | 65 | // allocate ONE new row of data in the given struct 66 | CSRDataRow_t * init_row(CSRDataRow_t * row, int32_t num_features); 67 | 68 | // Iterates on the svm file and returns ONE row ready to predict 69 | // returns nullptr when finished 70 | CSRDataRow_t * iter(CSRDataRow_t * replace_row = nullptr); 71 | }; 72 | 73 | #endif /* LIGHTGBM_BENCHMARK_COMMON_CUSTOM_LOADER_H_ */ 74 | -------------------------------------------------------------------------------- /src/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/src/common/__init__.py -------------------------------------------------------------------------------- /src/common/lightgbm_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | """ 5 | This classes provide help to integrate lightgbm 6 | """ 7 | import lightgbm 8 | import logging 9 | 10 | class LightGBMCallbackHandler(): 11 | """ This class handles LightGBM callbacks for recording metrics. """ 12 | def __init__(self, metrics_logger, metrics_prefix=None, metrics_suffix=None): 13 | """ 14 | Args: 15 | metrics_logger (common.metrics.MetricsLogger): class to log metrics using MLFlow 16 | """ 17 | self.metrics = {} 18 | self.metrics_logger = metrics_logger 19 | self.metrics_prefix = metrics_prefix 20 | self.metrics_suffix = metrics_suffix 21 | self.logger = logging.getLogger(__name__) 22 | 23 | def _format_metric_key(self, data_name, eval_name): 24 | """Builds a metric key with prefix and suffix""" 25 | key = f"{data_name}.{eval_name}" 26 | 27 | if self.metrics_prefix: 28 | key = self.metrics_prefix + key 29 | if self.metrics_suffix: 30 | key = key + self.metrics_suffix 31 | 32 | return key 33 | 34 | def callback(self, env: lightgbm.callback.CallbackEnv) -> None: 35 | """Callback method to collect metrics produced by LightGBM. 36 | 37 | See https://lightgbm.readthedocs.io/en/latest/_modules/lightgbm/callback.html 38 | """ 39 | # let's record in the object for future use 40 | self.metrics[env.iteration] = env.evaluation_result_list 41 | 42 | # loop on all the evaluation results tuples 43 | for data_name, eval_name, result, _ in env.evaluation_result_list: 44 | # log each as a distinct metric 45 | self.metrics_logger.log_metric( 46 | key=self._format_metric_key(data_name, eval_name), 47 | value=result, 48 | step=env.iteration # provide iteration as step in mlflow 49 | ) 50 | -------------------------------------------------------------------------------- /src/common/math.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper math functions 3 | """ 4 | import os 5 | import argparse 6 | import logging 7 | import numpy as np 8 | 9 | def bootstrap_ci(data, iterations=1000, operators={'mean':np.mean}, confidence_level=0.95, seed=None): 10 | """ 11 | Args: 12 | data (np.array) : input data 13 | iterations (int) : how many bootstrapped samples to generate 14 | operators (Dict[str->func]) : map of functions to produce CI for 15 | confidence_level (float) : confidence_level = 1-alpha 16 | 17 | Returns: 18 | operators_ci: Dict[str->tuple] 19 | """ 20 | # values will be stored in a dict 21 | bootstrap_runs = {} 22 | for operator_key in operators.keys(): 23 | bootstrap_runs[operator_key] = [] 24 | 25 | sample_size = len(data) 26 | for _ in range(iterations): 27 | bootstrap = np.random.choice(data, size=sample_size, replace=True) 28 | for operator_key, operator_func in operators.items(): 29 | bootstrap_runs[operator_key].append(operator_func(bootstrap)) 30 | 31 | operators_ci = {} 32 | for operator_key in operators.keys(): 33 | values = np.array(bootstrap_runs[operator_key]) 34 | ci_left = np.percentile(values, ((1-confidence_level)/2*100)) 35 | ci_right = np.percentile(values, (100-(1-confidence_level)/2*100)) 36 | ci_mean = np.mean(values) # just for fun 37 | operators_ci[operator_key] = (ci_left, ci_mean, ci_right) 38 | 39 | return(operators_ci) 40 | -------------------------------------------------------------------------------- /src/common/paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 4 | SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, 'src') 5 | COMPONENTS_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, 'src', 'scripts') 6 | CONFIG_PATH = os.path.join(LIGHTGBM_REPO_ROOT, 'conf') 7 | -------------------------------------------------------------------------------- /src/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/src/scripts/__init__.py -------------------------------------------------------------------------------- /src/scripts/analysis/templates/inferencing.md: -------------------------------------------------------------------------------- 1 | # LightGBM Inferencing Benchmark 2 | 3 | !!! note 4 | The report below has been automatically generated with results from the [lightgbm-benchmark repo](https://github.com/microsoft/lightgbm-benchmark). 5 | 6 | !!! warning 7 | This is work in progress, to check out current work items check the [project open inferencing issues](https://github.com/microsoft/lightgbm-benchmark/issues?q=is%3Aissue+is%3Aopen+label%3Ainferencing-benchmark). In particular, **do not trust these numbers yet** until we've removed this header! 8 | 9 | ## Variants 10 | 11 | {{variants_table}} 12 | 13 | ## Metric time_inferencing per prediction (usecs) 14 | 15 | {{metrics_table}} 16 | 17 | ## Percentile metrics for each variant 18 | 19 | Some variants above report percentile metrics. Those are reported by computing inferencing latency per request batch (currently, batch size = 1, and number of threads = 1). Not all variants provide those (work in progress). 20 | 21 | {% for entry in percentile_metrics_reports %} 22 | ### {{entry.variant_id}} 23 | 24 | {{entry.report}} 25 | {% endfor %} -------------------------------------------------------------------------------- /src/scripts/data_processing/generate_data/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/data_processing/generate_data/conda_env.yaml: -------------------------------------------------------------------------------- 1 | name: treelite_conda_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - numpy==1.21.2 9 | - scikit-learn==0.24.2 10 | - azureml-defaults==1.35.0 11 | - azureml-mlflow==1.35.0 12 | - psutil==5.8.0 13 | -------------------------------------------------------------------------------- /src/scripts/data_processing/generate_data/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/data_processing/lightgbm_data2bin/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/data_processing/lightgbm_data2bin/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: lightgbm_data2bin_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - numpy==1.21.2 9 | - scikit-learn==0.24.2 10 | - azureml-defaults==1.35.0 11 | - azureml-mlflow==1.35.0 12 | - psutil==5.8.0 13 | - lightgbm==3.2.1 14 | -------------------------------------------------------------------------------- /src/scripts/data_processing/lightgbm_data2bin/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/data_processing/lightgbm_data2bin/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: lightgbm_python_data2bin 3 | version: 1.0.1 4 | display_name: "Save LightGBM data to binary (Python API)" 5 | type: CommandComponent 6 | description: "Uses lightgbm.Dataset() construct and save_binary to save training/testing data in binary format." 7 | is_deterministic: true 8 | 9 | inputs: 10 | # Inputs 11 | train: 12 | type: AnyDirectory 13 | description: directory to the training data 14 | optional: false 15 | test: 16 | type: AnyDirectory 17 | description: directory to the testing data 18 | optional: false 19 | 20 | # Input Parameters 21 | header: 22 | type: Boolean 23 | optional: true 24 | description: "does data have a header, see https://lightgbm.readthedocs.io/en/latest/Parameters.html#header" 25 | label_column: 26 | type: String 27 | optional: true 28 | description: "specify label column, default 0, see https://lightgbm.readthedocs.io/en/latest/Parameters.html#label_column" 29 | group_column: 30 | type: String 31 | optional: true 32 | description: "specify group/query column, default '', see https://lightgbm.readthedocs.io/en/latest/Parameters.html#group_column" 33 | 34 | max_bin: 35 | type: Integer 36 | min: 1 37 | default: 255 38 | description: "https://lightgbm.readthedocs.io/en/latest/Parameters.html#max_bin" 39 | custom_params: 40 | type: String 41 | optional: true 42 | description: "any lgbm param provided as json dictionary" 43 | 44 | # generic benchmark parameters 45 | verbose: 46 | type: Boolean 47 | optional: true 48 | custom_properties: 49 | type: String 50 | description: additional custom tags for the job 51 | optional: true 52 | 53 | outputs: 54 | output_train: 55 | type: AnyDirectory 56 | output_test: 57 | type: AnyDirectory 58 | 59 | command: >- 60 | python data2bin.py 61 | --train {inputs.train} 62 | --test {inputs.test} 63 | --output_train {outputs.output_train} 64 | --output_test {outputs.output_test} 65 | [--header {inputs.header}] 66 | [--label_column {inputs.label_column}] 67 | [--group_column {inputs.group_column}] 68 | --max_bin {inputs.max_bin} 69 | [--custom_params {inputs.custom_params}] 70 | [--verbose {inputs.verbose}] 71 | [--custom_properties {inputs.custom_properties}] 72 | 73 | environment: 74 | docker: 75 | image: mcr.microsoft.com/azureml/base:latest 76 | conda: 77 | conda_dependencies_file: conda_env.yml 78 | os: Linux 79 | -------------------------------------------------------------------------------- /src/scripts/data_processing/partition_data/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/data_processing/partition_data/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: partition_data_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - numpy==1.21.2 9 | - scikit-learn==0.24.2 10 | - azureml-defaults==1.35.0 11 | - azureml-mlflow==1.35.0 12 | - psutil==5.8.0 13 | 14 | -------------------------------------------------------------------------------- /src/scripts/data_processing/partition_data/partition.py: -------------------------------------------------------------------------------- 1 | """ 2 | Partitions input data (text/lines) into chunks for parallel processing. 3 | 4 | NOTE: current script assumes all records are independent. 5 | """ 6 | import os 7 | import sys 8 | import argparse 9 | import logging 10 | from distutils.util import strtobool 11 | 12 | # Add the right path to PYTHONPATH 13 | # so that you can import from common.* 14 | COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) 15 | 16 | if COMMON_ROOT not in sys.path: 17 | print(f"Adding {COMMON_ROOT} to PYTHONPATH") 18 | sys.path.append(str(COMMON_ROOT)) 19 | 20 | # useful imports from common 21 | from common.io import PartitioningEngine 22 | from common.components import RunnableScript 23 | 24 | 25 | class PartitionDataScript(RunnableScript): 26 | def __init__(self): 27 | super().__init__( 28 | task = "partition", 29 | framework = "python", 30 | framework_version = "n/a" 31 | ) 32 | 33 | @classmethod 34 | def get_arg_parser(cls, parser=None): 35 | """Adds component/module arguments to a given argument parser. 36 | 37 | Args: 38 | parser (argparse.ArgumentParser): an argument parser instance 39 | 40 | Returns: 41 | ArgumentParser: the argument parser instance 42 | 43 | Notes: 44 | if parser is None, creates a new parser instance 45 | """ 46 | # add generic arguments 47 | parser = RunnableScript.get_arg_parser(parser) 48 | 49 | # add arguments that are specific to the script 50 | group = parser.add_argument_group(f"Partitioning arguments [{__name__}:{cls.__name__}]") 51 | group.add_argument("--input", dest="input", type=str, required=True, help="file/directory to split") 52 | group.add_argument("--output", dest="output", type=str, help="location to store partitioned files", required=True) 53 | group.add_argument("--mode", type=str, choices=PartitioningEngine.PARTITION_MODES, required=True, help="Partitioning mode") 54 | group.add_argument("--number", type=int, required=True, help="If roundrobin number of partition, if chunk number of records per partition") 55 | group.add_argument("--header", type=strtobool, required=False, default=False, help="Should we preserve firstline into each partition?") 56 | 57 | return parser 58 | 59 | def run(self, args, logger, metrics_logger, unknown_args): 60 | """Run script with arguments (the core of the component) 61 | 62 | Args: 63 | args (argparse.namespace): command line arguments provided to script 64 | logger (logging.getLogger() for this script) 65 | metrics_logger (common.metrics.MetricLogger) 66 | unknown_args (list[str]): list of arguments not recognized during argparse 67 | """ 68 | # Create output folder 69 | os.makedirs(args.output, exist_ok=True) 70 | 71 | # create instance of partitioner 72 | partition_engine = PartitioningEngine( 73 | mode = args.mode, 74 | number = args.number, 75 | header = args.header, 76 | logger=logger 77 | ) 78 | 79 | # simply run 80 | logger.info(f"Running partitioning...") 81 | with metrics_logger.log_time_block("time_partitioning"): 82 | partition_engine.run(args.input, args.output) 83 | 84 | 85 | def get_arg_parser(parser=None): 86 | """ To ensure compatibility with shrike unit tests """ 87 | return PartitionDataScript.get_arg_parser(parser) 88 | 89 | def main(cli_args=None): 90 | """ To ensure compatibility with shrike unit tests """ 91 | PartitionDataScript.main(cli_args) 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /src/scripts/data_processing/partition_data/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/data_processing/partition_data/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: partition_data 3 | version: 1.0.1 4 | display_name: "Partition Data" 5 | type: CommandComponent 6 | description: Split one dataset into multiple files 7 | is_deterministic: true 8 | 9 | inputs: 10 | input_data: 11 | type: AnyDirectory 12 | description: Directory of input data (flat text file) 13 | mode: 14 | type: Enum 15 | description: How data is partition 16 | enum: 17 | - roundrobin 18 | - chunk 19 | - append 20 | default: roundrobin 21 | number: 22 | type: Integer 23 | description: If roundrobin number of partition, if chunk number of records per 24 | partition 25 | min: 1 26 | header: 27 | type: Boolean 28 | description : "Should we preserve firstline into each partition?" 29 | optional: True 30 | 31 | # generic benchmark parameters 32 | verbose: 33 | type: Boolean 34 | optional: true 35 | custom_properties: 36 | type: String 37 | description: additional custom tags for the job 38 | optional: true 39 | 40 | outputs: 41 | output_data: 42 | type: AnyDirectory 43 | description: partitioned dataset 44 | 45 | command: >- 46 | python partition.py 47 | --input {inputs.input_data} 48 | --output {outputs.output_data} 49 | --mode {inputs.mode} 50 | --number {inputs.number} 51 | [--header {inputs.header}] 52 | [--verbose {inputs.verbose}] 53 | [--custom_properties {inputs.custom_properties}] 54 | 55 | environment: 56 | docker: 57 | image: mcr.microsoft.com/azureml/base:latest 58 | conda: 59 | conda_dependencies_file: conda_env.yml 60 | os: Linux 61 | -------------------------------------------------------------------------------- /src/scripts/inferencing/custom_win_cli/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | build/ 6 | -------------------------------------------------------------------------------- /src/scripts/inferencing/custom_win_cli/conda_env.yaml: -------------------------------------------------------------------------------- 1 | name: custom_win_conda_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - azureml-defaults==1.35.0 9 | - azureml-mlflow==1.35.0 10 | - psutil==5.8.0 11 | -------------------------------------------------------------------------------- /src/scripts/inferencing/custom_win_cli/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/inferencing/custom_win_cli/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: custom_win_cli_score 3 | version: 1.0.0 4 | display_name: "Custom Binaries (Windows)" 5 | type: CommandComponent 6 | description: "Running custom windows binaries for inferencing" 7 | is_deterministic: true 8 | inputs: 9 | data: 10 | type: AnyDirectory 11 | description: directory to the inference data 12 | optional: false 13 | model: 14 | type: AnyDirectory 15 | description: directory to the model 16 | optional: false 17 | n_threads: 18 | type: Integer 19 | optional: true 20 | verbose: 21 | type: Boolean 22 | optional: true 23 | custom_properties: 24 | type: String 25 | description: additional custom tags for the job 26 | optional: true 27 | 28 | outputs: 29 | predictions: 30 | type: AnyDirectory 31 | 32 | command: >- 33 | python score.py 34 | --data {inputs.data} 35 | --model {inputs.model} 36 | --output {outputs.predictions} 37 | [--num_threads {inputs.n_threads}] 38 | [--verbose {inputs.verbose}] 39 | [--custom_properties {inputs.custom_properties}] 40 | 41 | environment: 42 | docker: 43 | image: mcr.microsoft.com/azureml/windows-servercore-1809:latest 44 | conda: 45 | # conda file path is resolved after additional includes 46 | conda_dependencies_file: conda_env.yaml 47 | os: "Windows" 48 | -------------------------------------------------------------------------------- /src/scripts/inferencing/custom_win_cli/static_binaries/README.md: -------------------------------------------------------------------------------- 1 | # Static Custom Windows Binaries for Inferencing Benchmark 2 | 3 | Use this directory to store the binaries you will use for custom inferencing on Windows compute targets. 4 | 5 | It is highly recommended to compile your binaries statically. 6 | 7 | NOTE: This directory has been added to `.gitignore`. -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_c_api/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | build/ -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_c_api/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.3.0" 8 | ARG lightgbm_benchmark_branch=main 9 | 10 | RUN apt-get update && \ 11 | apt-get -y install build-essential cmake 12 | 13 | # LIGHTGBM EXEC AND LIBRARY 14 | 15 | # Clone lightgbm official repository (master branch) 16 | RUN git clone --recursive https://github.com/microsoft/LightGBM && \ 17 | cd LightGBM && \ 18 | git checkout tags/v${lightgbm_version} 19 | 20 | # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm 21 | RUN cd /LightGBM && \ 22 | mkdir build && \ 23 | cd build && \ 24 | cmake .. && \ 25 | make -j$(nproc) 26 | 27 | # Prepend path to LightGBM LIB 28 | ENV PATH /LightGBM:$PATH 29 | 30 | # building lightgbm-benchmark binaries 31 | RUN git clone --recursive https://github.com/microsoft/lightgbm-benchmark.git && \ 32 | cd lightgbm-benchmark && \ 33 | git checkout ${lightgbm_benchmark_branch} 34 | 35 | # assuming lightgbm lib+includes are installed on the system 36 | RUN cd /lightgbm-benchmark/src/binaries/ && \ 37 | mkdir build && \ 38 | cd build && \ 39 | cmake -DLIGHTGBM_CLONE=/LightGBM .. && \ 40 | cmake --build . --target lightgbm_predict --config Release 41 | 42 | # provide env variable with path to built binaries 43 | ENV LIGHTGBM_BENCHMARK_BINARIES_PATH /lightgbm-benchmark/src/binaries/build 44 | RUN ls -l $LIGHTGBM_BENCHMARK_BINARIES_PATH 45 | 46 | ## ANACONDA ENVIRONMENT 47 | 48 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 49 | 50 | # Create conda environment 51 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 52 | python=3.8 pip=20.2.4 53 | 54 | # Prepend path to AzureML conda environment 55 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 56 | 57 | # Install pip dependencies 58 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 59 | pip install 'pandas>=1.1,<1.2' \ 60 | 'numpy>=1.10,<1.20' \ 61 | 'matplotlib==3.4.3' \ 62 | 'scipy~=1.5.0' \ 63 | 'scikit-learn~=0.24.1' \ 64 | 'azureml-core==1.35.0' \ 65 | 'azureml-defaults==1.35.0' \ 66 | 'azureml-mlflow==1.35.0' \ 67 | 'azureml-telemetry==1.35.0' \ 68 | 'mpi4py==3.1.1' 69 | 70 | RUN pip install --upgrade pip setuptools wheel && \ 71 | pip install 'cmake==3.21.0' 72 | 73 | # Install LightGBM Python API from build 74 | RUN cd /LightGBM/python-package/ && \ 75 | python setup.py install --precompile 76 | 77 | # This is needed for mpi to locate libpython 78 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 79 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_c_api/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | ../../../../docker/ 3 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_c_api/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: lightgbm_c_api_score 3 | version: 1.0.1 4 | display_name: "LightGBM Inferencing Probe (C API)" 5 | type: CommandComponent 6 | description: "LightGBM inferencing using the C API, this component is intended to measure latency, not to use for production inferencing scenarios." 7 | is_deterministic: true 8 | inputs: 9 | data: 10 | type: AnyDirectory 11 | description: directory to the inference data 12 | optional: false 13 | model: 14 | type: AnyDirectory 15 | description: directory to the model 16 | optional: false 17 | predict_disable_shape_check: 18 | type: Boolean 19 | description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" 20 | default: False 21 | n_threads: 22 | type: Integer 23 | default: 1 24 | verbose: 25 | type: Boolean 26 | default: False 27 | custom_properties: 28 | type: String 29 | description: additional custom tags for the job 30 | optional: true 31 | 32 | outputs: 33 | predictions: 34 | type: AnyDirectory 35 | 36 | command: >- 37 | python score.py 38 | --data {inputs.data} 39 | --model {inputs.model} 40 | --num_threads {inputs.n_threads} 41 | --output {outputs.predictions} 42 | --predict_disable_shape_check {inputs.predict_disable_shape_check} 43 | --verbose {inputs.verbose} 44 | [--custom_properties {inputs.custom_properties}] 45 | 46 | environment: 47 | docker: 48 | build: 49 | # file path is resolved after additional includes 50 | dockerfile: file:./default.dockerfile 51 | conda: 52 | userManagedDependencies: true 53 | os: "Linux" 54 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_python/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_python/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20211210.1" 3 | 4 | # Those arguments will NOT be used by AzureML 5 | # they are here just to allow for lightgbm-benchmark build to actually check 6 | # dockerfiles in a PR against their actual branch 7 | ARG lightgbm_version="3.3.0" 8 | 9 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 10 | 11 | # Create conda environment 12 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 13 | python=3.8 pip=20.2.4 14 | 15 | # Prepend path to AzureML conda environment 16 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 17 | 18 | # Install pip dependencies 19 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 20 | pip install 'pandas>=1.1,<1.2' \ 21 | 'numpy>=1.10,<1.20' \ 22 | 'matplotlib==3.4.3' \ 23 | 'scipy~=1.5.0' \ 24 | 'scikit-learn~=0.24.1' \ 25 | 'azureml-core==1.35.0' \ 26 | 'azureml-defaults==1.35.0' \ 27 | 'azureml-mlflow==1.35.0' \ 28 | 'azureml-telemetry==1.35.0' \ 29 | 'mpi4py==3.1.1' 30 | 31 | # install lightgbm with mpi 32 | RUN pip install --upgrade pip setuptools wheel && \ 33 | pip install 'cmake==3.21.0' && \ 34 | pip install lightgbm==${lightgbm_version} 35 | 36 | # This is needed for mpi to locate libpython 37 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 38 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_python/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | ../../../../docker/ 3 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_python/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: lightgbm_python_score 3 | version: 1.0.1 4 | display_name: "LightGBM Inferencing (Python API)" 5 | type: CommandComponent 6 | description: "LightGBM inferencing using the Python API." 7 | is_deterministic: true 8 | inputs: 9 | data: 10 | type: AnyDirectory 11 | description: directory to the inference data 12 | optional: false 13 | model: 14 | type: AnyDirectory 15 | description: directory to the model 16 | optional: false 17 | predict_disable_shape_check: 18 | type: Boolean 19 | description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" 20 | default: False 21 | n_threads: 22 | type: Integer 23 | optional: true 24 | verbose: 25 | type: Boolean 26 | default: False 27 | custom_properties: 28 | type: String 29 | description: additional custom tags for the job 30 | optional: true 31 | 32 | outputs: 33 | predictions: 34 | type: AnyDirectory 35 | 36 | command: >- 37 | python score.py 38 | --data {inputs.data} 39 | --model {inputs.model} 40 | --output {outputs.predictions} 41 | [--num_threads {inputs.n_threads}] 42 | --predict_disable_shape_check {inputs.predict_disable_shape_check} 43 | --verbose {inputs.verbose} 44 | [--custom_properties {inputs.custom_properties}] 45 | 46 | environment: 47 | docker: 48 | build: 49 | # file path is resolved after additional includes 50 | dockerfile: file:./default.dockerfile 51 | conda: 52 | userManagedDependencies: true 53 | os: Linux 54 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_ray/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_ray/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest 2 | LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" 3 | 4 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 5 | 6 | # Create conda environment 7 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 8 | python=3.8 pip=20.2.4 9 | 10 | # Prepend path to AzureML conda environment 11 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 12 | 13 | # Install pip dependencies 14 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 15 | pip install 'pandas>=1.1,<1.2' \ 16 | 'numpy>=1.10,<1.20' \ 17 | 'matplotlib==3.4.3' \ 18 | 'scipy~=1.5.0' \ 19 | 'scikit-learn~=0.24.1' \ 20 | 'azureml-core==1.35.0' \ 21 | 'azureml-defaults==1.35.0' \ 22 | 'azureml-mlflow==1.35.0' \ 23 | 'azureml-telemetry==1.35.0' \ 24 | 'mpi4py==3.1.1' \ 25 | 'ray==1.9.2' \ 26 | 'lightgbm-ray==0.1.2' 27 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_ray/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | ../../../../docker/ 3 | -------------------------------------------------------------------------------- /src/scripts/inferencing/lightgbm_ray/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: lightgbm_ray_score 3 | version: 1.0.1 4 | display_name: "LightGBM Inferencing (Ray)" 5 | type: CommandComponent 6 | description: "LightGBM inferencing using the Ray Python API." 7 | is_deterministic: true 8 | inputs: 9 | data: 10 | type: AnyDirectory 11 | description: directory to the inference data 12 | optional: false 13 | model: 14 | type: AnyDirectory 15 | description: directory to the model 16 | optional: false 17 | predict_disable_shape_check: 18 | type: Boolean 19 | description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" 20 | default: False 21 | n_threads: 22 | type: Integer 23 | optional: true 24 | verbose: 25 | type: Boolean 26 | default: False 27 | custom_properties: 28 | type: String 29 | description: additional custom tags for the job 30 | optional: true 31 | 32 | outputs: 33 | predictions: 34 | type: AnyDirectory 35 | 36 | command: >- 37 | python score.py 38 | --data {inputs.data} 39 | --model {inputs.model} 40 | --output {outputs.predictions} 41 | [--num_threads {inputs.n_threads}] 42 | --predict_disable_shape_check {inputs.predict_disable_shape_check} 43 | --verbose {inputs.verbose} 44 | [--custom_properties {inputs.custom_properties}] 45 | --cluster_auto_setup True 46 | 47 | environment: 48 | docker: 49 | build: 50 | # file path is resolved after additional includes 51 | dockerfile: file:./default.dockerfile 52 | conda: 53 | userManagedDependencies: true 54 | os: Linux 55 | -------------------------------------------------------------------------------- /src/scripts/inferencing/treelite_python/conda_env.yaml: -------------------------------------------------------------------------------- 1 | name: treelite_conda_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - azureml-defaults==1.35.0 9 | - azureml-mlflow==1.35.0 10 | - psutil==5.8.0 11 | - treelite==2.1.0 12 | - treelite_runtime==2.1.0 13 | - pandas>=1.1,<1.2 14 | - numpy>=1.10,<1.20 15 | -------------------------------------------------------------------------------- /src/scripts/inferencing/treelite_python/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/inferencing/treelite_python/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: treelite_score 3 | version: 1.0.1 4 | display_name: "Treelite Inferencing (Python API)" 5 | type: CommandComponent 6 | description: "Use Treelite Python API for inferencing" 7 | is_deterministic: true 8 | 9 | inputs: 10 | data: 11 | type: AnyDirectory 12 | description: directory to the inference data 13 | optional: false 14 | compiled_model: 15 | type: AnyDirectory 16 | description: directory to the model 17 | optional: false 18 | n_threads: 19 | type: Integer 20 | optional: true 21 | verbose: 22 | type: Boolean 23 | default: False 24 | custom_properties: 25 | type: String 26 | description: additional custom tags for the job 27 | optional: true 28 | 29 | # path for running command is resolved after additional includes 30 | command: >- 31 | python score.py 32 | --data {inputs.data} 33 | --so_path {inputs.compiled_model} 34 | [--num_threads {inputs.n_threads}] 35 | --verbose {inputs.verbose} 36 | [--custom_properties {inputs.custom_properties}] 37 | 38 | environment: 39 | conda: 40 | # conda file path is resolved after additional includes 41 | conda_dependencies_file: conda_env.yaml 42 | os: Linux 43 | -------------------------------------------------------------------------------- /src/scripts/model_transformation/treelite_compile/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/model_transformation/treelite_compile/compile_treelite.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | """ 5 | TreeLite/Python inferencing script 6 | """ 7 | import os 8 | import sys 9 | import argparse 10 | import logging 11 | import numpy 12 | from distutils.util import strtobool 13 | import pandas as pd 14 | import treelite, treelite_runtime 15 | 16 | # Add the right path to PYTHONPATH 17 | # so that you can import from common.* 18 | COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) 19 | 20 | if COMMON_ROOT not in sys.path: 21 | print(f"Adding {COMMON_ROOT} to PYTHONPATH") 22 | sys.path.append(str(COMMON_ROOT)) 23 | 24 | # useful imports from common 25 | from common.components import RunnableScript 26 | from common.io import input_file_path 27 | 28 | 29 | class TreeLightCompileScript(RunnableScript): 30 | def __init__(self): 31 | super().__init__( 32 | task = 'compile', 33 | framework = 'treelite_python', 34 | framework_version = treelite.__version__ 35 | ) 36 | 37 | @classmethod 38 | def get_arg_parser(cls, parser=None): 39 | """Adds component/module arguments to a given argument parser. 40 | 41 | Args: 42 | parser (argparse.ArgumentParser): an argument parser instance 43 | 44 | Returns: 45 | ArgumentParser: the argument parser instance 46 | 47 | Notes: 48 | if parser is None, creates a new parser instance 49 | """ 50 | # add generic arguments 51 | parser = RunnableScript.get_arg_parser(parser) 52 | 53 | group_i = parser.add_argument_group(f"Input Data [{__name__}:{cls.__name__}]") 54 | group_i.add_argument("--model", 55 | required=False, type=input_file_path, help="Exported model location (file path)") 56 | 57 | group_treelite = parser.add_argument_group(f"Treelite parameters [{__name__}:{cls.__name__}]") 58 | group_treelite.add_argument("--model_format", 59 | required=False, default="lightgbm", type=str, help="format of the input --model") 60 | group_treelite.add_argument("--so_path", 61 | required=False, default="./mymodel.so", type=str, help="full path to the saved model") 62 | group_treelite.add_argument("--toolchain", 63 | required=False, default="gcc", type=str, help="toolchain for compiling model") 64 | 65 | return parser 66 | 67 | 68 | def run(self, args, logger, metrics_logger, unknown_args): 69 | """Run script with arguments (the core of the component) 70 | 71 | Args: 72 | args (argparse.namespace): command line arguments provided to script 73 | logger (logging.getLogger() for this script) 74 | metrics_logger (common.metrics.MetricLogger) 75 | unknown_args (list[str]): list of arguments not recognized during argparse 76 | """ 77 | logger.info(f"Converting model to Treelite") 78 | with metrics_logger.log_time_block("model_compilation"): 79 | model = treelite.Model.load( 80 | args.model, 81 | model_format=args.model_format 82 | ) 83 | model.export_lib( 84 | toolchain=args.toolchain, 85 | libpath=args.so_path, 86 | verbose=True, 87 | params={'parallel_comp':16} 88 | ) 89 | 90 | 91 | def get_arg_parser(parser=None): 92 | """ To ensure compatibility with shrike unit tests """ 93 | return TreeLightCompileScript.get_arg_parser(parser) 94 | 95 | def main(cli_args=None): 96 | """ To ensure compatibility with shrike unit tests """ 97 | TreeLightCompileScript.main(cli_args) 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /src/scripts/model_transformation/treelite_compile/conda_env.yaml: -------------------------------------------------------------------------------- 1 | name: treelite_conda_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - azureml-defaults==1.35.0 9 | - azureml-mlflow==1.35.0 10 | - psutil==5.8.0 11 | - treelite==2.1.0 12 | - treelite_runtime==2.1.0 13 | - pandas>=1.1,<1.2 14 | - numpy>=1.10,<1.20 15 | -------------------------------------------------------------------------------- /src/scripts/model_transformation/treelite_compile/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/model_transformation/treelite_compile/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: treelite_compile 3 | version: 1.0.1 4 | display_name: "Treelite Compile" 5 | type: CommandComponent 6 | description: treelite compile 7 | is_deterministic: true 8 | 9 | inputs: 10 | model: 11 | type: AnyDirectory 12 | description: directory to the model 13 | optional: false 14 | toolchain: 15 | type: Enum 16 | default: "gcc" 17 | enum: 18 | - gcc 19 | - msvc 20 | - clang 21 | model_format: 22 | type: Enum 23 | default: "lightgbm" 24 | enum: 25 | - lightgbm 26 | - xgboost 27 | - xgboost_json 28 | verbose: 29 | type: Boolean 30 | optional: true 31 | custom_properties: 32 | type: String 33 | description: additional custom tags for the job 34 | optional: true 35 | 36 | outputs: 37 | compiled_model: 38 | type: AnyDirectory 39 | description: compiled model binary 40 | 41 | # path for running command is resolved after additional includes 42 | command: >- 43 | python compile_treelite.py 44 | --model {inputs.model} 45 | --so_path {outputs.compiled_model} 46 | [--verbose {inputs.verbose}] 47 | [--custom_properties {inputs.custom_properties}] 48 | 49 | environment: 50 | conda: 51 | # conda file path is resolved after additional includes 52 | conda_dependencies_file: conda_env.yaml 53 | os: Linux 54 | -------------------------------------------------------------------------------- /src/scripts/sample/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/sample/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/src/scripts/sample/__init__.py -------------------------------------------------------------------------------- /src/scripts/sample/conda_env.yaml: -------------------------------------------------------------------------------- 1 | name: sample_conda_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.8 6 | - pip=20.0 7 | - pip: 8 | - azureml-defaults==1.35.0 9 | - azureml-mlflow==1.35.0 10 | - psutil==5.8.0 11 | -------------------------------------------------------------------------------- /src/scripts/sample/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/sample/spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-2-0/CommandComponent.json 2 | name: sample_module 3 | version: 1.0.1 4 | display_name: "Sample Module" 5 | type: CommandComponent 6 | description: A sample module to implement your own 7 | is_deterministic: true 8 | 9 | inputs: 10 | data: 11 | type: AnyDirectory 12 | description: directory to the inference data 13 | optional: false 14 | model: 15 | type: AnyDirectory 16 | description: directory to the model 17 | optional: false 18 | verbose: 19 | type: Boolean 20 | optional: true 21 | custom_properties: 22 | type: String 23 | description: additional custom tags for the job 24 | optional: true 25 | 26 | outputs: 27 | predictions: 28 | type: AnyDirectory 29 | description: output of the sample 30 | 31 | command: >- 32 | python sample.py 33 | --data {inputs.data} 34 | --model {inputs.model} 35 | --output {outputs.predictions} 36 | [--verbose {inputs.verbose}] 37 | [--custom_properties {inputs.custom_properties}] 38 | 39 | environment: 40 | conda: 41 | # conda file path is resolved after additional includes 42 | conda_dependencies_file: conda_env.yaml 43 | os: Linux 44 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_python/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_python/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220516.v1 2 | LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.1/20211210.1" 3 | 4 | # https://github.com/microsoft/lightgbm-transform/blob/main/docs/Installation-Guide.rst 5 | # Install CMake, gcc, g++, boost. 6 | ENV ACCEPT_EULA=Y 7 | RUN apt-get update && apt-get -y upgrade && DEBIAN_FRONTEND="noninteractive" apt-get install -y libboost-all-dev gcc g++ wget cmake git curl libtinfo5 8 | 9 | # Install LLVM with RTTI feature. 10 | WORKDIR /root 11 | RUN wget https://github.com/llvm/llvm-project/archive/refs/tags/llvmorg-3.5.1.tar.gz && tar zxvf llvmorg-3.5.1.tar.gz 12 | WORKDIR /root/llvm-project-llvmorg-3.5.1/llvm 13 | RUN mkdir build && cd build && cmake -DLLVM_REQUIRES_RTTI=1 .. && make -j4 && make install 14 | 15 | # Install bond. 16 | WORKDIR /root 17 | RUN git clone --recursive https://github.com/microsoft/bond.git 18 | RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y clang zlib1g-dev 19 | RUN curl -sSL https://get.haskellstack.org/ | sh 20 | WORKDIR /root/bond 21 | RUN mkdir build && cd build && cmake -DBOND_ENABLE_GRPC=FALSE .. && make -j4 && make install 22 | 23 | 24 | # Those arguments will NOT be used by AzureML 25 | # they are here just to allow for lightgbm-benchmark build to actually check 26 | # dockerfiles in a PR against their actual branch 27 | ARG lightgbm_version="3.3.1" 28 | ARG lightgbm_transform_version="3.3.1.post1" 29 | 30 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 31 | 32 | # Create conda environment 33 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 34 | python=3.8 pip=20.2.4 35 | 36 | # Prepend path to AzureML conda environment 37 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 38 | 39 | # Install pip dependencies 40 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 41 | pip install 'pandas>=1.1,<1.2' \ 42 | 'numpy>=1.10,<1.20' \ 43 | 'matplotlib==3.4.3' \ 44 | 'scipy~=1.5.0' \ 45 | 'scikit-learn~=0.24.1' \ 46 | 'azureml-core==1.35.0' \ 47 | 'azureml-defaults==1.35.0' \ 48 | 'azureml-mlflow==1.35.0' \ 49 | 'azureml-telemetry==1.35.0' \ 50 | 'mpi4py==3.1.1' \ 51 | 'omegaconf' 52 | 53 | # install lightgbm with mpi 54 | RUN pip install --upgrade pip setuptools wheel && \ 55 | pip install 'cmake==3.21.0' && \ 56 | pip install lightgbm==${lightgbm_version} --install-option=--mpi &&\ 57 | pip install lightgbm-transform==${lightgbm_transform_version} 58 | 59 | # This is needed for mpi to locate libpython 60 | ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH 61 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_python/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_python/sweep_spec.yaml: -------------------------------------------------------------------------------- 1 | $schema: http://azureml/sdk-1-5/SweepComponent.json 2 | name: lightgbm.train.sweep.python.basic 3 | version: 1.0.6 4 | display_name: LightGBM Training Sweep 5 | type: SweepComponent 6 | description: "Hyperparameter optimization for LightGBM training, capable of distributed training (MPI)" 7 | is_deterministic: false 8 | 9 | #outputs: 10 | # metrics: 11 | # type: metrics 12 | # description: metrics data 13 | # saved_model: 14 | # type: path 15 | # description: path of the saved_model of trial run 16 | # training_stats: 17 | # type: path 18 | # description: writes some stats file of the trial component. 19 | 20 | trial: file:./spec.yaml 21 | 22 | algorithm: random 23 | 24 | search_space: 25 | num_iterations: 26 | type: choice 27 | values: [50] 28 | num_leaves: 29 | type: choice 30 | values: [30] 31 | min_data_in_leaf: 32 | type: choice 33 | values: [20] # default value from lightgbm docs https://testlightgbm.readthedocs.io/en/latest/Parameters.html#learning-control-parameters 34 | learning_rate: 35 | type: choice 36 | values: [0.1] # default value from lightgbm docs https://testlightgbm.readthedocs.io/en/latest/Parameters.html#learning-control-parameters 37 | max_bin: 38 | type: choice 39 | values: [255] # default value from lightgbm docs https://testlightgbm.readthedocs.io/en/latest/Parameters.html#learning-control-parameters 40 | feature_fraction: 41 | type: choice 42 | values: [1.0] # default value from lightgbm docs https://testlightgbm.readthedocs.io/en/latest/Parameters.html#learning-control-parameters 43 | 44 | objective: 45 | primary_metric: 46 | default: node_0/valid_0.rmse 47 | enum: 48 | # NOTE: we have to declare all the possible metrics names here 49 | - node_0/valid_0.l1 50 | - node_0/valid_0.l2 51 | - node_0/valid_0.rmse 52 | - node_0/valid_0.quantile 53 | - node_0/valid_0.mape 54 | - node_0/valid_0.huber 55 | - node_0/valid_0.fair 56 | - node_0/valid_0.poisson 57 | - node_0/valid_0.gamma 58 | - node_0/valid_0.gamma_deviance 59 | - node_0/valid_0.tweedie 60 | - node_0/valid_0.ndcg 61 | - node_0/valid_0.map 62 | - node_0/valid_0.auc 63 | - node_0/valid_0.average_precision 64 | - node_0/valid_0.binary_logloss 65 | - node_0/valid_0.binary_error 66 | - node_0/valid_0.auc_mu 67 | - node_0/valid_0.multi_logloss 68 | - node_0/valid_0.multi_error 69 | - node_0/valid_0.cross_entropy 70 | - node_0/valid_0.cross_entropy_lambda 71 | - node_0/valid_0.kullback_leibler 72 | goal: minimize 73 | 74 | limits: 75 | max_total_trials: 4 76 | max_concurrent_trials: 4 77 | timeout_minutes: 40 78 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_ray/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_ray/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest 2 | LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" 3 | 4 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 5 | 6 | # Create conda environment 7 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 8 | python=3.8 pip=20.2.4 9 | 10 | # Prepend path to AzureML conda environment 11 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 12 | 13 | # Install pip dependencies 14 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 15 | pip install 'pandas>=1.1,<1.2' \ 16 | 'numpy>=1.10,<1.20' \ 17 | 'matplotlib==3.4.3' \ 18 | 'scipy~=1.5.0' \ 19 | 'scikit-learn~=0.24.1' \ 20 | 'azureml-core==1.35.0' \ 21 | 'azureml-defaults==1.35.0' \ 22 | 'azureml-mlflow==1.35.0' \ 23 | 'azureml-telemetry==1.35.0' \ 24 | 'mpi4py==3.1.1' \ 25 | 'ray==1.9.2' \ 26 | 'lightgbm-ray==0.1.2' 27 | -------------------------------------------------------------------------------- /src/scripts/training/lightgbm_ray/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/training/ray_tune/.amlignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /src/scripts/training/ray_tune/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest 2 | LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" 3 | 4 | ARG lightgbm_version="3.3.0" 5 | 6 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 7 | 8 | # Create conda environment 9 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 10 | python=3.8 pip=20.2.4 11 | 12 | # Prepend path to AzureML conda environment 13 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 14 | 15 | # Install pip dependencies 16 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 17 | pip install 'pandas>=1.1,<1.2' \ 18 | 'numpy>=1.10,<1.20' \ 19 | 'matplotlib==3.4.3' \ 20 | 'scipy~=1.5.0' \ 21 | 'scikit-learn~=0.24.1' \ 22 | 'azureml-core==1.35.0' \ 23 | 'azureml-defaults==1.35.0' \ 24 | 'azureml-mlflow==1.35.0' \ 25 | 'azureml-telemetry==1.35.0' \ 26 | 'ray==1.9.2' \ 27 | 'flaml==0.9.6' \ 28 | 'mpi4py==3.1.1' \ 29 | 'hpbandster==0.7.4' \ 30 | 'ConfigSpace==0.5.0' \ 31 | 'optuna==2.8.0' \ 32 | 'protobuf==3.20.1' \ 33 | lightgbm==${lightgbm_version} -------------------------------------------------------------------------------- /src/scripts/training/ray_tune/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /src/scripts/training/ray_tune_distributed/default.dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest 2 | LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" 3 | 4 | ARG lightgbm_version="3.3.0" 5 | 6 | ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm 7 | 8 | # Create conda environment 9 | RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ 10 | python=3.8 pip=20.2.4 11 | 12 | # Prepend path to AzureML conda environment 13 | ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH 14 | 15 | # Install pip dependencies 16 | RUN HOROVOD_WITH_TENSORFLOW=1 \ 17 | pip install 'pandas>=1.1,<1.2' \ 18 | 'numpy>=1.10,<1.20' \ 19 | 'matplotlib==3.4.3' \ 20 | 'scipy~=1.5.0' \ 21 | 'scikit-learn~=0.24.1' \ 22 | 'azureml-core==1.35.0' \ 23 | 'azureml-defaults==1.35.0' \ 24 | 'azureml-mlflow==1.35.0' \ 25 | 'azureml-telemetry==1.35.0' \ 26 | 'ray==1.9.2' \ 27 | 'flaml==0.9.6' \ 28 | 'lightgbm-ray==0.1.4' \ 29 | 'mpi4py==3.1.1' \ 30 | 'protobuf==3.20.1' -------------------------------------------------------------------------------- /src/scripts/training/ray_tune_distributed/spec.additional_includes: -------------------------------------------------------------------------------- 1 | ../../../common/ 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/lightgbm-benchmark/3d124a7591e580b5e39e9078cf83b711205f27dc/tests/__init__.py -------------------------------------------------------------------------------- /tests/aml/test_components.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | """ 5 | PyTest suite for testing if each run.py is aligned with module specification: 6 | 7 | > Status: this code relates to the _recipe_ and is a _proposition_ 8 | """ 9 | import pytest 10 | import os 11 | 12 | from shrike.pipeline.testing.components import ( 13 | component_spec_yaml_exists_and_is_parsable, 14 | ) 15 | from shrike.pipeline.testing.components import component_uses_private_acr 16 | from shrike.pipeline.testing.components import component_uses_private_python_feed 17 | from shrike.pipeline.testing.components import component_run_py_import 18 | from shrike.pipeline.testing.components import component_run_get_arg_parser 19 | from shrike.pipeline.testing.components import ( 20 | if_arguments_from_component_spec_match_script_argparse, 21 | ) 22 | 23 | COMPONENT_ROOT_FOLDER = os.path.abspath( 24 | os.path.join(os.path.dirname(__file__), "..", "..", "src", "scripts") 25 | ) 26 | 27 | # modules that should ALSO pass advanced tests (design pattern) 28 | COMPONENT_SPEC_FILES = [ 29 | "sample/spec.yaml", 30 | 31 | "data_processing/generate_data/spec.yaml", 32 | "data_processing/lightgbm_data2bin/spec.yaml", 33 | "data_processing/partition_data/spec.yaml", 34 | 35 | "training/lightgbm_python/spec.yaml", 36 | "training/lightgbm_ray/spec.yaml", 37 | "training/ray_tune/spec.yaml", 38 | 39 | "model_transformation/treelite_compile/spec.yaml", 40 | 41 | "inferencing/lightgbm_python/spec.yaml", 42 | "inferencing/lightgbm_c_api/spec.yaml", 43 | "inferencing/lightgbm_ray/spec.yaml", 44 | "inferencing/custom_win_cli/spec.yaml", 45 | "inferencing/treelite_python/spec.yaml", 46 | ] 47 | 48 | 49 | ### BASIC TESTS ### 50 | # for basic module designs (minimal wrappers) 51 | 52 | @pytest.mark.parametrize("component_spec_path", COMPONENT_SPEC_FILES) 53 | def test_component_run_py_import(component_spec_path): 54 | """Try importing run.py, just to check if basic script passes syntax/imports checks""" 55 | component_run_py_import( 56 | os.path.join(COMPONENT_ROOT_FOLDER, component_spec_path) 57 | ) 58 | 59 | 60 | @pytest.mark.parametrize("component_spec_path", COMPONENT_SPEC_FILES) 61 | def test_component_spec_yaml_exists_and_is_parsable(component_spec_path): 62 | """Try loading and parsing the component spec yaml file""" 63 | component_spec_yaml_exists_and_is_parsable( 64 | os.path.join(COMPONENT_ROOT_FOLDER, component_spec_path) 65 | ) 66 | 67 | 68 | ### ADVANCED TESTS ### 69 | # for module implementing full design pattern (get_arg_parser()) 70 | 71 | @pytest.mark.parametrize("component_spec_path", COMPONENT_SPEC_FILES) 72 | def test_component_run_get_arg_parser(component_spec_path): 73 | """Tests if component run.py has function get_arg_parser(parser)""" 74 | component_run_get_arg_parser( 75 | os.path.join(COMPONENT_ROOT_FOLDER, component_spec_path) 76 | ) 77 | 78 | 79 | @pytest.mark.parametrize("component_spec_path", COMPONENT_SPEC_FILES) 80 | def test_if_arguments_from_component_spec_match_script_argparse(component_spec_path): 81 | """Tests alignment between module_spec arguments and script parser arguments""" 82 | if_arguments_from_component_spec_match_script_argparse( 83 | os.path.join(COMPONENT_ROOT_FOLDER, component_spec_path) 84 | ) 85 | 86 | 87 | # NOTE: this test has been disabled because it requires exception re-throw in compliant_handle() 88 | # @pytest.mark.parametrize("module", MODULE_MANIFEST_ADVANCED) 89 | # def test_script_main_with_synthetic_arguments(mocker, module): 90 | # """Tests alignment between module_spec arguments and script parser arguments""" 91 | # script_main_with_synthetic_arguments(module, mocker) 92 | -------------------------------------------------------------------------------- /tests/common/test_aml.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/aml.py""" 2 | import os 3 | import pytest 4 | from unittest.mock import call, Mock, patch 5 | import time 6 | import json 7 | 8 | from common.aml import apply_sweep_settings 9 | from common.aml import format_run_name 10 | from common.aml import load_dataset_from_data_input_spec 11 | from common.aml import dataset_from_dstore_path 12 | 13 | 14 | def test_format_run_name(): 15 | """ Tests format_run_name() """ 16 | test_cases = [ 17 | { 18 | 'input': "run name foo", 19 | 'expected':"run_name_foo" 20 | }, 21 | { 22 | 'input': "abcd01234", 23 | 'expected':"abcd01234" 24 | }, 25 | { 26 | 'input': "gen 1000samples+3train*foo", 27 | 'expected':"gen_1000samples_3train_foo" 28 | }, 29 | { 30 | 'input': "a"*1000, 31 | 'expected': "a"*255 32 | } 33 | ] 34 | 35 | for test_case in test_cases: 36 | assert format_run_name(test_case['input']) == test_case['expected'] 37 | -------------------------------------------------------------------------------- /tests/common/test_data.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/data.py""" 2 | import os 3 | import pytest 4 | from unittest.mock import call, Mock, patch 5 | 6 | from common.data import RegressionDataGenerator 7 | 8 | def test_regression_data_generator(): 9 | """Tests format of outputs of RegressionDataGenerator""" 10 | generator = RegressionDataGenerator( 11 | batch_size=64, 12 | n_features=100, 13 | n_informative=50, 14 | bias=1.0, 15 | noise=1.0, 16 | seed=4 17 | ) 18 | 19 | for i in range(10): 20 | batch = generator.generate() 21 | 22 | assert batch is not None 23 | assert isinstance(batch, tuple) 24 | assert len(batch) == 2 25 | 26 | X, y = batch 27 | assert X is not None 28 | assert y is not None 29 | 30 | assert X.shape == (64, 100) 31 | assert y.shape == (64,) 32 | 33 | def test_regression_data_generator_reproducibility(): 34 | """Tests initializing generator with seeds""" 35 | generator1 = RegressionDataGenerator( 36 | batch_size=64, 37 | n_features=100, 38 | n_informative=50, 39 | bias=1.0, 40 | noise=1.0, 41 | seed=4 42 | ) 43 | X1,y1 = generator1.generate() 44 | 45 | generator2 = RegressionDataGenerator( 46 | batch_size=64, 47 | n_features=100, 48 | n_informative=50, 49 | bias=1.0, 50 | noise=1.0, 51 | seed=5 52 | ) 53 | X2,y2 = generator2.generate() 54 | 55 | generator3 = RegressionDataGenerator( 56 | batch_size=64, 57 | n_features=100, 58 | n_informative=50, 59 | bias=1.0, 60 | noise=1.0, 61 | seed=4 # <<< Equal to generator 1 62 | ) 63 | X3,y3 = generator3.generate() 64 | 65 | # if using same seed twice, should be equal strictly 66 | assert (X1 == X3).all() 67 | assert (y1 == y3).all() 68 | 69 | # if using different seeds, likely to be different 70 | assert (X1 != X2).all() 71 | assert (y1 != y2).all() 72 | -------------------------------------------------------------------------------- /tests/common/test_io.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/io.py""" 2 | import os 3 | import pytest 4 | 5 | from common.io import input_file_path 6 | 7 | 8 | def test_input_file_path_success(temporary_dir): 9 | """Tests input_file_path function with a unique file""" 10 | test1_dir = os.path.join(temporary_dir, "test1") 11 | os.makedirs(test1_dir, exist_ok=False) 12 | with open(os.path.join(test1_dir, "random.out"), "w") as out_file: 13 | out_file.write("something") 14 | 15 | full_file_path = os.path.join(test1_dir, "random.out") 16 | assert input_file_path(test1_dir) == full_file_path 17 | assert input_file_path(full_file_path) == full_file_path 18 | 19 | def test_input_file_path_failure(temporary_dir): 20 | """Tests input_file_path function when 2 files are provided (should except)""" 21 | test2_dir = os.path.join(temporary_dir, "test2") 22 | os.makedirs(test2_dir, exist_ok=False) 23 | 24 | with open(os.path.join(test2_dir, "random1.out"), "w") as out_file: 25 | out_file.write("something") 26 | with open(os.path.join(test2_dir, "random2.out"), "w") as out_file: 27 | out_file.write("something else") 28 | 29 | with pytest.raises(Exception): 30 | assert input_file_path(test2_dir) 31 | -------------------------------------------------------------------------------- /tests/common/test_lightgbm_utils.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/io.py""" 2 | import os 3 | import pytest 4 | from unittest.mock import call, Mock, patch 5 | 6 | from common.lightgbm_utils import LightGBMCallbackHandler 7 | from lightgbm.callback import CallbackEnv 8 | 9 | def test_lightgbm_callback_handler(): 10 | metrics_logger = Mock() 11 | 12 | callback_handler = LightGBMCallbackHandler( 13 | metrics_logger, metrics_prefix=None, metrics_suffix=None 14 | ) 15 | 16 | # namedtuple 17 | # see https://lightgbm.readthedocs.io/en/latest/_modules/lightgbm/callback.html 18 | callback_env = CallbackEnv( 19 | None, # model 20 | {"foo_param": 0.32}, # params 21 | 3, # iteration 22 | 0, # begin_iteration 23 | 5, # end_iteration 24 | [ 25 | # list of tuples 26 | ( 27 | "valid_0", # dataset name 28 | "rmse", # evaluation name 29 | 12345.0, # result 30 | None, # _ 31 | ), 32 | ( 33 | "valid_0", # dataset name 34 | "l2", # evaluation name 35 | 3456.0, # result 36 | None, # _ 37 | ) 38 | ] 39 | ) 40 | callback_handler.callback(callback_env) 41 | 42 | metrics_logger.log_metric.assert_has_calls( 43 | [ 44 | call(key="valid_0.rmse", value=12345.0, step=3), 45 | call(key="valid_0.l2", value=3456.0, step=3) 46 | ], 47 | any_order=True 48 | ) 49 | -------------------------------------------------------------------------------- /tests/common/test_math.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/math.py""" 2 | import os 3 | import pytest 4 | import numpy as np 5 | 6 | from common.math import bootstrap_ci 7 | 8 | def test_bootstrap_ci_fixed_seed(): 9 | """Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """ 10 | sample_data = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0]) 11 | operators={ 12 | 'mean':np.mean, 13 | 'p90': (lambda x : np.percentile(x, 90)), 14 | 'p99': (lambda x : np.percentile(x, 99)), 15 | } 16 | 17 | np.random.seed(404) # fixed const 18 | 19 | # because we're fixing the seed, we can actually go deeper 20 | expected_values = { 21 | 'mean': (0.30000000000000004, 0.99395, 2.1624999999999996), 22 | 'p90': (0.5, 2.36413, 5.0), 23 | 'p99': (0.593, 3.469213, 5.0), 24 | } 25 | 26 | returned_values = bootstrap_ci( 27 | sample_data, 28 | iterations=1000, 29 | operators=operators, 30 | confidence_level=0.95 31 | ) 32 | 33 | assert returned_values == expected_values 34 | 35 | 36 | def test_bootstrap_ci_no_seed(): 37 | """Testing the bootstrap_ci method, but we can't have a non-deterministic test here. """ 38 | np.random.seed(None) # not const 39 | 40 | sample_data = np.random.rand(100) 41 | operators={ 42 | 'mean':np.mean, 43 | 'p90': (lambda x : np.percentile(x, 90)), 44 | 'p99': (lambda x : np.percentile(x, 99)), 45 | } 46 | 47 | returned_values = bootstrap_ci( 48 | sample_data, 49 | iterations=1000, 50 | operators=operators, 51 | confidence_level=0.95 52 | ) 53 | 54 | for key in operators: 55 | # check type 56 | assert key in returned_values 57 | assert isinstance(returned_values[key], tuple) 58 | assert len(returned_values[key]) == 3 59 | 60 | # basic interval ordering 61 | ci_left, ci_mean, ci_right = returned_values[key] 62 | assert ci_left <= ci_mean 63 | assert ci_mean <= ci_right 64 | 65 | # because it's a bootstrap, these are supposed to be true 66 | assert min(sample_data) <= ci_left 67 | assert ci_right <= max(sample_data) 68 | 69 | # tests that are specific to the operators 70 | assert returned_values['p90'][0] <= returned_values['p99'][0] # p90 < p99 so left CI also 71 | assert returned_values['p90'][1] <= returned_values['p99'][1] # p90 < p99 so mean also 72 | assert returned_values['p90'][2] <= returned_values['p99'][2] # p90 < p99 so right CI also 73 | -------------------------------------------------------------------------------- /tests/common/test_perf.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/metrics.py""" 2 | import os 3 | import pytest 4 | from unittest.mock import call, Mock, patch 5 | import time 6 | 7 | from common.perf import PerformanceReportingThread, PerformanceMetricsCollector 8 | 9 | def verify_all_perf_report_keys(perf_report): 10 | """Helper test function, tests all keys in perf report""" 11 | assert isinstance(perf_report, dict) 12 | 13 | assert "timestamp" in perf_report, "perf report should have a timestamp key" 14 | 15 | required_keys = [ 16 | "cpu_pct_per_cpu_avg", 17 | "cpu_pct_per_cpu_min", 18 | "cpu_pct_per_cpu_max", 19 | "mem_percent", 20 | "disk_usage_percent", 21 | "disk_io_read_mb", 22 | "disk_io_write_mb", 23 | "net_io_lo_sent_mb", 24 | "net_io_ext_sent_mb", 25 | "net_io_lo_recv_mb", 26 | "net_io_ext_recv_mb" 27 | ] 28 | 29 | for key in required_keys: 30 | assert key in perf_report, f"key {key} should be in the perf report, but instead we find: {list(perf_report.keys())}" 31 | assert isinstance(perf_report[key], float) # all metrics are float so far\ 32 | 33 | assert "not_in_perf_report" not in perf_report 34 | 35 | 36 | def test_perf_report_run_as_thread(): 37 | """ Tests PerformanceReportingThread() as a thread """ 38 | # creating a mock to provide as callback 39 | call_on_loop_method = Mock() 40 | call_on_exit_method = Mock() 41 | 42 | perf_report_thread = PerformanceReportingThread( 43 | initial_time_increment=2.0, 44 | callback_on_loop=call_on_loop_method, 45 | callback_on_exit=call_on_exit_method 46 | ) 47 | 48 | perf_report_thread.start() # will engage in first loop and sleep 2.0 49 | time.sleep(0.5) # will wait to be in the middle of that loop 50 | perf_report_thread.finalize() 51 | 52 | # on exit not called in this one 53 | call_on_exit_method.assert_called_once() 54 | 55 | # get all mock calls 56 | callback_call_args = call_on_loop_method.call_args_list 57 | 58 | assert len(callback_call_args) == 1 # just called once 59 | assert len(callback_call_args[0].args) == 1 # only 1 argument 60 | 61 | perf_report = callback_call_args[0].args[0] 62 | verify_all_perf_report_keys(perf_report) 63 | 64 | 65 | def test_perf_report_collector_run_as_thread(): 66 | """ Tests PerformanceMetricsCollector() """ 67 | # creating a mock to provide as callback 68 | test_max_length = 10 69 | 70 | perf_collector = PerformanceMetricsCollector(max_length=test_max_length) 71 | 72 | # hack internal values to make the test faster 73 | perf_collector.report_thread.cpu_interval = 0.01 74 | perf_collector.report_thread.time_increment = 0.02 75 | 76 | # fake the loop in the internal thread 77 | 78 | # if we run the exact times we can hold 79 | for i in range(test_max_length): 80 | perf_collector.report_thread._run_loop() 81 | # we expect to have internal list full 82 | assert len(perf_collector.perf_reports) > 0 83 | assert len(perf_collector.perf_reports) == test_max_length 84 | 85 | # 1 more time... 86 | perf_collector.report_thread._run_loop() 87 | # and length should be half 88 | assert len(perf_collector.perf_reports) == (test_max_length // 2) 89 | # and frequency increased 90 | assert perf_collector.perf_reports_freqs == 2 91 | 92 | # then every other report should be skipped 93 | for i in range(4): 94 | perf_collector.report_thread._run_loop() 95 | # and length should be half + 2 new values 96 | assert len(perf_collector.perf_reports) == (test_max_length // 2) + 2 97 | 98 | for report in perf_collector.perf_reports: 99 | verify_all_perf_report_keys(report) 100 | -------------------------------------------------------------------------------- /tests/common/test_pipelines.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/pipelines.py""" 2 | import os 3 | import sys 4 | import pytest 5 | from unittest.mock import call, Mock, patch 6 | import time 7 | 8 | from dataclasses import dataclass 9 | from omegaconf import DictConfig 10 | 11 | from common.pipelines import ( 12 | parse_pipeline_config, 13 | azureml_connect, 14 | pipeline_submit 15 | ) 16 | 17 | 18 | def test_parse_pipeline_config(): 19 | """Creates a config dataclass and tests parsing it from CLI""" 20 | @dataclass 21 | class test_config: 22 | test_param: str = "default_str" 23 | 24 | script_args = [ 25 | "test_pipelines.py", 26 | "+aml.subscription_id=test_subscription", 27 | "+aml.resource_group=test_resource_group", 28 | "+aml.workspace_name=test_workspace_name", 29 | "+aml.tenant=test_tenant", 30 | "+experiment.name=test_experiment_name", 31 | "+compute.linux_cpu=test-cluster", 32 | "+compute.linux_gpu=test-gpu-cluster", 33 | "+compute.windows_cpu=test-win-cpu", 34 | "test_config.test_param=test_str_value", 35 | "run.submit=True" 36 | ] 37 | 38 | # replaces sys.argv with test arguments and run main 39 | with patch.object(sys, "argv", script_args): 40 | pipeline_config = parse_pipeline_config(test_config) 41 | 42 | # test return value type 43 | assert isinstance(pipeline_config, DictConfig) 44 | 45 | # test some custom value 46 | assert pipeline_config.test_config.test_param == "test_str_value" 47 | 48 | # checking config fields (see dataclass above) 49 | # aml connect 50 | assert pipeline_config.aml.subscription_id == "test_subscription" 51 | assert pipeline_config.aml.resource_group == "test_resource_group" 52 | assert pipeline_config.aml.workspace_name == "test_workspace_name" 53 | assert pipeline_config.aml.tenant == "test_tenant" 54 | 55 | # compute 56 | assert pipeline_config.compute.linux_cpu == "test-cluster" 57 | assert pipeline_config.compute.linux_gpu == "test-gpu-cluster" 58 | assert pipeline_config.compute.windows_cpu == "test-win-cpu" 59 | 60 | return pipeline_config 61 | 62 | def test_pipeline_submit(): 63 | # need a mock pipeline mock (Pipeline) 64 | pipeline_instance_mock = Mock() 65 | workspace_mock = "fake_workspace" 66 | 67 | # reusing config from previous test 68 | pipeline_config = test_parse_pipeline_config() 69 | 70 | pipeline_run = pipeline_submit( 71 | workspace_mock, 72 | pipeline_config, 73 | pipeline_instance_mock, 74 | 75 | # test hardcoded overrides 76 | experiment_description="test_description", 77 | display_name="test_display_name", 78 | tags={'foo':'bar'} 79 | ) 80 | 81 | pipeline_instance_mock.validate.assert_called_once() 82 | pipeline_instance_mock.validate.assert_called_with( 83 | workspace="fake_workspace" # what's returned by aml_connect mock 84 | ) 85 | 86 | pipeline_instance_mock.submit.assert_called_once() 87 | pipeline_instance_mock.submit.assert_called_with( 88 | workspace="fake_workspace", # what's returned by aml_connect mock 89 | experiment_name="test_experiment_name", 90 | description="test_description", 91 | display_name="test_display_name", 92 | tags={'foo':'bar'}, 93 | default_compute_target="cpu-cluster", 94 | regenerate_outputs=False, # default 95 | continue_on_step_failure=False # default 96 | ) 97 | -------------------------------------------------------------------------------- /tests/common/test_raytune.py: -------------------------------------------------------------------------------- 1 | """Tests src/common/metrics.py""" 2 | import numpy as np 3 | from ray.tune import ( 4 | uniform, 5 | choice, 6 | ) 7 | 8 | from common.raytune_param import RayTuneParameterParser 9 | 10 | ### UNIT TESTS ### 11 | 12 | 13 | def assert_dist_equal(parsed, expected): 14 | np.random.seed(0) 15 | parsed_sample = parsed.sample(100) 16 | np.random.seed(0) 17 | expected_sample = expected.sample(100) 18 | assert parsed_sample == expected_sample 19 | 20 | 21 | def test_raytune_parameter_parsing(): 22 | lgbm_params = {"train": "regression_train_sample", 23 | "test": "regression_test_sample", 24 | "output_path": "./model_dir", 25 | "objective": "regression", 26 | "boosting_type": "gbdt", 27 | "tree_learner": "serial", 28 | "metric": "rmse", 29 | "mode": "min", 30 | "num_samples": "4", 31 | "num_iterations": "choice([30,40,50,60])", 32 | "num_leaves": "20", 33 | "min_data_in_leaf":"31", 34 | "learning_rate": "uniform(0.0005, 0.005)", 35 | "max_bin":"255", 36 | "feature_fraction": "1", 37 | } 38 | 39 | raytune_param_parser = RayTuneParameterParser( 40 | tunable_parameters=[ 41 | "num_iterations", 42 | "num_leaves", 43 | "min_data_in_leaf", 44 | "learning_rate", 45 | "max_bin", 46 | "feature_fraction" 47 | ], 48 | ) 49 | 50 | # parser gonna parse 51 | raytune_param_parser.parse_from_dict(lgbm_params) 52 | 53 | # update get_tunable_params and get_fixed_params 54 | tunable_params = raytune_param_parser.get_tunable_params() 55 | fixed_params = raytune_param_parser.get_fixed_params() 56 | 57 | parsed_choice = tunable_params["num_iterations"] 58 | expected_choice = choice([30, 40, 50, 60]) 59 | 60 | parsed_uniform = tunable_params["learning_rate"] 61 | expected_uniform = uniform(0.0005, 0.005) 62 | 63 | assert_dist_equal(parsed_choice, expected_choice) 64 | assert_dist_equal(parsed_uniform, expected_uniform) 65 | 66 | assert fixed_params['num_leaves'] == 20 67 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ Add src/ to path """ 2 | import os 3 | import sys 4 | import logging 5 | import pytest 6 | import tempfile 7 | 8 | LIGHTGBM_BENCHMARK_ROOT = os.path.abspath( 9 | os.path.join(os.path.dirname(__file__), "..", "src") 10 | ) 11 | 12 | if LIGHTGBM_BENCHMARK_ROOT not in sys.path: 13 | logging.info(f"Adding {LIGHTGBM_BENCHMARK_ROOT} to path") 14 | sys.path.append(str(LIGHTGBM_BENCHMARK_ROOT)) 15 | 16 | from common.pipelines import aml_connection_config 17 | from common.paths import CONFIG_PATH 18 | 19 | @pytest.fixture() 20 | def config_directory(): 21 | """Returns path to configuration files""" 22 | return CONFIG_PATH 23 | 24 | @pytest.fixture() 25 | def temporary_dir(): 26 | """Creates a temporary directory for the tests below""" 27 | temp_directory = tempfile.TemporaryDirectory() 28 | yield temp_directory.name 29 | temp_directory.cleanup() 30 | 31 | 32 | TEST_DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") 33 | 34 | @pytest.fixture() 35 | def regression_train_sample(): 36 | return os.path.join(TEST_DATA_ROOT, "regression", "train") 37 | 38 | @pytest.fixture() 39 | def regression_test_sample(): 40 | return os.path.join(TEST_DATA_ROOT, "regression", "test") 41 | 42 | @pytest.fixture() 43 | def regression_inference_sample(): 44 | return os.path.join(TEST_DATA_ROOT, "regression", "inference") 45 | 46 | @pytest.fixture() 47 | def regression_model_sample(): 48 | return os.path.join(TEST_DATA_ROOT, "regression", "model") 49 | 50 | # add cli options to connect to AzureML 51 | 52 | def pytest_addoption(parser): 53 | parser.addoption("--aml_subscription_id", action="store") 54 | parser.addoption("--aml_resource_group", action="store") 55 | parser.addoption("--aml_workspace_name", action="store") 56 | parser.addoption("--aml_auth", action="store") 57 | parser.addoption("--aml_tenant", action="store") 58 | 59 | @pytest.fixture(scope='session') 60 | def aml_config(request): 61 | """ 62 | Creates some aml config for unit tests that require connectivity (tests/pipelines) 63 | 64 | NOTE: will except with AssertionError and fail test if not provided properly 65 | """ 66 | subscription_id = request.config.option.aml_subscription_id or os.environ.get('AML_SUBSCRIPTION_ID') 67 | resource_group = request.config.option.aml_resource_group or os.environ.get('AML_RESOURCE_GROUP') 68 | workspace_name = request.config.option.aml_workspace_name or os.environ.get('AML_WORKSPACE_NAME') 69 | auth = request.config.option.aml_auth or os.environ.get('AML_AUTH') or "interactive" 70 | tenant = request.config.option.aml_tenant or os.environ.get('AML_TENANT') 71 | 72 | test_config = [] 73 | if subscription_id is None: 74 | test_config.append("To run this unit test, you need to provide a subscription through --aml_subscription_id or env var AML_SUBSCRIPTION_ID") 75 | if resource_group is None: 76 | test_config.append("To run this unit test, you need to provide a subscription through --aml_resource_group or env var AML_RESOURCE_GROUP") 77 | if workspace_name is None: 78 | test_config.append("To run this unit test, you need to provide a subscription through --aml_workspace_name or env var AML_WORKSPACE_NAME") 79 | assert (not test_config), "\n".join(test_config) 80 | 81 | return aml_connection_config( 82 | subscription_id, 83 | resource_group, 84 | workspace_name, 85 | auth, 86 | tenant, 87 | False # force auth 88 | ) 89 | 90 | @pytest.fixture(scope='session', autouse=True) 91 | def ray_init_fixture(request): 92 | # using this fixture to avoid re-running ray.init() in each script it is required 93 | # you'll also need to patch ray.init() and ray.shutdown() 94 | logging.getLogger().warning("Running ray.init() from ray_init_fixture") 95 | import ray 96 | return ray.init() 97 | -------------------------------------------------------------------------------- /tests/data/regression/test/test.txt: -------------------------------------------------------------------------------- 1 | label,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9 2 | -0.180,0.205,0.939,0.659,-0.482,-0.700,-1.034,-0.688,-0.084,0.526,-56.995 3 | -0.706,0.499,-0.814,-0.059,0.468,1.222,1.444,0.101,-2.387,0.443,197.238 4 | 0.129,0.426,-0.557,2.064,0.801,-0.875,0.563,-0.223,0.013,1.097,247.005 5 | 1.050,0.074,0.237,0.547,0.925,0.870,1.444,-2.394,1.105,0.355,219.847 6 | -0.332,0.608,0.800,2.040,0.862,0.499,0.763,0.353,-1.204,1.124,274.270 7 | 0.061,2.854,-0.694,2.112,1.110,-1.924,0.390,-0.642,2.446,-1.022,272.634 8 | 1.446,-0.431,0.092,-0.094,-1.496,-0.459,-0.881,-0.125,0.757,-0.673,-213.986 9 | -1.180,0.936,-0.172,-0.754,1.317,-1.196,1.364,0.578,2.170,-1.083,132.367 10 | 0.549,-0.916,-0.221,-0.668,1.081,0.076,0.592,-1.219,-0.515,-1.081,-67.271 11 | -0.478,-0.306,-0.101,1.139,0.270,-0.336,0.101,1.292,0.494,0.355,31.194 12 | -------------------------------------------------------------------------------- /tests/pipelines/test_data_generation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch 10 | 11 | from pipelines.azureml.data_generation import main 12 | 13 | def test_data_generation_main(aml_config, config_directory): 14 | # create test arguments for the script 15 | script_args = [ 16 | "src/pipelines/data_generation.py", 17 | "--exp-config", os.path.join(config_directory, "experiments", "data-generation.yaml"), 18 | f"aml.subscription_id={aml_config.subscription_id}", 19 | f"aml.resource_group={aml_config.resource_group}", 20 | f"aml.workspace_name={aml_config.workspace_name}", 21 | f"aml.tenant={aml_config.tenant}", 22 | f"aml.auth={aml_config.auth}", 23 | "+run.validate=True", 24 | "+run.submit=False" 25 | ] 26 | 27 | # replaces sys.argv with test arguments and run main 28 | with patch.object(sys, "argv", script_args): 29 | main() 30 | -------------------------------------------------------------------------------- /tests/pipelines/test_lightgbm_inferencing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch 10 | 11 | from pipelines.azureml.lightgbm_inferencing import main 12 | 13 | def test_lightgbm_inferencing(aml_config, config_directory): 14 | # create test arguments for the script 15 | script_args = [ 16 | "src/pipelines/lightgbm_inferencing.py", 17 | "--exp-config", os.path.join(config_directory, "experiments", "lightgbm-inferencing.yaml"), 18 | f"aml.subscription_id={aml_config.subscription_id}", 19 | f"aml.resource_group={aml_config.resource_group}", 20 | f"aml.workspace_name={aml_config.workspace_name}", 21 | f"aml.tenant={aml_config.tenant}", 22 | f"aml.auth={aml_config.auth}", 23 | "+run.validate=True", 24 | "+run.submit=False" 25 | ] 26 | 27 | # replaces sys.argv with test arguments and run main 28 | with patch.object(sys, "argv", script_args): 29 | main() 30 | -------------------------------------------------------------------------------- /tests/pipelines/test_lightgbm_training.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch 10 | 11 | from pipelines.azureml.lightgbm_training import main 12 | 13 | def test_lightgbm_training_cpu(aml_config, config_directory): 14 | # create test arguments for the script 15 | script_args = [ 16 | "src/pipelines/lightgbm_training.py", 17 | "--exp-config", os.path.join(config_directory, "experiments", "lightgbm_training", "cpu.yaml"), 18 | f"aml.subscription_id={aml_config.subscription_id}", 19 | f"aml.resource_group={aml_config.resource_group}", 20 | f"aml.workspace_name={aml_config.workspace_name}", 21 | f"aml.tenant={aml_config.tenant}", 22 | f"aml.auth={aml_config.auth}", 23 | "+run.validate=True", 24 | "+run.submit=False" 25 | ] 26 | 27 | # replaces sys.argv with test arguments and run main 28 | with patch.object(sys, "argv", script_args): 29 | main() 30 | 31 | def test_lightgbm_training_sweep(aml_config, config_directory): 32 | # create test arguments for the script 33 | script_args = [ 34 | "src/pipelines/lightgbm_training.py", 35 | "--exp-config", os.path.join(config_directory, "experiments", "lightgbm_training", "sweep.yaml"), 36 | f"aml.subscription_id={aml_config.subscription_id}", 37 | f"aml.resource_group={aml_config.resource_group}", 38 | f"aml.workspace_name={aml_config.workspace_name}", 39 | f"aml.tenant={aml_config.tenant}", 40 | f"aml.auth={aml_config.auth}", 41 | "+run.validate=True", 42 | "+run.submit=False" 43 | ] 44 | 45 | # replaces sys.argv with test arguments and run main 46 | with patch.object(sys, "argv", script_args): 47 | main() 48 | -------------------------------------------------------------------------------- /tests/scripts/test_inferencing_custom_win_cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch, Mock, call 10 | 11 | from scripts.inferencing.custom_win_cli import score 12 | 13 | # IMPORTANT: see conftest.py for fixtures 14 | 15 | @patch('mlflow.log_metric') 16 | @patch('scripts.inferencing.custom_win_cli.score.subprocess_run') 17 | def test_lightgbm_c_api_score(subprocess_run_mock, mlflow_log_metric_mock, temporary_dir, regression_model_sample, regression_inference_sample): 18 | """Tests src/scripts/inferencing/lightgbm_c_api/score.py""" 19 | predictions_dir = os.path.join(temporary_dir, "predictions") 20 | 21 | # create a first mock for the return of subprocess 22 | subprocess_call_handle_mock = Mock() 23 | subprocess_call_handle_mock.returncode = 0 24 | subprocess_call_handle_mock.stderr = "# empty logs" 25 | subprocess_call_handle_mock.stdout = "# empty logs" 26 | 27 | # feed that mock into a subprocess.run() mock 28 | subprocess_run_mock.return_value = subprocess_call_handle_mock 29 | 30 | # create test arguments for the script 31 | script_args = [ 32 | "score.py", 33 | "--data", regression_inference_sample, 34 | "--model", regression_model_sample, 35 | "--output", predictions_dir 36 | ] 37 | 38 | # replaces sys.argv with test arguments and run main 39 | with patch.object(sys, "argv", script_args): 40 | score.main() 41 | 42 | # test arguments 43 | assert isinstance(subprocess_run_mock.call_args.args[0], list), "first argument of subprocess.run() should be a list" 44 | assert "lightgbm.exe" in subprocess_run_mock.call_args.args[0][0], "first element in subprocess.run() command should contain lightgbm.exe" 45 | 46 | # no outputs (yet?) 47 | # assert os.path.isfile(os.path.join(predictions_dir, "predictions.txt")) 48 | 49 | metric_calls = mlflow_log_metric_mock.call_args_list 50 | # 19 = 18 perf metrics + 1 time_inferencing 51 | assert mlflow_log_metric_mock.call_count == 19 52 | assert len(metric_calls) == 19 53 | -------------------------------------------------------------------------------- /tests/scripts/test_lightgbm_data2bin.py: -------------------------------------------------------------------------------- 1 | """ 2 | test src/scripts/partition_data/partition.py 3 | """ 4 | import os 5 | import sys 6 | import tempfile 7 | import pytest 8 | from unittest.mock import patch 9 | 10 | from scripts.data_processing.lightgbm_data2bin import data2bin 11 | 12 | # IMPORTANT: see conftest.py for fixtures 13 | 14 | def test_lightgbm_data2bin(temporary_dir, regression_train_sample, regression_test_sample): 15 | """Tests src/scripts/data_processing/lightgbm_data2bin/data2bin.py""" 16 | binary_train_data_dir = os.path.join(temporary_dir, "binary_train_data") 17 | binary_test_data_dir = os.path.join(temporary_dir, "binary_test_data") 18 | 19 | # create test arguments for the script 20 | 21 | script_args = [ 22 | "data2bin.py", 23 | "--train", regression_train_sample, 24 | "--test", regression_test_sample, 25 | "--output_train", binary_train_data_dir, 26 | "--output_test", binary_test_data_dir, 27 | "--header", "True", 28 | "--label_column", "name:label", 29 | "--max_bin", "255", 30 | ] 31 | 32 | # replaces sys.argv with test arguments and run main 33 | with patch.object(sys, "argv", script_args): 34 | data2bin.main() 35 | 36 | assert os.path.isfile(os.path.join(binary_train_data_dir, "train.bin")) 37 | assert os.path.isfile(os.path.join(binary_test_data_dir, "test_0.bin")) 38 | -------------------------------------------------------------------------------- /tests/scripts/test_lightgbm_inferencing_c_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch, Mock, call 10 | 11 | from scripts.inferencing.lightgbm_c_api import score 12 | 13 | # IMPORTANT: see conftest.py for fixtures 14 | 15 | @patch('mlflow.log_metric') 16 | @patch('mlflow.set_tags') 17 | @patch('scripts.inferencing.lightgbm_c_api.score.subprocess_run') 18 | @patch('scripts.inferencing.lightgbm_c_api.score.locate_lightgbm_benchmark_binaries') 19 | def test_lightgbm_c_api_score(locate_binaries_mock, subprocess_run_mock, mlflow_set_tags_mock, mlflow_log_metric_mock, temporary_dir, regression_model_sample, regression_inference_sample): 20 | """Tests src/scripts/inferencing/lightgbm_c_api/score.py""" 21 | predictions_dir = os.path.join(temporary_dir, "predictions") 22 | locate_binaries_mock.return_value = "fake_cli.exe" 23 | 24 | # create a first mock for the return of subprocess 25 | subprocess_call_handle_mock = Mock() 26 | subprocess_call_handle_mock.returncode = 0 27 | subprocess_call_handle_mock.stderr = "# empty logs" 28 | subprocess_call_handle_mock.stdout = """ 29 | # fake logs for parsing metrics from C API binaries (cli) 30 | ROW line=0 label=0.42 null_elem=3 prediction=0.45 time_usecs=45.2 31 | ROW line=1 label=0.42 null_elem=3 prediction=0.45 time_usecs=45.3 32 | ROW line=3 label=0.42 null_elem=3 prediction=0.45 time_usecs=45.4 33 | METRIC foo=342.0 34 | PROPRETY foo2=bar2 35 | """ 36 | # feed that mock into a subprocess.run() mock 37 | subprocess_run_mock.return_value = subprocess_call_handle_mock 38 | 39 | # create test arguments for the script 40 | script_args = [ 41 | "score.py", 42 | "--data", regression_inference_sample, 43 | "--model", regression_model_sample, 44 | "--output", predictions_dir 45 | ] 46 | 47 | # replaces sys.argv with test arguments and run main 48 | with patch.object(sys, "argv", script_args): 49 | score.main() 50 | 51 | # test arguments 52 | assert isinstance(subprocess_run_mock.call_args.args[0], list), "first argument of subprocess.run() should be a list" 53 | assert "fake_cli.exe" in subprocess_run_mock.call_args.args[0][0], "first element in subprocess.run() command should contain return value of locate_lightgbm_benchmark_binaries()" 54 | 55 | # test expected outputs 56 | assert os.path.isfile(os.path.join(predictions_dir, "predictions.txt")) 57 | 58 | metric_calls = mlflow_log_metric_mock.call_args_list 59 | # 32 = 18 perf metrics + 13 inference metrics + 1 custom (foo) 60 | assert mlflow_log_metric_mock.call_count == 32 61 | assert len(metric_calls) == 32 62 | -------------------------------------------------------------------------------- /tests/scripts/test_lightgbm_inferencing_ray.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test LightGBM Inferencing using Ray 3 | """ 4 | import os 5 | import sys 6 | import tempfile 7 | import pytest 8 | from unittest.mock import patch 9 | 10 | from scripts.inferencing.lightgbm_ray import score 11 | from common.distributed import multinode_config_class 12 | # IMPORTANT: see conftest.py for fixtures 13 | 14 | @patch('ray.shutdown') # patching this to avoid ray.shutdown() call, using ray_init_fixture instead 15 | @patch('ray.init') # patching this to avoid ray.init() call, using ray_init_fixture instead 16 | @patch('common.distributed.MultiNodeMPIDriver') # patching this to avoid mpi.init() call 17 | def test_lightgbm_ray_score(mpi_driver_mock, ray_init_mock, ray_shutdown_mock, ray_init_fixture, temporary_dir, regression_model_sample, regression_inference_sample): 18 | """Tests src/scripts/inferencing/lightgbm_ray/score.py""" 19 | # fake mpi initialization + config 20 | mpi_driver_mock().get_multinode_config.return_value = multinode_config_class( 21 | 1, # world_size 22 | 0, # world_rank 23 | False, # mpi_available 24 | True, # main_node 25 | ) 26 | 27 | predictions_dir = os.path.join(temporary_dir, "predictions") 28 | 29 | # create test arguments for the script 30 | script_args = [ 31 | "score.py", 32 | "--data", regression_inference_sample, 33 | "--model", regression_model_sample, 34 | "--output", predictions_dir, 35 | "--cluster_auto_setup", "False" 36 | ] 37 | 38 | # replaces sys.argv with test arguments and run main 39 | with patch.object(sys, "argv", script_args): 40 | score.main() 41 | 42 | # test expected outputs 43 | assert os.path.isfile(os.path.join(predictions_dir, "predictions.txt")) 44 | -------------------------------------------------------------------------------- /tests/scripts/test_lightgbm_python.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch 10 | 11 | from scripts.training.lightgbm_python import train 12 | from scripts.inferencing.lightgbm_python import score 13 | 14 | # IMPORTANT: see conftest.py for fixtures 15 | 16 | def test_lightgbm_python_train(temporary_dir, regression_train_sample, regression_test_sample): 17 | """Tests src/scripts/training/lightgbm_python/train.py""" 18 | model_dir = os.path.join(temporary_dir, "model") 19 | model_filename = 'custom_model_filename.txt' 20 | # create test arguments for the script 21 | objective_argument = "regression" 22 | 23 | script_args = [ 24 | "train.py", 25 | "--train", regression_train_sample, 26 | "--label_column", "name:label", 27 | "--header", "True", 28 | "--test", regression_test_sample, 29 | "--export_model", model_dir, 30 | "--model_filename", model_filename, 31 | "--objective", objective_argument, 32 | "--boosting_type", "gbdt", 33 | "--tree_learner", "serial", 34 | "--metric", "rmse", 35 | "--num_trees", "5", 36 | "--num_leaves", "10", 37 | "--min_data_in_leaf", "255", 38 | "--learning_rate", "0.3", 39 | "--max_bin", "16", 40 | "--feature_fraction", "0.15", 41 | "--device_type", "cpu", 42 | "--multinode_driver", "socket" 43 | ] 44 | 45 | # replaces sys.argv with test arguments and run main 46 | with patch.object(sys, "argv", script_args): 47 | train.main() 48 | 49 | # test expected outputs 50 | assert os.path.isfile( 51 | os.path.join(model_dir, model_filename) 52 | ), f"Script train.py should generate a {model_filename} output file but did not" 53 | 54 | 55 | def test_lightgbm_python_score(temporary_dir, regression_model_sample, regression_inference_sample): 56 | """Tests src/scripts/inferencing/lightgbm_python/score.py""" 57 | predictions_dir = os.path.join(temporary_dir, "predictions") 58 | 59 | # create test arguments for the script 60 | script_args = [ 61 | "score.py", 62 | "--data", regression_inference_sample, 63 | "--header", "True", 64 | "--model", regression_model_sample, 65 | "--output", predictions_dir 66 | ] 67 | 68 | # replaces sys.argv with test arguments and run main 69 | with patch.object(sys, "argv", script_args): 70 | score.main() 71 | 72 | # test expected outputs 73 | assert os.path.isfile(os.path.join(predictions_dir, "predictions.txt")) 74 | -------------------------------------------------------------------------------- /tests/scripts/test_partition_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | test src/scripts/partition_data/partition.py 3 | """ 4 | import os 5 | import sys 6 | import tempfile 7 | import pytest 8 | from unittest.mock import patch 9 | 10 | from scripts.data_processing.partition_data import partition 11 | 12 | # IMPORTANT: see conftest.py for fixtures 13 | 14 | def verify_partitioned_files(partitioned_data_dir, expected_file_count, expected_file_length): 15 | """Utility for testing outputs""" 16 | assert os.path.isdir(partitioned_data_dir) 17 | 18 | for expected_file in [os.path.join(partitioned_data_dir, "part_{:06d}".format(i)) for i in range(expected_file_count)]: 19 | assert os.path.isfile( 20 | expected_file 21 | ), "Script partition.py should generate partitioned data file {expected_file} in --output, but no output files were found" 22 | 23 | # open file in read mode 24 | with open(expected_file, 'r') as i_file: 25 | for count, line in enumerate(i_file): 26 | pass 27 | 28 | assert (count+1) == expected_file_length # expected size of each chunk 29 | 30 | def test_partition_data_roundrobin(temporary_dir, regression_train_sample): 31 | """Tests src/scripts/data_processing/partition_data/partition.py""" 32 | partitioned_data_dir = os.path.join(temporary_dir, "partitioned_data") 33 | 34 | # create test arguments for the script 35 | 36 | script_args = [ 37 | "partition.py", 38 | "--input", regression_train_sample, 39 | "--output", partitioned_data_dir, 40 | "--mode", "roundrobin", 41 | "--header", "True", 42 | 43 | # regression_train_sample has 100 sample, splitting in 5 x 20 44 | "--number", "5", 45 | ] 46 | 47 | # replaces sys.argv with test arguments and run main 48 | with patch.object(sys, "argv", script_args): 49 | partition.main() 50 | 51 | verify_partitioned_files( 52 | partitioned_data_dir, 53 | expected_file_count=5, 54 | expected_file_length=21 55 | ) 56 | 57 | def test_partition_data_chunk(temporary_dir, regression_train_sample): 58 | """Tests src/scripts/data_processing/partition_data/partition.py""" 59 | partitioned_data_dir = os.path.join(temporary_dir, "partitioned_data") 60 | 61 | # create test arguments for the script 62 | 63 | script_args = [ 64 | "partition.py", 65 | "--input", regression_train_sample, 66 | "--output", partitioned_data_dir, 67 | "--mode", "chunk", 68 | "--header", "True", 69 | 70 | # regression_train_sample has 100 sample, splitting in 20 x 5 71 | "--number", "5", 72 | ] 73 | 74 | # replaces sys.argv with test arguments and run main 75 | with patch.object(sys, "argv", script_args): 76 | partition.main() 77 | 78 | verify_partitioned_files( 79 | partitioned_data_dir, 80 | expected_file_count=20, 81 | expected_file_length=6 82 | ) 83 | -------------------------------------------------------------------------------- /tests/scripts/test_sample_sample.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch 10 | 11 | from scripts.sample import sample 12 | 13 | # IMPORTANT: see conftest.py for fixtures 14 | 15 | def test_sample_inferencing_script(temporary_dir, regression_inference_sample, regression_model_sample): 16 | # create a directory for each output 17 | predictions_dir = os.path.join(temporary_dir, "predictions") 18 | 19 | script_args = [ 20 | "score.py", 21 | "--data", regression_inference_sample, 22 | "--model", regression_model_sample, 23 | "--output", predictions_dir 24 | ] 25 | 26 | # replaces sys.argv with test arguments and run main 27 | with patch.object(sys, "argv", script_args): 28 | sample.main() 29 | 30 | # test expected outputs 31 | #assert os.path.isfile(os.path.join(predictions_dir, "predictions.txt")) -------------------------------------------------------------------------------- /tests/scripts/test_training_lightgbm_ray.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | import pytest 9 | from unittest.mock import patch 10 | 11 | from scripts.training.lightgbm_ray import train 12 | from common.distributed import multinode_config_class 13 | 14 | # IMPORTANT: see conftest.py for fixtures 15 | 16 | @patch('ray.shutdown') # patching this to avoid ray.shutdown() call, using ray_init_fixture instead 17 | @patch('ray.init') # patching this to avoid ray.init() call, using ray_init_fixture instead 18 | @patch('common.distributed.MultiNodeMPIDriver') # patching this to avoid mpi.init() call 19 | def test_lightgbm_ray_train(mpi_driver_mock, ray_init_mock, ray_shutdown_mock, ray_init_fixture, temporary_dir, regression_train_sample, regression_test_sample): 20 | """Tests src/scripts/training/lightgbm_ray/train.py""" 21 | # fake mpi initialization + config 22 | mpi_driver_mock().get_multinode_config.return_value = multinode_config_class( 23 | 1, # world_size 24 | 0, # world_rank 25 | False, # mpi_available 26 | True, # main_node 27 | ) 28 | 29 | model_dir = os.path.join(temporary_dir, "model") 30 | model_filename = 'custom_model_filename.txt' 31 | # create test arguments for the script 32 | objective_argument = "regression" 33 | 34 | script_args = [ 35 | "train.py", 36 | "--train", regression_train_sample, 37 | "--label_column", "label", 38 | "--test", regression_test_sample, 39 | "--export_model", model_dir, 40 | "--model_filename", model_filename, 41 | "--objective", objective_argument, 42 | "--boosting_type", "gbdt", 43 | "--tree_learner", "serial", 44 | "--metric", "rmse", 45 | "--num_iterations", "5", 46 | "--num_leaves", "10", 47 | "--min_data_in_leaf", "255", 48 | "--learning_rate", "0.3", 49 | "--max_bin", "16", 50 | "--feature_fraction", "0.15", 51 | "--device_type", "cpu", 52 | "--cluster_auto_setup", "False" 53 | ] 54 | 55 | # replaces sys.argv with test arguments and run main 56 | with patch.object(sys, "argv", script_args): 57 | train.main() 58 | 59 | # test expected outputs 60 | assert os.path.isfile( 61 | os.path.join(model_dir, model_filename) 62 | ), f"Script train.py should generate a {model_filename} output file but did not" 63 | -------------------------------------------------------------------------------- /tests/scripts/test_treelite_python.py: -------------------------------------------------------------------------------- 1 | """ 2 | Executes the series of scripts end-to-end 3 | to test LightGBM (python) manual benchmark 4 | """ 5 | import os 6 | import sys 7 | import tempfile 8 | from unittest.mock import patch 9 | 10 | from scripts.model_transformation.treelite_compile import compile_treelite 11 | from scripts.inferencing.treelite_python import score 12 | 13 | # IMPORTANT: see conftest.py for fixtures 14 | 15 | def test_treelist_inferencing_script(temporary_dir, regression_inference_sample, regression_model_sample): 16 | # create a directory for each output 17 | predictions_dir = os.path.join(temporary_dir, "predictions") 18 | 19 | if sys.platform == "linux" or sys.platform == "linux2": 20 | toolchain = "gcc" 21 | elif sys.platform == "win32": 22 | toolchain = "msvc" 23 | else: 24 | raise NotImplementedError(f"unit test doesn't know how to handle toolchain for platform {sys.platform}") 25 | 26 | script_args = [ 27 | "compile_treelite.py", 28 | "--model", regression_model_sample, 29 | "--model_format", "lightgbm", 30 | "--toolchain", toolchain, 31 | "--so_path", os.path.join(temporary_dir, "mymodel.so") 32 | ] 33 | 34 | # replaces sys.argv with test arguments and run main 35 | with patch.object(sys, "argv", script_args): 36 | compile_treelite.main() 37 | 38 | script_args = [ 39 | "score.py", 40 | "--so_path", os.path.join(temporary_dir, "mymodel.so"), 41 | "--data", regression_inference_sample, 42 | "--output", predictions_dir, 43 | "--nthreads", "1", 44 | ] 45 | 46 | # replaces sys.argv with test arguments and run main 47 | with patch.object(sys, "argv", script_args): 48 | score.main() 49 | 50 | # test expected outputs 51 | #assert os.path.isfile(os.path.join(predictions_dir, "predictions.txt")) 52 | --------------------------------------------------------------------------------