├── .clang-format ├── .clang-tidy ├── .editorconfig ├── .gitattributes ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE.md ├── lock.yml ├── runs-on.yml └── workflows │ ├── doc.yml │ ├── freebsd.yml │ ├── i386.yml │ ├── jvm_tests.yml │ ├── lint.yml │ ├── main.yml │ ├── misc.yml │ ├── python_tests.yml │ ├── python_wheels_macos.yml │ ├── r_nold.yml │ ├── r_tests.yml │ ├── scorecards.yml │ ├── sycl_tests.yml │ └── windows.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── CITATION ├── CMakeLists.txt ├── CONTRIBUTORS.md ├── LICENSE ├── NEWS.md ├── R-package ├── .Rbuildignore ├── .gitignore ├── CMakeLists.txt ├── DESCRIPTION ├── NAMESPACE ├── R │ ├── callbacks.R │ ├── utils.R │ ├── xgb.Booster.R │ ├── xgb.DMatrix.R │ ├── xgb.DMatrix.save.R │ ├── xgb.config.R │ ├── xgb.create.features.R │ ├── xgb.cv.R │ ├── xgb.dump.R │ ├── xgb.ggplot.R │ ├── xgb.importance.R │ ├── xgb.load.R │ ├── xgb.load.raw.R │ ├── xgb.model.dt.tree.R │ ├── xgb.plot.deepness.R │ ├── xgb.plot.importance.R │ ├── xgb.plot.multi.trees.R │ ├── xgb.plot.shap.R │ ├── xgb.plot.tree.R │ ├── xgb.save.R │ ├── xgb.save.raw.R │ ├── xgb.train.R │ └── xgboost.R ├── README.md ├── bootstrap.R ├── cleanup ├── config.h.in ├── configure ├── configure.ac ├── configure.win ├── data │ ├── agaricus.test.rda │ └── agaricus.train.rda ├── inst │ └── make-r-def.R ├── man │ ├── a-compatibility-note-for-saveRDS-save.Rd │ ├── agaricus.test.Rd │ ├── agaricus.train.Rd │ ├── coef.xgb.Booster.Rd │ ├── dim.xgb.DMatrix.Rd │ ├── dimnames.xgb.DMatrix.Rd │ ├── getinfo.Rd │ ├── predict.xgb.Booster.Rd │ ├── predict.xgboost.Rd │ ├── print.xgb.Booster.Rd │ ├── print.xgb.DMatrix.Rd │ ├── print.xgb.cv.Rd │ ├── print.xgboost.Rd │ ├── variable.names.xgb.Booster.Rd │ ├── xgb.Callback.Rd │ ├── xgb.DMatrix.Rd │ ├── xgb.DMatrix.hasinfo.Rd │ ├── xgb.DMatrix.save.Rd │ ├── xgb.DataBatch.Rd │ ├── xgb.DataIter.Rd │ ├── xgb.ExtMemDMatrix.Rd │ ├── xgb.QuantileDMatrix.from_iterator.Rd │ ├── xgb.attr.Rd │ ├── xgb.cb.cv.predict.Rd │ ├── xgb.cb.early.stop.Rd │ ├── xgb.cb.evaluation.log.Rd │ ├── xgb.cb.gblinear.history.Rd │ ├── xgb.cb.print.evaluation.Rd │ ├── xgb.cb.reset.parameters.Rd │ ├── xgb.cb.save.model.Rd │ ├── xgb.config.Rd │ ├── xgb.copy.Booster.Rd │ ├── xgb.create.features.Rd │ ├── xgb.cv.Rd │ ├── xgb.dump.Rd │ ├── xgb.gblinear.history.Rd │ ├── xgb.get.DMatrix.data.Rd │ ├── xgb.get.DMatrix.num.non.missing.Rd │ ├── xgb.get.DMatrix.qcut.Rd │ ├── xgb.get.num.boosted.rounds.Rd │ ├── xgb.importance.Rd │ ├── xgb.is.same.Booster.Rd │ ├── xgb.load.Rd │ ├── xgb.load.raw.Rd │ ├── xgb.model.dt.tree.Rd │ ├── xgb.model.parameters.Rd │ ├── xgb.params.Rd │ ├── xgb.plot.deepness.Rd │ ├── xgb.plot.importance.Rd │ ├── xgb.plot.multi.trees.Rd │ ├── xgb.plot.shap.Rd │ ├── xgb.plot.shap.summary.Rd │ ├── xgb.plot.tree.Rd │ ├── xgb.save.Rd │ ├── xgb.save.raw.Rd │ ├── xgb.slice.Booster.Rd │ ├── xgb.slice.DMatrix.Rd │ ├── xgb.train.Rd │ ├── xgbConfig.Rd │ ├── xgboost-options.Rd │ └── xgboost.Rd ├── pkgdown │ └── _pkgdown.yml ├── remove_warning_suppression_pragma.sh ├── src │ ├── Makevars.in │ ├── Makevars.win.in │ ├── init.c │ ├── xgboost-win.def │ ├── xgboost_R.cc │ ├── xgboost_R.h │ └── xgboost_custom.cc ├── tests │ ├── helper_scripts │ │ ├── generate_models.R │ │ ├── install_deps.R │ │ └── run-examples.R │ ├── testthat.R │ └── testthat │ │ ├── test_basic.R │ │ ├── test_booster_slicing.R │ │ ├── test_callbacks.R │ │ ├── test_config.R │ │ ├── test_custom_objective.R │ │ ├── test_dmatrix.R │ │ ├── test_feature_weights.R │ │ ├── test_glm.R │ │ ├── test_helpers.R │ │ ├── test_interaction_constraints.R │ │ ├── test_interactions.R │ │ ├── test_io.R │ │ ├── test_model_compatibility.R │ │ ├── test_monotone.R │ │ ├── test_parameter_exposure.R │ │ ├── test_poisson_regression.R │ │ ├── test_ranking.R │ │ ├── test_unicode.R │ │ ├── test_update.R │ │ └── test_xgboost.R └── vignettes │ ├── xgboost_introduction.Rmd │ └── xgboostfromJSON.Rmd ├── README.md ├── SECURITY.md ├── amalgamation └── dmlc-minimum0.cc ├── cmake ├── Doc.cmake ├── FindOpenMPMacOS.cmake ├── PrefetchIntrinsics.cmake ├── RPackageInstall.cmake.in ├── RPackageInstallTargetSetup.cmake ├── Sanitizer.cmake ├── Utils.cmake ├── Version.cmake ├── modules │ ├── FindASan.cmake │ ├── FindLSan.cmake │ ├── FindLibR.cmake │ ├── FindNVML.cmake │ ├── FindNccl.cmake │ ├── FindTSan.cmake │ └── FindUBSan.cmake ├── version_config.h.in ├── xgboost-config.cmake.in └── xgboost.pc.in ├── demo ├── .gitignore ├── CLI │ ├── README.rst │ ├── binary_classification │ │ ├── README.md │ │ ├── agaricus-lepiota.data │ │ ├── agaricus-lepiota.fmap │ │ ├── agaricus-lepiota.names │ │ ├── mapfeat.py │ │ ├── mknfold.py │ │ ├── mushroom.conf │ │ └── runexp.sh │ ├── distributed-training │ │ ├── README.md │ │ ├── mushroom.aws.conf │ │ ├── plot_model.ipynb │ │ └── run_aws.sh │ ├── regression │ │ ├── README.md │ │ ├── machine.conf │ │ ├── machine.data │ │ ├── machine.names │ │ ├── mapfeat.py │ │ ├── mknfold.py │ │ └── runexp.sh │ └── yearpredMSD │ │ ├── README.md │ │ ├── csv2libsvm.py │ │ ├── runexp.sh │ │ └── yearpredMSD.conf ├── README.md ├── aft_survival │ ├── README.rst │ ├── aft_survival_demo.py │ ├── aft_survival_demo_with_optuna.py │ └── aft_survival_viz_demo.py ├── c-api │ ├── .gitignore │ ├── CMakeLists.txt │ ├── basic │ │ ├── CMakeLists.txt │ │ ├── Makefile │ │ ├── README.md │ │ └── c-api-demo.c │ ├── external-memory │ │ ├── CMakeLists.txt │ │ ├── README.md │ │ └── external_memory.c │ └── inference │ │ ├── CMakeLists.txt │ │ └── inference.c ├── dask │ ├── README.rst │ ├── cpu_survival.py │ ├── cpu_training.py │ ├── dask_callbacks.py │ ├── dask_learning_to_rank.py │ ├── forward_logging.py │ ├── gpu_training.py │ ├── sklearn_cpu_training.py │ └── sklearn_gpu_training.py ├── data │ ├── README.md │ ├── agaricus.txt.test │ ├── agaricus.txt.train │ ├── featmap.txt │ ├── gen_autoclaims.R │ └── veterans_lung_cancer.csv ├── gpu_acceleration │ ├── README.rst │ ├── cover_type.py │ └── tree_shap.py ├── guide-python │ ├── README.rst │ ├── basic_walkthrough.py │ ├── boost_from_prediction.py │ ├── callbacks.py │ ├── cat_in_the_dat.py │ ├── cat_pipeline.py │ ├── categorical.py │ ├── continuation.py │ ├── cross_validation.py │ ├── custom_rmsle.py │ ├── custom_softmax.py │ ├── distributed_extmem_basic.py │ ├── evals_result.py │ ├── external_memory.py │ ├── feature_weights.py │ ├── gamma_regression.py │ ├── generalized_linear_model.py │ ├── individual_trees.py │ ├── learning_to_rank.py │ ├── model_parser.py │ ├── multioutput_regression.py │ ├── predict_first_ntree.py │ ├── predict_leaf_indices.py │ ├── quantile_data_iterator.py │ ├── quantile_regression.py │ ├── sklearn_evals_result.py │ ├── sklearn_examples.py │ ├── sklearn_parallel.py │ ├── spark_estimator_examples.py │ └── update_process.py ├── kaggle-higgs │ ├── README.md │ ├── higgs-cv.py │ ├── higgs-numpy.py │ ├── higgs-pred.R │ ├── higgs-pred.py │ ├── higgs-train.R │ ├── run.sh │ ├── speedtest.R │ └── speedtest.py ├── kaggle-otto │ ├── README.MD │ ├── otto_train_pred.R │ └── understandingXGBoostModel.Rmd ├── multiclass_classification │ ├── README.md │ ├── runexp.sh │ ├── train.R │ └── train.py ├── nvflare │ ├── .gitignore │ ├── README.md │ ├── config │ │ ├── config_fed_client.json │ │ └── config_fed_server.json │ ├── horizontal │ │ ├── README.md │ │ ├── custom │ │ │ ├── controller.py │ │ │ └── trainer.py │ │ └── prepare_data.sh │ └── vertical │ │ ├── README.md │ │ ├── custom │ │ ├── controller.py │ │ └── trainer.py │ │ └── prepare_data.sh └── rmm_plugin │ ├── README.rst │ ├── rmm_mgpu_with_dask.py │ └── rmm_singlegpu.py ├── dev ├── prepare_jvm_release.py └── query_contributors.py ├── doc ├── .gitignore ├── Doxyfile.in ├── Makefile ├── R-package │ ├── .gitignore │ ├── Makefile │ ├── adding_parameters.rst │ ├── index.rst │ ├── index_base.rst │ ├── migration_guide.rst │ └── r_docs │ │ └── index.rst ├── README ├── _static │ ├── cn.svg │ ├── custom.css │ ├── js │ │ └── auto_module_index.js │ └── us.svg ├── build.rst ├── c++.rst ├── c.rst ├── changes │ ├── index.rst │ ├── v2.1.0.rst │ └── v3.0.0.rst ├── cli.rst ├── conf.py ├── contrib │ ├── ci.rst │ ├── coding_guide.rst │ ├── community.rst │ ├── consistency.rst │ ├── docs.rst │ ├── donate.rst │ ├── featuremap.rst │ ├── git_guide.rst │ ├── index.rst │ ├── python_packaging.rst │ ├── release.rst │ └── unit_tests.rst ├── dump.schema ├── faq.rst ├── get_started.rst ├── gpu │ └── index.rst ├── index.rst ├── install.rst ├── julia.rst ├── jvm │ ├── api.rst │ ├── index.rst │ ├── java_intro.rst │ ├── javadocs │ │ └── index.rst │ ├── scaladocs │ │ ├── xgboost4j-flink │ │ │ └── index.rst │ │ ├── xgboost4j-spark │ │ │ └── index.rst │ │ └── xgboost4j │ │ │ └── index.rst │ ├── xgboost4j_spark_gpu_tutorial.rst │ ├── xgboost4j_spark_tutorial.rst │ └── xgboost_spark_migration.rst ├── model.schema ├── parameter.rst ├── prediction.rst ├── python │ ├── .gitignore │ ├── callbacks.rst │ ├── index.rst │ ├── python_api.rst │ ├── python_intro.rst │ └── sklearn_estimator.rst ├── requirements.txt ├── sphinx_util.py ├── treemethod.rst ├── tutorials │ ├── advanced_custom_obj.rst │ ├── aft_survival_analysis.rst │ ├── c_api_tutorial.rst │ ├── categorical.rst │ ├── custom_metric_obj.rst │ ├── dart.rst │ ├── dask.rst │ ├── external_memory.rst │ ├── feature_interaction_constraint.rst │ ├── index.rst │ ├── input_format.rst │ ├── intercept.rst │ ├── kubernetes.rst │ ├── learning_to_rank.rst │ ├── model.rst │ ├── monotonic.rst │ ├── multioutput.rst │ ├── param_tuning.rst │ ├── privacy_preserving.rst │ ├── ray.rst │ ├── rf.rst │ ├── saving_model.rst │ ├── slicing_model.rst │ └── spark_estimator.rst └── xgboost_doc.yml ├── include └── xgboost │ ├── base.h │ ├── c_api.h │ ├── cache.h │ ├── collective │ ├── poll_utils.h │ ├── result.h │ └── socket.h │ ├── context.h │ ├── data.h │ ├── feature_map.h │ ├── gbm.h │ ├── global_config.h │ ├── host_device_vector.h │ ├── intrusive_ptr.h │ ├── json.h │ ├── json_io.h │ ├── learner.h │ ├── linalg.h │ ├── linear_updater.h │ ├── logging.h │ ├── metric.h │ ├── model.h │ ├── multi_target_tree_model.h │ ├── objective.h │ ├── parameter.h │ ├── predictor.h │ ├── span.h │ ├── string_view.h │ ├── task.h │ ├── tree_model.h │ ├── tree_updater.h │ ├── version_config.h │ └── windefs.h ├── jvm-packages ├── .gitignore ├── CMakeLists.txt ├── README.md ├── checkstyle-suppressions.xml ├── checkstyle.xml ├── create_jni.py ├── pom.xml ├── scalastyle-config.xml ├── xgboost4j-example │ ├── LICENSE │ ├── README.md │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── ml │ │ │ │ └── dmlc │ │ │ │ └── xgboost4j │ │ │ │ └── java │ │ │ │ └── example │ │ │ │ ├── BasicWalkThrough.java │ │ │ │ ├── BoostFromPrediction.java │ │ │ │ ├── CrossValidation.java │ │ │ │ ├── CustomObjective.java │ │ │ │ ├── EarlyStopping.java │ │ │ │ ├── ExternalMemory.java │ │ │ │ ├── GeneralizedLinearModel.java │ │ │ │ ├── PredictFirstNtree.java │ │ │ │ ├── PredictLeafIndices.java │ │ │ │ ├── flink │ │ │ │ └── DistTrainWithFlinkExample.java │ │ │ │ └── util │ │ │ │ ├── CustomEval.java │ │ │ │ └── DataLoader.java │ │ └── scala │ │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── scala │ │ │ └── example │ │ │ ├── BasicWalkThrough.scala │ │ │ ├── BoostFromPrediction.scala │ │ │ ├── CrossValidation.scala │ │ │ ├── CustomObjective.scala │ │ │ ├── ExternalMemory.scala │ │ │ ├── GeneralizedLinearModel.scala │ │ │ ├── PredictFirstNTree.scala │ │ │ ├── PredictLeafIndices.scala │ │ │ ├── flink │ │ │ └── DistTrainWithFlink.scala │ │ │ ├── spark │ │ │ ├── SparkMLlibPipeline.scala │ │ │ └── SparkTraining.scala │ │ │ └── util │ │ │ └── CustomEval.scala │ │ └── test │ │ ├── java │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── java │ │ │ └── example │ │ │ └── JavaExamplesTest.java │ │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ ├── java │ │ └── example │ │ │ └── flink │ │ │ └── DistTrainWithFlinkExampleTest.scala │ │ └── scala │ │ └── example │ │ ├── ScalaExamplesTest.scala │ │ ├── flink │ │ └── DistTrainWithFlinkSuite.scala │ │ └── spark │ │ └── SparkExamplesTest.scala ├── xgboost4j-flink │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── java │ │ └── flink │ │ ├── XGBoost.java │ │ └── XGBoostModel.java ├── xgboost4j-spark-gpu │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── ml │ │ │ │ └── dmlc │ │ │ │ └── xgboost4j │ │ │ │ └── java │ │ │ │ ├── CudfColumn.java │ │ │ │ ├── CudfColumnBatch.java │ │ │ │ ├── ExtMemQuantileDMatrix.java │ │ │ │ └── QuantileDMatrix.java │ │ ├── resources │ │ │ └── META-INF │ │ │ │ └── services │ │ │ │ └── ml.dmlc.xgboost4j.scala.spark.XGBoostPlugin │ │ └── scala │ │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── scala │ │ │ ├── ExtMemQuantileDMatrix.scala │ │ │ ├── QuantileDMatrix.scala │ │ │ └── spark │ │ │ ├── ExternalMemory.scala │ │ │ └── GpuXGBoostPlugin.scala │ │ └── test │ │ ├── java │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── java │ │ │ ├── BoosterTest.java │ │ │ └── DMatrixTest.java │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ ├── ml │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── scala │ │ │ ├── ExtMemQuantileDMatrixSuite.scala │ │ │ ├── QuantileDMatrixSuite.scala │ │ │ └── spark │ │ │ ├── ExternalMemorySuite.scala │ │ │ ├── GpuTestSuite.scala │ │ │ ├── GpuXGBoostPluginSuite.scala │ │ │ └── TrainTestData.scala │ │ └── org │ │ └── apache │ │ └── spark │ │ └── GpuTestUtils.scala ├── xgboost4j-spark │ ├── pom.xml │ └── src │ │ ├── main │ │ └── scala │ │ │ ├── ml │ │ │ └── dmlc │ │ │ │ └── xgboost4j │ │ │ │ └── scala │ │ │ │ └── spark │ │ │ │ ├── Utils.scala │ │ │ │ ├── XGBoost.scala │ │ │ │ ├── XGBoostClassifier.scala │ │ │ │ ├── XGBoostEstimator.scala │ │ │ │ ├── XGBoostPlugin.scala │ │ │ │ ├── XGBoostRanker.scala │ │ │ │ ├── XGBoostRegressor.scala │ │ │ │ ├── XGBoostTrainingSummary.scala │ │ │ │ ├── package.scala │ │ │ │ └── params │ │ │ │ ├── CustomParams.scala │ │ │ │ ├── DartBoosterParams.scala │ │ │ │ ├── GeneralParams.scala │ │ │ │ ├── LearningTaskParams.scala │ │ │ │ ├── ParamMapConversion.scala │ │ │ │ ├── RabitParams.scala │ │ │ │ ├── TreeBoosterParams.scala │ │ │ │ └── XGBoostParams.scala │ │ │ └── org │ │ │ └── apache │ │ │ └── spark │ │ │ └── ml │ │ │ └── xgboost │ │ │ └── SparkUtils.scala │ │ └── test │ │ ├── resources │ │ ├── dermatology.data │ │ ├── log4j.properties │ │ ├── model │ │ │ └── 0.82 │ │ │ │ └── model │ │ │ │ ├── data │ │ │ │ └── XGBoostClassificationModel │ │ │ │ └── metadata │ │ │ │ ├── _SUCCESS │ │ │ │ └── part-00000 │ │ ├── rank.test.csv │ │ ├── rank.test.txt │ │ └── rank.train.csv │ │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── scala │ │ └── spark │ │ ├── CommunicatorRobustnessSuite.scala │ │ ├── CustomObj.scala │ │ ├── EvalError.scala │ │ ├── PerTest.scala │ │ ├── TmpFolderPerSuite.scala │ │ ├── TrainTestData.scala │ │ ├── XGBoostClassifierSuite.scala │ │ ├── XGBoostEstimatorSuite.scala │ │ ├── XGBoostRankerSuite.scala │ │ ├── XGBoostRegressorSuite.scala │ │ └── XGBoostSuite.scala └── xgboost4j │ ├── LICENSE │ ├── pom.xml │ └── src │ ├── main │ ├── java │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── java │ │ │ ├── Booster.java │ │ │ ├── Column.java │ │ │ ├── ColumnBatch.java │ │ │ ├── Communicator.java │ │ │ ├── ConfigContext.java │ │ │ ├── DMatrix.java │ │ │ ├── DataBatch.java │ │ │ ├── ExternalCheckpointManager.java │ │ │ ├── IEvaluation.java │ │ │ ├── IObjective.java │ │ │ ├── ITracker.java │ │ │ ├── NativeLibLoader.java │ │ │ ├── RabitTracker.java │ │ │ ├── TrackerProperties.java │ │ │ ├── XGBoost.java │ │ │ ├── XGBoostError.java │ │ │ ├── XGBoostJNI.java │ │ │ └── util │ │ │ ├── BigDenseMatrix.java │ │ │ └── UtilUnsafe.java │ ├── resources │ │ └── xgboost4j-version.properties │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ ├── LabeledPoint.scala │ │ └── scala │ │ ├── Booster.scala │ │ ├── DMatrix.scala │ │ ├── EvalTrait.scala │ │ ├── ExternalCheckpointManager.scala │ │ ├── ObjectiveTrait.scala │ │ └── XGBoost.scala │ ├── native │ ├── jvm_utils.h │ ├── xgboost4j-gpu.cpp │ ├── xgboost4j-gpu.cu │ ├── xgboost4j.cpp │ └── xgboost4j.h │ └── test │ ├── java │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── java │ │ ├── ArchDetectionTest.java │ │ ├── BoosterImplTest.java │ │ ├── ConfigContextTest.java │ │ ├── DMatrixTest.java │ │ ├── LibraryPathProviderTest.java │ │ ├── OsDetectionTest.java │ │ └── XGBoostTest.java │ └── scala │ └── ml │ └── dmlc │ └── xgboost4j │ └── scala │ ├── DMatrixSuite.scala │ └── ScalaBoosterImplSuite.scala ├── ops ├── conda_env │ ├── aarch64_test.yml │ ├── cpp_test.yml │ ├── linux_cpu_test.yml │ ├── linux_sycl_test.yml │ ├── macos_cpu_test.yml │ ├── minimal.yml │ ├── python_lint.yml │ ├── sdist_test.yml │ └── win64_test.yml ├── docker_run.py ├── packer │ ├── linux │ │ ├── bootstrap.sh │ │ ├── install_drivers.sh │ │ ├── linux.pkr.hcl │ │ └── setup_ssh.sh │ └── windows │ │ ├── bootstrap.ps1 │ │ ├── install_choco.ps1 │ │ ├── setup_ssh.ps1 │ │ ├── sysprep.ps1 │ │ └── windows.pkr.hcl ├── pipeline │ ├── build-cpu-impl.sh │ ├── build-cpu.sh │ ├── build-cuda-impl.sh │ ├── build-cuda.sh │ ├── build-gpu-rpkg-impl.sh │ ├── build-gpu-rpkg.sh │ ├── build-jvm-doc-impl.sh │ ├── build-jvm-doc.sh │ ├── build-jvm-gpu.sh │ ├── build-jvm-macos-apple-silicon.sh │ ├── build-jvm-macos-intel.sh │ ├── build-jvm-manylinux2014.sh │ ├── build-python-wheels-arm64-impl.sh │ ├── build-python-wheels-arm64.sh │ ├── build-python-wheels-cpu.sh │ ├── build-python-wheels-macos.sh │ ├── build-python-wheels-manylinux2014.sh │ ├── build-r-docs-impl.sh │ ├── build-r-docs.sh │ ├── build-test-cpu-nonomp.sh │ ├── build-test-jvm-packages-impl.sh │ ├── build-test-jvm-packages.sh │ ├── build-test-sycl.sh │ ├── build-win64-cpu.ps1 │ ├── build-win64-gpu.ps1 │ ├── classify-git-branch.sh │ ├── deploy-jvm-packages-impl.sh │ ├── deploy-jvm-packages.sh │ ├── enforce-ci.ps1 │ ├── enforce-ci.sh │ ├── get-docker-registry-details.sh │ ├── get-image-tag.sh │ ├── login-docker-registry.sh │ ├── manage-artifacts.py │ ├── run-clang-tidy.sh │ ├── test-c-api-demo.sh │ ├── test-cpp-gpu.sh │ ├── test-cpp-i386-impl.sh │ ├── test-cpp-i386.sh │ ├── test-freebsd.sh │ ├── test-jvm-gpu.sh │ ├── test-python-macos.sh │ ├── test-python-sdist.sh │ ├── test-python-wheel-impl.sh │ ├── test-python-wheel.sh │ ├── test-python-with-sysprefix.sh │ ├── test-win64-gpu.ps1 │ ├── trigger-rtd-impl.py │ └── trigger-rtd.sh └── script │ ├── change_scala_version.py │ ├── change_version.py │ ├── changelog.py │ ├── format_wheel_meta.py │ ├── inject_jvm_lib.sh │ ├── lint_cmake.sh │ ├── lint_cpp.py │ ├── lint_python.py │ ├── lint_r.R │ ├── pypi_variants.py │ ├── release_artifacts.py │ ├── run_clang_tidy.py │ ├── test_r_package.py │ ├── test_tidy.cc │ ├── test_utils.py │ └── verify_link.sh ├── plugin ├── CMakeLists.txt ├── README.md ├── example │ ├── README.md │ └── custom_obj.cc ├── federated │ ├── CMakeLists.txt │ ├── README.md │ ├── federated.proto │ ├── federated_coll.cc │ ├── federated_coll.cu │ ├── federated_coll.cuh │ ├── federated_coll.h │ ├── federated_comm.cc │ ├── federated_comm.cu │ ├── federated_comm.cuh │ ├── federated_comm.h │ ├── federated_tracker.cc │ └── federated_tracker.h ├── sycl │ ├── README.md │ ├── common │ │ ├── hist_util.cc │ │ ├── hist_util.h │ │ ├── host_device_vector.cc │ │ ├── linalg_op.h │ │ ├── partition_builder.h │ │ ├── row_set.h │ │ └── transform.h │ ├── data.h │ ├── data │ │ ├── gradient_index.cc │ │ └── gradient_index.h │ ├── device_manager.cc │ ├── device_manager.h │ ├── predictor │ │ └── predictor.cc │ └── tree │ │ ├── expand_entry.h │ │ ├── hist_row_adder.h │ │ ├── hist_synchronizer.h │ │ ├── hist_updater.cc │ │ ├── hist_updater.h │ │ ├── param.h │ │ ├── split_evaluator.h │ │ ├── updater_quantile_hist.cc │ │ └── updater_quantile_hist.h └── updater_gpu │ └── README.md ├── python-package ├── .gitignore ├── README.cpu.rst ├── README.dft.rst ├── README.rst ├── hatch_build.py ├── packager │ ├── __init__.py │ ├── build_config.py │ ├── nativelib.py │ ├── pep517.py │ ├── sdist.py │ └── util.py ├── pyproject.toml ├── pyproject.toml.in └── xgboost │ ├── VERSION │ ├── __init__.py │ ├── _data_utils.py │ ├── _typing.py │ ├── callback.py │ ├── collective.py │ ├── compat.py │ ├── config.py │ ├── core.py │ ├── dask │ ├── __init__.py │ ├── data.py │ └── utils.py │ ├── data.py │ ├── federated.py │ ├── libpath.py │ ├── plotting.py │ ├── py.typed │ ├── sklearn.py │ ├── spark │ ├── __init__.py │ ├── core.py │ ├── data.py │ ├── estimator.py │ ├── params.py │ ├── summary.py │ └── utils.py │ ├── testing │ ├── __init__.py │ ├── basic_models.py │ ├── callbacks.py │ ├── collective.py │ ├── continuation.py │ ├── dask.py │ ├── data.py │ ├── data_iter.py │ ├── federated.py │ ├── interaction_constraints.py │ ├── metrics.py │ ├── monotone_constraints.py │ ├── ordinal.py │ ├── params.py │ ├── parse_tree.py │ ├── plotting.py │ ├── quantile_dmatrix.py │ ├── ranking.py │ ├── shared.py │ ├── updater.py │ ├── utils.py │ └── with_skl.py │ ├── tracker.py │ └── training.py ├── src ├── CMakeLists.txt ├── c_api │ ├── c_api.cc │ ├── c_api.cu │ ├── c_api_error.cc │ ├── c_api_error.h │ ├── c_api_utils.h │ └── coll_c_api.cc ├── cli_main.cc ├── collective │ ├── aggregator.cuh │ ├── aggregator.h │ ├── allgather.cc │ ├── allgather.h │ ├── allreduce.cc │ ├── allreduce.h │ ├── broadcast.cc │ ├── broadcast.h │ ├── coll.cc │ ├── coll.cu │ ├── coll.cuh │ ├── coll.h │ ├── comm.cc │ ├── comm.cu │ ├── comm.cuh │ ├── comm.h │ ├── comm_group.cc │ ├── comm_group.h │ ├── communicator-inl.h │ ├── in_memory_communicator.h │ ├── in_memory_handler.cc │ ├── in_memory_handler.h │ ├── loop.cc │ ├── loop.h │ ├── nccl_stub.cc │ ├── nccl_stub.h │ ├── protocol.h │ ├── result.cc │ ├── socket.cc │ ├── tracker.cc │ └── tracker.h ├── common │ ├── algorithm.cuh │ ├── algorithm.h │ ├── api_entry.h │ ├── base64.h │ ├── bitfield.h │ ├── categorical.h │ ├── charconv.cc │ ├── charconv.h │ ├── cleanup.h │ ├── column_matrix.cc │ ├── column_matrix.h │ ├── common.cc │ ├── common.cu │ ├── common.h │ ├── compressed_iterator.h │ ├── config.h │ ├── cuda_context.cuh │ ├── cuda_dr_utils.cc │ ├── cuda_dr_utils.h │ ├── cuda_pinned_allocator.cu │ ├── cuda_pinned_allocator.h │ ├── cuda_rt_utils.cc │ ├── cuda_rt_utils.h │ ├── cuda_stream_pool.cuh │ ├── deterministic.cuh │ ├── device_compression.cu │ ├── device_compression.cuh │ ├── device_compression.h │ ├── device_helpers.cu │ ├── device_helpers.cuh │ ├── device_vector.cu │ ├── device_vector.cuh │ ├── error_msg.cc │ ├── error_msg.h │ ├── group_data.h │ ├── hist_util.cc │ ├── hist_util.cu │ ├── hist_util.cuh │ ├── hist_util.h │ ├── host_device_vector.cc │ ├── host_device_vector.cu │ ├── io.cc │ ├── io.h │ ├── json.cc │ ├── json_utils.h │ ├── linalg_op.cuh │ ├── linalg_op.h │ ├── math.h │ ├── numeric.cc │ ├── numeric.cu │ ├── numeric.h │ ├── nvtx_utils.h │ ├── observer.h │ ├── optional_weight.h │ ├── partition_builder.h │ ├── probability_distribution.h │ ├── pseudo_huber.cc │ ├── pseudo_huber.h │ ├── quantile.cc │ ├── quantile.cu │ ├── quantile.cuh │ ├── quantile.h │ ├── quantile_loss_utils.cc │ ├── quantile_loss_utils.h │ ├── random.cc │ ├── random.cu │ ├── random.h │ ├── ranking_utils.cc │ ├── ranking_utils.cu │ ├── ranking_utils.cuh │ ├── ranking_utils.h │ ├── ref_resource_view.cuh │ ├── ref_resource_view.h │ ├── resource.cu │ ├── resource.cuh │ ├── row_set.h │ ├── stats.cc │ ├── stats.cu │ ├── stats.cuh │ ├── stats.h │ ├── survival_util.cc │ ├── survival_util.h │ ├── threading_utils.cc │ ├── threading_utils.cuh │ ├── threading_utils.h │ ├── threadpool.h │ ├── timer.cc │ ├── timer.h │ ├── transform.h │ ├── transform_iterator.h │ ├── type.h │ ├── version.cc │ └── version.h ├── context.cc ├── context.cu ├── data │ ├── adapter.cc │ ├── adapter.h │ ├── array_interface.cc │ ├── array_interface.cu │ ├── array_interface.h │ ├── batch_utils.cc │ ├── batch_utils.h │ ├── cat_container.cc │ ├── cat_container.cu │ ├── cat_container.cuh │ ├── cat_container.h │ ├── data.cc │ ├── data.cu │ ├── device_adapter.cu │ ├── device_adapter.cuh │ ├── ellpack_page.cc │ ├── ellpack_page.cu │ ├── ellpack_page.cuh │ ├── ellpack_page.h │ ├── ellpack_page_raw_format.cu │ ├── ellpack_page_raw_format.h │ ├── ellpack_page_source.cu │ ├── ellpack_page_source.h │ ├── extmem_quantile_dmatrix.cc │ ├── extmem_quantile_dmatrix.cu │ ├── extmem_quantile_dmatrix.h │ ├── file_iterator.cc │ ├── file_iterator.h │ ├── gradient_index.cc │ ├── gradient_index.cu │ ├── gradient_index.h │ ├── gradient_index_format.cc │ ├── gradient_index_format.h │ ├── gradient_index_page_source.cc │ ├── gradient_index_page_source.h │ ├── iterative_dmatrix.cc │ ├── iterative_dmatrix.cu │ ├── iterative_dmatrix.h │ ├── proxy_dmatrix.cc │ ├── proxy_dmatrix.cu │ ├── proxy_dmatrix.cuh │ ├── proxy_dmatrix.h │ ├── quantile_dmatrix.cc │ ├── quantile_dmatrix.cu │ ├── quantile_dmatrix.h │ ├── simple_batch_iterator.h │ ├── simple_dmatrix.cc │ ├── simple_dmatrix.cu │ ├── simple_dmatrix.cuh │ ├── simple_dmatrix.h │ ├── sparse_page_dmatrix.cc │ ├── sparse_page_dmatrix.cu │ ├── sparse_page_dmatrix.h │ ├── sparse_page_raw_format.cc │ ├── sparse_page_source.cc │ ├── sparse_page_source.cu │ ├── sparse_page_source.h │ ├── sparse_page_writer.h │ ├── validation.cc │ ├── validation.cu │ └── validation.h ├── encoder │ ├── ordinal.cuh │ ├── ordinal.h │ └── types.h ├── gbm │ ├── gblinear.cc │ ├── gblinear_model.cc │ ├── gblinear_model.h │ ├── gbm.cc │ ├── gbtree.cc │ ├── gbtree.cu │ ├── gbtree.h │ ├── gbtree_model.cc │ └── gbtree_model.h ├── global_config.cc ├── learner.cc ├── linear │ ├── coordinate_common.h │ ├── linear_updater.cc │ ├── param.h │ ├── updater_coordinate.cc │ ├── updater_gpu_coordinate.cu │ └── updater_shotgun.cc ├── logging.cc ├── metric │ ├── auc.cc │ ├── auc.cu │ ├── auc.h │ ├── elementwise_metric.cc │ ├── elementwise_metric.cu │ ├── metric.cc │ ├── metric_common.h │ ├── multiclass_metric.cc │ ├── multiclass_metric.cu │ ├── rank_metric.cc │ ├── rank_metric.cu │ ├── rank_metric.h │ ├── survival_metric.cc │ └── survival_metric.cu ├── objective │ ├── adaptive.cc │ ├── adaptive.cu │ ├── adaptive.h │ ├── aft_obj.cc │ ├── aft_obj.cu │ ├── hinge.cc │ ├── hinge.cu │ ├── init_estimation.cc │ ├── init_estimation.h │ ├── lambdarank_obj.cc │ ├── lambdarank_obj.cu │ ├── lambdarank_obj.cuh │ ├── lambdarank_obj.h │ ├── multiclass_obj.cc │ ├── multiclass_obj.cu │ ├── multiclass_param.h │ ├── objective.cc │ ├── quantile_obj.cc │ ├── quantile_obj.cu │ ├── regression_loss.h │ ├── regression_obj.cc │ ├── regression_obj.cu │ └── regression_param.h ├── predictor │ ├── cpu_predictor.cc │ ├── cpu_treeshap.cc │ ├── cpu_treeshap.h │ ├── gpu_predictor.cu │ ├── predict_fn.h │ └── predictor.cc └── tree │ ├── common_row_partitioner.h │ ├── constraints.cc │ ├── constraints.cu │ ├── constraints.cuh │ ├── constraints.h │ ├── driver.h │ ├── fit_stump.cc │ ├── fit_stump.cu │ ├── fit_stump.h │ ├── gpu_hist │ ├── evaluate_splits.cu │ ├── evaluate_splits.cuh │ ├── evaluator.cu │ ├── expand_entry.cuh │ ├── feature_groups.cu │ ├── feature_groups.cuh │ ├── gradient_based_sampler.cu │ ├── gradient_based_sampler.cuh │ ├── histogram.cu │ ├── histogram.cuh │ ├── quantiser.cuh │ ├── row_partitioner.cu │ └── row_partitioner.cuh │ ├── hist │ ├── evaluate_splits.h │ ├── expand_entry.h │ ├── hist_cache.h │ ├── hist_param.cc │ ├── hist_param.h │ ├── histogram.cc │ ├── histogram.h │ └── sampler.h │ ├── io_utils.h │ ├── multi_target_tree_model.cc │ ├── param.cc │ ├── param.h │ ├── sample_position.h │ ├── split_evaluator.h │ ├── tree_model.cc │ ├── tree_updater.cc │ ├── updater_approx.cc │ ├── updater_colmaker.cc │ ├── updater_gpu_common.cuh │ ├── updater_gpu_hist.cu │ ├── updater_prune.cc │ ├── updater_quantile_hist.cc │ ├── updater_refresh.cc │ └── updater_sync.cc └── tests ├── README.md ├── cli └── machine.conf.in ├── cpp ├── CMakeLists.txt ├── c_api │ └── test_c_api.cc ├── categorical_helpers.h ├── collective │ ├── test_allgather.cc │ ├── test_allgather.cu │ ├── test_allreduce.cc │ ├── test_allreduce.cu │ ├── test_broadcast.cc │ ├── test_coll_c_api.cc │ ├── test_comm.cc │ ├── test_comm_group.cc │ ├── test_loop.cc │ ├── test_result.cc │ ├── test_socket.cc │ ├── test_tracker.cc │ ├── test_worker.cuh │ └── test_worker.h ├── common │ ├── test_algorithm.cc │ ├── test_algorithm.cu │ ├── test_bitfield.cc │ ├── test_bitfield.cu │ ├── test_categorical.cc │ ├── test_charconv.cc │ ├── test_column_matrix.cc │ ├── test_common.cc │ ├── test_compressed_iterator.cc │ ├── test_config.cc │ ├── test_cuda_dr_utils.cc │ ├── test_cuda_host_allocator.cu │ ├── test_cuda_rt_utils.cu │ ├── test_device_compression.cu │ ├── test_device_helpers.cu │ ├── test_device_vector.cu │ ├── test_gpu_compressed_iterator.cu │ ├── test_group_data.cc │ ├── test_hist_util.cc │ ├── test_hist_util.cu │ ├── test_hist_util.h │ ├── test_host_device_vector.cu │ ├── test_intrusive_ptr.cc │ ├── test_io.cc │ ├── test_json.cc │ ├── test_linalg.cc │ ├── test_linalg.cu │ ├── test_monitor.cc │ ├── test_numeric.cc │ ├── test_optional_weight.cc │ ├── test_parameter.cc │ ├── test_partition_builder.cc │ ├── test_probability_distribution.cc │ ├── test_quantile.cc │ ├── test_quantile.cu │ ├── test_quantile.h │ ├── test_quantile_utils.cc │ ├── test_random.cc │ ├── test_ranking_utils.cc │ ├── test_ranking_utils.cu │ ├── test_ranking_utils.h │ ├── test_ref_resource_view.cc │ ├── test_ref_resource_view.cu │ ├── test_span.cc │ ├── test_span.cu │ ├── test_span.h │ ├── test_stats.cc │ ├── test_stats.cu │ ├── test_string_view.cc │ ├── test_survival_util.cc │ ├── test_threading_utils.cc │ ├── test_threading_utils.cu │ ├── test_threadpool.cc │ ├── test_transform_iterator.cc │ ├── test_transform_range.cc │ ├── test_transform_range.cu │ └── test_version.cc ├── data │ ├── test_adapter.cc │ ├── test_array_interface.cc │ ├── test_array_interface.cu │ ├── test_array_interface.h │ ├── test_batch_utils.cu │ ├── test_cat_container.cc │ ├── test_cat_container.cu │ ├── test_cat_container.h │ ├── test_data.cc │ ├── test_device_adapter.cu │ ├── test_ellpack_page.cu │ ├── test_ellpack_page_raw_format.cu │ ├── test_extmem_quantile_dmatrix.cc │ ├── test_extmem_quantile_dmatrix.cu │ ├── test_extmem_quantile_dmatrix.h │ ├── test_file_iterator.cc │ ├── test_gradient_index.cc │ ├── test_gradient_index_page_raw_format.cc │ ├── test_iterative_dmatrix.cc │ ├── test_iterative_dmatrix.cu │ ├── test_iterative_dmatrix.h │ ├── test_metainfo.cc │ ├── test_metainfo.cu │ ├── test_metainfo.h │ ├── test_proxy_dmatrix.cc │ ├── test_proxy_dmatrix.cu │ ├── test_simple_dmatrix.cc │ ├── test_simple_dmatrix.cu │ ├── test_sparse_page_dmatrix.cc │ ├── test_sparse_page_dmatrix.cu │ └── test_sparse_page_raw_format.cc ├── encoder │ ├── df_mock.cuh │ ├── df_mock.h │ ├── test_ordinal.cc │ ├── test_ordinal.cu │ └── test_ordinal.h ├── filesystem.h ├── gbm │ ├── test_gblinear.cc │ ├── test_gblinear.cu │ ├── test_gbtree.cc │ └── test_gbtree.cu ├── helpers.cc ├── helpers.cu ├── helpers.h ├── histogram_helpers.h ├── linear │ ├── test_json_io.h │ ├── test_linear.cc │ └── test_linear.cu ├── metric │ ├── test_auc.h │ ├── test_distributed_metric.cc │ ├── test_elementwise_metric.h │ ├── test_metric.cc │ ├── test_multiclass_metric.h │ ├── test_rank_metric.cc │ ├── test_rank_metric.h │ ├── test_survival_metric.cc │ ├── test_survival_metric.cu │ └── test_survival_metric.h ├── objective │ ├── test_aft_obj.cc │ ├── test_aft_obj.cu │ ├── test_aft_obj.h │ ├── test_aft_obj_cpu.cc │ ├── test_hinge.cc │ ├── test_hinge.cu │ ├── test_hinge.h │ ├── test_hinge_cpu.cc │ ├── test_lambdarank_obj.cc │ ├── test_lambdarank_obj.cu │ ├── test_lambdarank_obj.h │ ├── test_multiclass_obj.cc │ ├── test_multiclass_obj.h │ ├── test_multiclass_obj_cpu.cc │ ├── test_multiclass_obj_gpu.cu │ ├── test_objective.cc │ ├── test_quantile_obj.cc │ ├── test_quantile_obj.h │ ├── test_quantile_obj_cpu.cc │ ├── test_quantile_obj_gpu.cu │ ├── test_regression_obj.cc │ ├── test_regression_obj.h │ ├── test_regression_obj_cpu.cc │ └── test_regression_obj_gpu.cu ├── objective_helpers.cc ├── objective_helpers.h ├── plugin │ ├── federated │ │ ├── CMakeLists.txt │ │ ├── test_federated_coll.cc │ │ ├── test_federated_coll.cu │ │ ├── test_federated_comm.cc │ │ ├── test_federated_comm_group.cc │ │ ├── test_federated_comm_group.cu │ │ ├── test_federated_data.cc │ │ ├── test_federated_learner.cc │ │ ├── test_federated_tracker.cc │ │ └── test_worker.h │ ├── sycl_helpers.h │ ├── test_example_objective.cc │ ├── test_sycl_aft_obj.cc │ ├── test_sycl_ghist_builder.cc │ ├── test_sycl_gradient_index.cc │ ├── test_sycl_hinge.cc │ ├── test_sycl_hist_updater.cc │ ├── test_sycl_host_device_vector.cc │ ├── test_sycl_lambdarank_obj.cc │ ├── test_sycl_multiclass_obj.cc │ ├── test_sycl_partition_builder.cc │ ├── test_sycl_prediction_cache.cc │ ├── test_sycl_predictor.cc │ ├── test_sycl_quantile_hist_builder.cc │ ├── test_sycl_quantile_obj.cc │ ├── test_sycl_regression_obj.cc │ ├── test_sycl_row_set_collection.cc │ ├── test_sycl_split_evaluator.cc │ └── test_sycl_transform_range.cc ├── predictor │ ├── test_cpu_predictor.cc │ ├── test_gpu_predictor.cu │ ├── test_predictor.cc │ └── test_predictor.h ├── test_cache.cc ├── test_context.cc ├── test_context.cu ├── test_global_config.cc ├── test_helpers.cc ├── test_learner.cc ├── test_learner.cu ├── test_logging.cc ├── test_main.cc ├── test_multi_target.cc ├── test_serialization.cc └── tree │ ├── gpu_hist │ ├── test_driver.cu │ ├── test_evaluate_splits.cu │ ├── test_expand_entry.cu │ ├── test_gradient_based_sampler.cu │ ├── test_histogram.cu │ └── test_row_partitioner.cu │ ├── hist │ ├── test_evaluate_splits.cc │ ├── test_expand_entry.cc │ ├── test_histogram.cc │ └── test_sampler.cc │ ├── test_approx.cc │ ├── test_column_split.cc │ ├── test_column_split.h │ ├── test_common_partitioner.cc │ ├── test_constraints.cc │ ├── test_constraints.cu │ ├── test_evaluate_splits.h │ ├── test_fit_stump.cc │ ├── test_gpu_approx.cu │ ├── test_gpu_hist.cu │ ├── test_multi_target_tree_model.cc │ ├── test_node_partition.cc │ ├── test_param.cc │ ├── test_partitioner.h │ ├── test_prediction_cache.cc │ ├── test_prediction_cache.h │ ├── test_prune.cc │ ├── test_quantile_hist.cc │ ├── test_refresh.cc │ ├── test_regen.cc │ ├── test_tree_model.cc │ ├── test_tree_policy.cc │ └── test_tree_stat.cc ├── pytest.ini ├── python-gpu ├── conftest.py ├── load_pickle.py ├── test_device_quantile_dmatrix.py ├── test_from_cudf.py ├── test_from_cupy.py ├── test_gpu_basic_models.py ├── test_gpu_callbacks.py ├── test_gpu_data_iterator.py ├── test_gpu_demos.py ├── test_gpu_eval_metrics.py ├── test_gpu_interaction_constraints.py ├── test_gpu_linear.py ├── test_gpu_ordinal.py ├── test_gpu_parse_tree.py ├── test_gpu_pickling.py ├── test_gpu_plotting.py ├── test_gpu_prediction.py ├── test_gpu_ranking.py ├── test_gpu_training_continuation.py ├── test_gpu_updaters.py ├── test_gpu_with_sklearn.py ├── test_large_input.py └── test_monotonic_constraints.py ├── python-sycl ├── test_sycl_prediction.py ├── test_sycl_simple_dask.py ├── test_sycl_training_continuation.py ├── test_sycl_updaters.py └── test_sycl_with_sklearn.py ├── python ├── generate_models.py ├── test_basic.py ├── test_basic_models.py ├── test_callback.py ├── test_cli.py ├── test_collective.py ├── test_config.py ├── test_data_iterator.py ├── test_demos.py ├── test_dmatrix.py ├── test_early_stopping.py ├── test_eval_metrics.py ├── test_interaction_constraints.py ├── test_linear.py ├── test_model_compatibility.py ├── test_model_io.py ├── test_monotone_constraints.py ├── test_multi_target.py ├── test_objectives.py ├── test_openmp.py ├── test_ordinal.py ├── test_parse_tree.py ├── test_pickling.py ├── test_plotting.py ├── test_predict.py ├── test_quantile_dmatrix.py ├── test_ranking.py ├── test_shap.py ├── test_survival.py ├── test_tracker.py ├── test_training_continuation.py ├── test_tree_regularization.py ├── test_updaters.py ├── test_with_arrow.py ├── test_with_modin.py ├── test_with_pandas.py ├── test_with_polars.py ├── test_with_scipy.py ├── test_with_shap.py ├── test_with_sklearn.py └── with_omp_limit.py └── test_distributed ├── __init__.py ├── test_federated └── test_federated.py ├── test_gpu_federated └── test_gpu_federated.py ├── test_gpu_with_dask ├── __init__.py ├── conftest.py ├── test_gpu_demos.py ├── test_gpu_external_memory.py ├── test_gpu_ranking.py └── test_gpu_with_dask.py ├── test_gpu_with_spark ├── __init__.py ├── conftest.py ├── discover_gpu.sh ├── test_data.py └── test_gpu_spark.py ├── test_with_dask ├── __init__.py ├── test_demos.py ├── test_external_memory.py ├── test_ranking.py └── test_with_dask.py └── test_with_spark ├── __init__.py ├── test_data.py ├── test_spark_local.py ├── test_spark_local_cluster.py └── utils.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset=utf-8 5 | indent_style = space 6 | indent_size = 2 7 | insert_final_newline = true 8 | 9 | [*.py] 10 | indent_style = space 11 | indent_size = 4 12 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | *.c text eol=lf 4 | *.h text eol=lf 5 | *.cc text eol=lf 6 | *.cuh text eol=lf 7 | *.cu text eol=lf 8 | *.py text eol=lf 9 | *.txt text eol=lf 10 | *.R text eol=lf 11 | *.scala text eol=lf 12 | *.java text eol=lf 13 | 14 | *.sh text eol=lf 15 | 16 | *.rst text eol=lf 17 | *.md text eol=lf 18 | *.csv text eol=lf -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | open_collective: xgboost 2 | custom: https://xgboost.ai/sponsors 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thanks for participating in the XGBoost community! The issue tracker is used for actionable items such as feature proposals discussion, roadmaps, and bug tracking. 2 | 3 | Issues that are inactive for a period of time may get closed. We adopt this policy so that we won't lose track of actionable issues that may fall at the bottom of the pile. Feel free to reopen a new one if you feel there is an additional problem that needs attention when an old one gets closed. 4 | 5 | For bug reports, to help the developer act on the issues, please include a description of your environment, preferably a minimum script to reproduce the problem. 6 | 7 | For feature proposals, list clear, small actionable items so we can track the progress of the change. 8 | -------------------------------------------------------------------------------- /.github/lock.yml: -------------------------------------------------------------------------------- 1 | # Configuration for lock-threads - https://github.com/dessant/lock-threads 2 | 3 | # Number of days of inactivity before a closed issue or pull request is locked 4 | daysUntilLock: 90 5 | 6 | # Issues and pull requests with these labels will not be locked. Set to `[]` to disable 7 | exemptLabels: 8 | - feature-request 9 | 10 | # Label to add before locking, such as `outdated`. Set to `false` to disable 11 | lockLabel: false 12 | 13 | # Comment to post before locking. Set to `false` to disable 14 | lockComment: false 15 | 16 | # Assign `resolved` as the reason for locking. Set to `false` to disable 17 | setLockReason: true 18 | 19 | # Limit to only `issues` or `pulls` 20 | # only: issues 21 | 22 | # Optionally, specify configuration settings just for `issues` or `pulls` 23 | # issues: 24 | # exemptLabels: 25 | # - help-wanted 26 | # lockLabel: outdated 27 | 28 | # pulls: 29 | # daysUntilLock: 30 30 | 31 | # Repository to extend settings from 32 | # _extends: repo 33 | -------------------------------------------------------------------------------- /.github/workflows/freebsd.yml: -------------------------------------------------------------------------------- 1 | name: FreeBSD 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read # to fetch code (actions/checkout) 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | timeout-minutes: 20 16 | name: A job to run test in FreeBSD 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | submodules: 'true' 21 | - name: Test in FreeBSD 22 | id: test 23 | uses: vmactions/freebsd-vm@v1 24 | with: 25 | usesh: true 26 | prepare: | 27 | pkg install -y cmake git ninja googletest bash 28 | run: | 29 | bash ops/pipeline/test-freebsd.sh 30 | -------------------------------------------------------------------------------- /.github/workflows/i386.yml: -------------------------------------------------------------------------------- 1 | name: XGBoost-i386-test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read # to fetch code (actions/checkout) 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | build-32bit: 14 | name: Build 32-bit 15 | runs-on: 16 | - runs-on=${{ github.run_id }} 17 | - runner=linux-amd64-cpu 18 | - tag=i386-build-32bit 19 | steps: 20 | # Restart Docker daemon so that it recognizes the ephemeral disks 21 | - run: sudo systemctl restart docker 22 | - uses: actions/checkout@v4 23 | with: 24 | submodules: "true" 25 | - name: Log into Docker registry (AWS ECR) 26 | run: bash ops/pipeline/login-docker-registry.sh 27 | - run: bash ops/pipeline/test-cpp-i386.sh 28 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dmlc-core"] 2 | path = dmlc-core 3 | url = https://github.com/dmlc/dmlc-core 4 | branch = main 5 | [submodule "gputreeshap"] 6 | path = gputreeshap 7 | url = https://github.com/rapidsai/gputreeshap.git 8 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | submodules: 9 | include: all 10 | 11 | # Set the version of Python and other tools you might need 12 | build: 13 | os: ubuntu-22.04 14 | tools: 15 | python: "3.10" 16 | apt_packages: 17 | - graphviz 18 | - cmake 19 | - g++ 20 | - doxygen 21 | - ninja-build 22 | 23 | # Build documentation in the docs/ directory with Sphinx 24 | sphinx: 25 | configuration: doc/conf.py 26 | 27 | # If using Sphinx, optionally build your docs in additional formats such as PDF 28 | formats: 29 | - pdf 30 | 31 | # Optionally declare the Python requirements required to build your docs 32 | python: 33 | install: 34 | - requirements: doc/requirements.txt 35 | -------------------------------------------------------------------------------- /CITATION: -------------------------------------------------------------------------------- 1 | @inproceedings{Chen:2016:XST:2939672.2939785, 2 | author = {Chen, Tianqi and Guestrin, Carlos}, 3 | title = {{XGBoost}: A Scalable Tree Boosting System}, 4 | booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, 5 | series = {KDD '16}, 6 | year = {2016}, 7 | isbn = {978-1-4503-4232-2}, 8 | location = {San Francisco, California, USA}, 9 | pages = {785--794}, 10 | numpages = {10}, 11 | url = {http://doi.acm.org/10.1145/2939672.2939785}, 12 | doi = {10.1145/2939672.2939785}, 13 | acmid = {2939785}, 14 | publisher = {ACM}, 15 | address = {New York, NY, USA}, 16 | keywords = {large-scale machine learning}, 17 | } 18 | -------------------------------------------------------------------------------- /R-package/.Rbuildignore: -------------------------------------------------------------------------------- 1 | \.o$ 2 | \.so$ 3 | \.dll$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | README.md 7 | ^doc$ 8 | ^Meta$ 9 | ^_pkgdown\.yml$ 10 | ^docs$ 11 | ^pkgdown$ 12 | -------------------------------------------------------------------------------- /R-package/.gitignore: -------------------------------------------------------------------------------- 1 | docs 2 | -------------------------------------------------------------------------------- /R-package/R/xgb.DMatrix.save.R: -------------------------------------------------------------------------------- 1 | #' Save xgb.DMatrix object to binary file 2 | #' 3 | #' Save xgb.DMatrix object to binary file 4 | #' 5 | #' @param dmatrix the `xgb.DMatrix` object 6 | #' @param fname the name of the file to write. 7 | #' 8 | #' @examples 9 | #' \dontshow{RhpcBLASctl::omp_set_num_threads(1)} 10 | #' data(agaricus.train, package = "xgboost") 11 | #' 12 | #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) 13 | #' fname <- file.path(tempdir(), "xgb.DMatrix.data") 14 | #' xgb.DMatrix.save(dtrain, fname) 15 | #' dtrain <- xgb.DMatrix(fname, nthread = 1) 16 | #' @export 17 | xgb.DMatrix.save <- function(dmatrix, fname) { 18 | if (typeof(fname) != "character") 19 | stop("fname must be character") 20 | if (!inherits(dmatrix, "xgb.DMatrix")) 21 | stop("dmatrix must be xgb.DMatrix") 22 | 23 | fname <- path.expand(fname) 24 | .Call(XGDMatrixSaveBinary_R, dmatrix, fname[1], 0L) 25 | return(TRUE) 26 | } 27 | -------------------------------------------------------------------------------- /R-package/R/xgb.load.raw.R: -------------------------------------------------------------------------------- 1 | #' Load serialised XGBoost model from R's raw vector 2 | #' 3 | #' User can generate raw memory buffer by calling [xgb.save.raw()]. 4 | #' 5 | #' @param buffer The buffer returned by [xgb.save.raw()]. 6 | #' @export 7 | xgb.load.raw <- function(buffer) { 8 | cachelist <- list() 9 | bst <- .Call(XGBoosterCreate_R, cachelist) 10 | .Call(XGBoosterLoadModelFromRaw_R, xgb.get.handle(bst), buffer) 11 | return(bst) 12 | } 13 | -------------------------------------------------------------------------------- /R-package/cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f src/Makevars 4 | -------------------------------------------------------------------------------- /R-package/configure.win: -------------------------------------------------------------------------------- 1 | R_EXE="${R_HOME}/bin${R_ARCH_BIN}/R.exe" 2 | CXX=`"${R_EXE}" CMD config CXX` 3 | 4 | cat > test.cpp < 6 | int main() { 7 | char data = 0; 8 | const char* address = &data; 9 | _mm_prefetch(address, _MM_HINT_NTA); 10 | return 0; 11 | } 12 | EOL 13 | 14 | XGBOOST_MM_PREFETCH_PRESENT="" 15 | ${CXX} -o test test.cpp 2>/dev/null && ./test && XGBOOST_MM_PREFETCH_PRESENT="-DXGBOOST_MM_PREFETCH_PRESENT=1" 16 | rm -f ./test 17 | rm -f ./test.cpp 18 | 19 | sed \ 20 | -e "s/@XGBOOST_MM_PREFETCH_PRESENT@/$XGBOOST_MM_PREFETCH_PRESENT/" \ 21 | < src/Makevars.win.in > src/Makevars.win 22 | -------------------------------------------------------------------------------- /R-package/data/agaricus.test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/R-package/data/agaricus.test.rda -------------------------------------------------------------------------------- /R-package/data/agaricus.train.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/R-package/data/agaricus.train.rda -------------------------------------------------------------------------------- /R-package/man/agaricus.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgboost.R 3 | \docType{data} 4 | \name{agaricus.test} 5 | \alias{agaricus.test} 6 | \title{Test part from Mushroom Data Set} 7 | \format{ 8 | A list containing a label vector, and a dgCMatrix object with 1611 9 | rows and 126 variables 10 | } 11 | \usage{ 12 | data(agaricus.test) 13 | } 14 | \description{ 15 | This data set is originally from the Mushroom data set, 16 | UCI Machine Learning Repository. 17 | } 18 | \details{ 19 | It includes the following fields: 20 | \itemize{ 21 | \item \code{label}: The label for each record. 22 | \item \code{data}: A sparse Matrix of 'dgCMatrix' class with 126 columns. 23 | } 24 | } 25 | \references{ 26 | \url{https://archive.ics.uci.edu/ml/datasets/Mushroom} 27 | 28 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 29 | \url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California, 30 | School of Information and Computer Science. 31 | } 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /R-package/man/agaricus.train.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgboost.R 3 | \docType{data} 4 | \name{agaricus.train} 5 | \alias{agaricus.train} 6 | \title{Training part from Mushroom Data Set} 7 | \format{ 8 | A list containing a label vector, and a dgCMatrix object with 6513 9 | rows and 127 variables 10 | } 11 | \usage{ 12 | data(agaricus.train) 13 | } 14 | \description{ 15 | This data set is originally from the Mushroom data set, 16 | UCI Machine Learning Repository. 17 | } 18 | \details{ 19 | It includes the following fields: 20 | \itemize{ 21 | \item \code{label}: The label for each record. 22 | \item \code{data}: A sparse Matrix of 'dgCMatrix' class with 126 columns. 23 | } 24 | } 25 | \references{ 26 | \url{https://archive.ics.uci.edu/ml/datasets/Mushroom} 27 | 28 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 29 | \url{http://archive.ics.uci.edu/ml}. Irvine, CA: University of California, 30 | School of Information and Computer Science. 31 | } 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /R-package/man/dim.xgb.DMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{dim.xgb.DMatrix} 4 | \alias{dim.xgb.DMatrix} 5 | \title{Dimensions of xgb.DMatrix} 6 | \usage{ 7 | \method{dim}{xgb.DMatrix}(x) 8 | } 9 | \arguments{ 10 | \item{x}{Object of class \code{xgb.DMatrix}} 11 | } 12 | \description{ 13 | Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}. 14 | } 15 | \details{ 16 | Note: since \code{\link[=nrow]{nrow()}} and \code{\link[=ncol]{ncol()}} internally use \code{\link[=dim]{dim()}}, they can also 17 | be directly used with an \code{xgb.DMatrix} object. 18 | } 19 | \examples{ 20 | data(agaricus.train, package = "xgboost") 21 | 22 | train <- agaricus.train 23 | dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2) 24 | 25 | stopifnot(nrow(dtrain) == nrow(train$data)) 26 | stopifnot(ncol(dtrain) == ncol(train$data)) 27 | stopifnot(all(dim(dtrain) == dim(train$data))) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /R-package/man/print.xgb.Booster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.Booster.R 3 | \name{print.xgb.Booster} 4 | \alias{print.xgb.Booster} 5 | \title{Print xgb.Booster} 6 | \usage{ 7 | \method{print}{xgb.Booster}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An \code{xgb.Booster} object.} 11 | 12 | \item{...}{Not used.} 13 | } 14 | \value{ 15 | The same \code{x} object, returned invisibly 16 | } 17 | \description{ 18 | Print information about \code{xgb.Booster}. 19 | } 20 | \examples{ 21 | data(agaricus.train, package = "xgboost") 22 | train <- agaricus.train 23 | 24 | bst <- xgb.train( 25 | data = xgb.DMatrix(train$data, label = train$label, nthread = 1), 26 | nrounds = 2, 27 | params = xgb.params( 28 | max_depth = 2, 29 | nthread = 2, 30 | objective = "binary:logistic" 31 | ) 32 | ) 33 | 34 | attr(bst, "myattr") <- "memo" 35 | 36 | print(bst) 37 | } 38 | -------------------------------------------------------------------------------- /R-package/man/print.xgb.DMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{print.xgb.DMatrix} 4 | \alias{print.xgb.DMatrix} 5 | \title{Print xgb.DMatrix} 6 | \usage{ 7 | \method{print}{xgb.DMatrix}(x, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An xgb.DMatrix object.} 11 | 12 | \item{verbose}{Whether to print colnames (when present).} 13 | 14 | \item{...}{Not currently used.} 15 | } 16 | \description{ 17 | Print information about xgb.DMatrix. 18 | Currently it displays dimensions and presence of info-fields and colnames. 19 | } 20 | \examples{ 21 | data(agaricus.train, package = "xgboost") 22 | 23 | dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) 24 | dtrain 25 | 26 | print(dtrain, verbose = TRUE) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /R-package/man/print.xgb.cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.cv.R 3 | \name{print.xgb.cv.synchronous} 4 | \alias{print.xgb.cv.synchronous} 5 | \title{Print xgb.cv result} 6 | \usage{ 7 | \method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An \code{xgb.cv.synchronous} object.} 11 | 12 | \item{verbose}{Whether to print detailed data.} 13 | 14 | \item{...}{Passed to \code{data.table.print()}.} 15 | } 16 | \description{ 17 | Prints formatted results of \code{\link[=xgb.cv]{xgb.cv()}}. 18 | } 19 | \details{ 20 | When not verbose, it would only print the evaluation results, 21 | including the best iteration (when available). 22 | } 23 | \examples{ 24 | data(agaricus.train, package = "xgboost") 25 | 26 | train <- agaricus.train 27 | cv <- xgb.cv( 28 | data = xgb.DMatrix(train$data, label = train$label, nthread = 1), 29 | nfold = 5, 30 | nrounds = 2, 31 | params = xgb.params( 32 | max_depth = 2, 33 | nthread = 2, 34 | objective = "binary:logistic" 35 | ) 36 | ) 37 | print(cv) 38 | print(cv, verbose = TRUE) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /R-package/man/print.xgboost.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgboost.R 3 | \name{print.xgboost} 4 | \alias{print.xgboost} 5 | \title{Print info from XGBoost model} 6 | \usage{ 7 | \method{print}{xgboost}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An XGBoost model object of class \code{xgboost}, as produced by function \code{\link[=xgboost]{xgboost()}}.} 11 | 12 | \item{...}{Not used.} 13 | } 14 | \value{ 15 | Same object \code{x}, after printing its info. 16 | } 17 | \description{ 18 | Prints basic properties of an XGBoost model object. 19 | } 20 | -------------------------------------------------------------------------------- /R-package/man/variable.names.xgb.Booster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.Booster.R 3 | \name{variable.names.xgb.Booster} 4 | \alias{variable.names.xgb.Booster} 5 | \title{Get Features Names from Booster} 6 | \usage{ 7 | \method{variable.names}{xgb.Booster}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{An \code{xgb.Booster} object.} 11 | 12 | \item{...}{Not used.} 13 | } 14 | \description{ 15 | Returns the feature / variable / column names from a fitted 16 | booster object, which are set automatically during the call to \code{\link[=xgb.train]{xgb.train()}} 17 | from the DMatrix names, or which can be set manually through \code{\link[=setinfo]{setinfo()}}. 18 | 19 | If the object doesn't have feature names, will return \code{NULL}. 20 | 21 | It is equivalent to calling \code{getinfo(object, "feature_name")}. 22 | } 23 | -------------------------------------------------------------------------------- /R-package/man/xgb.DMatrix.hasinfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{xgb.DMatrix.hasinfo} 4 | \alias{xgb.DMatrix.hasinfo} 5 | \title{Check whether DMatrix object has a field} 6 | \usage{ 7 | xgb.DMatrix.hasinfo(object, info) 8 | } 9 | \arguments{ 10 | \item{object}{The DMatrix object to check for the given \code{info} field.} 11 | 12 | \item{info}{The field to check for presence or absence in \code{object}.} 13 | } 14 | \description{ 15 | Checks whether an xgb.DMatrix object has a given field assigned to 16 | it, such as weights, labels, etc. 17 | } 18 | \examples{ 19 | x <- matrix(1:10, nrow = 5) 20 | dm <- xgb.DMatrix(x, nthread = 1) 21 | 22 | # 'dm' so far does not have any fields set 23 | xgb.DMatrix.hasinfo(dm, "label") 24 | 25 | # Fields can be added after construction 26 | setinfo(dm, "label", 1:5) 27 | xgb.DMatrix.hasinfo(dm, "label") 28 | } 29 | \seealso{ 30 | \code{\link[=xgb.DMatrix]{xgb.DMatrix()}}, \code{\link[=getinfo.xgb.DMatrix]{getinfo.xgb.DMatrix()}}, \code{\link[=setinfo.xgb.DMatrix]{setinfo.xgb.DMatrix()}} 31 | } 32 | -------------------------------------------------------------------------------- /R-package/man/xgb.DMatrix.save.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.save.R 3 | \name{xgb.DMatrix.save} 4 | \alias{xgb.DMatrix.save} 5 | \title{Save xgb.DMatrix object to binary file} 6 | \usage{ 7 | xgb.DMatrix.save(dmatrix, fname) 8 | } 9 | \arguments{ 10 | \item{dmatrix}{the \code{xgb.DMatrix} object} 11 | 12 | \item{fname}{the name of the file to write.} 13 | } 14 | \description{ 15 | Save xgb.DMatrix object to binary file 16 | } 17 | \examples{ 18 | \dontshow{RhpcBLASctl::omp_set_num_threads(1)} 19 | data(agaricus.train, package = "xgboost") 20 | 21 | dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) 22 | fname <- file.path(tempdir(), "xgb.DMatrix.data") 23 | xgb.DMatrix.save(dtrain, fname) 24 | dtrain <- xgb.DMatrix(fname, nthread = 1) 25 | } 26 | -------------------------------------------------------------------------------- /R-package/man/xgb.cb.evaluation.log.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{xgb.cb.evaluation.log} 4 | \alias{xgb.cb.evaluation.log} 5 | \title{Callback for logging the evaluation history} 6 | \usage{ 7 | xgb.cb.evaluation.log() 8 | } 9 | \value{ 10 | An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}. 11 | } 12 | \description{ 13 | Callback for logging the evaluation history 14 | } 15 | \details{ 16 | This callback creates a table with per-iteration evaluation metrics (see parameters 17 | \code{evals} and \code{custom_metric} in \code{\link[=xgb.train]{xgb.train()}}). 18 | 19 | Note: in the column names of the final data.table, the dash '-' character is replaced with 20 | the underscore '_' in order to make the column names more like regular R identifiers. 21 | } 22 | \seealso{ 23 | \link{xgb.cb.print.evaluation} 24 | } 25 | -------------------------------------------------------------------------------- /R-package/man/xgb.cb.print.evaluation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{xgb.cb.print.evaluation} 4 | \alias{xgb.cb.print.evaluation} 5 | \title{Callback for printing the result of evaluation} 6 | \usage{ 7 | xgb.cb.print.evaluation(period = 1, showsd = TRUE) 8 | } 9 | \arguments{ 10 | \item{period}{Results would be printed every number of periods.} 11 | 12 | \item{showsd}{Whether standard deviations should be printed (when available).} 13 | } 14 | \value{ 15 | An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}. 16 | } 17 | \description{ 18 | The callback function prints the result of evaluation at every \code{period} iterations. 19 | The initial and the last iteration's evaluations are always printed. 20 | 21 | Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that). 22 | } 23 | \seealso{ 24 | \link{xgb.Callback} 25 | } 26 | -------------------------------------------------------------------------------- /R-package/man/xgb.cb.save.model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{xgb.cb.save.model} 4 | \alias{xgb.cb.save.model} 5 | \title{Callback for saving a model file} 6 | \usage{ 7 | xgb.cb.save.model(save_period = 0, save_name = "xgboost.ubj") 8 | } 9 | \arguments{ 10 | \item{save_period}{Save the model to disk after every \code{save_period} iterations; 11 | 0 means save the model at the end.} 12 | 13 | \item{save_name}{The name or path for the saved model file. 14 | It can contain a \code{\link[=sprintf]{sprintf()}} formatting specifier to include the integer 15 | iteration number in the file name. E.g., with \code{save_name = 'xgboost_\%04d.model'}, 16 | the file saved at iteration 50 would be named "xgboost_0050.model".} 17 | } 18 | \value{ 19 | An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}}, 20 | but \strong{not} to \code{\link[=xgb.cv]{xgb.cv()}}. 21 | } 22 | \description{ 23 | This callback function allows to save an xgb-model file, either periodically 24 | after each \code{save_period}'s or at the end. 25 | 26 | Does not leave any attribute in the booster. 27 | } 28 | -------------------------------------------------------------------------------- /R-package/man/xgb.get.DMatrix.data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{xgb.get.DMatrix.data} 4 | \alias{xgb.get.DMatrix.data} 5 | \title{Get DMatrix Data} 6 | \usage{ 7 | xgb.get.DMatrix.data(dmat) 8 | } 9 | \arguments{ 10 | \item{dmat}{An \code{xgb.DMatrix} object, as returned by \code{\link[=xgb.DMatrix]{xgb.DMatrix()}}.} 11 | } 12 | \value{ 13 | The data held in the DMatrix, as a sparse CSR matrix (class \code{dgRMatrix} 14 | from package \code{Matrix}). If it had feature names, these will be added as column names 15 | in the output. 16 | } 17 | \description{ 18 | Get DMatrix Data 19 | } 20 | -------------------------------------------------------------------------------- /R-package/man/xgb.get.DMatrix.num.non.missing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{xgb.get.DMatrix.num.non.missing} 4 | \alias{xgb.get.DMatrix.num.non.missing} 5 | \title{Get Number of Non-Missing Entries in DMatrix} 6 | \usage{ 7 | xgb.get.DMatrix.num.non.missing(dmat) 8 | } 9 | \arguments{ 10 | \item{dmat}{An \code{xgb.DMatrix} object, as returned by \code{\link[=xgb.DMatrix]{xgb.DMatrix()}}.} 11 | } 12 | \value{ 13 | The number of non-missing entries in the DMatrix. 14 | } 15 | \description{ 16 | Get Number of Non-Missing Entries in DMatrix 17 | } 18 | -------------------------------------------------------------------------------- /R-package/man/xgb.get.num.boosted.rounds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.Booster.R 3 | \name{xgb.get.num.boosted.rounds} 4 | \alias{xgb.get.num.boosted.rounds} 5 | \alias{length.xgb.Booster} 6 | \title{Get number of boosting in a fitted booster} 7 | \usage{ 8 | xgb.get.num.boosted.rounds(model) 9 | 10 | \method{length}{xgb.Booster}(x) 11 | } 12 | \arguments{ 13 | \item{model, x}{A fitted \code{xgb.Booster} model.} 14 | } 15 | \value{ 16 | The number of rounds saved in the model as an integer. 17 | } 18 | \description{ 19 | Get number of boosting in a fitted booster 20 | } 21 | \details{ 22 | Note that setting booster parameters related to training 23 | continuation / updates through \code{\link[=xgb.model.parameters<-]{xgb.model.parameters<-()}} will reset the 24 | number of rounds to zero. 25 | } 26 | -------------------------------------------------------------------------------- /R-package/man/xgb.load.raw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.load.raw.R 3 | \name{xgb.load.raw} 4 | \alias{xgb.load.raw} 5 | \title{Load serialised XGBoost model from R's raw vector} 6 | \usage{ 7 | xgb.load.raw(buffer) 8 | } 9 | \arguments{ 10 | \item{buffer}{The buffer returned by \code{\link[=xgb.save.raw]{xgb.save.raw()}}.} 11 | } 12 | \description{ 13 | User can generate raw memory buffer by calling \code{\link[=xgb.save.raw]{xgb.save.raw()}}. 14 | } 15 | -------------------------------------------------------------------------------- /R-package/remove_warning_suppression_pragma.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # remove all #pragma's that suppress compiler warnings 3 | set -e 4 | set -x 5 | for file in xgboost/src/dmlc-core/include/dmlc/*.h 6 | do 7 | sed -i.bak -e 's/^.*#pragma GCC diagnostic.*$//' -e 's/^.*#pragma clang diagnostic.*$//' -e 's/^.*#pragma warning.*$//' "${file}" 8 | done 9 | for file in xgboost/src/dmlc-core/include/dmlc/*.h.bak 10 | do 11 | rm "${file}" 12 | done 13 | set +x 14 | set +e 15 | -------------------------------------------------------------------------------- /R-package/src/xgboost-win.def: -------------------------------------------------------------------------------- 1 | LIBRARY xgboost.dll 2 | EXPORTS 3 | R_init_xgboost 4 | -------------------------------------------------------------------------------- /R-package/tests/helper_scripts/run-examples.R: -------------------------------------------------------------------------------- 1 | ## Helper script for running individual examples. 2 | library(pkgload) 3 | library(xgboost) 4 | 5 | files <- list.files("./man") 6 | 7 | 8 | run_example_timeit <- function(f) { 9 | path <- paste("./man/", f, sep = "") 10 | print(paste("Test", f)) 11 | flush.console() 12 | t0 <- proc.time() 13 | run_example(path) 14 | t1 <- proc.time() 15 | list(file = f, time = t1 - t0) 16 | } 17 | 18 | timings <- lapply(files, run_example_timeit) 19 | 20 | for (t in timings) { 21 | ratio <- t$time[1] / t$time[3] 22 | if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) { 23 | print(paste("Offending example:", t$file, ratio)) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /R-package/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(xgboost) 3 | library(Matrix) 4 | 5 | test_check("xgboost", reporter = ProgressReporter) 6 | RhpcBLASctl::omp_set_num_threads(1) 7 | -------------------------------------------------------------------------------- /R-package/tests/testthat/test_config.R: -------------------------------------------------------------------------------- 1 | context('Test global configuration') 2 | 3 | test_that('Global configuration works with verbosity', { 4 | old_verbosity <- xgb.get.config()$verbosity 5 | for (v in c(0, 1, 2, 3)) { 6 | xgb.set.config(verbosity = v) 7 | expect_equal(xgb.get.config()$verbosity, v) 8 | } 9 | xgb.set.config(verbosity = old_verbosity) 10 | expect_equal(xgb.get.config()$verbosity, old_verbosity) 11 | }) 12 | 13 | test_that('Global configuration works with use_rmm flag', { 14 | old_use_rmm_flag <- xgb.get.config()$use_rmm 15 | for (v in c(TRUE, FALSE)) { 16 | xgb.set.config(use_rmm = v) 17 | expect_equal(xgb.get.config()$use_rmm, v) 18 | } 19 | xgb.set.config(use_rmm = old_use_rmm_flag) 20 | expect_equal(xgb.get.config()$use_rmm, old_use_rmm_flag) 21 | }) 22 | -------------------------------------------------------------------------------- /R-package/tests/testthat/test_feature_weights.R: -------------------------------------------------------------------------------- 1 | context("feature weights") 2 | 3 | n_threads <- 2 4 | 5 | test_that("training with feature weights works", { 6 | nrows <- 1000 7 | ncols <- 9 8 | set.seed(2022) 9 | x <- matrix(rnorm(nrows * ncols), nrow = nrows) 10 | y <- rowSums(x) 11 | weights <- seq(from = 1, to = ncols) 12 | 13 | test <- function(tm) { 14 | names <- paste0("f", 1:ncols) 15 | xy <- xgb.DMatrix( 16 | data = x, label = y, feature_weights = weights, nthread = n_threads 17 | ) 18 | params <- list( 19 | colsample_bynode = 0.4, tree_method = tm, nthread = n_threads 20 | ) 21 | model <- xgb.train(params = params, data = xy, nrounds = 32) 22 | importance <- xgb.importance(model = model, feature_names = names) 23 | expect_equal(dim(importance), c(ncols, 4)) 24 | importance <- importance[order(importance$Feature)] 25 | expect_lt(importance[1, Frequency], importance[9, Frequency]) 26 | } 27 | 28 | for (tm in c("hist", "approx")) { 29 | test(tm) 30 | } 31 | }) 32 | -------------------------------------------------------------------------------- /R-package/tests/testthat/test_monotone.R: -------------------------------------------------------------------------------- 1 | context("monotone constraints") 2 | 3 | set.seed(1024) 4 | x <- rnorm(1000, 10) 5 | y <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x) 6 | train <- matrix(x, ncol = 1) 7 | 8 | 9 | test_that("monotone constraints for regression", { 10 | bst <- xgb.train( 11 | data = xgb.DMatrix(train, label = y, nthread = 1), 12 | nrounds = 100, verbose = 0, 13 | params = xgb.params( 14 | max_depth = 2, 15 | learning_rate = 0.1, 16 | nthread = 2, 17 | monotone_constraints = -1 18 | ) 19 | ) 20 | 21 | pred <- predict(bst, train) 22 | 23 | ind <- order(train[, 1]) 24 | pred.ord <- pred[ind] 25 | expect_true({ 26 | !any(diff(pred.ord) > 0) 27 | }, "Monotone constraint satisfied") 28 | }) 29 | -------------------------------------------------------------------------------- /R-package/tests/testthat/test_parameter_exposure.R: -------------------------------------------------------------------------------- 1 | context('Test model params and call are exposed to R') 2 | 3 | data(agaricus.train, package = 'xgboost') 4 | data(agaricus.test, package = 'xgboost') 5 | 6 | dtrain <- xgb.DMatrix( 7 | agaricus.train$data, label = agaricus.train$label, nthread = 2 8 | ) 9 | dtest <- xgb.DMatrix( 10 | agaricus.test$data, label = agaricus.test$label, nthread = 2 11 | ) 12 | 13 | bst <- xgb.train( 14 | data = dtrain, 15 | verbose = 0, 16 | nrounds = 10, 17 | params = xgb.params( 18 | max_depth = 2, 19 | learning_rate = 1, 20 | nthread = 1, 21 | objective = "binary:logistic" 22 | ) 23 | ) 24 | 25 | test_that("call is exposed to R", { 26 | expect_false(is.null(attributes(bst)$call)) 27 | expect_is(attributes(bst)$call, "call") 28 | }) 29 | 30 | test_that("params is exposed to R", { 31 | model_params <- attributes(bst)$params 32 | expect_is(model_params, "list") 33 | expect_equal(model_params$learning_rate, 1) 34 | expect_equal(model_params$max_depth, 2) 35 | expect_equal(model_params$objective, "binary:logistic") 36 | }) 37 | -------------------------------------------------------------------------------- /R-package/tests/testthat/test_unicode.R: -------------------------------------------------------------------------------- 1 | context("Test Unicode handling") 2 | 3 | data(agaricus.train, package = 'xgboost') 4 | data(agaricus.test, package = 'xgboost') 5 | train <- agaricus.train 6 | test <- agaricus.test 7 | set.seed(1994) 8 | 9 | test_that("Can save and load models with Unicode paths", { 10 | nrounds <- 2 11 | bst <- xgb.train( 12 | data = xgb.DMatrix(train$data, label = train$label, nthread = 1), 13 | nrounds = nrounds, 14 | params = xgb.params( 15 | max_depth = 2, 16 | nthread = 2, 17 | objective = "binary:logistic" 18 | ) 19 | ) 20 | tmpdir <- tempdir() 21 | lapply(c("모델.json", "がうる・ぐら.json", "类继承.ubj"), function(x) { 22 | path <- file.path(tmpdir, x) 23 | xgb.save(bst, path) 24 | bst2 <- xgb.load(path) 25 | xgb.model.parameters(bst2) <- list(nthread = 2) 26 | expect_equal(predict(bst, test$data), predict(bst2, test$data)) 27 | }) 28 | }) 29 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | 7 | Security updates are applied only to the most recent release. 8 | 9 | ## Reporting a Vulnerability 10 | 11 | 16 | 17 | To report a security issue, please email 18 | [security@xgboost-ci.net](mailto:security@xgboost-ci.net) 19 | with a description of the issue, the steps you took to create the issue, 20 | affected versions, and, if known, mitigations for the issue. 21 | 22 | All support will be made on the best effort base, so please indicate the "urgency level" of the vulnerability as Critical, High, Medium or Low. 23 | -------------------------------------------------------------------------------- /amalgamation/dmlc-minimum0.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors. 3 | * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib. 4 | * Normally this is not needed. 5 | */ 6 | #include "../dmlc-core/src/io/line_split.cc" 7 | #include "../dmlc-core/src/io/recordio_split.cc" 8 | #include "../dmlc-core/src/io/input_split_base.cc" 9 | #include "../dmlc-core/src/io/local_filesys.cc" 10 | #include "../dmlc-core/src/io/filesys.cc" 11 | #include "../dmlc-core/src/io/indexed_recordio_split.cc" 12 | #include "../dmlc-core/src/data.cc" 13 | #include "../dmlc-core/src/io.cc" 14 | #include "../dmlc-core/src/recordio.cc" 15 | -------------------------------------------------------------------------------- /cmake/Doc.cmake: -------------------------------------------------------------------------------- 1 | function(run_doxygen) 2 | find_package(Doxygen REQUIRED) 3 | 4 | if(NOT DOXYGEN_DOT_FOUND) 5 | message(FATAL_ERROR "Command `dot` not found. Please install graphviz.") 6 | endif() 7 | 8 | configure_file( 9 | ${xgboost_SOURCE_DIR}/doc/Doxyfile.in 10 | ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY) 11 | add_custom_target( 12 | doc_doxygen ALL 13 | COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile 14 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 15 | COMMENT "Generate C APIs documentation." 16 | VERBATIM) 17 | endfunction() 18 | -------------------------------------------------------------------------------- /cmake/PrefetchIntrinsics.cmake: -------------------------------------------------------------------------------- 1 | function(find_prefetch_intrinsics) 2 | include(CheckCXXSourceCompiles) 3 | check_cxx_source_compiles(" 4 | #include 5 | int main() { 6 | char data = 0; 7 | const char* address = &data; 8 | _mm_prefetch(address, _MM_HINT_NTA); 9 | return 0; 10 | } 11 | " XGBOOST_MM_PREFETCH_PRESENT) 12 | check_cxx_source_compiles(" 13 | int main() { 14 | char data = 0; 15 | const char* address = &data; 16 | __builtin_prefetch(address, 0, 0); 17 | return 0; 18 | } 19 | " XGBOOST_BUILTIN_PREFETCH_PRESENT) 20 | set(XGBOOST_MM_PREFETCH_PRESENT ${XGBOOST_MM_PREFETCH_PRESENT} PARENT_SCOPE) 21 | set(XGBOOST_BUILTIN_PREFETCH_PRESENT ${XGBOOST_BUILTIN_PREFETCH_PRESENT} PARENT_SCOPE) 22 | endfunction() 23 | -------------------------------------------------------------------------------- /cmake/RPackageInstallTargetSetup.cmake: -------------------------------------------------------------------------------- 1 | # Assembles the R-package files in build_dir; 2 | # if necessary, installs the main R package dependencies; 3 | # runs R CMD INSTALL. 4 | function(setup_rpackage_install_target rlib_target build_dir) 5 | configure_file(${PROJECT_SOURCE_DIR}/cmake/RPackageInstall.cmake.in ${PROJECT_BINARY_DIR}/RPackageInstall.cmake @ONLY) 6 | install( 7 | DIRECTORY "${xgboost_SOURCE_DIR}/R-package" 8 | DESTINATION "${build_dir}" 9 | PATTERN "src/*" EXCLUDE 10 | PATTERN "R-package/configure" EXCLUDE 11 | ) 12 | install(TARGETS ${rlib_target} 13 | LIBRARY DESTINATION "${build_dir}/R-package/src/" 14 | RUNTIME DESTINATION "${build_dir}/R-package/src/") 15 | install(SCRIPT ${PROJECT_BINARY_DIR}/RPackageInstall.cmake) 16 | endfunction() 17 | -------------------------------------------------------------------------------- /cmake/Version.cmake: -------------------------------------------------------------------------------- 1 | function(write_version) 2 | message(STATUS "xgboost VERSION: ${xgboost_VERSION}") 3 | configure_file( 4 | ${xgboost_SOURCE_DIR}/cmake/version_config.h.in 5 | ${xgboost_SOURCE_DIR}/include/xgboost/version_config.h 6 | @ONLY 7 | NEWLINE_STYLE UNIX) 8 | endfunction() 9 | -------------------------------------------------------------------------------- /cmake/modules/FindASan.cmake: -------------------------------------------------------------------------------- 1 | set(ASan_LIB_NAME ASan) 2 | 3 | find_library(ASan_LIBRARY 4 | NAMES libasan.so libasan.so.6 libasan.so.5 libasan.so.4 libasan.so.3 libasan.so.2 libasan.so.1 libasan.so.0 5 | PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib) 6 | 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(ASan DEFAULT_MSG 9 | ASan_LIBRARY) 10 | 11 | mark_as_advanced( 12 | ASan_LIBRARY 13 | ASan_LIB_NAME) 14 | -------------------------------------------------------------------------------- /cmake/modules/FindLSan.cmake: -------------------------------------------------------------------------------- 1 | set(LSan_LIB_NAME lsan) 2 | 3 | find_library(LSan_LIBRARY 4 | NAMES liblsan.so liblsan.so.0 liblsan.so.0.0.0 5 | PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib) 6 | 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(LSan DEFAULT_MSG 9 | LSan_LIBRARY) 10 | 11 | mark_as_advanced( 12 | LSan_LIBRARY 13 | LSan_LIB_NAME) 14 | -------------------------------------------------------------------------------- /cmake/modules/FindNVML.cmake: -------------------------------------------------------------------------------- 1 | if(NVML_LIBRARY) 2 | unset(NVML_LIBRARY CACHE) 3 | endif() 4 | 5 | set(NVML_LIB_NAME nvml) 6 | 7 | find_path(NVML_INCLUDE_DIR 8 | NAMES nvml.h 9 | PATHS ${CUDA_HOME}/include ${CUDA_INCLUDE} /usr/local/cuda/include) 10 | 11 | find_library(NVML_LIBRARY 12 | NAMES nvidia-ml) 13 | 14 | message(STATUS "Using nvml library: ${NVML_LIBRARY}") 15 | 16 | include(FindPackageHandleStandardArgs) 17 | find_package_handle_standard_args(NVML DEFAULT_MSG 18 | NVML_INCLUDE_DIR NVML_LIBRARY) 19 | 20 | mark_as_advanced( 21 | NVML_INCLUDE_DIR 22 | NVML_LIBRARY 23 | ) 24 | -------------------------------------------------------------------------------- /cmake/modules/FindTSan.cmake: -------------------------------------------------------------------------------- 1 | set(TSan_LIB_NAME tsan) 2 | 3 | find_library(TSan_LIBRARY 4 | NAMES libtsan.so libtsan.so.0 libtsan.so.0.0.0 5 | PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib) 6 | 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(TSan DEFAULT_MSG 9 | TSan_LIBRARY) 10 | 11 | mark_as_advanced( 12 | TSan_LIBRARY 13 | TSan_LIB_NAME) 14 | -------------------------------------------------------------------------------- /cmake/modules/FindUBSan.cmake: -------------------------------------------------------------------------------- 1 | set(UBSan_LIB_NAME UBSan) 2 | 3 | find_library(UBSan_LIBRARY 4 | NAMES libubsan.so libubsan.so.5 libubsan.so.4 libubsan.so.3 libubsan.so.2 libubsan.so.1 libubsan.so.0 5 | PATHS ${SANITIZER_PATH} /usr/lib64 /usr/lib /usr/local/lib64 /usr/local/lib ${CMAKE_PREFIX_PATH}/lib) 6 | 7 | include(FindPackageHandleStandardArgs) 8 | find_package_handle_standard_args(UBSan DEFAULT_MSG 9 | UBSan_LIBRARY) 10 | 11 | mark_as_advanced( 12 | UBSan_LIBRARY 13 | UBSan_LIB_NAME) 14 | -------------------------------------------------------------------------------- /cmake/version_config.h.in: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2023 by XGBoost contributors 3 | */ 4 | #ifndef XGBOOST_VERSION_CONFIG_H_ 5 | #define XGBOOST_VERSION_CONFIG_H_ 6 | 7 | #define XGBOOST_VER_MAJOR @xgboost_VERSION_MAJOR@ /* NOLINT */ 8 | #define XGBOOST_VER_MINOR @xgboost_VERSION_MINOR@ /* NOLINT */ 9 | #define XGBOOST_VER_PATCH @xgboost_VERSION_PATCH@ /* NOLINT */ 10 | 11 | #endif // XGBOOST_VERSION_CONFIG_H_ 12 | -------------------------------------------------------------------------------- /cmake/xgboost-config.cmake.in: -------------------------------------------------------------------------------- 1 | @PACKAGE_INIT@ 2 | 3 | set(USE_OPENMP @USE_OPENMP@) 4 | set(USE_CUDA @USE_CUDA@) 5 | set(USE_NCCL @USE_NCCL@) 6 | set(XGBOOST_BUILD_STATIC_LIB @BUILD_STATIC_LIB@) 7 | 8 | include(CMakeFindDependencyMacro) 9 | 10 | if (XGBOOST_BUILD_STATIC_LIB) 11 | find_dependency(Threads) 12 | if(USE_OPENMP) 13 | find_dependency(OpenMP) 14 | endif() 15 | if(USE_CUDA) 16 | find_dependency(CUDA) 17 | endif() 18 | # nccl should be linked statically if xgboost is built as static library. 19 | endif (XGBOOST_BUILD_STATIC_LIB) 20 | 21 | if(NOT TARGET xgboost::xgboost) 22 | include(${CMAKE_CURRENT_LIST_DIR}/XGBoostTargets.cmake) 23 | endif() 24 | 25 | message(STATUS "Found XGBoost (found version \"${xgboost_VERSION}\")") 26 | -------------------------------------------------------------------------------- /cmake/xgboost.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | version=@xgboost_VERSION@ 3 | exec_prefix=${prefix}/bin 4 | libdir=${prefix}/lib 5 | includedir=${prefix}/include 6 | 7 | Name: xgboost 8 | Description: XGBoost - Scalable and Flexible Gradient Boosting. 9 | Version: ${version} 10 | 11 | Cflags: -I${includedir} 12 | Libs: -L${libdir} -lxgboost 13 | -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | *.libsvm 2 | *.pkl 3 | -------------------------------------------------------------------------------- /demo/CLI/README.rst: -------------------------------------------------------------------------------- 1 | XGBoost Command Line Interface Walkthrough 2 | ========================================== 3 | 4 | Please note that the command line interface is deprecated in 2.1.0, use other language bindings instead. For a list of available bindings, see https://xgboost.readthedocs.io/en/stable/ 5 | -------------------------------------------------------------------------------- /demo/CLI/binary_classification/mknfold.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import random 4 | import sys 5 | 6 | if len(sys.argv) < 2: 7 | print ('Usage: [nfold = 5]') 8 | exit(0) 9 | 10 | random.seed( 10 ) 11 | 12 | k = int( sys.argv[2] ) 13 | if len(sys.argv) > 3: 14 | nfold = int( sys.argv[3] ) 15 | else: 16 | nfold = 5 17 | 18 | fi = open( sys.argv[1], 'r' ) 19 | ftr = open( sys.argv[1]+'.train', 'w' ) 20 | fte = open( sys.argv[1]+'.test', 'w' ) 21 | for l in fi: 22 | if random.randint( 1 , nfold ) == k: 23 | fte.write( l ) 24 | else: 25 | ftr.write( l ) 26 | 27 | fi.close() 28 | ftr.close() 29 | fte.close() 30 | -------------------------------------------------------------------------------- /demo/CLI/binary_classification/mushroom.conf: -------------------------------------------------------------------------------- 1 | # General Parameters, see comment for each definition 2 | # choose the booster, can be gbtree or gblinear 3 | booster = gbtree 4 | # choose logistic regression loss function for binary classification 5 | objective = binary:logistic 6 | 7 | # Tree Booster Parameters 8 | # step size shrinkage 9 | eta = 1.0 10 | # minimum loss reduction required to make a further partition 11 | gamma = 1.0 12 | # minimum sum of instance weight(hessian) needed in a child 13 | min_child_weight = 1 14 | # maximum depth of a tree 15 | max_depth = 3 16 | 17 | # Task Parameters 18 | # the number of round to do boosting 19 | num_round = 2 20 | # 0 means do not save any model except the final round model 21 | save_period = 2 22 | # The path of training data 23 | data = "agaricus.txt.train?format=libsvm" 24 | # The path of validation data, used to monitor training process, here [test] sets name of the validation set 25 | eval[test] = "agaricus.txt.test?format=libsvm" 26 | # evaluate on training data as well each round 27 | eval_train = 1 28 | # The path of test data 29 | test:data = "agaricus.txt.test?format=libsvm" 30 | -------------------------------------------------------------------------------- /demo/CLI/binary_classification/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # map feature using indicator encoding, also produce featmap.txt 3 | python mapfeat.py 4 | # split train and test 5 | python mknfold.py agaricus.txt 1 6 | 7 | XGBOOST=../../../xgboost 8 | 9 | # training and output the models 10 | $XGBOOST mushroom.conf 11 | # output prediction task=pred 12 | $XGBOOST mushroom.conf task=pred model_in=0002.model 13 | # print the boosters of 00002.model in dump.raw.txt 14 | $XGBOOST mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt 15 | # use the feature map in printing for better visualization 16 | $XGBOOST mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 17 | cat dump.nice.txt 18 | -------------------------------------------------------------------------------- /demo/CLI/distributed-training/mushroom.aws.conf: -------------------------------------------------------------------------------- 1 | # General Parameters, see comment for each definition 2 | # choose the booster, can be gbtree or gblinear 3 | booster = gbtree 4 | # choose logistic regression loss function for binary classification 5 | objective = binary:logistic 6 | 7 | # Tree Booster Parameters 8 | # step size shrinkage 9 | eta = 1.0 10 | # minimum loss reduction required to make a further partition 11 | gamma = 1.0 12 | # minimum sum of instance weight(hessian) needed in a child 13 | min_child_weight = 1 14 | # maximum depth of a tree 15 | max_depth = 3 16 | 17 | # Task Parameters 18 | # the number of round to do boosting 19 | num_round = 2 20 | # 0 means do not save any model except the final round model 21 | save_period = 0 22 | # The path of training data 23 | data = "s3://mybucket/xgb-demo/train" 24 | # The path of validation data, used to monitor training process, here [test] sets name of the validation set 25 | # evaluate on training data as well each round 26 | eval_train = 1 27 | 28 | -------------------------------------------------------------------------------- /demo/CLI/distributed-training/run_aws.sh: -------------------------------------------------------------------------------- 1 | # This is the example script to run distributed xgboost on AWS. 2 | # Change the following two lines for configuration 3 | 4 | export BUCKET=mybucket 5 | 6 | # submit the job to YARN 7 | ../../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\ 8 | ../../../xgboost mushroom.aws.conf nthread=2\ 9 | data=s3://${BUCKET}/xgb-demo/train\ 10 | eval[test]=s3://${BUCKET}/xgb-demo/test\ 11 | model_dir=s3://${BUCKET}/xgb-demo/model 12 | -------------------------------------------------------------------------------- /demo/CLI/regression/machine.conf: -------------------------------------------------------------------------------- 1 | # General Parameters, see comment for each definition 2 | # choose the tree booster, can also change to gblinear 3 | booster = gbtree 4 | # this is the only difference with classification, use reg:squarederror to do linear classification 5 | # when labels are in [0,1] we can also use reg:logistic 6 | objective = reg:squarederror 7 | 8 | # Tree Booster Parameters 9 | # step size shrinkage 10 | eta = 1.0 11 | # minimum loss reduction required to make a further partition 12 | gamma = 1.0 13 | # minimum sum of instance weight(hessian) needed in a child 14 | min_child_weight = 1 15 | # maximum depth of a tree 16 | max_depth = 3 17 | 18 | # Task parameters 19 | # the number of round to do boosting 20 | num_round = 2 21 | # 0 means do not save any model except the final round model 22 | save_period = 0 23 | # The path of training data 24 | data = "machine.txt.train?format=libsvm" 25 | # The path of validation data, used to monitor training process, here [test] sets name of the validation set 26 | eval[test] = "machine.txt.test?format=libsvm" 27 | # The path of test data 28 | test:data = "machine.txt.test?format=libsvm" 29 | -------------------------------------------------------------------------------- /demo/CLI/regression/mapfeat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | fo = open('machine.txt', 'w') 4 | cnt = 6 5 | fmap = {} 6 | for l in open('machine.data'): 7 | arr = l.split(',') 8 | fo.write(arr[8]) 9 | for i in range(0, 6): 10 | fo.write(' %d:%s' % (i, arr[i + 2])) 11 | 12 | if arr[0] not in fmap: 13 | fmap[arr[0]] = cnt 14 | cnt += 1 15 | 16 | fo.write(' %d:1' % fmap[arr[0]]) 17 | fo.write('\n') 18 | 19 | fo.close() 20 | 21 | # create feature map for machine data 22 | fo = open('featmap.txt', 'w') 23 | # list from machine.names 24 | names = [ 25 | 'vendor', 'MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' 26 | ] 27 | 28 | for i in range(0, 6): 29 | fo.write('%d\t%s\tint\n' % (i, names[i + 1])) 30 | 31 | for v, k in sorted(fmap.items(), key=lambda x: x[1]): 32 | fo.write('%d\tvendor=%s\ti\n' % (k, v)) 33 | fo.close() 34 | -------------------------------------------------------------------------------- /demo/CLI/regression/mknfold.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import random 4 | import sys 5 | 6 | if len(sys.argv) < 2: 7 | print('Usage: [nfold = 5]') 8 | exit(0) 9 | 10 | random.seed(10) 11 | 12 | k = int(sys.argv[2]) 13 | if len(sys.argv) > 3: 14 | nfold = int(sys.argv[3]) 15 | else: 16 | nfold = 5 17 | 18 | fi = open(sys.argv[1], 'r') 19 | ftr = open(sys.argv[1] + '.train', 'w') 20 | fte = open(sys.argv[1] + '.test', 'w') 21 | for l in fi: 22 | if random.randint(1, nfold) == k: 23 | fte.write(l) 24 | else: 25 | ftr.write(l) 26 | 27 | fi.close() 28 | ftr.close() 29 | fte.close() 30 | -------------------------------------------------------------------------------- /demo/CLI/regression/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way 3 | python mapfeat.py 4 | # split train and test 5 | python mknfold.py machine.txt 1 6 | # training and output the models 7 | ../../xgboost machine.conf 8 | # output predictions of test data 9 | ../../xgboost machine.conf task=pred model_in=0002.model 10 | # print the boosters of 0002.model in dump.raw.txt 11 | ../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt 12 | # print the boosters of 0002.model in dump.nice.txt with feature map 13 | ../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 14 | 15 | # cat the result 16 | cat dump.nice.txt 17 | -------------------------------------------------------------------------------- /demo/CLI/yearpredMSD/README.md: -------------------------------------------------------------------------------- 1 | Demonstrating how to use XGBoost on [Year Prediction task of Million Song Dataset](https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD) 2 | 3 | 1. Run runexp.sh 4 | ```bash 5 | ./runexp.sh 6 | ``` 7 | 8 | You can also use the script to prepare LIBSVM format, and run the [Distributed Version](../../multi-node). 9 | Note that though that normally you only need to use single machine for dataset at this scale, and use distributed version for larger scale dataset. 10 | -------------------------------------------------------------------------------- /demo/CLI/yearpredMSD/csv2libsvm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | 5 | fo = open(sys.argv[2], 'w') 6 | 7 | for l in open(sys.argv[1]): 8 | arr = l.split(',') 9 | fo.write('%s' % arr[0]) 10 | for i in range(len(arr) - 1): 11 | fo.write(' %d:%s' % (i, arr[i+1])) 12 | fo.close() 13 | -------------------------------------------------------------------------------- /demo/CLI/yearpredMSD/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f YearPredictionMSD.txt ] 4 | then 5 | echo "use existing data to run experiment" 6 | else 7 | echo "getting data from uci, make sure you are connected to internet" 8 | wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip 9 | unzip YearPredictionMSD.txt.zip 10 | fi 11 | echo "start making data.." 12 | # map feature using indicator encoding, also produce featmap.txt 13 | python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm 14 | head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train 15 | tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test 16 | echo "finish making the data" 17 | ../../../xgboost yearpredMSD.conf 18 | -------------------------------------------------------------------------------- /demo/CLI/yearpredMSD/yearpredMSD.conf: -------------------------------------------------------------------------------- 1 | # General Parameters, see comment for each definition 2 | # choose the tree booster, can also change to gblinear 3 | booster = gbtree 4 | # this is the only difference with classification, use reg:squarederror to do linear classification 5 | # when labels are in [0,1] we can also use reg:logistic 6 | objective = reg:squarederror 7 | 8 | # Tree Booster Parameters 9 | # step size shrinkage 10 | eta = 1.0 11 | # minimum loss reduction required to make a further partition 12 | gamma = 1.0 13 | # minimum sum of instance weight(hessian) needed in a child 14 | min_child_weight = 1 15 | # maximum depth of a tree 16 | max_depth = 5 17 | 18 | base_score = 2001 19 | # Task parameters 20 | # the number of round to do boosting 21 | num_round = 100 22 | # 0 means do not save any model except the final round model 23 | save_period = 0 24 | # The path of training data 25 | data = "yearpredMSD.libsvm.train" 26 | # The path of validation data, used to monitor training process, here [test] sets name of the validation set 27 | eval[test] = "yearpredMSD.libsvm.test" 28 | # The path of test data 29 | #test:data = "yearpredMSD.libsvm.test" 30 | -------------------------------------------------------------------------------- /demo/aft_survival/README.rst: -------------------------------------------------------------------------------- 1 | Survival Analysis Walkthrough 2 | ============================= 3 | 4 | This is a collection of examples for using the XGBoost Python package for training 5 | survival models. For an introduction, see :doc:`/tutorials/aft_survival_analysis` 6 | -------------------------------------------------------------------------------- /demo/c-api/.gitignore: -------------------------------------------------------------------------------- 1 | c-api-demo 2 | -------------------------------------------------------------------------------- /demo/c-api/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(xgboost-c-examples) 3 | 4 | add_subdirectory(basic) 5 | add_subdirectory(external-memory) 6 | add_subdirectory(inference) 7 | 8 | enable_testing() 9 | add_test( 10 | NAME test_xgboost_demo_c_basic 11 | COMMAND api-demo 12 | WORKING_DIRECTORY ${xgboost-c-examples_BINARY_DIR} 13 | ) 14 | add_test( 15 | NAME test_xgboost_demo_c_external_memory 16 | COMMAND external-memory-demo 17 | WORKING_DIRECTORY ${xgboost-c-examples_BINARY_DIR} 18 | ) 19 | add_test( 20 | NAME test_xgboost_demo_c_inference 21 | COMMAND inference-demo 22 | WORKING_DIRECTORY ${xgboost-c-examples_BINARY_DIR} 23 | ) 24 | -------------------------------------------------------------------------------- /demo/c-api/basic/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(api-demo LANGUAGES C VERSION 0.0.1) 2 | find_package(xgboost REQUIRED) 3 | 4 | # xgboost is built as static libraries, all cxx dependencies need to be linked into the 5 | # executable. 6 | if(XGBOOST_BUILD_STATIC_LIB) 7 | enable_language(CXX) 8 | # find again for those cxx libraries. 9 | find_package(xgboost REQUIRED) 10 | endif() 11 | 12 | add_executable(api-demo c-api-demo.c) 13 | target_link_libraries(api-demo PRIVATE xgboost::xgboost) 14 | -------------------------------------------------------------------------------- /demo/c-api/basic/Makefile: -------------------------------------------------------------------------------- 1 | SRC=c-api-demo.c 2 | TGT=c-api-demo 3 | 4 | cc=cc 5 | CFLAGS ?=-O3 6 | XGBOOST_ROOT ?=../.. 7 | INCLUDE_DIR=-I$(XGBOOST_ROOT)/include -I$(XGBOOST_ROOT)/dmlc-core/include 8 | LIB_DIR=-L$(XGBOOST_ROOT)/lib 9 | 10 | build: $(TGT) 11 | 12 | $(TGT): $(SRC) Makefile 13 | $(cc) $(CFLAGS) $(INCLUDE_DIR) $(LIB_DIR) -o $(TGT) $(SRC) -lxgboost 14 | 15 | run: $(TGT) 16 | LD_LIBRARY_PATH=$(XGBOOST_ROOT)/lib ./$(TGT) 17 | 18 | clean: 19 | rm -f $(TGT) 20 | -------------------------------------------------------------------------------- /demo/c-api/basic/README.md: -------------------------------------------------------------------------------- 1 | C-APIs 2 | === 3 | 4 | **XGBoost** implements a C API originally designed for various language 5 | bindings. For detailed reference, please check xgboost/c_api.h. Here is a 6 | demonstration of using the API. 7 | 8 | # CMake 9 | If you use **CMake** for your project, you can either install **XGBoost** 10 | somewhere in your system and tell CMake to find it by calling 11 | `find_package(xgboost)`, or put **XGBoost** inside your project's source tree 12 | and call **CMake** command: `add_subdirectory(xgboost)`. To use 13 | `find_package()`, put the following in your **CMakeLists.txt**: 14 | 15 | ``` CMake 16 | find_package(xgboost REQUIRED) 17 | add_executable(api-demo c-api-demo.c) 18 | target_link_libraries(api-demo xgboost::xgboost) 19 | ``` 20 | 21 | If you want to put XGBoost inside your project (like git submodule), use this 22 | instead: 23 | ``` CMake 24 | add_subdirectory(xgboost) 25 | add_executable(api-demo c-api-demo.c) 26 | target_link_libraries(api-demo xgboost) 27 | ``` 28 | 29 | # make 30 | You can start by modifying the makefile in this directory to fit your need. 31 | -------------------------------------------------------------------------------- /demo/c-api/external-memory/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(external-memory-demo LANGUAGES C VERSION 0.0.1) 3 | 4 | find_package(xgboost REQUIRED) 5 | 6 | add_executable(external-memory-demo external_memory.c) 7 | target_link_libraries(external-memory-demo PRIVATE xgboost::xgboost) 8 | -------------------------------------------------------------------------------- /demo/c-api/external-memory/README.md: -------------------------------------------------------------------------------- 1 | Defining a Custom Data Iterator to Load Data from External Memory 2 | ================================================================= 3 | 4 | A simple demo for using custom data iterator with XGBoost. The feature is still 5 | **experimental** and not ready for production use. If you are not familiar with C API, 6 | please read its introduction in our tutorials and visit the basic demo first. 7 | 8 | Defining Data Iterator 9 | ---------------------- 10 | 11 | In the example, we define a custom data iterator with 2 methods: `reset` and `next`. The 12 | `next` method passes data into XGBoost and tells XGBoost whether the iterator has reached 13 | its end, and the `reset` method resets iterations. One important detail when using the C 14 | API for data iterator is users need to make sure that the data passed into `next` method 15 | must be kept in memory until the next iteration or `reset` is called. The external memory 16 | DMatrix is not limited to training, but also valid for other features like prediction. -------------------------------------------------------------------------------- /demo/c-api/inference/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.18) 2 | project(inference-demo LANGUAGES C VERSION 0.0.1) 3 | find_package(xgboost REQUIRED) 4 | 5 | # xgboost is built as static libraries, all cxx dependencies need to be linked into the 6 | # executable. 7 | if(XGBOOST_BUILD_STATIC_LIB) 8 | enable_language(CXX) 9 | # find again for those cxx libraries. 10 | find_package(xgboost REQUIRED) 11 | endif() 12 | 13 | add_executable(inference-demo inference.c) 14 | target_link_libraries(inference-demo PRIVATE xgboost::xgboost) 15 | -------------------------------------------------------------------------------- /demo/dask/README.rst: -------------------------------------------------------------------------------- 1 | .. _dask-examples: 2 | 3 | XGBoost Dask Feature Walkthrough 4 | ================================ 5 | 6 | This directory contains some demonstrations for using `dask` with `XGBoost`. For an 7 | overview, see :doc:`/tutorials/dask` 8 | -------------------------------------------------------------------------------- /demo/data/README.md: -------------------------------------------------------------------------------- 1 | This folder contains processed example dataset used by the demos. 2 | Copyright of the dataset belongs to the original copyright holder 3 | -------------------------------------------------------------------------------- /demo/data/gen_autoclaims.R: -------------------------------------------------------------------------------- 1 | site <- 'http://cran.r-project.org' 2 | if (!require('dummies')) { 3 | install.packages('dummies', repos = site) 4 | } 5 | if (!require('insuranceData')) { 6 | install.packages('insuranceData', repos = site) 7 | } 8 | 9 | library(dummies) 10 | library(insuranceData) 11 | 12 | data(AutoClaims) 13 | data <- AutoClaims 14 | 15 | data$STATE <- as.factor(data$STATE) 16 | data$CLASS <- as.factor(data$CLASS) 17 | data$GENDER <- as.factor(data$GENDER) 18 | 19 | data.dummy <- dummy.data.frame( 20 | data 21 | , dummy.class = 'factor' 22 | , omit.constants = TRUE 23 | ) 24 | write.table( 25 | data.dummy 26 | , 'autoclaims.csv' 27 | , sep = ',' 28 | , row.names = FALSE 29 | , col.names = FALSE 30 | , quote = FALSE 31 | ) 32 | -------------------------------------------------------------------------------- /demo/gpu_acceleration/README.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | GPU Acceleration Demo 4 | ===================== 5 | 6 | This is a collection of demonstration scripts to showcase the basic usage of GPU. Please 7 | see :doc:`/gpu/index` for more info. There are other demonstrations for distributed GPU 8 | training using dask or spark. 9 | -------------------------------------------------------------------------------- /demo/guide-python/README.rst: -------------------------------------------------------------------------------- 1 | XGBoost Python Feature Walkthrough 2 | ================================== 3 | 4 | 5 | This is a collection of examples for using the XGBoost Python package. 6 | -------------------------------------------------------------------------------- /demo/guide-python/predict_leaf_indices.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo for obtaining leaf index 3 | ============================= 4 | """ 5 | import os 6 | 7 | import xgboost as xgb 8 | 9 | # load data in do training 10 | CURRENT_DIR = os.path.dirname(__file__) 11 | dtrain = xgb.DMatrix( 12 | os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") 13 | ) 14 | dtest = xgb.DMatrix( 15 | os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm") 16 | ) 17 | param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"} 18 | watchlist = [(dtest, "eval"), (dtrain, "train")] 19 | num_round = 3 20 | bst = xgb.train(param, dtrain, num_round, watchlist) 21 | 22 | print("start testing predict the leaf indices") 23 | # predict using first 2 tree 24 | leafindex = bst.predict( 25 | dtest, iteration_range=(0, 2), pred_leaf=True, strict_shape=True 26 | ) 27 | print(leafindex.shape) 28 | print(leafindex) 29 | # predict all trees 30 | leafindex = bst.predict(dtest, pred_leaf=True) 31 | print(leafindex.shape) 32 | -------------------------------------------------------------------------------- /demo/guide-python/sklearn_parallel.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo for using xgboost with sklearn 3 | =================================== 4 | """ 5 | 6 | import multiprocessing 7 | 8 | from sklearn.datasets import fetch_california_housing 9 | from sklearn.model_selection import GridSearchCV 10 | 11 | import xgboost as xgb 12 | 13 | if __name__ == "__main__": 14 | print("Parallel Parameter optimization") 15 | X, y = fetch_california_housing(return_X_y=True) 16 | # Make sure the number of threads is balanced. 17 | xgb_model = xgb.XGBRegressor( 18 | n_jobs=multiprocessing.cpu_count() // 2, tree_method="hist" 19 | ) 20 | clf = GridSearchCV( 21 | xgb_model, 22 | {"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]}, 23 | verbose=1, 24 | n_jobs=2, 25 | ) 26 | clf.fit(X, y) 27 | print(clf.best_score_) 28 | print(clf.best_params_) 29 | -------------------------------------------------------------------------------- /demo/kaggle-higgs/higgs-pred.R: -------------------------------------------------------------------------------- 1 | # install xgboost package, see R-package in root folder 2 | require(xgboost) 3 | require(methods) 4 | 5 | modelfile <- "higgs.model" 6 | outfile <- "higgs.pred.csv" 7 | dtest <- read.csv("data/test.csv", header = TRUE) 8 | data <- as.matrix(dtest[2:31]) 9 | idx <- dtest[[1]] 10 | 11 | xgmat <- xgb.DMatrix(data, missing = -999.0) 12 | bst <- xgb.load(modelfile = modelfile) 13 | ypred <- predict(bst, xgmat) 14 | 15 | rorder <- rank(ypred, ties.method = "first") 16 | 17 | threshold <- 0.15 18 | # to be completed 19 | ntop <- length(rorder) - as.integer(threshold * length(rorder)) 20 | plabel <- ifelse(rorder > ntop, "s", "b") 21 | outdata <- list("EventId" = idx, 22 | "RankOrder" = rorder, 23 | "Class" = plabel) 24 | write.csv(outdata, file = outfile, quote = FALSE, row.names = FALSE) 25 | -------------------------------------------------------------------------------- /demo/kaggle-higgs/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -u higgs-numpy.py 4 | ret=$? 5 | if [[ $ret != 0 ]]; then 6 | echo "ERROR in higgs-numpy.py" 7 | exit $ret 8 | fi 9 | python -u higgs-pred.py 10 | ret=$? 11 | if [[ $ret != 0 ]]; then 12 | echo "ERROR in higgs-pred.py" 13 | exit $ret 14 | fi 15 | -------------------------------------------------------------------------------- /demo/kaggle-otto/README.MD: -------------------------------------------------------------------------------- 1 | Benchmark for Otto Group Competition 2 | ========= 3 | 4 | This is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge). 5 | 6 | ## Getting started 7 | 8 | 1. Put `train.csv` and `test.csv` under the `data` folder 9 | 2. Run the script 10 | 3. Submit the `submission.csv` 11 | 12 | The parameter `nthread` controls the number of cores to run on, please set it to suit your machine. 13 | 14 | ## R-package 15 | 16 | To install the R-package of xgboost, please run 17 | 18 | ```r 19 | install.packages("xgboost", repos = "https://cran.r-project.org") 20 | ``` 21 | 22 | Windows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. 23 | -------------------------------------------------------------------------------- /demo/multiclass_classification/README.md: -------------------------------------------------------------------------------- 1 | Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology) 2 | 3 | Make sure you make xgboost python module in ../../python 4 | 5 | 1. Run runexp.sh 6 | ```bash 7 | ./runexp.sh 8 | ``` 9 | 10 | **R version** please see the `train.R`. 11 | -------------------------------------------------------------------------------- /demo/multiclass_classification/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -f dermatology.data ] 3 | then 4 | echo "use existing data to run multi class classification" 5 | else 6 | echo "getting data from uci, make sure you are connected to internet" 7 | wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data 8 | fi 9 | python train.py 10 | -------------------------------------------------------------------------------- /demo/nvflare/.gitignore: -------------------------------------------------------------------------------- 1 | !config 2 | -------------------------------------------------------------------------------- /demo/nvflare/README.md: -------------------------------------------------------------------------------- 1 | # Experimental Support of Federated XGBoost using NVFlare 2 | 3 | This directory contains a demo of Federated Learning using 4 | [NVFlare](https://nvidia.github.io/NVFlare/). 5 | 6 | ## Horizontal Federated XGBoost 7 | 8 | For horizontal federated learning using XGBoost (data is split row-wise), check out the `horizontal` directory 9 | (see the [README](horizontal/README.md)). 10 | 11 | ## Vertical Federated XGBoost 12 | 13 | For vertical federated learning using XGBoost (data is split column-wise), check out the `vertical` directory 14 | (see the [README](vertical/README.md)). 15 | -------------------------------------------------------------------------------- /demo/nvflare/config/config_fed_client.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_version": 2, 3 | "executors": [ 4 | { 5 | "tasks": [ 6 | "train" 7 | ], 8 | "executor": { 9 | "path": "trainer.XGBoostTrainer", 10 | "args": { 11 | "server_address": "localhost:9091", 12 | "world_size": 2, 13 | "server_cert_path": "server-cert.pem", 14 | "client_key_path": "client-key.pem", 15 | "client_cert_path": "client-cert.pem", 16 | "use_gpus": false 17 | } 18 | } 19 | } 20 | ], 21 | "task_result_filters": [], 22 | "task_data_filters": [] 23 | } 24 | -------------------------------------------------------------------------------- /demo/nvflare/config/config_fed_server.json: -------------------------------------------------------------------------------- 1 | { 2 | "format_version": 2, 3 | "server": { 4 | "heart_beat_timeout": 600 5 | }, 6 | "task_data_filters": [], 7 | "task_result_filters": [], 8 | "workflows": [ 9 | { 10 | "id": "server_workflow", 11 | "path": "controller.XGBoostController", 12 | "args": { 13 | "port": 9091, 14 | "world_size": 2, 15 | "server_key_path": "server-key.pem", 16 | "server_cert_path": "server-cert.pem", 17 | "client_cert_path": "client-cert.pem" 18 | } 19 | } 20 | ], 21 | "components": [] 22 | } 23 | -------------------------------------------------------------------------------- /demo/rmm_plugin/rmm_singlegpu.py: -------------------------------------------------------------------------------- 1 | """ 2 | Using rmm on a single node device 3 | ================================= 4 | """ 5 | 6 | import rmm 7 | from sklearn.datasets import make_classification 8 | 9 | import xgboost as xgb 10 | 11 | # Initialize RMM pool allocator 12 | rmm.reinitialize(pool_allocator=True) 13 | # Optionally force XGBoost to use RMM for all GPU memory allocation, see ./README.md 14 | # xgb.set_config(use_rmm=True) 15 | 16 | X, y = make_classification(n_samples=10000, n_informative=5, n_classes=3) 17 | dtrain = xgb.DMatrix(X, label=y) 18 | 19 | params = { 20 | "max_depth": 8, 21 | "eta": 0.01, 22 | "objective": "multi:softprob", 23 | "num_class": 3, 24 | "tree_method": "hist", 25 | "device": "cuda", 26 | } 27 | # XGBoost will automatically use the RMM pool allocator 28 | bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtrain, "train")]) 29 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | html 2 | latex 3 | *.sh 4 | _* 5 | sg_execution_times.rst 6 | doxygen 7 | parser.py 8 | *.pyc 9 | web-data 10 | # generated by doxygen 11 | tmp -------------------------------------------------------------------------------- /doc/R-package/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.md 3 | -------------------------------------------------------------------------------- /doc/R-package/Makefile: -------------------------------------------------------------------------------- 1 | # This is the makefile for compiling Rmarkdown files into the md file with results. 2 | PKGROOT=../../R-package 3 | 4 | # ADD The Markdown to be built here, with suffix md 5 | xgboostfromJSON.md: $(PKGROOT)/vignettes/xgboostfromJSON.Rmd 6 | xgboost_introduction.md: $(PKGROOT)/vignettes/xgboost_introduction.Rmd 7 | 8 | all: xgboostfromJSON.md xgboost_introduction.md 9 | 10 | # General Rules for build rmarkdowns, need knitr 11 | %.md: 12 | Rscript -e \ 13 | "require(methods);"\ 14 | "require(knitr);"\ 15 | "knitr::opts_knit\$$set(root.dir=\".\");"\ 16 | "knitr::opts_chunk\$$set(fig.path=\"../web-data/xgboost/knitr/$(basename $@)-\");"\ 17 | "knitr::knit(\"$+\")" 18 | -------------------------------------------------------------------------------- /doc/R-package/r_docs/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | ============= 4 | XGBoost R API 5 | ============= 6 | -------------------------------------------------------------------------------- /doc/README: -------------------------------------------------------------------------------- 1 | The documentation of xgboost is generated with recommonmark and sphinx. 2 | 3 | You can build it locally by typing "make html" in this folder. 4 | 5 | Checkout https://recommonmark.readthedocs.org for guide on how to write markdown with extensions used in this doc, such as math formulas and table of content. 6 | -------------------------------------------------------------------------------- /doc/_static/cn.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Flag of the People's Republic of China 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /doc/_static/custom.css: -------------------------------------------------------------------------------- 1 | @import url('theme.css'); 2 | 3 | /* Logo background */ 4 | .wy-side-nav-search, .wy-side-nav-search img { 5 | background-color: #ffffff !important; 6 | } 7 | 8 | .highlight { 9 | background: #f1f3f4; 10 | } 11 | 12 | .navbar { 13 | background: #ffffff; 14 | } 15 | 16 | .navbar-nav { 17 | background: #ffffff; 18 | } 19 | 20 | /* side bar */ 21 | .wy-nav-side { 22 | background: #f1f3f4; 23 | } 24 | 25 | .wy-menu-vertical a { 26 | color: #707070; 27 | } 28 | 29 | .wy-side-nav-search div.version { 30 | color: #404040; 31 | } 32 | -------------------------------------------------------------------------------- /doc/_static/js/auto_module_index.js: -------------------------------------------------------------------------------- 1 | function auto_index(module) { 2 | $(document).ready(function () { 3 | // find all classes or functions 4 | var div_query = "div[class='section'][id='module-" + module + "']"; 5 | var class_query = div_query + " dl[class='class'] > dt"; 6 | var func_query = div_query + " dl[class='function'] > dt"; 7 | var targets = $(class_query + ',' + func_query); 8 | 9 | var li_node = $("li a[href='#module-" + module + "']").parent(); 10 | var html = "
    "; 11 | 12 | for (var i = 0; i < targets.length; ++i) { 13 | var id = $(targets[i]).attr('id'); 14 | // remove 'mxnet.' prefix to make menus shorter 15 | var id_simple = id.replace(/^mxnet\./, ''); 16 | html += "
  • " + id_simple + "
  • "; 19 | } 20 | 21 | html += "
"; 22 | li_node.append(html); 23 | }); 24 | } 25 | 26 | -------------------------------------------------------------------------------- /doc/c++.rst: -------------------------------------------------------------------------------- 1 | ############### 2 | XGBoost C++ API 3 | ############### 4 | 5 | Starting from 1.0 release, CMake will generate installation rules to export all C++ headers. But 6 | the c++ interface is much closer to the internal of XGBoost than other language bindings. 7 | As a result it's changing quite often and we don't maintain its stability. Along with the 8 | plugin system (see ``plugin/example`` in XGBoost's source tree), users can utilize some 9 | existing c++ headers for gaining more access to the internal of XGBoost. 10 | 11 | * `C++ interface documentation (latest master branch) <./dev/files.html>`_ 12 | * `C++ interface documentation (last stable release) `_ 13 | -------------------------------------------------------------------------------- /doc/changes/index.rst: -------------------------------------------------------------------------------- 1 | ############# 2 | Release Notes 3 | ############# 4 | 5 | For release notes prior to the 2.1 release, please see `news `__ . 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Contents: 10 | 11 | v3.0.0 12 | v2.1.0 -------------------------------------------------------------------------------- /doc/cli.rst: -------------------------------------------------------------------------------- 1 | ############################ 2 | XGBoost Command Line version 3 | ############################ 4 | 5 | See `XGBoost Command Line walkthrough `_. 6 | -------------------------------------------------------------------------------- /doc/contrib/index.rst: -------------------------------------------------------------------------------- 1 | ##################### 2 | Contribute to XGBoost 3 | ##################### 4 | 5 | XGBoost has been developed by community members. Everyone is welcome to contribute. We value all forms of contributions, including, but not limited to: 6 | 7 | * Code reviews for pull requests 8 | * Documentation and usage examples 9 | * Community participation in forums and issues 10 | * Code readability and developer guide 11 | 12 | - We welcome contributions that add code comments to improve readability. 13 | - We also welcome contributions to docs to explain the design choices of the XGBoost internals. 14 | 15 | * Test cases to make the codebase more robust. 16 | * Tutorials, blog posts, talks that promote the project. 17 | 18 | Here are guidelines for contributing to various aspect of the XGBoost project: 19 | 20 | .. toctree:: 21 | :maxdepth: 2 22 | 23 | Community Guideline 24 | donate 25 | coding_guide 26 | consistency 27 | python_packaging 28 | unit_tests 29 | Docs and Examples 30 | featuremap 31 | git_guide 32 | release 33 | ci 34 | -------------------------------------------------------------------------------- /doc/julia.rst: -------------------------------------------------------------------------------- 1 | ########## 2 | XGBoost.jl 3 | ########## 4 | 5 | See `XGBoost.jl Project page `_. 6 | -------------------------------------------------------------------------------- /doc/jvm/api.rst: -------------------------------------------------------------------------------- 1 | ############################# 2 | API Docs for the JVM packages 3 | ############################# 4 | 5 | * `XGBoost4J Java API <../jvm_docs/javadocs/index.html>`_ 6 | * `XGBoost4J Scala API <../jvm_docs/scaladocs/xgboost4j/index.html>`_ 7 | * `XGBoost4J-Spark Scala API <../jvm_docs/scaladocs/xgboost4j-spark/index.html>`_ 8 | * `XGBoost4J-Spark-GPU Scala API <../jvm_docs/scaladocs/xgboost4j-spark-gpu/index.html>`_ 9 | * `XGBoost4J-Flink Scala API <../jvm_docs/scaladocs/xgboost4j-flink/index.html>`_ 10 | -------------------------------------------------------------------------------- /doc/jvm/javadocs/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | ================== 4 | XGBoost4J Java API 5 | ================== 6 | -------------------------------------------------------------------------------- /doc/jvm/scaladocs/xgboost4j-flink/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | ========================= 4 | XGBoost4J-Flink Scala API 5 | ========================= 6 | -------------------------------------------------------------------------------- /doc/jvm/scaladocs/xgboost4j-spark/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | ========================= 4 | XGBoost4J-Spark Scala API 5 | ========================= 6 | -------------------------------------------------------------------------------- /doc/jvm/scaladocs/xgboost4j/index.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | =================== 4 | XGBoost4J Scala API 5 | =================== 6 | -------------------------------------------------------------------------------- /doc/python/.gitignore: -------------------------------------------------------------------------------- 1 | examples 2 | dask-examples 3 | survival-examples 4 | gpu-examples 5 | rmm-examples -------------------------------------------------------------------------------- /doc/python/index.rst: -------------------------------------------------------------------------------- 1 | ###################### 2 | XGBoost Python Package 3 | ###################### 4 | This page contains links to all the python related documents on python package. 5 | To install the package, checkout :doc:`Installation Guide `. 6 | 7 | ******** 8 | Contents 9 | ******** 10 | 11 | .. toctree:: 12 | python_intro 13 | sklearn_estimator 14 | python_api 15 | callbacks 16 | examples/index 17 | dask-examples/index 18 | survival-examples/index 19 | gpu-examples/index 20 | rmm-examples/index 21 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | mock 3 | sphinx_rtd_theme>=1.0.0 4 | breathe 5 | scikit-learn 6 | sh 7 | matplotlib 8 | graphviz 9 | numpy 10 | scipy 11 | myst-parser 12 | ray[train] 13 | sphinx-gallery 14 | sphinx-issues 15 | sphinx-tabs 16 | dask 17 | pyspark 18 | cloudpickle 19 | setuptools 20 | -------------------------------------------------------------------------------- /doc/sphinx_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Helper utility function for customization.""" 3 | import os 4 | import subprocess 5 | import sys 6 | 7 | READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None) 8 | 9 | if not os.path.exists('web-data'): 10 | subprocess.call('rm -rf web-data;' + 11 | 'git clone https://github.com/dmlc/web-data', shell = True) 12 | else: 13 | subprocess.call('cd web-data; git pull', shell=True) 14 | 15 | sys.stderr.write('READTHEDOCS=%s\n' % (READTHEDOCS_BUILD)) 16 | -------------------------------------------------------------------------------- /doc/tutorials/index.rst: -------------------------------------------------------------------------------- 1 | ################# 2 | XGBoost Tutorials 3 | ################# 4 | 5 | This section contains official tutorials inside XGBoost package. 6 | See `Awesome XGBoost `_ for more resources. Also, don't miss the feature introductions in each package. 7 | 8 | .. toctree:: 9 | :maxdepth: 1 10 | :caption: Contents: 11 | 12 | model 13 | saving_model 14 | slicing_model 15 | learning_to_rank 16 | dart 17 | monotonic 18 | feature_interaction_constraint 19 | aft_survival_analysis 20 | categorical 21 | multioutput 22 | rf 23 | kubernetes 24 | Distributed XGBoost with XGBoost4J-Spark 25 | Distributed XGBoost with XGBoost4J-Spark-GPU 26 | dask 27 | spark_estimator 28 | ray 29 | external_memory 30 | c_api_tutorial 31 | input_format 32 | param_tuning 33 | custom_metric_obj 34 | advanced_custom_obj 35 | intercept 36 | privacy_preserving -------------------------------------------------------------------------------- /doc/xgboost_doc.yml: -------------------------------------------------------------------------------- 1 | name: xgboost_docs 2 | dependencies: 3 | - python=3.10 4 | - pip 5 | - pygraphviz 6 | - sphinx 7 | - sphinx-gallery 8 | - recommonmark 9 | - mock 10 | - sh 11 | - matplotlib 12 | - numpy 13 | - scipy 14 | - scikit-learn 15 | - myst-parser 16 | - pyspark 17 | - pip: 18 | - breathe 19 | - sphinx_rtd_theme 20 | - pydot-ng 21 | - graphviz 22 | - ray[train] 23 | - xgboost_ray 24 | -------------------------------------------------------------------------------- /include/xgboost/version_config.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2023 by XGBoost contributors 3 | */ 4 | #ifndef XGBOOST_VERSION_CONFIG_H_ 5 | #define XGBOOST_VERSION_CONFIG_H_ 6 | 7 | #define XGBOOST_VER_MAJOR 3 /* NOLINT */ 8 | #define XGBOOST_VER_MINOR 1 /* NOLINT */ 9 | #define XGBOOST_VER_PATCH 0 /* NOLINT */ 10 | 11 | #endif // XGBOOST_VERSION_CONFIG_H_ 12 | -------------------------------------------------------------------------------- /include/xgboost/windefs.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024, XGBoost Contributors 3 | * 4 | * @brief Macro for Windows. 5 | */ 6 | #pragma once 7 | 8 | #if !defined(xgboost_IS_WIN) 9 | 10 | #if defined(_MSC_VER) || defined(__MINGW32__) 11 | #define xgboost_IS_WIN 1 12 | #endif // defined(_MSC_VER) || defined(__MINGW32__) 13 | 14 | #endif // !defined(xgboost_IS_WIN) 15 | 16 | #if defined(xgboost_IS_WIN) 17 | 18 | #if !defined(NOMINMAX) 19 | #define NOMINMAX 20 | #endif // !defined(NOMINMAX) 21 | 22 | // A macro used inside `windows.h` to avoid conflicts with `winsock2.h` 23 | #if !defined(WIN32_LEAN_AND_MEAN) 24 | #define WIN32_LEAN_AND_MEAN 25 | #endif // !defined(WIN32_LEAN_AND_MEAN) 26 | // Stop windows.h from including winsock.h 27 | #if !defined(_WINSOCKAPI_) 28 | #define _WINSOCKAPI_ 29 | #endif // !defined(_WINSOCKAPI_) 30 | 31 | #if !defined(xgboost_IS_MINGW) 32 | 33 | #if defined(__MINGW32__) 34 | #define xgboost_IS_MINGW 1 35 | #endif // defined(__MINGW32__) 36 | 37 | #endif // xgboost_IS_MINGW 38 | 39 | #endif // !defined(xgboost_IS_WIN) 40 | -------------------------------------------------------------------------------- /jvm-packages/.gitignore: -------------------------------------------------------------------------------- 1 | build.sh 2 | xgboost4j-tester/pom.xml 3 | xgboost4j-tester/iris.csv 4 | dependency-reduced-pom.xml 5 | .factorypath 6 | -------------------------------------------------------------------------------- /jvm-packages/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(JNI REQUIRED) 2 | 3 | list(APPEND JVM_SOURCES 4 | ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j.cpp 5 | ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cpp) 6 | 7 | if(USE_CUDA) 8 | list(APPEND JVM_SOURCES 9 | ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cu) 10 | endif() 11 | 12 | add_library(xgboost4j SHARED ${JVM_SOURCES} ${XGBOOST_OBJ_SOURCES}) 13 | 14 | if(ENABLE_ALL_WARNINGS) 15 | target_compile_options(xgboost4j PUBLIC -Wall -Wextra) 16 | endif() 17 | 18 | target_link_libraries(xgboost4j PRIVATE objxgboost) 19 | target_include_directories(xgboost4j 20 | PRIVATE 21 | ${JNI_INCLUDE_DIRS} 22 | ${PROJECT_SOURCE_DIR}/jvm-packages/xgboost4j/src/native 23 | ${PROJECT_SOURCE_DIR}/include 24 | ${PROJECT_SOURCE_DIR}/dmlc-core/include) 25 | 26 | set_output_directory(xgboost4j ${PROJECT_SOURCE_DIR}/lib) 27 | 28 | # MacOS: Patch libxgboost4j.dylib to use @rpath/libomp.dylib 29 | if(USE_OPENMP AND APPLE) 30 | patch_openmp_path_macos(xgboost4j libxgboost4j) 31 | endif() 32 | -------------------------------------------------------------------------------- /jvm-packages/README.md: -------------------------------------------------------------------------------- 1 | # XGBoost4J: Distributed XGBoost for Scala/Java 2 | [![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci) 3 | [![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org/en/latest/jvm/index.html) 4 | [![GitHub license](http://dmlc.github.io/img/apache2.svg)](../LICENSE) 5 | 6 | [Documentation](https://xgboost.readthedocs.org/en/stable/jvm/index.html) | 7 | [Resources](../demo/README.md) | 8 | [Release Notes](../NEWS.md) 9 | 10 | XGBoost4J is the JVM package of xgboost. It brings all the optimizations and power xgboost 11 | into JVM ecosystem. 12 | 13 | - Train XGBoost models in scala and java with easy customization. 14 | - Run distributed xgboost natively on jvm frameworks such as Apache Flink and Apache 15 | Spark. 16 | 17 | You can find more about XGBoost on [Documentation](https://xgboost.readthedocs.org/en/stable/jvm/index.html) and [Resource Page](../demo/README.md). -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-example/LICENSE: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark-gpu/src/main/resources/META-INF/services/ml.dmlc.xgboost4j.scala.spark.XGBoostPlugin: -------------------------------------------------------------------------------- 1 | ml.dmlc.xgboost4j.scala.spark.GpuXGBoostPlugin 2 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark-gpu/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.logger.org.apache.spark=INFO 2 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark-gpu/src/test/scala/org/apache/spark/GpuTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2023 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package org.apache.spark 18 | 19 | import org.apache.spark.sql.SparkSession 20 | 21 | object GpuTestUtils { 22 | 23 | def cleanupAnyExistingSession(): Unit = { 24 | SparkSession.cleanupAnyExistingSession() 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.logger.org.apache.spark=ERROR 2 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/data/XGBoostClassificationModel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/data/XGBoostClassificationModel -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/metadata/_SUCCESS -------------------------------------------------------------------------------- /jvm-packages/xgboost4j-spark/src/test/resources/model/0.82/model/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel","timestamp":1555350539033,"sparkVersion":"2.3.2-uber-109","uid":"xgbc_5e7bec215a4c","paramMap":{"useExternalMemory":false,"trainTestRatio":1.0,"alpha":0.0,"seed":0,"numWorkers":100,"skipDrop":0.0,"treeLimit":0,"silent":0,"trackerConf":{"workerConnectionTimeout":0,"trackerImpl":"python"},"missing":"NaN","colsampleBylevel":1.0,"probabilityCol":"probability","checkpointPath":"","lambda":1.0,"rawPredictionCol":"rawPrediction","eta":0.3,"numEarlyStoppingRounds":0,"growPolicy":"depthwise","gamma":0.0,"sampleType":"uniform","maxDepth":6,"rateDrop":0.0,"objective":"reg:linear","customObj":null,"lambdaBias":0.0,"baseScore":0.5,"labelCol":"label","minChildWeight":1.0,"customEval":null,"normalizeType":"tree","maxBin":16,"nthread":4,"numRound":20,"colsampleBytree":1.0,"predictionCol":"prediction","subsample":1.0,"timeoutRequestWorkers":1800000,"featuresCol":"features","evalMetric":"error","sketchEps":0.03,"scalePosWeight":1.0,"checkpointInterval":-1,"maxDeltaStep":0.0,"treeMethod":"approx"}} 2 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j/LICENSE: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ColumnBatch.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2021-2024 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package ml.dmlc.xgboost4j.java; 18 | 19 | /** 20 | * This class wraps multiple Column and provides the array interface json 21 | * for all columns. 22 | */ 23 | public abstract class ColumnBatch extends Column { 24 | 25 | /** Get features cuda array interface json string */ 26 | public abstract String toFeaturesJson(); 27 | } 28 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostError.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package ml.dmlc.xgboost4j.java; 17 | 18 | /** 19 | * custom error class for xgboost 20 | * 21 | * @author hzx 22 | */ 23 | public class XGBoostError extends Exception { 24 | public XGBoostError(String message) { 25 | super(message); 26 | } 27 | 28 | public XGBoostError(String message, Throwable cause) { 29 | super(message, cause); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j/src/main/resources/xgboost4j-version.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} -------------------------------------------------------------------------------- /jvm-packages/xgboost4j/src/native/jvm_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2014-2025, XGBoost Contributors 3 | */ 4 | #ifndef JVM_UTILS_H_ 5 | #define JVM_UTILS_H_ 6 | 7 | #include 8 | 9 | #include "xgboost/logging.h" // for Check 10 | 11 | #define JVM_CHECK_CALL(__expr) \ 12 | { \ 13 | int __errcode = (__expr); \ 14 | if (__errcode != 0) { \ 15 | return __errcode; \ 16 | } \ 17 | } 18 | 19 | JavaVM *&GlobalJvm(); 20 | void setHandle(JNIEnv *jenv, jlongArray jhandle, void *handle); 21 | 22 | template 23 | T CheckJvmCall(T const &v, JNIEnv *jenv) { 24 | if (!v) { 25 | CHECK(jenv->ExceptionOccurred()); 26 | jenv->ExceptionDescribe(); 27 | } 28 | return v; 29 | } 30 | 31 | #endif // JVM_UTILS_H_ 32 | -------------------------------------------------------------------------------- /jvm-packages/xgboost4j/src/native/xgboost4j-gpu.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021-2024, XGBoost Contributors 3 | */ 4 | #ifndef XGBOOST_USE_CUDA 5 | 6 | #include 7 | 8 | #include "../../../../src/c_api/c_api_error.h" 9 | #include "../../../../src/common/common.h" 10 | 11 | namespace xgboost::jni { 12 | int QdmFromCallback(JNIEnv *, jobject, jlongArray, char const *, bool, jlongArray) { 13 | API_BEGIN(); 14 | common::AssertGPUSupport(); 15 | API_END(); 16 | } 17 | } // namespace xgboost::jni 18 | #endif // XGBOOST_USE_CUDA 19 | -------------------------------------------------------------------------------- /ops/conda_env/aarch64_test.yml: -------------------------------------------------------------------------------- 1 | name: aarch64_test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10 6 | - pip 7 | - wheel 8 | - pytest 9 | - pytest-cov 10 | - numpy 11 | - scipy 12 | - scikit-learn 13 | - pandas 14 | - matplotlib 15 | - dask 16 | - distributed 17 | - hypothesis 18 | - graphviz 19 | - python-graphviz 20 | - codecov 21 | - cmake 22 | - ninja 23 | - boto3 24 | - jsonschema 25 | - boto3 26 | - awscli 27 | - numba 28 | - llvmlite 29 | - loky>=3.5.1 30 | - pyarrow 31 | - pyspark>=3.4.0 32 | - cloudpickle 33 | - pip: 34 | - awscli 35 | - auditwheel 36 | -------------------------------------------------------------------------------- /ops/conda_env/cpp_test.yml: -------------------------------------------------------------------------------- 1 | # conda environment for CPP test on Linux distributions 2 | name: cpp_test 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - cmake 7 | - ninja 8 | - c-compiler 9 | - cxx-compiler 10 | - gtest 11 | - protobuf 12 | - libgrpc 13 | -------------------------------------------------------------------------------- /ops/conda_env/linux_cpu_test.yml: -------------------------------------------------------------------------------- 1 | name: linux_cpu_test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10 6 | - cmake>=3.26.4 7 | - c-compiler 8 | - cxx-compiler 9 | - ninja 10 | - pip 11 | - wheel 12 | - pyyaml 13 | - cpplint 14 | - pylint 15 | - numpy 16 | - scipy 17 | - scikit-learn>=1.4.1 18 | - pandas 19 | - polars 20 | - matplotlib 21 | - dask<=2024.10.0 22 | - distributed<=2024.10.0 23 | - python-graphviz 24 | - hypothesis>=6.46 25 | - astroid 26 | - sh 27 | - mock 28 | - pytest 29 | - pytest-timeout 30 | - pytest-cov 31 | - python-kubernetes 32 | - urllib3 33 | - jsonschema 34 | - boto3 35 | - awscli 36 | - py-ubjson 37 | - loky>=3.5.1 38 | - pyarrow 39 | - protobuf 40 | - cloudpickle 41 | - modin 42 | - pyspark>=3.4.0 43 | -------------------------------------------------------------------------------- /ops/conda_env/linux_sycl_test.yml: -------------------------------------------------------------------------------- 1 | name: linux_sycl_test 2 | channels: 3 | - conda-forge 4 | - https://software.repos.intel.com/python/conda/ 5 | dependencies: 6 | - python=3.10 7 | - cmake>=3.26.4 8 | - c-compiler 9 | - cxx-compiler 10 | - gtest 11 | - pip 12 | - wheel 13 | - numpy 14 | - scipy 15 | - scikit-learn 16 | - pandas 17 | - hypothesis>=6.46 18 | - pytest 19 | - pytest-timeout 20 | - pytest-cov 21 | - dask=2024.11 22 | - ninja 23 | - dpcpp_linux-64 24 | - onedpl-devel 25 | - intel-openmp 26 | -------------------------------------------------------------------------------- /ops/conda_env/macos_cpu_test.yml: -------------------------------------------------------------------------------- 1 | name: macos_test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10 6 | - pip 7 | - wheel 8 | - pyyaml 9 | - numpy 10 | - scipy 11 | - llvm-openmp 12 | - scikit-learn>=1.4.1 13 | - pandas 14 | - matplotlib 15 | - dask<=2024.10.0 16 | - distributed<=2024.10.0 17 | - graphviz 18 | - python-graphviz 19 | - hypothesis 20 | - astroid 21 | - sh 22 | - pytest 23 | - pytest-cov 24 | - pytest-timeout 25 | - python-kubernetes 26 | - urllib3 27 | - jsonschema 28 | - boto3 29 | - awscli 30 | - loky>=3.5.1 31 | - pyarrow 32 | - cloudpickle 33 | - pip: 34 | - setuptools 35 | -------------------------------------------------------------------------------- /ops/conda_env/minimal.yml: -------------------------------------------------------------------------------- 1 | name: minimal 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10 6 | - awscli 7 | -------------------------------------------------------------------------------- /ops/conda_env/python_lint.yml: -------------------------------------------------------------------------------- 1 | name: python_lint 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10 6 | - pylint 7 | - wheel 8 | - setuptools 9 | - mypy 10 | - numpy 11 | - scipy 12 | - pandas 13 | - pyarrow 14 | - scikit-learn 15 | - dask 16 | - distributed 17 | - black 18 | - isort 19 | - cloudpickle 20 | - pytest 21 | - hypothesis 22 | - hatchling 23 | - pyspark>=3.4.0 24 | -------------------------------------------------------------------------------- /ops/conda_env/sdist_test.yml: -------------------------------------------------------------------------------- 1 | # conda environment for source distribution test. 2 | name: sdist_test 3 | channels: 4 | - conda-forge 5 | dependencies: 6 | - python=3.10 7 | - pip 8 | - wheel 9 | - cmake 10 | - ninja 11 | - python-build 12 | -------------------------------------------------------------------------------- /ops/conda_env/win64_test.yml: -------------------------------------------------------------------------------- 1 | name: win64_env 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.10 6 | - numpy 7 | - scipy 8 | - matplotlib 9 | - scikit-learn 10 | - pandas 11 | - pytest 12 | - boto3 13 | - hypothesis 14 | - jsonschema 15 | - cupy>=13.2 16 | - python-graphviz 17 | - pip 18 | - py-ubjson 19 | - loky>=3.5.1 20 | - pyarrow 21 | -------------------------------------------------------------------------------- /ops/packer/linux/install_drivers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | ## Install basic tools 5 | echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections 6 | sudo apt-get update 7 | sudo apt-get install -y cmake git build-essential wget ca-certificates curl unzip 8 | 9 | ## Install CUDA Toolkit 12.6 (Driver will be installed later) 10 | wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb 11 | sudo dpkg -i cuda-keyring_1.1-1_all.deb 12 | sudo apt-get update 13 | sudo apt-get -y install cuda-toolkit-12-6 cuda-drivers-565 14 | rm cuda-keyring_1.1-1_all.deb 15 | -------------------------------------------------------------------------------- /ops/packer/linux/setup_ssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | systemctl start ssh 3 | -------------------------------------------------------------------------------- /ops/packer/windows/install_choco.ps1: -------------------------------------------------------------------------------- 1 | ## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/InstallChoco.ps1 2 | ## Author: Christopher Horrell (https://github.com/chorrell) 3 | 4 | $ErrorActionPreference = "Stop" 5 | 6 | # Install Chocolatey 7 | # See https://chocolatey.org/install#individual 8 | Set-ExecutionPolicy Bypass -Scope Process -Force 9 | [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072 10 | Invoke-Expression ((New-Object System.Net.WebClient).DownloadString("https://community.chocolatey.org/install.ps1")) 11 | 12 | # Globally Auto confirm every action 13 | # See: https://docs.chocolatey.org/en-us/faqs#why-do-i-have-to-confirm-packages-now-is-there-a-way-to-remove-this 14 | choco feature enable -n allowGlobalConfirmation 15 | -------------------------------------------------------------------------------- /ops/packer/windows/sysprep.ps1: -------------------------------------------------------------------------------- 1 | ## Adopted from https://github.com/chorrell/packer-aws-windows-openssh/blob/20c40aa60b54469b3d85650a2e2e45e35ed83bc7/files/PrepareImage.ps1 2 | ## Author: Christopher Horrell (https://github.com/chorrell) 3 | 4 | $ErrorActionPreference = "Stop" 5 | 6 | Write-Output "Cleaning up keys" 7 | $openSSHAuthorizedKeys = Join-Path $env:ProgramData "ssh\administrators_authorized_keys" 8 | Remove-Item -Recurse -Force -Path $openSSHAuthorizedKeys 9 | 10 | # Make sure task is enabled 11 | Enable-ScheduledTask "DownloadKey" 12 | 13 | Write-Output "Running Sysprep" 14 | & "$Env:Programfiles\Amazon\EC2Launch\ec2launch.exe" sysprep 15 | -------------------------------------------------------------------------------- /ops/pipeline/build-gpu-rpkg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | if [[ -z "${GITHUB_SHA:-}" ]] 6 | then 7 | echo "Make sure to set environment variable GITHUB_SHA" 8 | exit 1 9 | fi 10 | 11 | source ops/pipeline/classify-git-branch.sh 12 | source ops/pipeline/get-docker-registry-details.sh 13 | source ops/pipeline/get-image-tag.sh 14 | 15 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.gpu_build_r_rockylinux8:${IMAGE_TAG} 16 | 17 | echo "--- Build XGBoost R package with CUDA" 18 | set -x 19 | python3 ops/docker_run.py \ 20 | --image-uri ${IMAGE_URI} \ 21 | -- ops/pipeline/build-gpu-rpkg-impl.sh \ 22 | ${GITHUB_SHA} 23 | 24 | if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] 25 | then 26 | python3 ops/pipeline/manage-artifacts.py upload \ 27 | --s3-bucket xgboost-nightly-builds \ 28 | --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ 29 | xgboost_r_gpu_linux.tar.gz 30 | fi 31 | -------------------------------------------------------------------------------- /ops/pipeline/build-jvm-doc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build docs for the JVM packages and package it in a tarball 3 | ## Note: this script assumes that the user has already built libxgboost4j.so 4 | ## and place it in the lib/ directory. 5 | 6 | set -euo pipefail 7 | 8 | if [[ -z ${BRANCH_NAME:-} ]] 9 | then 10 | echo "Make sure to define environment variable BRANCH_NAME." 11 | exit 1 12 | fi 13 | 14 | if [[ ! -f lib/libxgboost4j.so ]] 15 | then 16 | echo "Must place libxgboost4j.so in lib/ first" 17 | exit 2 18 | fi 19 | 20 | source ops/pipeline/get-docker-registry-details.sh 21 | source ops/pipeline/get-image-tag.sh 22 | 23 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.jvm_gpu_build:${IMAGE_TAG} 24 | 25 | echo "--- Build JVM packages doc" 26 | set -x 27 | python3 ops/docker_run.py \ 28 | --image-uri ${IMAGE_URI} \ 29 | -- ops/pipeline/build-jvm-doc-impl.sh ${BRANCH_NAME} 30 | -------------------------------------------------------------------------------- /ops/pipeline/build-jvm-gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build libxgboost4j.so with CUDA 3 | 4 | set -euo pipefail 5 | 6 | source ops/pipeline/classify-git-branch.sh 7 | source ops/pipeline/get-docker-registry-details.sh 8 | source ops/pipeline/get-image-tag.sh 9 | 10 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.jvm_gpu_build:${IMAGE_TAG} 11 | 12 | echo "--- Build libxgboost4j.so with CUDA" 13 | 14 | if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] 15 | then 16 | arch_flag="-DGPU_COMPUTE_VER=75" 17 | else 18 | arch_flag="" 19 | fi 20 | 21 | COMMAND=$( 22 | cat <<-EOF 23 | cd build-gpu/ && \ 24 | cmake .. -GNinja -DUSE_CUDA=ON -DUSE_NCCL=ON \ 25 | -DJVM_BINDINGS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ${arch_flag} && \ 26 | ninja 27 | EOF 28 | ) 29 | 30 | set -x 31 | mkdir -p build-gpu/ 32 | python3 ops/docker_run.py \ 33 | --image-uri ${IMAGE_URI} \ 34 | -- bash -c "${COMMAND}" 35 | -------------------------------------------------------------------------------- /ops/pipeline/build-jvm-macos-apple-silicon.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build libxgboost4j.dylib targeting MacOS (Apple Silicon) 3 | 4 | set -euox pipefail 5 | 6 | # Display system info 7 | echo "--- Display system information" 8 | set -x 9 | system_profiler SPSoftwareDataType 10 | sysctl -n machdep.cpu.brand_string 11 | uname -m 12 | set +x 13 | 14 | brew install ninja libomp 15 | 16 | # Build XGBoost4J binary 17 | echo "--- Build libxgboost4j.dylib" 18 | set -x 19 | mkdir build 20 | pushd build 21 | export JAVA_HOME=$(/usr/libexec/java_home) 22 | cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 23 | ninja -v 24 | popd 25 | rm -rf build 26 | otool -L lib/libxgboost.dylib 27 | set +x 28 | -------------------------------------------------------------------------------- /ops/pipeline/build-jvm-macos-intel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build libxgboost4j.dylib targeting MacOS (Intel) 3 | 4 | set -euox pipefail 5 | 6 | # Display system info 7 | echo "--- Display system information" 8 | set -x 9 | system_profiler SPSoftwareDataType 10 | sysctl -n machdep.cpu.brand_string 11 | uname -m 12 | set +x 13 | 14 | brew install ninja libomp 15 | 16 | # Build XGBoost4J binary 17 | echo "--- Build libxgboost4j.dylib" 18 | set -x 19 | mkdir build 20 | pushd build 21 | export JAVA_HOME=$(/usr/libexec/java_home) 22 | cmake .. -GNinja -DJVM_BINDINGS=ON -DUSE_OPENMP=ON -DCMAKE_OSX_DEPLOYMENT_TARGET=10.15 23 | ninja -v 24 | popd 25 | rm -rf build 26 | otool -L lib/libxgboost.dylib 27 | -------------------------------------------------------------------------------- /ops/pipeline/build-python-wheels-arm64-impl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build and test XGBoost with ARM64 CPU 3 | ## Companion script for ops/pipeline/build-cpu-arm64.sh 4 | 5 | set -euox pipefail 6 | 7 | source activate aarch64_test 8 | 9 | echo "--- Build libxgboost from the source" 10 | mkdir -p build 11 | pushd build 12 | 13 | cmake .. \ 14 | -GNinja \ 15 | -DCMAKE_PREFIX_PATH="${CONDA_PREFIX}" \ 16 | -DUSE_OPENMP=ON \ 17 | -DHIDE_CXX_SYMBOLS=ON \ 18 | -DGOOGLE_TEST=ON \ 19 | -DUSE_DMLC_GTEST=ON \ 20 | -DENABLE_ALL_WARNINGS=ON \ 21 | -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ 22 | -DBUILD_DEPRECATED_CLI=ON 23 | time ninja -v 24 | 25 | echo "--- Run Google Test" 26 | ctest --extra-verbose 27 | popd 28 | 29 | echo "--- Build binary wheel" 30 | pushd python-package 31 | rm -rfv dist/* 32 | pip wheel --no-deps -v . --wheel-dir dist/ 33 | popd 34 | -------------------------------------------------------------------------------- /ops/pipeline/build-r-docs-impl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ $# -ne 1 ]] 4 | then 5 | echo "Usage: $0 [branch name]" 6 | exit 1 7 | fi 8 | 9 | branch_name=$1 10 | 11 | if [[ -z "${R_LIBS_USER}" ]]; 12 | then 13 | export R_LIBS_USER=/tmp/rtmpdir 14 | fi 15 | 16 | set -euo pipefail 17 | 18 | echo "R_LIBS_USER: ${R_LIBS_USER}" 19 | 20 | if [[ ! -d ${R_LIBS_USER} ]] 21 | then 22 | echo "Make ${R_LIBS_USER} for installing temporary R packages." 23 | mkdir ${R_LIBS_USER} 24 | fi 25 | 26 | # Used only in container environment 27 | if command -v gosu 2>&1 >/dev/null 28 | then 29 | gosu root chown -R $UID:$GROUPS ${R_LIBS_USER} 30 | fi 31 | 32 | cd R-package 33 | 34 | MAKEFLAGS=-j$(nproc) Rscript ./tests/helper_scripts/install_deps.R 35 | # Some examples are failing 36 | MAKEFLAGS=-j$(nproc) Rscript -e "pkgdown::build_site(examples=FALSE)" 37 | # Install the package for vignettes 38 | MAKEFLAGS=-j$(nproc) R CMD INSTALL . 39 | 40 | cd - 41 | 42 | cd doc/R-package 43 | make -j$(nproc) all 44 | 45 | cd ../../ # back to project root 46 | 47 | tar cvjf r-docs-${branch_name}.tar.bz2 R-package/docs doc/R-package/xgboost_introduction.md doc/R-package/xgboostfromJSON.md 48 | -------------------------------------------------------------------------------- /ops/pipeline/build-r-docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | if [[ -z ${BRANCH_NAME:-} ]] 6 | then 7 | echo "Make sure to define environment variable BRANCH_NAME." 8 | exit 1 9 | fi 10 | 11 | source ops/pipeline/get-docker-registry-details.sh 12 | 13 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.cpu_build_r_doc:main 14 | 15 | echo "--- Build R package doc" 16 | set -x 17 | python3 ops/docker_run.py \ 18 | --image-uri ${IMAGE_URI} \ 19 | -- ops/pipeline/build-r-docs-impl.sh ${BRANCH_NAME} 20 | -------------------------------------------------------------------------------- /ops/pipeline/build-test-cpu-nonomp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Ensure that XGBoost can function with OpenMP disabled 3 | 4 | set -euox pipefail 5 | 6 | mkdir -p build 7 | pushd build 8 | cmake .. \ 9 | -GNinja \ 10 | -DUSE_OPENMP=OFF \ 11 | -DHIDE_CXX_SYMBOLS=ON \ 12 | -DGOOGLE_TEST=ON \ 13 | -DUSE_DMLC_GTEST=ON \ 14 | -DENABLE_ALL_WARNINGS=ON \ 15 | -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ 16 | -DBUILD_DEPRECATED_CLI=ON 17 | time ninja -v 18 | ctest --extra-verbose 19 | popd 20 | -------------------------------------------------------------------------------- /ops/pipeline/build-test-jvm-packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build and test JVM packages. 3 | ## 4 | ## Note. This script takes in all inputs via environment variables. 5 | 6 | INPUT_DOC=$( 7 | cat <<-EOF 8 | Inputs 9 | - SCALA_VERSION: Scala version, either 2.12 or 2.13 (Required) 10 | EOF 11 | ) 12 | 13 | set -euo pipefail 14 | 15 | source ops/pipeline/get-docker-registry-details.sh 16 | source ops/pipeline/get-image-tag.sh 17 | 18 | for arg in "SCALA_VERSION" 19 | do 20 | if [[ -z "${!arg:-}" ]] 21 | then 22 | echo -e "Error: $arg must be set.\n${INPUT_DOC}" 23 | exit 1 24 | fi 25 | done 26 | 27 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.jvm:${IMAGE_TAG} 28 | 29 | set -x 30 | 31 | python3 ops/docker_run.py --image-uri ${IMAGE_URI} \ 32 | --run-args "-e SCALA_VERSION=${SCALA_VERSION}" \ 33 | -- ops/pipeline/build-test-jvm-packages-impl.sh 34 | -------------------------------------------------------------------------------- /ops/pipeline/build-test-sycl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Build and test oneAPI 3 | 4 | set -euox pipefail 5 | 6 | if [[ "$#" -lt 1 ]] 7 | then 8 | echo "Usage: $0 {gtest,pytest}" 9 | exit 1 10 | fi 11 | 12 | suite="$1" 13 | 14 | mkdir build 15 | pushd build 16 | cmake .. -DGOOGLE_TEST=ON -DPLUGIN_SYCL=ON -DCMAKE_CXX_COMPILER=g++ \ 17 | -DCMAKE_C_COMPILER=gcc -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ 18 | -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -GNinja 19 | ninja 20 | popd 21 | 22 | case "$suite" in 23 | gtest) 24 | ./build/testxgboost 25 | ;; 26 | pytest) 27 | cd python-package 28 | python --version 29 | pip install -v . 30 | cd .. 31 | pytest -s -v -rxXs --durations=0 ./tests/python-sycl/ 32 | ;; 33 | esac 34 | -------------------------------------------------------------------------------- /ops/pipeline/classify-git-branch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Detect whether the current git branch is a pull request or a release branch 3 | 4 | set -euo pipefail 5 | 6 | if [[ -n ${GITHUB_BASE_REF:-} ]] 7 | then 8 | is_pull_request=1 9 | else 10 | is_pull_request=0 11 | fi 12 | 13 | if [[ ${BRANCH_NAME:-} == "master" || ${BRANCH_NAME:-} == "release_"* || ${BRANCH_NAME:-} == "federated-secure" ]] 14 | then 15 | is_release_branch=1 16 | enforce_daily_budget=0 17 | else 18 | is_release_branch=0 19 | enforce_daily_budget=1 20 | fi 21 | 22 | if [[ -n ${DISABLE_RELEASE:-} ]] 23 | then 24 | is_release_branch=0 25 | fi 26 | -------------------------------------------------------------------------------- /ops/pipeline/deploy-jvm-packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Deploy JVM packages to S3 bucket 3 | 4 | set -euo pipefail 5 | 6 | source ops/pipeline/enforce-ci.sh 7 | source ops/pipeline/get-docker-registry-details.sh 8 | source ops/pipeline/get-image-tag.sh 9 | 10 | if [[ "$#" -lt 3 ]] 11 | then 12 | echo "Usage: $0 {cpu,gpu} [image_repo] [scala_version]" 13 | exit 1 14 | fi 15 | 16 | variant="$1" 17 | image_repo="$2" 18 | scala_version="$3" 19 | 20 | IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}" 21 | 22 | set -x 23 | 24 | if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] 25 | then 26 | echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" 27 | python3 ops/docker_run.py --image-uri "${IMAGE_URI}" \ 28 | -- ops/pipeline/deploy-jvm-packages-impl.sh "${variant}" "${scala_version}" 29 | fi 30 | -------------------------------------------------------------------------------- /ops/pipeline/enforce-ci.ps1: -------------------------------------------------------------------------------- 1 | ## Ensure that a script is running inside the CI. 2 | ## Usage: . ops/pipeline/enforce-ci.ps1 3 | 4 | if ( -Not $Env:GITHUB_ACTIONS ) { 5 | $script_name = (Split-Path -Path $PSCommandPath -Leaf) 6 | Write-Host "$script_name is not meant to run locally; it should run inside GitHub Actions." 7 | Write-Host "Please inspect the content of $script_name and locate the desired command manually." 8 | exit 1 9 | } 10 | 11 | if ( -Not $Env:BRANCH_NAME ) { 12 | Write-Host "Make sure to define environment variable BRANCH_NAME." 13 | exit 2 14 | } 15 | 16 | if ( $Env:GITHUB_BASE_REF ) { 17 | $is_pull_request = 1 18 | } else { 19 | $is_pull_request = 0 20 | } 21 | 22 | if ( ($Env:BRANCH_NAME -eq "master") -or ($Env:BRANCH_NAME -match "release_.+") ) { 23 | $is_release_branch = 1 24 | $enforce_daily_budget = 0 25 | } else { 26 | $is_release_branch = 0 27 | $enforce_daily_budget = 1 28 | } 29 | -------------------------------------------------------------------------------- /ops/pipeline/enforce-ci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Ensure that a script is running inside the CI. 4 | ## Usage: source ops/pipeline/enforce-ci.sh 5 | 6 | set -euo pipefail 7 | 8 | if [[ -z ${GITHUB_ACTIONS:-} ]] 9 | then 10 | echo "$0 is not meant to run locally; it should run inside GitHub Actions." 11 | echo "Please inspect the content of $0 and locate the desired command manually." 12 | exit 1 13 | fi 14 | 15 | if [[ -z ${BRANCH_NAME:-} ]] 16 | then 17 | echo "Make sure to define environment variable BRANCH_NAME." 18 | exit 2 19 | fi 20 | 21 | source ops/pipeline/classify-git-branch.sh 22 | -------------------------------------------------------------------------------- /ops/pipeline/get-docker-registry-details.sh: -------------------------------------------------------------------------------- 1 | ## Get details for AWS ECR (Elastic Container Registry) in environment variables 2 | 3 | ECR_AWS_ACCOUNT_ID="492475357299" 4 | ECR_AWS_REGION="us-west-2" 5 | DOCKER_REGISTRY_URL="${ECR_AWS_ACCOUNT_ID}.dkr.ecr.${ECR_AWS_REGION}.amazonaws.com" 6 | -------------------------------------------------------------------------------- /ops/pipeline/get-image-tag.sh: -------------------------------------------------------------------------------- 1 | ## Update the following line to test changes to CI images 2 | ## See https://xgboost.readthedocs.io/en/latest/contrib/ci.html#making-changes-to-ci-containers 3 | 4 | IMAGE_TAG=main 5 | -------------------------------------------------------------------------------- /ops/pipeline/login-docker-registry.sh: -------------------------------------------------------------------------------- 1 | ## Log into AWS ECR (Elastic Container Registry) to be able to pull containers from it 2 | ## Note. Requires valid AWS credentials 3 | 4 | set -euo pipefail 5 | 6 | source ops/pipeline/get-docker-registry-details.sh 7 | 8 | echo "aws ecr get-login-password --region ${ECR_AWS_REGION} |" \ 9 | "docker login --username AWS --password-stdin ${DOCKER_REGISTRY_URL}" 10 | aws ecr get-login-password --region ${ECR_AWS_REGION} \ 11 | | docker login --username AWS --password-stdin ${DOCKER_REGISTRY_URL} 12 | -------------------------------------------------------------------------------- /ops/pipeline/run-clang-tidy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | source ops/pipeline/get-docker-registry-details.sh 6 | source ops/pipeline/get-image-tag.sh 7 | 8 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.clang_tidy:${IMAGE_TAG} 9 | 10 | echo "--- Run clang-tidy" 11 | set -x 12 | python3 ops/docker_run.py \ 13 | --image-uri ${IMAGE_URI} \ 14 | -- python3 ops/script/run_clang_tidy.py --cuda-archs 75 15 | -------------------------------------------------------------------------------- /ops/pipeline/test-cpp-i386-impl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Run C++ tests for i386 3 | ## Companion script for ops/pipeline/test-cpp-i386.sh 4 | 5 | set -euox pipefail 6 | 7 | export CXXFLAGS='-Wno-error=overloaded-virtual -Wno-error=maybe-uninitialized -Wno-error=redundant-move -Wno-narrowing' 8 | 9 | mkdir -p build 10 | pushd build 11 | 12 | cmake .. \ 13 | -GNinja \ 14 | -DGOOGLE_TEST=ON \ 15 | -DUSE_DMLC_GTEST=ON \ 16 | -DENABLE_ALL_WARNINGS=ON \ 17 | -DCMAKE_COMPILE_WARNING_AS_ERROR=ON 18 | time ninja -v 19 | # TODO(hcho3): Run gtest for i386 20 | # ./testxgboost 21 | 22 | popd 23 | -------------------------------------------------------------------------------- /ops/pipeline/test-cpp-i386.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Run C++ tests for i386 3 | 4 | set -euo pipefail 5 | 6 | source ops/pipeline/get-docker-registry-details.sh 7 | source ops/pipeline/get-image-tag.sh 8 | 9 | IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.i386:${IMAGE_TAG}" 10 | 11 | set -x 12 | python3 ops/docker_run.py \ 13 | --image-uri ${IMAGE_URI} \ 14 | -- bash ops/pipeline/test-cpp-i386-impl.sh 15 | -------------------------------------------------------------------------------- /ops/pipeline/test-freebsd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Run tests on FreeBSD 3 | 4 | set -euox pipefail 5 | 6 | mkdir build 7 | cd build 8 | cmake .. -GNinja -DGOOGLE_TEST=ON 9 | ninja -v 10 | ./testxgboost 11 | -------------------------------------------------------------------------------- /ops/pipeline/test-jvm-gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Test JVM packages with CUDA. Note: this script assumes that 3 | ## the user has already built libxgboost4j.so with CUDA support 4 | ## and place it in the lib/ directory. 5 | 6 | ## Note. This script takes in all inputs via environment variables. 7 | 8 | INPUT_DOC=$( 9 | cat <<-EOF 10 | Inputs 11 | - SCALA_VERSION: Scala version, either 2.12 or 2.13 (Required) 12 | EOF 13 | ) 14 | 15 | set -euo pipefail 16 | 17 | for arg in "SCALA_VERSION" 18 | do 19 | if [[ -z "${!arg:-}" ]] 20 | then 21 | echo -e "Error: $arg must be set.\n${INPUT_DOC}" 22 | exit 1 23 | fi 24 | done 25 | 26 | source ops/pipeline/get-docker-registry-details.sh 27 | source ops/pipeline/get-image-tag.sh 28 | 29 | IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.jvm_gpu_build:${IMAGE_TAG} 30 | 31 | set -x 32 | 33 | python3 ops/docker_run.py --image-uri ${IMAGE_URI} --use-gpus \ 34 | --run-args "-e SCALA_VERSION=${SCALA_VERSION} -e USE_CUDA=1 -e SKIP_NATIVE_BUILD=1 --shm-size=4g --privileged" \ 35 | -- ops/pipeline/build-test-jvm-packages-impl.sh 36 | -------------------------------------------------------------------------------- /ops/pipeline/test-python-macos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Test XGBoost Python wheel on MacOS 3 | 4 | set -euox pipefail 5 | 6 | brew install ninja 7 | 8 | mkdir build 9 | pushd build 10 | # Set prefix, to use OpenMP library from Conda env 11 | # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228 12 | # to learn why we don't use libomp from Homebrew. 13 | cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX -DBUILD_DEPRECATED_CLI=ON 14 | ninja 15 | popd 16 | 17 | cd python-package 18 | python --version 19 | pip install -v . 20 | 21 | cd .. 22 | pytest -s -v -rxXs --durations=0 ./tests/python 23 | pytest -s -v -rxXs --durations=0 ./tests/test_distributed/test_with_dask 24 | -------------------------------------------------------------------------------- /ops/pipeline/test-python-sdist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Test installing Python XGBoost from source distribution 3 | 4 | set -euox pipefail 5 | 6 | cd python-package 7 | python --version 8 | python -m build --sdist 9 | pip install -v ./dist/xgboost-*.tar.gz 10 | cd .. 11 | python -c 'import xgboost' 12 | -------------------------------------------------------------------------------- /ops/pipeline/test-python-wheel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Test XGBoost Python wheel on the Linux platform 3 | 4 | set -euo pipefail 5 | 6 | if [[ "$#" -lt 2 ]] 7 | then 8 | echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64} [image_repo]" 9 | exit 1 10 | fi 11 | 12 | suite="$1" 13 | image_repo="$2" 14 | 15 | if [[ "$suite" == "gpu" || "$suite" == "mgpu" ]] 16 | then 17 | gpu_option="--use-gpus" 18 | else 19 | gpu_option="" 20 | fi 21 | 22 | source ops/pipeline/get-docker-registry-details.sh 23 | source ops/pipeline/get-image-tag.sh 24 | 25 | IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}" 26 | 27 | set -x 28 | python3 ops/docker_run.py --image-uri "${IMAGE_URI}" ${gpu_option} \ 29 | --run-args='--shm-size=4g --privileged' \ 30 | -- bash ops/pipeline/test-python-wheel-impl.sh "${suite}" 31 | -------------------------------------------------------------------------------- /ops/pipeline/test-python-with-sysprefix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Test if Python XGBoost can be configured to use libxgboost.so from the system prefix 3 | 4 | set -euox pipefail 5 | 6 | sudo apt-get update && sudo apt-get install -y ninja-build 7 | 8 | mkdir build 9 | pushd build 10 | cmake .. -GNinja 11 | ninja 12 | popd 13 | 14 | # Copy libxgboost.so to system prefix 15 | cp -v lib/* "$(python -c 'import sys; print(sys.base_prefix)')/lib" 16 | 17 | # Now configure Python XGBoost to use libxgboost.so from the system prefix 18 | cd python-package 19 | pip install virtualenv 20 | virtualenv venv 21 | source venv/bin/activate && \ 22 | pip install -v . --config-settings use_system_libxgboost=True && \ 23 | python -c 'import xgboost' 24 | -------------------------------------------------------------------------------- /ops/pipeline/test-win64-gpu.ps1: -------------------------------------------------------------------------------- 1 | $ErrorActionPreference = "Stop" 2 | 3 | Write-Host "--- Test XGBoost on Windows with CUDA" 4 | 5 | nvcc --version 6 | 7 | Write-Host "--- Run Google Tests" 8 | build/testxgboost.exe 9 | if ($LASTEXITCODE -ne 0) { throw "Last command failed" } 10 | 11 | Write-Host "--- Set up Python env" 12 | conda activate 13 | $env_name = -join("win64_", (New-Guid).ToString().replace("-", "")) 14 | mamba env create -n ${env_name} --file=ops/conda_env/win64_test.yml 15 | conda activate ${env_name} 16 | python -m pip install ` 17 | (Get-ChildItem python-package/dist/*.whl | Select-Object -Expand FullName) 18 | if ($LASTEXITCODE -ne 0) { throw "Last command failed" } 19 | 20 | Write-Host "--- Run Python tests" 21 | python -X faulthandler -m pytest -v -s -rxXs tests/python 22 | if ($LASTEXITCODE -ne 0) { throw "Last command failed" } 23 | Write-Host "--- Run Python tests with GPU" 24 | python -X faulthandler -m pytest -v -s -rxXs -m "(not slow) and (not mgpu)"` 25 | tests/python-gpu 26 | if ($LASTEXITCODE -ne 0) { throw "Last command failed" } 27 | -------------------------------------------------------------------------------- /ops/pipeline/trigger-rtd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ## Trigger a new build on ReadTheDocs service. 3 | 4 | set -euo pipefail 5 | 6 | if [[ -z ${BRANCH_NAME:-} ]] 7 | then 8 | echo "Make sure to define environment variable BRANCH_NAME." 9 | exit 1 10 | fi 11 | 12 | echo "Branch name: ${BRANCH_NAME}" 13 | export RTD_AUTH_TOKEN=$(aws secretsmanager get-secret-value \ 14 | --secret-id runs-on/readthedocs-auth-token --output text \ 15 | --region us-west-2 --query SecretString || echo -n '') 16 | python3 ops/pipeline/trigger-rtd-impl.py 17 | -------------------------------------------------------------------------------- /ops/script/changelog.py: -------------------------------------------------------------------------------- 1 | """Helper script for creating links to PRs for changelog. This should be used with the 2 | `sphinx-issues` extension. 3 | 4 | """ 5 | 6 | import argparse 7 | import os 8 | import re 9 | 10 | from test_utils import ROOT 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument( 15 | "--version", 16 | type=str, 17 | required=True, 18 | help="Major version of the changelog, e.g., 3.0.0 .", 19 | ) 20 | args = parser.parse_args() 21 | version = args.version 22 | 23 | fname = os.path.join(ROOT, f"doc/changes/v{version}.rst") 24 | 25 | with open(fname) as fd: 26 | note = fd.read() 27 | 28 | # E.g. #11285 -> :pr:`11285`. 29 | regex = re.compile(r"(#)(\d+)") 30 | note = re.sub(regex, r":pr:`\2`", note) 31 | with open(fname, "w") as fd: 32 | fd.write(note) 33 | -------------------------------------------------------------------------------- /ops/script/lint_cmake.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -euo pipefail 4 | 5 | cmake_files=$( 6 | find . -name CMakeLists.txt -o -path "./cmake/*.cmake" \ 7 | | grep -v dmlc-core \ 8 | | grep -v gputreeshap 9 | ) 10 | cmakelint \ 11 | --linelength=120 \ 12 | --filter=-convention/filename,-package/stdargs,-readability/wonkycase \ 13 | ${cmake_files} \ 14 | || exit 1 15 | -------------------------------------------------------------------------------- /ops/script/test_tidy.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | struct Foo { 5 | int bar_; 6 | }; 7 | 8 | int main() { 9 | std::vector values; 10 | values.push_back(Foo()); 11 | } 12 | -------------------------------------------------------------------------------- /ops/script/verify_link.sh: -------------------------------------------------------------------------------- 1 | # Make sure the dependencies of XGBoost don't appear in directly downstream project. 2 | # Pass the executable as argument for this script 3 | 4 | if readelf -d $1 | grep "omp"; 5 | then 6 | echo "Found openmp in direct dependency" 7 | exit -1 8 | else 9 | exit 0 10 | fi 11 | 12 | if readelf -d $1 | grep "pthread"; 13 | then 14 | echo "Found pthread in direct dependency" 15 | exit -1 16 | else 17 | exit 0 18 | fi 19 | -------------------------------------------------------------------------------- /plugin/example/README.md: -------------------------------------------------------------------------------- 1 | XGBoost Plugin Example 2 | ====================== 3 | This folder provides an example of implementing xgboost plugin. 4 | 5 | There are three steps you need to do to add a plugin to xgboost 6 | - Create your source .cc file, implement a new extension 7 | - In this example [custom_obj.cc](custom_obj.cc) 8 | - Register this extension to xgboost via a registration macro 9 | - In this example ```XGBOOST_REGISTER_OBJECTIVE``` in [this line](custom_obj.cc#L78) 10 | - Add a line to `xgboost/plugin/CMakeLists.txt`: 11 | ``` 12 | target_sources(objxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/example/custom_obj.cc) 13 | ``` 14 | 15 | Then you can test this plugin by using ```objective=mylogistic``` parameter. 16 | 17 | 19 | -------------------------------------------------------------------------------- /plugin/federated/README.md: -------------------------------------------------------------------------------- 1 | XGBoost Plugin for Federated Learning 2 | ===================================== 3 | 4 | This folder contains the plugin for federated learning. 5 | 6 | See [build instruction](../../doc/build.rst) for how to build the plugin. 7 | 8 | 9 | Test Federated XGBoost 10 | ---------------------- 11 | ```shell 12 | # Under xgboost source tree. 13 | cd tests/distributed/test_federated 14 | # This tests both CPU training (`hist`) and GPU training (`gpu_hist`). 15 | ./runtests-federated.sh 16 | ``` 17 | -------------------------------------------------------------------------------- /plugin/federated/federated_comm.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023, XGBoost Contributors 3 | */ 4 | #include // for shared_ptr 5 | 6 | #include "../../src/common/cuda_context.cuh" 7 | #include "federated_comm.cuh" 8 | #include "xgboost/context.h" // for Context 9 | 10 | namespace xgboost::collective { 11 | CUDAFederatedComm::CUDAFederatedComm(Context const* ctx, std::shared_ptr impl) 12 | : FederatedComm{impl}, stream_{ctx->CUDACtx()->Stream()} { 13 | CHECK(impl); 14 | CHECK(ctx->IsCUDA()); 15 | dh::safe_cuda(cudaSetDevice(ctx->Ordinal())); 16 | } 17 | 18 | Comm* FederatedComm::MakeCUDAVar(Context const* ctx, std::shared_ptr) const { 19 | return new CUDAFederatedComm{ 20 | ctx, std::dynamic_pointer_cast(this->shared_from_this())}; 21 | } 22 | } // namespace xgboost::collective 23 | -------------------------------------------------------------------------------- /plugin/federated/federated_comm.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023-2024, XGBoost Contributors 3 | */ 4 | #pragma once 5 | 6 | #include // for shared_ptr 7 | 8 | #include "../../src/collective/coll.h" // for Coll 9 | #include "../../src/common/device_helpers.cuh" // for CUDAStreamView 10 | #include "federated_comm.h" // for FederatedComm 11 | #include "xgboost/context.h" // for Context 12 | 13 | namespace xgboost::collective { 14 | class CUDAFederatedComm : public FederatedComm { 15 | dh::CUDAStreamView stream_; 16 | 17 | public: 18 | explicit CUDAFederatedComm(Context const* ctx, std::shared_ptr impl); 19 | [[nodiscard]] auto Stream() const { return stream_; } 20 | Comm* MakeCUDAVar(Context const*, std::shared_ptr) const override { 21 | LOG(FATAL) << "[Internal Error]: Invalid request for CUDA variant."; 22 | return nullptr; 23 | } 24 | }; 25 | } // namespace xgboost::collective 26 | -------------------------------------------------------------------------------- /plugin/sycl/common/transform.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021-2024, XGBoost Contributors 3 | * \file transform.h 4 | */ 5 | #ifndef PLUGIN_SYCL_COMMON_TRANSFORM_H_ 6 | #define PLUGIN_SYCL_COMMON_TRANSFORM_H_ 7 | 8 | #include "../device_manager.h" 9 | 10 | #include 11 | 12 | namespace xgboost { 13 | namespace sycl { 14 | namespace common { 15 | 16 | template 17 | void LaunchSyclKernel(DeviceOrd device, Functor&& _func, xgboost::common::Range _range, 18 | SpanType... _spans) { 19 | sycl::DeviceManager device_manager; 20 | auto* qu = device_manager.GetQueue(device); 21 | 22 | size_t size = *(_range.end()); 23 | qu->submit([&](::sycl::handler& cgh) { 24 | cgh.parallel_for<>(::sycl::range<1>(size), 25 | [=](::sycl::id<1> pid) { 26 | const size_t idx = pid[0]; 27 | const_cast(_func)(idx, _spans...); 28 | }); 29 | }).wait(); 30 | } 31 | 32 | } // namespace common 33 | } // namespace sycl 34 | } // namespace xgboost 35 | #endif // PLUGIN_SYCL_COMMON_TRANSFORM_H_ 36 | -------------------------------------------------------------------------------- /plugin/updater_gpu/README.md: -------------------------------------------------------------------------------- 1 | # XGBoost GPU algorithms 2 | 3 | GPU algorithms are no longer a plugin and are included in official releases. [See documentation for more details](https://xgboost.readthedocs.io/en/latest/gpu/). 4 | -------------------------------------------------------------------------------- /python-package/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | *.egg* -------------------------------------------------------------------------------- /python-package/README.dft.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | XGBoost Python Package 3 | ====================== 4 | 5 | |PyPI version| 6 | 7 | Installation 8 | ============ 9 | 10 | From `PyPI `_ 11 | --------------------------------------------------- 12 | 13 | For a stable version, install using ``pip``:: 14 | 15 | pip install xgboost 16 | 17 | .. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg 18 | :target: http://badge.fury.io/py/xgboost 19 | 20 | For building from source, see `build `_. 21 | -------------------------------------------------------------------------------- /python-package/README.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | XGBoost Python Package 3 | ====================== 4 | 5 | |PyPI version| 6 | 7 | Installation 8 | ============ 9 | 10 | From `PyPI `_ 11 | --------------------------------------------------- 12 | 13 | For a stable version, install using ``pip``:: 14 | 15 | pip install xgboost 16 | 17 | .. |PyPI version| image:: https://badge.fury.io/py/xgboost.svg 18 | :target: http://badge.fury.io/py/xgboost 19 | 20 | For building from source, see `build `_. 21 | -------------------------------------------------------------------------------- /python-package/hatch_build.py: -------------------------------------------------------------------------------- 1 | """ 2 | Custom hook to customize the behavior of Hatchling. 3 | Here, we customize the tag of the generated wheels. 4 | """ 5 | 6 | from typing import Any, Dict 7 | 8 | from hatchling.builders.hooks.plugin.interface import BuildHookInterface 9 | from packaging.tags import platform_tags 10 | 11 | 12 | def get_tag() -> str: 13 | """Get appropriate wheel tag according to system""" 14 | platform_tag = next(platform_tags()) 15 | return f"py3-none-{platform_tag}" 16 | 17 | 18 | class CustomBuildHook(BuildHookInterface): 19 | """A custom build hook""" 20 | 21 | def initialize(self, version: str, build_data: Dict[str, Any]) -> None: 22 | """This step ccurs immediately before each build.""" 23 | build_data["tag"] = get_tag() 24 | -------------------------------------------------------------------------------- /python-package/packager/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/python-package/packager/__init__.py -------------------------------------------------------------------------------- /python-package/packager/sdist.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for building sdist 3 | """ 4 | 5 | import logging 6 | import pathlib 7 | 8 | from .util import copy_with_logging, copytree_with_logging 9 | 10 | 11 | def copy_cpp_src_tree( 12 | cpp_src_dir: pathlib.Path, target_dir: pathlib.Path, logger: logging.Logger 13 | ) -> None: 14 | """Copy C++ source tree into build directory""" 15 | 16 | for subdir in [ 17 | "src", 18 | "include", 19 | "dmlc-core", 20 | "gputreeshap", 21 | "cmake", 22 | "plugin", 23 | ]: 24 | copytree_with_logging(cpp_src_dir / subdir, target_dir / subdir, logger=logger) 25 | 26 | for filename in ["CMakeLists.txt", "LICENSE"]: 27 | copy_with_logging(cpp_src_dir.joinpath(filename), target_dir, logger=logger) 28 | -------------------------------------------------------------------------------- /python-package/packager/util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for implementing PEP 517 backend 3 | """ 4 | 5 | import logging 6 | import pathlib 7 | import shutil 8 | 9 | 10 | def copytree_with_logging( 11 | src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger 12 | ) -> None: 13 | """Call shutil.copytree() with logging""" 14 | logger.info("Copying %s -> %s", str(src), str(dest)) 15 | shutil.copytree(src, dest) 16 | 17 | 18 | def copy_with_logging( 19 | src: pathlib.Path, dest: pathlib.Path, logger: logging.Logger 20 | ) -> None: 21 | """Call shutil.copy() with logging""" 22 | if dest.is_dir(): 23 | logger.info("Copying %s -> %s", str(src), str(dest / src.name)) 24 | else: 25 | logger.info("Copying %s -> %s", str(src), str(dest)) 26 | shutil.copy(src, dest) 27 | -------------------------------------------------------------------------------- /python-package/xgboost/VERSION: -------------------------------------------------------------------------------- 1 | 3.1.0-dev 2 | -------------------------------------------------------------------------------- /python-package/xgboost/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/python-package/xgboost/py.typed -------------------------------------------------------------------------------- /python-package/xgboost/spark/__init__.py: -------------------------------------------------------------------------------- 1 | """PySpark XGBoost integration interface""" 2 | 3 | try: 4 | import pyspark 5 | except ImportError as e: 6 | raise ImportError("pyspark package needs to be installed to use this module") from e 7 | 8 | from .estimator import ( 9 | SparkXGBClassifier, 10 | SparkXGBClassifierModel, 11 | SparkXGBRanker, 12 | SparkXGBRankerModel, 13 | SparkXGBRegressor, 14 | SparkXGBRegressorModel, 15 | ) 16 | 17 | __all__ = [ 18 | "SparkXGBClassifier", 19 | "SparkXGBClassifierModel", 20 | "SparkXGBRegressor", 21 | "SparkXGBRegressorModel", 22 | "SparkXGBRanker", 23 | "SparkXGBRankerModel", 24 | ] 25 | -------------------------------------------------------------------------------- /python-package/xgboost/testing/collective.py: -------------------------------------------------------------------------------- 1 | """Collective module related utilities.""" 2 | 3 | import socket 4 | 5 | 6 | def get_avail_port() -> int: 7 | """Returns a port that's available during the function call. It doesn't prevent the 8 | port from being used after the function returns as we can't reserve the port. The 9 | utility makes a test more likely to pass. 10 | 11 | """ 12 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server: 13 | server.bind(("127.0.0.1", 0)) 14 | port = server.getsockname()[1] 15 | return port 16 | -------------------------------------------------------------------------------- /python-package/xgboost/testing/plotting.py: -------------------------------------------------------------------------------- 1 | """Test plotting functions for XGBoost.""" 2 | 3 | import json 4 | 5 | from graphviz import Source 6 | from matplotlib.axes import Axes 7 | 8 | from ..plotting import plot_tree, to_graphviz 9 | from ..sklearn import XGBRegressor 10 | from .data import make_categorical 11 | from .utils import Device 12 | 13 | 14 | def run_categorical(tree_method: str, device: Device) -> None: 15 | """Tests plotting functions for categorical features.""" 16 | X, y = make_categorical(1000, 31, 19, onehot=False) 17 | reg = XGBRegressor( 18 | enable_categorical=True, n_estimators=10, tree_method=tree_method, device=device 19 | ) 20 | reg.fit(X, y) 21 | trees = reg.get_booster().get_dump(dump_format="json") 22 | for tree in trees: 23 | j_tree = json.loads(tree) 24 | assert "leaf" in j_tree.keys() or isinstance(j_tree["split_condition"], list) 25 | 26 | graph = to_graphviz(reg, tree_idx=len(j_tree) - 1) 27 | assert isinstance(graph, Source) 28 | ax = plot_tree(reg, tree_idx=len(j_tree) - 1) 29 | assert isinstance(ax, Axes) 30 | -------------------------------------------------------------------------------- /python-package/xgboost/testing/utils.py: -------------------------------------------------------------------------------- 1 | """Helpers for test code.""" 2 | 3 | from typing import Literal, TypeAlias 4 | 5 | Device: TypeAlias = Literal["cpu", "cuda"] 6 | -------------------------------------------------------------------------------- /src/c_api/c_api_error.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2015-2023, XGBoost Contributors 3 | * \file c_api_error.cc 4 | * \brief C error handling 5 | */ 6 | #include "./c_api_error.h" 7 | 8 | #include 9 | 10 | #include "xgboost/c_api.h" 11 | #include "../collective/comm.h" 12 | #include "../collective/comm_group.h" 13 | 14 | struct XGBAPIErrorEntry { 15 | std::string last_error; 16 | std::int32_t code{-1}; 17 | }; 18 | 19 | using XGBAPIErrorStore = dmlc::ThreadLocalStore; 20 | 21 | XGB_DLL const char* XGBGetLastError() { return XGBAPIErrorStore::Get()->last_error.c_str(); } 22 | 23 | void XGBAPISetLastError(const char* msg) { 24 | XGBAPIErrorStore::Get()->last_error = msg; 25 | XGBAPIErrorStore::Get()->code = -1; 26 | } 27 | 28 | XGB_DLL int XGBGetLastErrorCode() { return XGBAPIErrorStore::Get()->code; } 29 | -------------------------------------------------------------------------------- /src/common/cleanup.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024, XGBoost Contributors 3 | * 4 | * @brief RAII guard, simplified version of absl::Cleanup 5 | */ 6 | #pragma once 7 | #include // for function 8 | #include // for forward 9 | 10 | namespace xgboost::common { 11 | class Cleanup { 12 | std::function cb_; 13 | 14 | public: 15 | template 16 | explicit Cleanup(Callback&& cb) : cb_{std::forward(cb)} {} 17 | 18 | ~Cleanup() { this->cb_(); } 19 | }; 20 | 21 | template 22 | auto MakeCleanup(Callback&& cb) { 23 | return Cleanup{std::forward(cb)}; 24 | } 25 | } // namespace xgboost::common 26 | -------------------------------------------------------------------------------- /src/common/common.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-2024, XGBoost contributors 3 | */ 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | 9 | namespace dh { 10 | void ThrowOnCudaError(cudaError_t code, const char *file, int line) { 11 | if (code != cudaSuccess) { 12 | std::string f; 13 | if (file != nullptr) { 14 | f = file; 15 | } 16 | LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(), 17 | f + ": " + std::to_string(line)) 18 | .what(); 19 | } 20 | } 21 | } // namespace dh 22 | -------------------------------------------------------------------------------- /src/common/cuda_stream_pool.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2025, XGBoost contributors 3 | */ 4 | #pragma once 5 | #include // for atomic 6 | #include // for size_t 7 | #include // for vector 8 | 9 | #include "device_helpers.cuh" // for CUDAStreamView, CUDAStream 10 | 11 | namespace xgboost::curt { 12 | // rmm cuda_stream_pool 13 | class StreamPool { 14 | mutable std::atomic next_{0}; 15 | std::vector stream_; 16 | 17 | public: 18 | explicit StreamPool(std::size_t n) : stream_(n) {} 19 | ~StreamPool() = default; 20 | StreamPool(StreamPool const& that) = delete; 21 | StreamPool& operator=(StreamPool const& that) = delete; 22 | 23 | [[nodiscard]] dh::CUDAStreamView operator[](std::size_t i) const { return stream_[i].View(); } 24 | [[nodiscard]] dh::CUDAStreamView Next() const { 25 | return stream_[(next_++) % stream_.size()].View(); 26 | } 27 | [[nodiscard]] std::size_t Size() const { return stream_.size(); } 28 | }; 29 | } // namespace xgboost::curt 30 | -------------------------------------------------------------------------------- /src/common/numeric.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2022-2024, XGBoost Contributors 3 | */ 4 | #include "numeric.h" 5 | 6 | #include // std::is_same_v 7 | 8 | #include "xgboost/context.h" // Context 9 | #include "xgboost/host_device_vector.h" // HostDeviceVector 10 | 11 | namespace xgboost { 12 | namespace common { 13 | double Reduce(Context const* ctx, HostDeviceVector const& values) { 14 | if (ctx->IsCUDA()) { 15 | return cuda_impl::Reduce(ctx, values); 16 | } else { 17 | auto const& h_values = values.ConstHostVector(); 18 | auto result = cpu_impl::Reduce(ctx, h_values.cbegin(), h_values.cend(), 0.0); 19 | static_assert(std::is_same_v); 20 | return result; 21 | } 22 | } 23 | } // namespace common 24 | } // namespace xgboost 25 | -------------------------------------------------------------------------------- /src/common/numeric.cu: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2022 by XGBoost Contributors 3 | */ 4 | #include 5 | 6 | #include "device_helpers.cuh" // dh::Reduce, dh::XGBCachingDeviceAllocator 7 | #include "numeric.h" 8 | #include "xgboost/context.h" // Context 9 | #include "xgboost/host_device_vector.h" // HostDeviceVector 10 | 11 | namespace xgboost::common::cuda_impl { 12 | double Reduce(Context const* ctx, HostDeviceVector const& values) { 13 | values.SetDevice(ctx->Device()); 14 | auto const d_values = values.ConstDeviceSpan(); 15 | dh::XGBCachingDeviceAllocator alloc; 16 | return dh::Reduce(thrust::cuda::par(alloc), dh::tcbegin(d_values), dh::tcend(d_values), 0.0, 17 | thrust::plus{}); 18 | } 19 | } // namespace xgboost::common::cuda_impl 20 | -------------------------------------------------------------------------------- /src/common/pseudo_huber.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2022, by XGBoost Contributors 3 | */ 4 | #include "pseudo_huber.h" 5 | namespace xgboost { 6 | DMLC_REGISTER_PARAMETER(PesudoHuberParam); 7 | } 8 | -------------------------------------------------------------------------------- /src/common/pseudo_huber.h: -------------------------------------------------------------------------------- 1 | #ifndef XGBOOST_COMMON_PSEUDO_HUBER_H_ 2 | #define XGBOOST_COMMON_PSEUDO_HUBER_H_ 3 | /*! 4 | * Copyright 2022, by XGBoost Contributors 5 | */ 6 | #include "xgboost/parameter.h" 7 | 8 | namespace xgboost { 9 | struct PesudoHuberParam : public XGBoostParameter { 10 | float huber_slope{1.0}; 11 | 12 | DMLC_DECLARE_PARAMETER(PesudoHuberParam) { 13 | DMLC_DECLARE_FIELD(huber_slope) 14 | .set_default(1.0f) 15 | .describe("The delta term in Pseudo-Huber loss."); 16 | } 17 | }; 18 | } // namespace xgboost 19 | #endif // XGBOOST_COMMON_PSEUDO_HUBER_H_ 20 | -------------------------------------------------------------------------------- /src/common/survival_util.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2019-2020 by Contributors 3 | * \file survival_util.cc 4 | * \brief Utility functions, useful for implementing objective and metric functions for survival 5 | * analysis 6 | * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking 7 | */ 8 | 9 | #include 10 | #include "survival_util.h" 11 | 12 | namespace xgboost { 13 | namespace common { 14 | 15 | DMLC_REGISTER_PARAMETER(AFTParam); 16 | 17 | } // namespace common 18 | } // namespace xgboost 19 | -------------------------------------------------------------------------------- /src/common/type.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023, XGBoost Contributors 3 | */ 4 | #pragma once 5 | #include // for int8_t 6 | #include // for is_const_v, add_const_t, conditional_t, add_pointer_t 7 | 8 | #include "xgboost/span.h" // for Span 9 | namespace xgboost::common { 10 | template , 11 | std::add_const_t, std::int8_t>> 12 | common::Span EraseType(common::Span data) { 13 | auto n_total_bytes = data.size_bytes(); 14 | auto erased = common::Span{reinterpret_cast>(data.data()), n_total_bytes}; 15 | return erased; 16 | } 17 | 18 | template 19 | common::Span RestoreType(common::Span data) { 20 | auto n_total_bytes = data.size_bytes(); 21 | auto restored = common::Span{reinterpret_cast(data.data()), n_total_bytes / sizeof(T)}; 22 | return restored; 23 | } 24 | } // namespace xgboost::common 25 | -------------------------------------------------------------------------------- /src/common/version.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2019 XGBoost contributors 3 | */ 4 | #ifndef XGBOOST_COMMON_VERSION_H_ 5 | #define XGBOOST_COMMON_VERSION_H_ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "xgboost/base.h" 12 | 13 | namespace xgboost { 14 | class Json; 15 | // a static class for handling version info 16 | struct Version { 17 | using TripletT = std::tuple; 18 | static const TripletT kInvalid; 19 | 20 | // Save/Load version info to JSON document 21 | static TripletT Load(Json const& in); 22 | static void Save(Json* out); 23 | 24 | // Save/Load version info to dmlc::Stream 25 | static Version::TripletT Load(dmlc::Stream* fi); 26 | static void Save(dmlc::Stream* fo); 27 | 28 | static std::string String(TripletT const& version); 29 | static TripletT Self(); 30 | 31 | static bool Same(TripletT const& triplet); 32 | }; 33 | 34 | } // namespace xgboost 35 | #endif // XGBOOST_COMMON_VERSION_H_ 36 | -------------------------------------------------------------------------------- /src/context.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2022 by XGBoost Contributors 3 | */ 4 | #include "common/cuda_context.cuh" // CUDAContext 5 | #include "xgboost/context.h" 6 | 7 | namespace xgboost { 8 | CUDAContext const* Context::CUDACtx() const { 9 | if (!cuctx_) { 10 | cuctx_.reset(new CUDAContext{}); 11 | } 12 | return cuctx_.get(); 13 | } 14 | } // namespace xgboost 15 | -------------------------------------------------------------------------------- /src/data/array_interface.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2024, XGBoost Contributors 3 | */ 4 | #include "array_interface.h" 5 | 6 | #include "../common/common.h" // for AssertGPUSupport 7 | 8 | namespace xgboost { 9 | #if !defined(XGBOOST_USE_CUDA) 10 | void ArrayInterfaceHandler::SyncCudaStream(int64_t) { common::AssertGPUSupport(); } 11 | bool ArrayInterfaceHandler::IsCudaPtr(void const *) { return false; } 12 | #endif // !defined(XGBOOST_USE_CUDA) 13 | } // namespace xgboost 14 | -------------------------------------------------------------------------------- /src/data/gradient_index_format.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021-2024, XGBoost contributors 3 | */ 4 | #pragma once 5 | 6 | #include // for size_t 7 | #include // for move 8 | 9 | #include "../common/hist_util.h" // for HistogramCuts 10 | #include "../common/io.h" // for AlignedFileWriteStream 11 | #include "gradient_index.h" // for GHistIndexMatrix 12 | #include "sparse_page_writer.h" // for SparsePageFormat 13 | 14 | namespace xgboost::common { 15 | class HistogramCuts; 16 | } 17 | 18 | namespace xgboost::data { 19 | class GHistIndexRawFormat : public SparsePageFormat { 20 | common::HistogramCuts cuts_; 21 | 22 | public: 23 | [[nodiscard]] bool Read(GHistIndexMatrix* page, common::AlignedResourceReadStream* fi) override; 24 | [[nodiscard]] std::size_t Write(GHistIndexMatrix const& page, 25 | common::AlignedFileWriteStream* fo) override; 26 | 27 | explicit GHistIndexRawFormat(common::HistogramCuts cuts) : cuts_{std::move(cuts)} {} 28 | }; 29 | } // namespace xgboost::data 30 | -------------------------------------------------------------------------------- /src/data/simple_batch_iterator.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2019-2024, XGBoost contributors 3 | */ 4 | #ifndef XGBOOST_DATA_SIMPLE_BATCH_ITERATOR_H_ 5 | #define XGBOOST_DATA_SIMPLE_BATCH_ITERATOR_H_ 6 | 7 | #include // for shared_ptr 8 | #include // for move 9 | 10 | #include "xgboost/data.h" // for BatchIteratorImpl 11 | 12 | namespace xgboost::data { 13 | template 14 | class SimpleBatchIteratorImpl : public BatchIteratorImpl { 15 | public: 16 | explicit SimpleBatchIteratorImpl(std::shared_ptr page) : page_(std::move(page)) {} 17 | const T& operator*() const override { 18 | CHECK(page_ != nullptr); 19 | return *page_; 20 | } 21 | SimpleBatchIteratorImpl& operator++() override { 22 | page_ = nullptr; 23 | return *this; 24 | } 25 | bool AtEnd() const override { return page_ == nullptr; } 26 | 27 | std::shared_ptr Page() const override { return page_; } 28 | 29 | private: 30 | std::shared_ptr page_{nullptr}; 31 | }; 32 | } // namespace xgboost::data 33 | #endif // XGBOOST_DATA_SIMPLE_BATCH_ITERATOR_H_ 34 | -------------------------------------------------------------------------------- /src/data/sparse_page_source.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021-2024, XGBoost contributors 3 | */ 4 | #include "../common/device_helpers.cuh" // for CurrentDevice 5 | #include "proxy_dmatrix.cuh" // for Dispatch, DMatrixProxy 6 | #include "simple_dmatrix.cuh" // for CopyToSparsePage 7 | #include "sparse_page_source.h" 8 | #include "xgboost/data.h" // for SparsePage 9 | 10 | namespace xgboost::data { 11 | void DevicePush(DMatrixProxy *proxy, float missing, SparsePage *page) { 12 | auto device = proxy->Device(); 13 | if (!device.IsCUDA()) { 14 | device = DeviceOrd::CUDA(dh::CurrentDevice()); 15 | } 16 | CHECK(device.IsCUDA()); 17 | auto ctx = Context{}.MakeCUDA(device.ordinal); 18 | 19 | cuda_impl::Dispatch( 20 | proxy, [&](auto const &value) { CopyToSparsePage(&ctx, value, device, missing, page); }); 21 | } 22 | } // namespace xgboost::data 23 | -------------------------------------------------------------------------------- /src/data/validation.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024, XGBoost Contributors 3 | */ 4 | #include // for equal 5 | 6 | #include "../common/device_helpers.cuh" // for tcbegin 7 | #include "../common/error_msg.h" // for InconsistentFeatureTypes 8 | #include "validation.h" 9 | 10 | namespace xgboost::data::cuda_impl { 11 | void CheckFeatureTypes(HostDeviceVector const& lhs, 12 | HostDeviceVector const& rhs) { 13 | auto device = lhs.DeviceCanRead() ? lhs.Device() : rhs.Device(); 14 | CHECK(device.IsCUDA()); 15 | lhs.SetDevice(device), rhs.SetDevice(device); 16 | auto const& d_lhs = lhs.ConstDeviceSpan(); 17 | auto const& d_rhs = rhs.ConstDeviceSpan(); 18 | auto ft_is_same = thrust::equal(dh::tcbegin(d_lhs), dh::tcend(d_lhs), dh::tcbegin(d_rhs)); 19 | CHECK(ft_is_same) << error::InconsistentFeatureTypes(); 20 | } 21 | } // namespace xgboost::data::cuda_impl 22 | -------------------------------------------------------------------------------- /src/global_config.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2025, XGBoost Contributors 3 | * \file global_config.cc 4 | * \brief Global configuration for XGBoost 5 | * \author Hyunsu Cho 6 | */ 7 | 8 | #include "xgboost/global_config.h" 9 | 10 | #include 11 | 12 | #include "common/cuda_rt_utils.h" // for SetDevice 13 | 14 | namespace xgboost { 15 | DMLC_REGISTER_PARAMETER(GlobalConfiguration); 16 | 17 | InitNewThread::InitNewThread() 18 | : config{*GlobalConfigThreadLocalStore::Get()}, device{curt::CurrentDevice(false)} {} 19 | 20 | void InitNewThread::operator()() const { 21 | *GlobalConfigThreadLocalStore::Get() = config; 22 | if (config.nthread > 0) { 23 | omp_set_num_threads(config.nthread); 24 | } 25 | if (device >= 0) { 26 | curt::SetDevice(this->device); 27 | } 28 | } 29 | } // namespace xgboost 30 | -------------------------------------------------------------------------------- /src/linear/linear_updater.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018 3 | */ 4 | #include 5 | #include 6 | #include "./param.h" 7 | 8 | namespace dmlc { 9 | DMLC_REGISTRY_ENABLE(::xgboost::LinearUpdaterReg); 10 | } // namespace dmlc 11 | 12 | namespace xgboost { 13 | 14 | LinearUpdater* LinearUpdater::Create(const std::string& name, Context const* ctx) { 15 | auto *e = ::dmlc::Registry< ::xgboost::LinearUpdaterReg>::Get()->Find(name); 16 | if (e == nullptr) { 17 | LOG(FATAL) << "Unknown linear updater " << name; 18 | } 19 | auto p_linear = (e->body)(); 20 | p_linear->ctx_ = ctx; 21 | return p_linear; 22 | } 23 | 24 | } // namespace xgboost 25 | 26 | namespace xgboost { 27 | namespace linear { 28 | DMLC_REGISTER_PARAMETER(LinearTrainParam); 29 | 30 | // List of files that will be force linked in static links. 31 | DMLC_REGISTRY_LINK_TAG(updater_shotgun); 32 | DMLC_REGISTRY_LINK_TAG(updater_coordinate); 33 | #ifdef XGBOOST_USE_CUDA 34 | DMLC_REGISTRY_LINK_TAG(updater_gpu_coordinate); 35 | #endif // XGBOOST_USE_CUDA 36 | } // namespace linear 37 | } // namespace xgboost 38 | -------------------------------------------------------------------------------- /src/metric/elementwise_metric.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018 XGBoost contributors 3 | */ 4 | // Dummy file to keep the CUDA conditional compile trick. 5 | 6 | #if !defined(XGBOOST_USE_CUDA) 7 | #include "elementwise_metric.cu" 8 | #endif // !defined(XGBOOST_USE_CUDA) 9 | -------------------------------------------------------------------------------- /src/metric/multiclass_metric.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2019 XGBoost contributors 3 | */ 4 | // Dummy file to keep the CUDA conditional compile trick. 5 | 6 | #if !defined(XGBOOST_USE_CUDA) 7 | #include "multiclass_metric.cu" 8 | #endif // !defined(XGBOOST_USE_CUDA) 9 | -------------------------------------------------------------------------------- /src/metric/survival_metric.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2019-2020 by Contributors 3 | * \file survival_metric.cc 4 | * \brief Metrics for survival analysis 5 | * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking 6 | */ 7 | 8 | // Dummy file to keep the CUDA conditional compile trick. 9 | #if !defined(XGBOOST_USE_CUDA) 10 | #include "survival_metric.cu" 11 | #endif // !defined(XGBOOST_USE_CUDA) 12 | -------------------------------------------------------------------------------- /src/objective/aft_obj.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2019-2020 by Contributors 3 | * \file aft_obj.cc 4 | * \brief Definition of AFT loss for survival analysis. 5 | * \author Avinash Barnwal, Hyunsu Cho and Toby Hocking 6 | */ 7 | 8 | // Dummy file to keep the CUDA conditional compile trick. 9 | 10 | #include 11 | namespace xgboost { 12 | namespace obj { 13 | 14 | DMLC_REGISTRY_FILE_TAG(aft_obj); 15 | 16 | } // namespace obj 17 | } // namespace xgboost 18 | 19 | #ifndef XGBOOST_USE_CUDA 20 | #include "aft_obj.cu" 21 | #endif // XGBOOST_USE_CUDA 22 | -------------------------------------------------------------------------------- /src/objective/hinge.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018 XGBoost contributors 3 | */ 4 | 5 | // Dummy file to keep the CUDA conditional compile trick. 6 | 7 | #include 8 | namespace xgboost { 9 | namespace obj { 10 | 11 | DMLC_REGISTRY_FILE_TAG(hinge_obj); 12 | 13 | } // namespace obj 14 | } // namespace xgboost 15 | 16 | #ifndef XGBOOST_USE_CUDA 17 | #include "hinge.cu" 18 | #endif // XGBOOST_USE_CUDA 19 | -------------------------------------------------------------------------------- /src/objective/init_estimation.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2022-2023 by XGBoost contributors 3 | */ 4 | #ifndef XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_ 5 | #define XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_ 6 | #include "xgboost/data.h" // MetaInfo 7 | #include "xgboost/linalg.h" // Tensor 8 | #include "xgboost/objective.h" // ObjFunction 9 | 10 | namespace xgboost::obj { 11 | class FitIntercept : public ObjFunction { 12 | public: 13 | void InitEstimation(MetaInfo const& info, linalg::Vector* base_score) const override; 14 | }; 15 | 16 | class FitInterceptGlmLike : public FitIntercept { 17 | public: 18 | void InitEstimation(MetaInfo const& info, linalg::Vector* base_score) const override; 19 | }; 20 | 21 | inline void CheckInitInputs(MetaInfo const& info) { 22 | CHECK_EQ(info.labels.Shape(0), info.num_row_) << "Invalid shape of labels."; 23 | if (!info.weights_.Empty()) { 24 | CHECK_EQ(info.weights_.Size(), info.num_row_) 25 | << "Number of weights should be equal to number of data points."; 26 | } 27 | } 28 | } // namespace xgboost::obj 29 | #endif // XGBOOST_OBJECTIVE_INIT_ESTIMATION_H_ 30 | -------------------------------------------------------------------------------- /src/objective/multiclass_obj.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018 XGBoost contributors 3 | */ 4 | 5 | // Dummy file to keep the CUDA conditional compile trick. 6 | 7 | #include 8 | namespace xgboost { 9 | namespace obj { 10 | 11 | DMLC_REGISTRY_FILE_TAG(multiclass_obj); 12 | 13 | } // namespace obj 14 | } // namespace xgboost 15 | 16 | #ifndef XGBOOST_USE_CUDA 17 | #include "multiclass_obj.cu" 18 | #endif // XGBOOST_USE_CUDA 19 | -------------------------------------------------------------------------------- /src/objective/multiclass_param.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015-2023 by Contributors 3 | * \file multiclass_param.h 4 | * \brief Definition of multi-class classification parameters. 5 | */ 6 | #ifndef XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_ 7 | #define XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_ 8 | 9 | #include "xgboost/parameter.h" 10 | 11 | namespace xgboost { 12 | namespace obj { 13 | 14 | struct SoftmaxMultiClassParam : public XGBoostParameter { 15 | int num_class; 16 | // declare parameters 17 | DMLC_DECLARE_PARAMETER(SoftmaxMultiClassParam) { 18 | DMLC_DECLARE_FIELD(num_class).set_lower_bound(1) 19 | .describe("Number of output class in the multi-class classification."); 20 | } 21 | }; 22 | 23 | } // namespace obj 24 | } // namespace xgboost 25 | #endif // XGBOOST_OBJECTIVE_MULTICLASS_PARAM_H_ 26 | -------------------------------------------------------------------------------- /src/objective/quantile_obj.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 by XGBoost Contributors 3 | */ 4 | 5 | // Dummy file to enable the CUDA conditional compile trick. 6 | 7 | #include 8 | namespace xgboost { 9 | namespace obj { 10 | 11 | DMLC_REGISTRY_FILE_TAG(quantile_obj); 12 | 13 | } // namespace obj 14 | } // namespace xgboost 15 | 16 | #ifndef XGBOOST_USE_CUDA 17 | #include "quantile_obj.cu" 18 | #endif // !defined(XBGOOST_USE_CUDA) 19 | -------------------------------------------------------------------------------- /src/objective/regression_obj.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018 XGBoost contributors 3 | */ 4 | 5 | // Dummy file to keep the CUDA conditional compile trick. 6 | 7 | #include 8 | namespace xgboost { 9 | namespace obj { 10 | 11 | DMLC_REGISTRY_FILE_TAG(regression_obj); 12 | 13 | } // namespace obj 14 | } // namespace xgboost 15 | 16 | #ifndef XGBOOST_USE_CUDA 17 | #include "regression_obj.cu" 18 | #endif // XGBOOST_USE_CUDA 19 | -------------------------------------------------------------------------------- /src/objective/regression_param.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015-2023 by Contributors 3 | * \file multiclass_param.h 4 | * \brief Definition of single-value regression and classification parameters. 5 | */ 6 | #ifndef XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_ 7 | #define XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_ 8 | 9 | #include "xgboost/parameter.h" 10 | 11 | namespace xgboost { 12 | namespace obj { 13 | 14 | struct RegLossParam : public XGBoostParameter { 15 | float scale_pos_weight; 16 | // declare parameters 17 | DMLC_DECLARE_PARAMETER(RegLossParam) { 18 | DMLC_DECLARE_FIELD(scale_pos_weight).set_default(1.0f).set_lower_bound(0.0f) 19 | .describe("Scale the weight of positive examples by this factor"); 20 | } 21 | }; 22 | 23 | } // namespace obj 24 | } // namespace xgboost 25 | #endif // XGBOOST_OBJECTIVE_REGRESSION_PARAM_H_ 26 | -------------------------------------------------------------------------------- /src/predictor/cpu_treeshap.h: -------------------------------------------------------------------------------- 1 | #ifndef XGBOOST_PREDICTOR_CPU_TREESHAP_H_ 2 | #define XGBOOST_PREDICTOR_CPU_TREESHAP_H_ 3 | /** 4 | * Copyright by XGBoost Contributors 2017-2022 5 | */ 6 | #include // vector 7 | 8 | #include "xgboost/tree_model.h" // RegTree 9 | 10 | namespace xgboost { 11 | /** 12 | * \brief calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree 13 | * \param feat dense feature vector, if the feature is missing the field is set to NaN 14 | * \param out_contribs output vector to hold the contributions 15 | * \param condition fix one feature to either off (-1) on (1) or not fixed (0 default) 16 | * \param condition_feature the index of the feature to fix 17 | */ 18 | void CalculateContributions(RegTree const &tree, const RegTree::FVec &feat, 19 | std::vector *mean_values, bst_float *out_contribs, int condition, 20 | unsigned condition_feature); 21 | } // namespace xgboost 22 | #endif // XGBOOST_PREDICTOR_CPU_TREESHAP_H_ 23 | -------------------------------------------------------------------------------- /src/tree/fit_stump.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2022-2024, XGBoost Contributors 3 | * 4 | * \brief Utilities for estimating initial score. 5 | */ 6 | 7 | #ifndef XGBOOST_TREE_FIT_STUMP_H_ 8 | #define XGBOOST_TREE_FIT_STUMP_H_ 9 | 10 | #include // std::max 11 | 12 | #include "xgboost/base.h" // GradientPair 13 | #include "xgboost/context.h" // Context 14 | #include "xgboost/data.h" // MetaInfo 15 | #include "xgboost/linalg.h" // TensorView 16 | 17 | namespace xgboost { 18 | namespace tree { 19 | 20 | template 21 | XGBOOST_DEVICE inline double CalcUnregularizedWeight(T sum_grad, T sum_hess) { 22 | return -sum_grad / std::max(sum_hess, static_cast(kRtEps)); 23 | } 24 | 25 | /** 26 | * @brief Fit a tree stump as an estimation of base_score. 27 | */ 28 | void FitStump(Context const* ctx, MetaInfo const& info, linalg::Matrix const& gpair, 29 | bst_target_t n_targets, linalg::Vector* out); 30 | } // namespace tree 31 | } // namespace xgboost 32 | #endif // XGBOOST_TREE_FIT_STUMP_H_ 33 | -------------------------------------------------------------------------------- /src/tree/sample_position.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024, XGBoost Contributors 3 | */ 4 | #pragma once 5 | #include "xgboost/base.h" // for bst_node_t 6 | 7 | namespace xgboost::tree { 8 | // Utility for maniputing the node index. This is used by the tree methods and the 9 | // adaptive objectives to share the node index. A row is invalid if it's not used in the 10 | // last iteration (due to sampling). For these rows, the corresponding tree node index is 11 | // negated. 12 | struct SamplePosition { 13 | [[nodiscard]] bst_node_t static XGBOOST_HOST_DEV_INLINE Encode(bst_node_t nidx, bool is_valid) { 14 | return is_valid ? nidx : ~nidx; 15 | } 16 | [[nodiscard]] bst_node_t static XGBOOST_HOST_DEV_INLINE Decode(bst_node_t nidx) { 17 | return IsValid(nidx) ? nidx : ~nidx; 18 | } 19 | [[nodiscard]] bool static XGBOOST_HOST_DEV_INLINE IsValid(bst_node_t nidx) { return nidx >= 0; } 20 | }; 21 | } // namespace xgboost::tree 22 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | This folder contains test cases for XGBoost c++ core, Python package and some other CI 2 | facilities. 3 | 4 | # Directories 5 | * ci_build: Test facilities for Jenkins CI and GitHub action. 6 | * cli: Basic test for command line executable `xgboost`. Most of the other command line 7 | specific tests are in Python test `test_cli.py`. 8 | * cpp: Tests for C++ core, using Google test framework. 9 | * python: Tests for Python package, demonstrations and CLI. For how to setup the 10 | dependencies for tests, see conda files in `ci_build`. 11 | * python-gpu: Similar to python tests, but for GPU. 12 | * travis: CI facilities for Travis. 13 | * test_distributed: Test for distributed systems including spark and dask. 14 | 15 | # Others 16 | * pytest.ini: Describes the `pytest` marker for python tests, some markers are generated 17 | by `conftest.py` file. 18 | -------------------------------------------------------------------------------- /tests/cli/machine.conf.in: -------------------------------------------------------------------------------- 1 | # Originally an example in demo/regression/ 2 | booster = gbtree 3 | objective = reg:squarederror 4 | eta = 1.0 5 | gamma = 1.0 6 | seed = 0 7 | min_child_weight = 0 8 | max_depth = 3 9 | 10 | num_round = 2 11 | save_period = 0 12 | data = "@PROJECT_SOURCE_DIR@/demo/data/agaricus.txt.train?format=libsvm" 13 | eval[test] = "@PROJECT_SOURCE_DIR@/demo/data/agaricus.txt.test?format=libsvm" -------------------------------------------------------------------------------- /tests/cpp/categorical_helpers.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2021 by XGBoost Contributors 3 | * 4 | * \brief Utilities for testing categorical data support. 5 | */ 6 | #include 7 | #include 8 | 9 | #include "xgboost/span.h" 10 | #include "helpers.h" 11 | #include "../../src/common/categorical.h" 12 | 13 | namespace xgboost { 14 | inline std::vector OneHotEncodeFeature(std::vector x, 15 | size_t num_cat) { 16 | std::vector ret(x.size() * num_cat, 0); 17 | size_t n_rows = x.size(); 18 | for (size_t r = 0; r < n_rows; ++r) { 19 | bst_cat_t cat = common::AsCat(x[r]); 20 | ret.at(num_cat * r + cat) = 1; 21 | } 22 | return ret; 23 | } 24 | 25 | } // namespace xgboost 26 | -------------------------------------------------------------------------------- /tests/cpp/collective/test_result.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024, XGBoost Contributors 3 | */ 4 | #include 5 | #include 6 | 7 | namespace xgboost::collective { 8 | TEST(Result, Concat) { 9 | auto rc0 = Fail("foo"); 10 | auto rc1 = Fail("bar"); 11 | auto rc = std::move(rc0) + std::move(rc1); 12 | ASSERT_NE(rc.Report().find("foo"), std::string::npos); 13 | ASSERT_NE(rc.Report().find("bar"), std::string::npos); 14 | 15 | auto rc2 = Fail("Another", std::move(rc)); 16 | auto assert_that = [](Result const& rc) { 17 | ASSERT_NE(rc.Report().find("Another"), std::string::npos); 18 | ASSERT_NE(rc.Report().find("foo"), std::string::npos); 19 | ASSERT_NE(rc.Report().find("bar"), std::string::npos); 20 | }; 21 | assert_that(rc2); 22 | 23 | auto empty = Success(); 24 | auto rc3 = std::move(empty) + std::move(rc2); 25 | assert_that(rc3); 26 | 27 | empty = Success(); 28 | auto rc4 = std::move(rc3) + std::move(empty); 29 | assert_that(rc4); 30 | } 31 | } // namespace xgboost::collective 32 | -------------------------------------------------------------------------------- /tests/cpp/collective/test_worker.cuh: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023, XGBoost Contributors 3 | */ 4 | #pragma once 5 | #include // for shared_ptr 6 | 7 | #include "../../../src/collective/coll.h" // for Coll 8 | #include "../../../src/collective/comm.h" // for Comm 9 | #include "test_worker.h" 10 | #include "xgboost/context.h" // for Context 11 | 12 | namespace xgboost::collective { 13 | class NCCLWorkerForTest : public WorkerForTest { 14 | protected: 15 | std::shared_ptr coll_; 16 | std::shared_ptr nccl_comm_; 17 | std::shared_ptr nccl_coll_; 18 | Context ctx_; 19 | 20 | public: 21 | using WorkerForTest::WorkerForTest; 22 | 23 | void Setup() { 24 | ctx_ = MakeCUDACtx(comm_.Rank()); 25 | coll_.reset(new Coll{}); 26 | nccl_comm_.reset(this->comm_.MakeCUDAVar(&ctx_, coll_)); 27 | nccl_coll_.reset(coll_->MakeCUDAVar()); 28 | ASSERT_EQ(comm_.World(), nccl_comm_->World()); 29 | ASSERT_EQ(comm_.Rank(), nccl_comm_->Rank()); 30 | } 31 | }; 32 | } // namespace xgboost::collective 33 | -------------------------------------------------------------------------------- /tests/cpp/common/test_algorithm.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2023 by XGBoost Contributors 3 | */ 4 | #include 5 | #include // Context 6 | #include 7 | 8 | #include // is_sorted 9 | 10 | #include "../../../src/common/algorithm.h" 11 | 12 | namespace xgboost { 13 | namespace common { 14 | TEST(Algorithm, ArgSort) { 15 | Context ctx; 16 | std::vector inputs{3.0, 2.0, 1.0}; 17 | auto ret = ArgSort(&ctx, inputs.cbegin(), inputs.cend()); 18 | std::vector sol{2, 1, 0}; 19 | ASSERT_EQ(ret, sol); 20 | } 21 | 22 | TEST(Algorithm, Sort) { 23 | Context ctx; 24 | ctx.Init(Args{{"nthread", "8"}}); 25 | std::vector inputs{3.0, 1.0, 2.0}; 26 | 27 | Sort(&ctx, inputs.begin(), inputs.end(), std::less<>{}); 28 | ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend())); 29 | 30 | inputs = {3.0, 1.0, 2.0}; 31 | StableSort(&ctx, inputs.begin(), inputs.end(), std::less<>{}); 32 | ASSERT_TRUE(std::is_sorted(inputs.cbegin(), inputs.cend())); 33 | } 34 | } // namespace common 35 | } // namespace xgboost 36 | -------------------------------------------------------------------------------- /tests/cpp/common/test_common.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024-2025, XGBoost Contributors 3 | */ 4 | #include 5 | 6 | #include "../../../src/common/common.h" 7 | 8 | namespace xgboost::common { 9 | TEST(Common, HumanMemUnit) { 10 | auto name = HumanMemUnit(1024 * 1024 * 1024ul); 11 | ASSERT_EQ(name, "1GB"); 12 | name = HumanMemUnit(1024 * 1024ul); 13 | ASSERT_EQ(name, "1MB"); 14 | name = HumanMemUnit(1024); 15 | ASSERT_EQ(name, "1KB"); 16 | name = HumanMemUnit(1); 17 | ASSERT_EQ(name, "1B"); 18 | } 19 | 20 | TEST(Common, TrimLast) { 21 | { 22 | std::string in{"foobar "}; 23 | auto out = TrimLast(in); 24 | ASSERT_EQ(out, "foobar"); 25 | } 26 | { 27 | std::string in{R"(foobar 28 | )"}; 29 | auto out = TrimLast(in); 30 | ASSERT_EQ(out, "foobar"); 31 | } 32 | } 33 | } // namespace xgboost::common 34 | -------------------------------------------------------------------------------- /tests/cpp/common/test_cuda_host_allocator.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024, XGBoost Contributors 3 | */ 4 | #include 5 | #include // for Context 6 | 7 | #include 8 | 9 | #include "../../../src/common/cuda_pinned_allocator.h" 10 | #include "../../../src/common/device_helpers.cuh" // for DefaultStream 11 | #include "../../../src/common/numeric.h" // for Iota 12 | 13 | namespace xgboost { 14 | TEST(CudaHostMalloc, Pinned) { 15 | std::vector> vec; 16 | vec.resize(10); 17 | ASSERT_EQ(vec.size(), 10); 18 | Context ctx; 19 | common::Iota(&ctx, vec.begin(), vec.end(), 0); 20 | float k = 0; 21 | for (auto v : vec) { 22 | ASSERT_EQ(v, k); 23 | ++k; 24 | } 25 | } 26 | 27 | TEST(CudaHostMalloc, Managed) { 28 | std::vector> vec; 29 | vec.resize(10); 30 | #if defined(__linux__) 31 | dh::safe_cuda( 32 | cudaMemPrefetchAsync(vec.data(), vec.size() * sizeof(float), 0, dh::DefaultStream())); 33 | #endif 34 | dh::DefaultStream().Sync(); 35 | } 36 | } // namespace xgboost 37 | -------------------------------------------------------------------------------- /tests/cpp/common/test_cuda_rt_utils.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2025, XGBoost contributors 3 | */ 4 | 5 | #include 6 | 7 | #include // for int32_t 8 | #include // for set 9 | 10 | #include "../../../src/common/cuda_stream_pool.cuh" 11 | 12 | namespace xgboost::curt { 13 | TEST(RtUtils, StreamPool) { 14 | auto n_streams = 16; 15 | auto pool = std::make_unique(n_streams); 16 | std::set hdls; 17 | 18 | for (std::int32_t i = 0; i < n_streams; ++i) { 19 | hdls.insert(cudaStream_t{pool->Next()}); 20 | } 21 | 22 | ASSERT_EQ(hdls.size(), n_streams); 23 | ASSERT_EQ(hdls.size(), pool->Size()); 24 | 25 | for (std::int32_t i = 0; i < n_streams; ++i) { 26 | hdls.insert(cudaStream_t{pool->Next()}); 27 | } 28 | ASSERT_EQ(hdls.size(), n_streams); 29 | ASSERT_EQ(hdls.size(), pool->Size()); 30 | } 31 | } // namespace xgboost::curt 32 | -------------------------------------------------------------------------------- /tests/cpp/common/test_optional_weight.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 by XGBoost Contributors 3 | */ 4 | #include 5 | #include // Context 6 | #include // HostDeviceVector 7 | 8 | #include "../../../src/common/optional_weight.h" 9 | namespace xgboost { 10 | namespace common { 11 | TEST(OptionalWeight, Basic) { 12 | HostDeviceVector weight{{2.0f, 3.0f, 4.0f}}; 13 | Context ctx; 14 | auto opt_w = MakeOptionalWeights(&ctx, weight); 15 | ASSERT_EQ(opt_w[0], 2.0f); 16 | ASSERT_FALSE(opt_w.Empty()); 17 | 18 | weight.HostVector().clear(); 19 | opt_w = MakeOptionalWeights(&ctx, weight); 20 | ASSERT_EQ(opt_w[0], 1.0f); 21 | ASSERT_TRUE(opt_w.Empty()); 22 | } 23 | } // namespace common 24 | } // namespace xgboost 25 | -------------------------------------------------------------------------------- /tests/cpp/common/test_quantile_utils.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 by XGBoost contributors 3 | */ 4 | #include 5 | 6 | #include "../../../src/common/quantile_loss_utils.h" 7 | #include "xgboost/base.h" // Args 8 | 9 | namespace xgboost { 10 | namespace common { 11 | TEST(QuantileLossParam, Basic) { 12 | QuantileLossParam param; 13 | auto& ref = param.quantile_alpha.Get(); 14 | 15 | param.UpdateAllowUnknown(Args{{"quantile_alpha", "0.3"}}); 16 | ASSERT_EQ(ref.size(), 1); 17 | ASSERT_NEAR(ref[0], 0.3, kRtEps); 18 | 19 | param.UpdateAllowUnknown(Args{{"quantile_alpha", "[0.3, 0.6]"}}); 20 | ASSERT_EQ(param.quantile_alpha.Get().size(), 2); 21 | ASSERT_NEAR(ref[0], 0.3, kRtEps); 22 | ASSERT_NEAR(ref[1], 0.6, kRtEps); 23 | 24 | param.UpdateAllowUnknown(Args{{"quantile_alpha", "(0.6, 0.3)"}}); 25 | ASSERT_EQ(param.quantile_alpha.Get().size(), 2); 26 | ASSERT_NEAR(ref[0], 0.6, kRtEps); 27 | ASSERT_NEAR(ref[1], 0.3, kRtEps); 28 | } 29 | } // namespace common 30 | } // namespace xgboost 31 | -------------------------------------------------------------------------------- /tests/cpp/common/test_ranking_utils.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 by XGBoost Contributors 3 | */ 4 | #pragma once 5 | #include // for Context 6 | 7 | namespace xgboost::ltr { 8 | void TestNDCGCache(Context const* ctx); 9 | 10 | void TestMAPCache(Context const* ctx); 11 | } // namespace xgboost::ltr 12 | -------------------------------------------------------------------------------- /tests/cpp/common/test_transform_iterator.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2022 by XGBoost Contributors 3 | */ 4 | #include 5 | 6 | #include // std::size_t 7 | 8 | #include "../../../src/common/transform_iterator.h" 9 | 10 | namespace xgboost { 11 | namespace common { 12 | TEST(IndexTransformIter, Basic) { 13 | auto sqr = [](std::size_t i) { return i * i; }; 14 | auto iter = MakeIndexTransformIter(sqr); 15 | for (std::size_t i = 0; i < 4; ++i) { 16 | ASSERT_EQ(iter[i], sqr(i)); 17 | } 18 | } 19 | } // namespace common 20 | } // namespace xgboost 21 | -------------------------------------------------------------------------------- /tests/cpp/common/test_transform_range.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 XGBoost contributors 3 | */ 4 | // Dummy file to keep the CUDA tests. 5 | #include "test_transform_range.cc" 6 | -------------------------------------------------------------------------------- /tests/cpp/data/test_cat_container.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2025, XGBoost contributors 3 | */ 4 | 5 | #include "test_cat_container.h" 6 | 7 | #include 8 | 9 | #include "../encoder/df_mock.h" 10 | 11 | namespace xgboost { 12 | using DfTest = enc::cpu_impl::DfTest; 13 | 14 | auto eq_check = [](common::Span sorted_idx, std::vector const& sol) { 15 | ASSERT_EQ(sorted_idx, common::Span{sol}); 16 | }; 17 | 18 | TEST(CatContainer, Str) { 19 | Context ctx; 20 | TestCatContainerStr(&ctx, eq_check); 21 | } 22 | 23 | TEST(CatContainer, Mixed) { 24 | Context ctx; 25 | TestCatContainerMixed(&ctx, eq_check); 26 | } 27 | } // namespace xgboost 28 | -------------------------------------------------------------------------------- /tests/cpp/data/test_proxy_dmatrix.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021-2023, XGBoost contributors 3 | */ 4 | #include 5 | 6 | #include "../../../src/data/adapter.h" 7 | #include "../../../src/data/proxy_dmatrix.h" 8 | #include "../helpers.h" 9 | 10 | namespace xgboost::data { 11 | TEST(ProxyDMatrix, HostData) { 12 | DMatrixProxy proxy; 13 | size_t constexpr kRows = 100, kCols = 10; 14 | std::vector> label_storage(1); 15 | 16 | HostDeviceVector storage; 17 | auto data = 18 | RandomDataGenerator(kRows, kCols, 0.5).Device(FstCU()).GenerateArrayInterface(&storage); 19 | 20 | proxy.SetArrayData(data.c_str()); 21 | 22 | auto n_samples = HostAdapterDispatch(&proxy, [](auto const &value) { return value.Size(); }); 23 | ASSERT_EQ(n_samples, kRows); 24 | auto n_features = HostAdapterDispatch(&proxy, [](auto const &value) { return value.NumCols(); }); 25 | ASSERT_EQ(n_features, kCols); 26 | } 27 | } // namespace xgboost::data 28 | -------------------------------------------------------------------------------- /tests/cpp/filesystem.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2022-2024, XGBoost Contributors 3 | */ 4 | #ifndef XGBOOST_TESTS_CPP_FILESYSTEM_H 5 | #define XGBOOST_TESTS_CPP_FILESYSTEM_H 6 | 7 | #include 8 | 9 | #include "dmlc/filesystem.h" 10 | 11 | #endif // XGBOOST_TESTS_CPP_FILESYSTEM_H 12 | -------------------------------------------------------------------------------- /tests/cpp/helpers.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2024, XGBoost contributors 3 | */ 4 | #include 5 | 6 | #include "helpers.h" 7 | 8 | namespace xgboost { 9 | CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows, 10 | size_t cols, size_t batches) 11 | : ArrayIterForTest{sparsity, rows, cols, batches} { 12 | rng_->Device(FstCU()); 13 | std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_); 14 | this->Reset(); 15 | } 16 | 17 | int CudaArrayIterForTest::Next() { 18 | if (iter_ == n_batches_) { 19 | return 0; 20 | } 21 | XGProxyDMatrixSetDataCudaArrayInterface(proxy_, batches_[iter_].c_str()); 22 | iter_++; 23 | return 1; 24 | } 25 | } // namespace xgboost 26 | -------------------------------------------------------------------------------- /tests/cpp/metric/test_metric.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | #include "../helpers.h" 5 | namespace xgboost { 6 | TEST(Metric, UnknownMetric) { 7 | auto ctx = MakeCUDACtx(GPUIDX); 8 | xgboost::Metric* metric = nullptr; 9 | EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name", &ctx)); 10 | EXPECT_NO_THROW(metric = xgboost::Metric::Create("rmse", &ctx)); 11 | delete metric; 12 | metric = nullptr; 13 | EXPECT_ANY_THROW(metric = xgboost::Metric::Create("unknown_name@1", &ctx)); 14 | EXPECT_NO_THROW(metric = xgboost::Metric::Create("error@0.5f", &ctx)); 15 | delete metric; 16 | } 17 | } // namespace xgboost 18 | -------------------------------------------------------------------------------- /tests/cpp/metric/test_survival_metric.cu: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) by Contributors 2020 3 | */ 4 | #include 5 | #include "test_survival_metric.h" 6 | #include "xgboost/metric.h" 7 | 8 | /** Tests for Survival metrics that should run both on CPU and GPU **/ 9 | 10 | namespace xgboost::common { 11 | // Test configuration of AFT metric 12 | TEST(AFTNegLogLikMetric, DeclareUnifiedTest(Configuration)) { 13 | auto ctx = MakeCUDACtx(GPUIDX); 14 | std::unique_ptr metric(Metric::Create("aft-nloglik", &ctx)); 15 | metric->Configure({{"aft_loss_distribution", "normal"}, {"aft_loss_distribution_scale", "10"}}); 16 | 17 | // Configuration round-trip test 18 | Json j_obj{ Object() }; 19 | metric->SaveConfig(&j_obj); 20 | auto aft_param_json = j_obj["aft_loss_param"]; 21 | EXPECT_EQ(get(aft_param_json["aft_loss_distribution"]), "normal"); 22 | EXPECT_EQ(get(aft_param_json["aft_loss_distribution_scale"]), "10"); 23 | 24 | CheckDeterministicMetricElementWise(StringView{"aft-nloglik"}, GPUIDX); 25 | } 26 | } // namespace xgboost::common 27 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_aft_obj.cu: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2020 XGBoost contributors 3 | */ 4 | // Dummy file to keep the CUDA tests. 5 | 6 | #include "test_aft_obj_cpu.cc" 7 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_aft_obj.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2024 by XGBoost Contributors 3 | */ 4 | #ifndef XGBOOST_TEST_AFT_OBJ_H_ 5 | #define XGBOOST_TEST_AFT_OBJ_H_ 6 | 7 | #include // for Context 8 | 9 | namespace xgboost::common { 10 | 11 | void TestAFTObjConfiguration(const Context* ctx); 12 | 13 | void TestAFTObjGPairUncensoredLabels(const Context* ctx); 14 | 15 | void TestAFTObjGPairLeftCensoredLabels(const Context* ctx); 16 | 17 | void TestAFTObjGPairRightCensoredLabels(const Context* ctx); 18 | 19 | void TestAFTObjGPairIntervalCensoredLabels(const Context* ctx); 20 | 21 | } // namespace xgboost::common 22 | 23 | #endif // XGBOOST_TEST_AFT_OBJ_H_ 24 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_hinge.cu: -------------------------------------------------------------------------------- 1 | #include "test_hinge_cpu.cc" 2 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_hinge.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2024 by XGBoost Contributors 3 | */ 4 | #ifndef XGBOOST_TEST_HINGE_H_ 5 | #define XGBOOST_TEST_HINGE_H_ 6 | 7 | #include // for Context 8 | 9 | namespace xgboost { 10 | 11 | void TestHingeObj(const Context* ctx); 12 | 13 | } // namespace xgboost 14 | 15 | #endif // XGBOOST_TEST_REGRESSION_OBJ_H_ 16 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_hinge_cpu.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2018-2023, XGBoost Contributors 3 | */ 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../helpers.h" 9 | #include "test_hinge.h" 10 | #include "../../../src/common/linalg_op.h" 11 | 12 | namespace xgboost { 13 | 14 | TEST(Objective, DeclareUnifiedTest(HingeObj)) { 15 | Context ctx = MakeCUDACtx(GPUIDX); 16 | TestHingeObj(&ctx); 17 | } 18 | } // namespace xgboost 19 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_multiclass_obj.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2023 by XGBoost Contributors 3 | */ 4 | #ifndef XGBOOST_TEST_MULTICLASS_OBJ_H_ 5 | #define XGBOOST_TEST_MULTICLASS_OBJ_H_ 6 | 7 | #include // for Context 8 | 9 | namespace xgboost { 10 | 11 | void TestSoftmaxMultiClassObjGPair(const Context* ctx); 12 | 13 | void TestSoftmaxMultiClassBasic(const Context* ctx); 14 | 15 | void TestSoftprobMultiClassBasic(const Context* ctx); 16 | 17 | } // namespace xgboost 18 | 19 | #endif // XGBOOST_TEST_MULTICLASS_OBJ_H_ 20 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_multiclass_obj_cpu.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018-2023 XGBoost contributors 3 | */ 4 | #include 5 | #include 6 | 7 | #include "../helpers.h" 8 | #include "test_multiclass_obj.h" 9 | 10 | namespace xgboost { 11 | TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassObjGPair)) { 12 | Context ctx = MakeCUDACtx(GPUIDX); 13 | TestSoftmaxMultiClassObjGPair(&ctx); 14 | } 15 | 16 | TEST(Objective, DeclareUnifiedTest(SoftmaxMultiClassBasic)) { 17 | auto ctx = MakeCUDACtx(GPUIDX); 18 | TestSoftmaxMultiClassBasic(&ctx); 19 | } 20 | 21 | TEST(Objective, DeclareUnifiedTest(SoftprobMultiClassBasic)) { 22 | Context ctx = MakeCUDACtx(GPUIDX); 23 | TestSoftprobMultiClassBasic(&ctx); 24 | } 25 | } // namespace xgboost 26 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_multiclass_obj_gpu.cu: -------------------------------------------------------------------------------- 1 | #include "test_multiclass_obj_cpu.cc" 2 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_quantile_obj.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2024 by XGBoost Contributors 3 | */ 4 | #ifndef XGBOOST_TEST_QUANTILE_OBJ_H_ 5 | #define XGBOOST_TEST_QUANTILE_OBJ_H_ 6 | 7 | #include // for Context 8 | 9 | namespace xgboost { 10 | 11 | void TestQuantile(const Context* ctx); 12 | 13 | void TestQuantileIntercept(const Context* ctx); 14 | 15 | } // namespace xgboost 16 | 17 | #endif // XGBOOST_TEST_REGRESSION_OBJ_H_ 18 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_quantile_obj_cpu.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 by XGBoost contributors 3 | */ 4 | #include 5 | #include 6 | 7 | #include "../helpers.h" 8 | #include "test_quantile_obj.h" 9 | 10 | namespace xgboost { 11 | TEST(Objective, DeclareUnifiedTest(Quantile)) { 12 | Context ctx = MakeCUDACtx(GPUIDX); 13 | TestQuantile(&ctx); 14 | } 15 | 16 | TEST(Objective, DeclareUnifiedTest(QuantileIntercept)) { 17 | Context ctx = MakeCUDACtx(GPUIDX); 18 | TestQuantileIntercept(&ctx); 19 | } 20 | } // namespace xgboost 21 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_quantile_obj_gpu.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 XGBoost contributors 3 | */ 4 | // Dummy file to enable the CUDA tests. 5 | #include "test_quantile_obj_cpu.cc" 6 | -------------------------------------------------------------------------------- /tests/cpp/objective/test_regression_obj_gpu.cu: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018 XGBoost contributors 3 | */ 4 | // Dummy file to keep the CUDA tests. 5 | 6 | #include "test_regression_obj_cpu.cc" 7 | -------------------------------------------------------------------------------- /tests/cpp/plugin/federated/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | target_sources( 2 | testxgboost PRIVATE 3 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_coll.cc 4 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_comm.cc 5 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_comm_group.cc 6 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_tracker.cc 7 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_learner.cc 8 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_data.cc 9 | ) 10 | 11 | if(USE_CUDA) 12 | target_sources( 13 | testxgboost PRIVATE 14 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_coll.cu 15 | ${xgboost_SOURCE_DIR}/tests/cpp/plugin/federated/test_federated_comm_group.cu 16 | ) 17 | endif() 18 | 19 | target_include_directories(testxgboost PRIVATE ${xgboost_SOURCE_DIR}/plugin/federated) 20 | target_link_libraries(testxgboost PRIVATE federated_client) 21 | -------------------------------------------------------------------------------- /tests/cpp/plugin/federated/test_federated_comm_group.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023-2024, XGBoost Contributors 3 | */ 4 | #include 5 | #include // for Json 6 | 7 | #include "../../../../src/collective/comm_group.h" 8 | #include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs 9 | #include "test_worker.h" 10 | 11 | namespace xgboost::collective { 12 | TEST(CommGroup, Federated) { 13 | std::int32_t n_workers = curt::AllVisibleGPUs(); 14 | TestFederatedGroup(n_workers, [&](std::shared_ptr comm_group, std::int32_t r) { 15 | Context ctx; 16 | ASSERT_EQ(comm_group->Rank(), r); 17 | auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CPU()); 18 | ASSERT_EQ(comm.TaskID(), std::to_string(r)); 19 | ASSERT_EQ(comm.Retry(), 2); 20 | }); 21 | } 22 | } // namespace xgboost::collective 23 | -------------------------------------------------------------------------------- /tests/cpp/plugin/federated/test_federated_comm_group.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023-2024, XGBoost Contributors 3 | */ 4 | #include 5 | #include // for Json 6 | 7 | #include "../../../../src/collective/comm_group.h" 8 | #include "../../../../src/common/cuda_rt_utils.h" // for AllVisibleGPUs 9 | #include "../../helpers.h" 10 | #include "test_worker.h" 11 | 12 | namespace xgboost::collective { 13 | TEST(CommGroup, FederatedGPU) { 14 | std::int32_t n_workers = curt::AllVisibleGPUs(); 15 | TestFederatedGroup(n_workers, [&](std::shared_ptr comm_group, std::int32_t r) { 16 | Context ctx = MakeCUDACtx(0); 17 | auto const& comm = comm_group->Ctx(&ctx, DeviceOrd::CUDA(0)); 18 | ASSERT_EQ(comm_group->Rank(), r); 19 | ASSERT_EQ(comm.TaskID(), std::to_string(r)); 20 | ASSERT_EQ(comm.Retry(), 2); 21 | }); 22 | } 23 | } // namespace xgboost::collective 24 | -------------------------------------------------------------------------------- /tests/cpp/plugin/federated/test_federated_tracker.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023-2024, XGBoost Contributors 3 | */ 4 | #include 5 | 6 | #include // for make_unique 7 | #include // for string 8 | 9 | #include "../../../../src/collective/tracker.h" // for GetHostAddress 10 | #include "federated_tracker.h" 11 | #include "xgboost/json.h" // for Json 12 | 13 | namespace xgboost::collective { 14 | TEST(FederatedTrackerTest, Basic) { 15 | Json config{Object()}; 16 | config["federated_secure"] = Boolean{false}; 17 | config["n_workers"] = Integer{3}; 18 | 19 | auto tracker = std::make_unique(config); 20 | ASSERT_FALSE(tracker->Ready()); 21 | auto fut = tracker->Run(); 22 | auto args = tracker->WorkerArgs(); 23 | ASSERT_TRUE(tracker->Ready()); 24 | 25 | ASSERT_GE(tracker->Port(), 1); 26 | std::string host; 27 | auto rc = GetHostAddress(&host); 28 | ASSERT_EQ(get(args["dmlc_tracker_uri"]), host); 29 | 30 | rc = tracker->Shutdown(); 31 | SafeColl(rc); 32 | SafeColl(fut.get()); 33 | ASSERT_FALSE(tracker->Ready()); 34 | } 35 | } // namespace xgboost::collective 36 | -------------------------------------------------------------------------------- /tests/cpp/plugin/test_example_objective.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "../helpers.h" 5 | 6 | namespace xgboost { 7 | TEST(Plugin, ExampleObjective) { 8 | xgboost::Context ctx = MakeCUDACtx(GPUIDX); 9 | auto* obj = xgboost::ObjFunction::Create("mylogistic", &ctx); 10 | ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"logloss"}); 11 | delete obj; 12 | } 13 | } // namespace xgboost 14 | -------------------------------------------------------------------------------- /tests/cpp/plugin/test_sycl_hinge.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 by XGBoost contributors 3 | */ 4 | #include 5 | #pragma GCC diagnostic push 6 | #pragma GCC diagnostic ignored "-Wtautological-constant-compare" 7 | #pragma GCC diagnostic ignored "-W#pragma-messages" 8 | #include 9 | #pragma GCC diagnostic pop 10 | #include 11 | 12 | #include "../helpers.h" 13 | #include "../objective/test_hinge.h" 14 | 15 | namespace xgboost { 16 | TEST(SyclObjective, DeclareUnifiedTest(HingeObj)) { 17 | Context ctx; 18 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 19 | TestHingeObj(&ctx); 20 | } 21 | 22 | } // namespace xgboost 23 | -------------------------------------------------------------------------------- /tests/cpp/plugin/test_sycl_multiclass_obj.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2018-2023 XGBoost contributors 3 | */ 4 | #include 5 | #pragma GCC diagnostic push 6 | #pragma GCC diagnostic ignored "-Wtautological-constant-compare" 7 | #pragma GCC diagnostic ignored "-W#pragma-messages" 8 | #include 9 | #pragma GCC diagnostic pop 10 | 11 | #include "../objective/test_multiclass_obj.h" 12 | 13 | namespace xgboost { 14 | 15 | TEST(SyclObjective, SoftmaxMultiClassObjGPair) { 16 | Context ctx; 17 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 18 | TestSoftmaxMultiClassObjGPair(&ctx); 19 | } 20 | 21 | TEST(SyclObjective, SoftmaxMultiClassBasic) { 22 | Context ctx; 23 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 24 | TestSoftmaxMultiClassObjGPair(&ctx); 25 | } 26 | 27 | TEST(SyclObjective, SoftprobMultiClassBasic) { 28 | Context ctx; 29 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 30 | TestSoftprobMultiClassBasic(&ctx); 31 | } 32 | } // namespace xgboost 33 | -------------------------------------------------------------------------------- /tests/cpp/plugin/test_sycl_prediction_cache.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2020-2024 by XGBoost contributors 3 | */ 4 | #include 5 | 6 | #pragma GCC diagnostic push 7 | #pragma GCC diagnostic ignored "-Wtautological-constant-compare" 8 | #pragma GCC diagnostic ignored "-W#pragma-messages" 9 | #include "../tree/test_prediction_cache.h" 10 | #pragma GCC diagnostic pop 11 | 12 | namespace xgboost::sycl::tree { 13 | 14 | class SyclPredictionCache : public xgboost::TestPredictionCache {}; 15 | 16 | TEST_F(SyclPredictionCache, Hist) { 17 | Context ctx; 18 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 19 | 20 | this->RunTest(&ctx, "grow_quantile_histmaker_sycl", "one_output_per_tree"); 21 | } 22 | 23 | } // namespace xgboost::sycl::tree 24 | -------------------------------------------------------------------------------- /tests/cpp/plugin/test_sycl_quantile_obj.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2024 by XGBoost contributors 3 | */ 4 | #include 5 | #pragma GCC diagnostic push 6 | #pragma GCC diagnostic ignored "-Wtautological-constant-compare" 7 | #pragma GCC diagnostic ignored "-W#pragma-messages" 8 | #include 9 | #pragma GCC diagnostic pop 10 | #include 11 | 12 | #include "../helpers.h" 13 | #include "../objective/test_quantile_obj.h" 14 | 15 | namespace xgboost { 16 | TEST(SyclObjective, DeclareUnifiedTest(Quantile)) { 17 | Context ctx; 18 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 19 | TestQuantile(&ctx); 20 | } 21 | 22 | TEST(SyclObjective, DeclareUnifiedTest(QuantileIntercept)) { 23 | Context ctx; 24 | ctx.UpdateAllowUnknown(Args{{"device", "sycl"}}); 25 | TestQuantileIntercept(&ctx); 26 | } 27 | } // namespace xgboost 28 | -------------------------------------------------------------------------------- /tests/cpp/test_main.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2016-2024, XGBoost Contributors 3 | */ 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | 10 | #include "helpers.h" 11 | 12 | int main(int argc, char** argv) { 13 | xgboost::Args args{{"verbosity", "2"}}; 14 | xgboost::ConsoleLogger::Configure(args); 15 | 16 | testing::InitGoogleTest(&argc, argv); 17 | testing::FLAGS_gtest_death_test_style = "threadsafe"; 18 | auto rmm_alloc = xgboost::SetUpRMMResourceForCppTests(argc, argv); 19 | return RUN_ALL_TESTS(); 20 | } 21 | -------------------------------------------------------------------------------- /tests/cpp/tree/gpu_hist/test_expand_entry.cu: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023, XGBoost Contributors 3 | */ 4 | #include 5 | #include 6 | #include // for RegTree 7 | 8 | #include "../../../../src/tree/gpu_hist/expand_entry.cuh" 9 | 10 | namespace xgboost::tree { 11 | TEST(ExpandEntry, IOGPU) { 12 | DeviceSplitCandidate split; 13 | GPUExpandEntry entry{RegTree::kRoot, 0, split, 3.0, 1.0, 2.0}; 14 | 15 | Json je{Object{}}; 16 | entry.Save(&je); 17 | 18 | GPUExpandEntry loaded; 19 | loaded.Load(je); 20 | 21 | ASSERT_EQ(entry.base_weight, loaded.base_weight); 22 | ASSERT_EQ(entry.left_weight, loaded.left_weight); 23 | ASSERT_EQ(entry.right_weight, loaded.right_weight); 24 | 25 | ASSERT_EQ(entry.GetDepth(), loaded.GetDepth()); 26 | ASSERT_EQ(entry.GetLossChange(), loaded.GetLossChange()); 27 | } 28 | } // namespace xgboost::tree 29 | -------------------------------------------------------------------------------- /tests/cpp/tree/test_node_partition.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2023 by XGBoost contributors 3 | */ 4 | #include 5 | #include // for Context 6 | #include // for ObjInfo 7 | #include // for TreeUpdater 8 | 9 | #include // for unique_ptr 10 | 11 | #include "../helpers.h" 12 | 13 | namespace xgboost { 14 | TEST(Updater, HasNodePosition) { 15 | Context ctx; 16 | ObjInfo task{ObjInfo::kRegression, true, true}; 17 | std::unique_ptr up{TreeUpdater::Create("grow_histmaker", &ctx, &task)}; 18 | ASSERT_TRUE(up->HasNodePosition()); 19 | 20 | up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)); 21 | ASSERT_TRUE(up->HasNodePosition()); 22 | 23 | #if defined(XGBOOST_USE_CUDA) 24 | ctx = MakeCUDACtx(0); 25 | up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, &task)); 26 | ASSERT_TRUE(up->HasNodePosition()); 27 | #endif // defined(XGBOOST_USE_CUDA) 28 | } 29 | } // namespace xgboost 30 | -------------------------------------------------------------------------------- /tests/cpp/tree/test_prediction_cache.cc: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2021-2023 by XGBoost contributors 3 | */ 4 | #include 5 | 6 | #include "test_prediction_cache.h" 7 | 8 | namespace xgboost { 9 | TEST_F(TestPredictionCache, Approx) { 10 | Context ctx; 11 | this->RunTest(&ctx, "grow_histmaker", "one_output_per_tree"); 12 | } 13 | 14 | TEST_F(TestPredictionCache, Hist) { 15 | Context ctx; 16 | this->RunTest(&ctx, "grow_quantile_histmaker", "one_output_per_tree"); 17 | } 18 | 19 | TEST_F(TestPredictionCache, HistMulti) { 20 | Context ctx; 21 | this->RunTest(&ctx, "grow_quantile_histmaker", "multi_output_tree"); 22 | } 23 | 24 | #if defined(XGBOOST_USE_CUDA) 25 | TEST_F(TestPredictionCache, GpuHist) { 26 | auto ctx = MakeCUDACtx(0); 27 | this->RunTest(&ctx, "grow_gpu_hist", "one_output_per_tree"); 28 | } 29 | 30 | TEST_F(TestPredictionCache, GpuApprox) { 31 | auto ctx = MakeCUDACtx(0); 32 | this->RunTest(&ctx, "grow_gpu_approx", "one_output_per_tree"); 33 | } 34 | #endif // defined(XGBOOST_USE_CUDA) 35 | } // namespace xgboost -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | markers = 3 | mgpu: Mark a test that requires multiple GPUs to run. 4 | ci: Mark a test that runs only on CI. 5 | -------------------------------------------------------------------------------- /tests/python-gpu/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xgboost import testing as tm 4 | 5 | 6 | def has_rmm(): 7 | return tm.no_rmm()["condition"] 8 | 9 | 10 | @pytest.fixture(scope="session", autouse=True) 11 | def setup_rmm_pool(request, pytestconfig): 12 | tm.setup_rmm_pool(request, pytestconfig) 13 | 14 | 15 | def pytest_addoption(parser: pytest.Parser) -> None: 16 | parser.addoption( 17 | "--use-rmm-pool", action="store_true", default=False, help="Use RMM pool" 18 | ) 19 | 20 | 21 | def pytest_collection_modifyitems(config, items): 22 | if config.getoption("--use-rmm-pool"): 23 | blocklist = [ 24 | "python-gpu/test_gpu_demos.py::test_dask_training", 25 | "python-gpu/test_gpu_prediction.py::TestGPUPredict::test_shap", 26 | "python-gpu/test_gpu_linear.py::TestGPULinear", 27 | ] 28 | skip_mark = pytest.mark.skip( 29 | reason="This test is not run when --use-rmm-pool flag is active" 30 | ) 31 | for item in items: 32 | if any(item.nodeid.startswith(x) for x in blocklist): 33 | item.add_marker(skip_mark) 34 | -------------------------------------------------------------------------------- /tests/python-gpu/test_gpu_callbacks.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xgboost import testing as tm 4 | from xgboost.testing.callbacks import ( 5 | run_eta_decay, 6 | run_eta_decay_leaf_output, 7 | tree_methods_objs, 8 | ) 9 | 10 | 11 | @pytest.mark.parametrize("tree_method", ["approx", "hist"]) 12 | def test_eta_decay(tree_method: str) -> None: 13 | dtrain, dtest = tm.load_agaricus(__file__) 14 | run_eta_decay(tree_method, dtrain, dtest, "cuda") 15 | 16 | 17 | @pytest.mark.parametrize("tree_method,objective", tree_methods_objs()) 18 | def test_eta_decay_leaf_output(tree_method: str, objective: str) -> None: 19 | dtrain, dtest = tm.load_agaricus(__file__) 20 | run_eta_decay_leaf_output(tree_method, objective, dtrain, dtest, "cuda") 21 | -------------------------------------------------------------------------------- /tests/python-gpu/test_gpu_parse_tree.py: -------------------------------------------------------------------------------- 1 | from xgboost.testing.parse_tree import ( 2 | run_split_value_histograms, 3 | run_tree_to_df_categorical, 4 | ) 5 | 6 | 7 | def test_tree_to_df_categorical() -> None: 8 | run_tree_to_df_categorical("hist", "cuda") 9 | 10 | 11 | def test_split_value_histograms() -> None: 12 | run_split_value_histograms("hist", "cuda") 13 | -------------------------------------------------------------------------------- /tests/python-gpu/test_gpu_plotting.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xgboost import testing as tm 4 | 5 | 6 | class TestPlotting: 7 | @pytest.mark.skipif(**tm.no_multiple(tm.no_matplotlib(), tm.no_graphviz())) 8 | def test_categorical(self) -> None: 9 | from xgboost.testing.plotting import run_categorical 10 | 11 | run_categorical("hist", "cuda") 12 | -------------------------------------------------------------------------------- /tests/python-gpu/test_gpu_training_continuation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from xgboost.testing.continuation import run_training_continuation_model_output 5 | 6 | rng = np.random.RandomState(1994) 7 | 8 | 9 | class TestGPUTrainingContinuation: 10 | @pytest.mark.parametrize("tree_method", ["hist", "approx"]) 11 | def test_model_output(self, tree_method: str) -> None: 12 | run_training_continuation_model_output("cuda", tree_method) 13 | -------------------------------------------------------------------------------- /tests/python-gpu/test_large_input.py: -------------------------------------------------------------------------------- 1 | import cupy as cp 2 | import numpy as np 3 | import pytest 4 | 5 | import xgboost as xgb 6 | 7 | 8 | # Test for integer overflow or out of memory exceptions 9 | def test_large_input(): 10 | available_bytes, _ = cp.cuda.runtime.memGetInfo() 11 | # 15 GB 12 | required_bytes = 1.5e10 13 | if available_bytes < required_bytes: 14 | pytest.skip("Not enough memory on this device") 15 | n = 1000 16 | m = ((1 << 31) + n - 1) // n 17 | assert np.log2(m * n) > 31 18 | X = cp.ones((m, n), dtype=np.float32) 19 | y = cp.ones(m) 20 | w = cp.ones(m) 21 | dmat = xgb.QuantileDMatrix(X, y, weight=w) 22 | booster = xgb.train( 23 | {"tree_method": "hist", "max_depth": 1, "device": "cuda"}, dmat, 1 24 | ) 25 | del y 26 | booster.inplace_predict(X) 27 | -------------------------------------------------------------------------------- /tests/python/test_with_shap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | import xgboost as xgb 5 | 6 | try: 7 | import shap 8 | except Exception: 9 | shap = None 10 | pass 11 | 12 | 13 | pytestmark = pytest.mark.skipif(shap is None, reason="Requires shap package") 14 | 15 | 16 | # xgboost removed ntree_limit in 2.0, which breaks the SHAP package. 17 | @pytest.mark.xfail 18 | def test_with_shap() -> None: 19 | from sklearn.datasets import fetch_california_housing 20 | 21 | X, y = fetch_california_housing(return_X_y=True) 22 | dtrain = xgb.DMatrix(X, label=y) 23 | model = xgb.train({"learning_rate": 0.01}, dtrain, 10) 24 | explainer = shap.TreeExplainer(model) 25 | shap_values = explainer.shap_values(X) 26 | margin = model.predict(dtrain, output_margin=True) 27 | assert np.allclose( 28 | np.sum(shap_values, axis=len(shap_values.shape) - 1), 29 | margin - explainer.expected_value, 30 | 1e-3, 31 | 1e-3, 32 | ) 33 | -------------------------------------------------------------------------------- /tests/python/with_omp_limit.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from sklearn.datasets import make_classification 4 | from sklearn.metrics import roc_auc_score 5 | 6 | import xgboost as xgb 7 | 8 | 9 | def run_omp(output_path: str): 10 | X, y = make_classification( 11 | n_samples=200, n_features=32, n_classes=3, n_informative=8 12 | ) 13 | Xy = xgb.DMatrix(X, y, nthread=16) 14 | booster = xgb.train( 15 | {"num_class": 3, "objective": "multi:softprob", "n_jobs": 16}, 16 | Xy, 17 | num_boost_round=8, 18 | ) 19 | score = booster.predict(Xy) 20 | auc = roc_auc_score(y, score, average="weighted", multi_class="ovr") 21 | with open(output_path, "w") as fd: 22 | fd.write(str(auc)) 23 | 24 | 25 | if __name__ == "__main__": 26 | out = sys.argv[1] 27 | run_omp(out) 28 | -------------------------------------------------------------------------------- /tests/test_distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/tests/test_distributed/__init__.py -------------------------------------------------------------------------------- /tests/test_distributed/test_federated/test_federated.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xgboost.testing.federated import run_federated_learning 4 | 5 | 6 | @pytest.mark.parametrize("with_ssl", [True, False]) 7 | def test_federated_learning(with_ssl: bool) -> None: 8 | run_federated_learning(with_ssl, False, __file__) 9 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_federated/test_gpu_federated.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xgboost.testing.federated import run_federated_learning 4 | 5 | 6 | @pytest.mark.parametrize("with_ssl", [True, False]) 7 | @pytest.mark.mgpu 8 | def test_federated_learning(with_ssl: bool) -> None: 9 | run_federated_learning(with_ssl, True, __file__) 10 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_dask/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_dask/test_gpu_demos.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | import pytest 5 | 6 | from xgboost import testing as tm 7 | 8 | pytestmark = [ 9 | pytest.mark.skipif(**tm.no_dask()), 10 | pytest.mark.skipif(**tm.no_dask_cuda()), 11 | tm.timeout(60), 12 | ] 13 | 14 | 15 | @pytest.mark.skipif(**tm.no_cupy()) 16 | @pytest.mark.mgpu 17 | def test_dask_training() -> None: 18 | script = os.path.join(tm.demo_dir(__file__), "dask", "gpu_training.py") 19 | cmd = ["python", script] 20 | subprocess.check_call(cmd) 21 | 22 | 23 | @pytest.mark.mgpu 24 | def test_dask_sklearn_demo() -> None: 25 | script = os.path.join(tm.demo_dir(__file__), "dask", "sklearn_gpu_training.py") 26 | cmd = ["python", script] 27 | subprocess.check_call(cmd) 28 | 29 | 30 | @pytest.mark.mgpu 31 | @pytest.mark.skipif(**tm.no_cupy()) 32 | def test_forward_logging_demo() -> None: 33 | script = os.path.join(tm.demo_dir(__file__), "dask", "forward_logging.py") 34 | cmd = ["python", script] 35 | subprocess.check_call(cmd) 36 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_dask/test_gpu_ranking.py: -------------------------------------------------------------------------------- 1 | """Copyright 2024, XGBoost contributors""" 2 | 3 | import dask 4 | import pytest 5 | from distributed import Client 6 | 7 | from xgboost import testing as tm 8 | from xgboost.testing import dask as dtm 9 | 10 | pytestmark = [ 11 | pytest.mark.skipif(**tm.no_dask()), 12 | pytest.mark.skipif(**tm.no_dask_cuda()), 13 | tm.timeout(120), 14 | ] 15 | 16 | 17 | @pytest.mark.filterwarnings("error") 18 | def test_no_group_split(local_cuda_client: Client) -> None: 19 | with dask.config.set( 20 | { 21 | "array.backend": "cupy", 22 | "dataframe.backend": "cudf", 23 | } 24 | ): 25 | dtm.check_no_group_split(local_cuda_client, "cuda") 26 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_spark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/tests/test_distributed/test_gpu_with_spark/__init__.py -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_spark/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import pytest 4 | 5 | 6 | def pytest_collection_modifyitems(config: pytest.Config, items: Sequence) -> None: 7 | # mark dask tests as `mgpu`. 8 | mgpu_mark = pytest.mark.mgpu 9 | for item in items: 10 | item.add_marker(mgpu_mark) 11 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_spark/discover_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is only made for running XGBoost tests on official CI where we have access 4 | # to a 4-GPU cluster, the discovery command is for running tests on a local machine where 5 | # the driver and the GPU worker might be the same machine for the ease of development. 6 | 7 | if ! command -v nvidia-smi &> /dev/null 8 | then 9 | # default to 4 GPUs 10 | echo "{\"name\":\"gpu\",\"addresses\":[\"0\",\"1\",\"2\",\"3\"]}" 11 | exit 12 | else 13 | # https://github.com/apache/spark/blob/master/examples/src/main/scripts/getGpusResources.sh 14 | ADDRS=`nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'` 15 | echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} 16 | fi 17 | -------------------------------------------------------------------------------- /tests/test_distributed/test_gpu_with_spark/test_data.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from xgboost import testing as tm 4 | 5 | pytestmark = [ 6 | pytest.mark.skipif(**tm.no_spark()), 7 | tm.timeout(120), 8 | ] 9 | 10 | from ..test_with_spark.test_data import run_dmatrix_ctor 11 | 12 | 13 | @pytest.mark.skipif(**tm.no_cudf()) 14 | @pytest.mark.parametrize( 15 | "is_feature_cols,is_qdm", 16 | [(True, True), (True, False), (False, True), (False, False)], 17 | ) 18 | def test_dmatrix_ctor(is_feature_cols: bool, is_qdm: bool) -> None: 19 | run_dmatrix_ctor(is_feature_cols, is_qdm, on_gpu=True) 20 | -------------------------------------------------------------------------------- /tests/test_distributed/test_with_dask/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/test_distributed/test_with_dask/test_external_memory.py: -------------------------------------------------------------------------------- 1 | """Copyright 2024, XGBoost contributors""" 2 | 3 | import pytest 4 | from distributed import Client, Scheduler, Worker 5 | from distributed.utils_test import gen_cluster 6 | 7 | from xgboost import testing as tm 8 | from xgboost.testing.dask import check_external_memory, get_rabit_args 9 | 10 | 11 | @pytest.mark.parametrize("is_qdm", [True, False]) 12 | @gen_cluster(client=True) 13 | async def test_external_memory( 14 | client: Client, s: Scheduler, a: Worker, b: Worker, is_qdm: bool 15 | ) -> None: 16 | workers = tm.dask.get_client_workers(client) 17 | n_workers = len(workers) 18 | args = await get_rabit_args(client, n_workers) 19 | 20 | futs = client.map( 21 | check_external_memory, 22 | range(n_workers), 23 | n_workers=n_workers, 24 | device="cpu", 25 | comm_args=args, 26 | is_qdm=is_qdm, 27 | ) 28 | await client.gather(futs) 29 | -------------------------------------------------------------------------------- /tests/test_distributed/test_with_spark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dmlc/xgboost/614cd5478bb3c7ef15683ea30c5796b01d41ffbd/tests/test_distributed/test_with_spark/__init__.py --------------------------------------------------------------------------------