├── .github ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── LICENSE ├── binding.gyp ├── build.sh ├── changelog.md ├── doc ├── api.md ├── intro.md └── intro_zh.md ├── index.d.ts ├── index.js ├── index.ts ├── package-lock.json ├── package.json ├── readme.md ├── src ├── base.h ├── index.cc ├── xgmatrix.cc ├── xgmatrix.h ├── xgmodel.cc └── xgmodel.h ├── test ├── base.js └── data │ ├── iris.xg.model │ └── xgmatrix.bin ├── tsconfig.json └── xgboost ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── CONTRIBUTORS.md ├── ISSUE_TEMPLATE.md ├── Jenkinsfile ├── LICENSE ├── Makefile ├── NEWS.md ├── R-package ├── .Rbuildignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R │ ├── callbacks.R │ ├── utils.R │ ├── xgb.Booster.R │ ├── xgb.DMatrix.R │ ├── xgb.DMatrix.save.R │ ├── xgb.create.features.R │ ├── xgb.cv.R │ ├── xgb.dump.R │ ├── xgb.ggplot.R │ ├── xgb.importance.R │ ├── xgb.load.R │ ├── xgb.model.dt.tree.R │ ├── xgb.plot.deepness.R │ ├── xgb.plot.importance.R │ ├── xgb.plot.multi.trees.R │ ├── xgb.plot.tree.R │ ├── xgb.save.R │ ├── xgb.save.raw.R │ ├── xgb.train.R │ └── xgboost.R ├── README.md ├── cleanup ├── configure ├── configure.ac ├── data │ ├── agaricus.test.rda │ └── agaricus.train.rda ├── demo │ ├── 00Index │ ├── README.md │ ├── basic_walkthrough.R │ ├── boost_from_prediction.R │ ├── caret_wrapper.R │ ├── create_sparse_matrix.R │ ├── cross_validation.R │ ├── custom_objective.R │ ├── early_stopping.R │ ├── generalized_linear_model.R │ ├── poisson_regression.R │ ├── predict_first_ntree.R │ ├── predict_leaf_indices.R │ ├── runall.R │ └── tweedie_regression.R ├── man │ ├── agaricus.test.Rd │ ├── agaricus.train.Rd │ ├── callbacks.Rd │ ├── cb.cv.predict.Rd │ ├── cb.early.stop.Rd │ ├── cb.evaluation.log.Rd │ ├── cb.print.evaluation.Rd │ ├── cb.reset.parameters.Rd │ ├── cb.save.model.Rd │ ├── dim.xgb.DMatrix.Rd │ ├── dimnames.xgb.DMatrix.Rd │ ├── getinfo.Rd │ ├── predict.xgb.Booster.Rd │ ├── print.xgb.Booster.Rd │ ├── print.xgb.DMatrix.Rd │ ├── print.xgb.cv.Rd │ ├── setinfo.Rd │ ├── slice.xgb.DMatrix.Rd │ ├── xgb.Booster.complete.Rd │ ├── xgb.DMatrix.Rd │ ├── xgb.DMatrix.save.Rd │ ├── xgb.attr.Rd │ ├── xgb.create.features.Rd │ ├── xgb.cv.Rd │ ├── xgb.dump.Rd │ ├── xgb.importance.Rd │ ├── xgb.load.Rd │ ├── xgb.model.dt.tree.Rd │ ├── xgb.parameters.Rd │ ├── xgb.plot.deepness.Rd │ ├── xgb.plot.importance.Rd │ ├── xgb.plot.multi.trees.Rd │ ├── xgb.plot.tree.Rd │ ├── xgb.save.Rd │ ├── xgb.save.raw.Rd │ ├── xgb.train.Rd │ └── xgboost-deprecated.Rd ├── src │ ├── Makevars.in │ ├── Makevars.win │ ├── init.c │ ├── xgboost_R.cc │ ├── xgboost_R.h │ ├── xgboost_assert.c │ └── xgboost_custom.cc ├── tests │ ├── testthat.R │ └── testthat │ │ ├── test_basic.R │ │ ├── test_callbacks.R │ │ ├── test_custom_objective.R │ │ ├── test_dmatrix.R │ │ ├── test_gc_safety.R │ │ ├── test_glm.R │ │ ├── test_helpers.R │ │ ├── test_lint.R │ │ ├── test_monotone.R │ │ ├── test_parameter_exposure.R │ │ ├── test_poisson_regression.R │ │ └── test_update.R └── vignettes │ ├── discoverYourData.Rmd │ ├── vignette.css │ ├── xgboost.Rnw │ ├── xgboost.bib │ └── xgboostPresentation.Rmd ├── README.md ├── amalgamation ├── dmlc-minimum0.cc └── xgboost-all0.cc ├── appveyor.yml ├── build.sh ├── cmake └── Utils.cmake ├── cub ├── CHANGE_LOG.TXT ├── LICENSE.TXT ├── README.md ├── common.mk ├── cub │ ├── agent │ │ ├── agent_histogram.cuh │ │ ├── agent_radix_sort_downsweep.cuh │ │ ├── agent_radix_sort_upsweep.cuh │ │ ├── agent_reduce.cuh │ │ ├── agent_reduce_by_key.cuh │ │ ├── agent_rle.cuh │ │ ├── agent_scan.cuh │ │ ├── agent_segment_fixup.cuh │ │ ├── agent_select_if.cuh │ │ ├── agent_spmv_csrt.cuh │ │ ├── agent_spmv_orig.cuh │ │ ├── agent_spmv_row_based.cuh │ │ └── single_pass_scan_operators.cuh │ ├── block │ │ ├── block_adjacent_difference.cuh │ │ ├── block_discontinuity.cuh │ │ ├── block_exchange.cuh │ │ ├── block_histogram.cuh │ │ ├── block_load.cuh │ │ ├── block_radix_rank.cuh │ │ ├── block_radix_sort.cuh │ │ ├── block_raking_layout.cuh │ │ ├── block_reduce.cuh │ │ ├── block_scan.cuh │ │ ├── block_shuffle.cuh │ │ ├── block_store.cuh │ │ └── specializations │ │ │ ├── block_histogram_atomic.cuh │ │ │ ├── block_histogram_sort.cuh │ │ │ ├── block_reduce_raking.cuh │ │ │ ├── block_reduce_raking_commutative_only.cuh │ │ │ ├── block_reduce_warp_reductions.cuh │ │ │ ├── block_scan_raking.cuh │ │ │ ├── block_scan_warp_scans.cuh │ │ │ ├── block_scan_warp_scans2.cuh │ │ │ └── block_scan_warp_scans3.cuh │ ├── cub.cuh │ ├── device │ │ ├── device_histogram.cuh │ │ ├── device_partition.cuh │ │ ├── device_radix_sort.cuh │ │ ├── device_reduce.cuh │ │ ├── device_run_length_encode.cuh │ │ ├── device_scan.cuh │ │ ├── device_segmented_radix_sort.cuh │ │ ├── device_segmented_reduce.cuh │ │ ├── device_select.cuh │ │ ├── device_spmv.cuh │ │ └── dispatch │ │ │ ├── dispatch_histogram.cuh │ │ │ ├── dispatch_radix_sort.cuh │ │ │ ├── dispatch_reduce.cuh │ │ │ ├── dispatch_reduce_by_key.cuh │ │ │ ├── dispatch_rle.cuh │ │ │ ├── dispatch_scan.cuh │ │ │ ├── dispatch_select_if.cuh │ │ │ ├── dispatch_spmv_csrt.cuh │ │ │ ├── dispatch_spmv_orig.cuh │ │ │ └── dispatch_spmv_row_based.cuh │ ├── grid │ │ ├── grid_barrier.cuh │ │ ├── grid_even_share.cuh │ │ ├── grid_mapping.cuh │ │ └── grid_queue.cuh │ ├── host │ │ └── mutex.cuh │ ├── iterator │ │ ├── arg_index_input_iterator.cuh │ │ ├── cache_modified_input_iterator.cuh │ │ ├── cache_modified_output_iterator.cuh │ │ ├── constant_input_iterator.cuh │ │ ├── counting_input_iterator.cuh │ │ ├── discard_output_iterator.cuh │ │ ├── tex_obj_input_iterator.cuh │ │ ├── tex_ref_input_iterator.cuh │ │ └── transform_input_iterator.cuh │ ├── thread │ │ ├── thread_load.cuh │ │ ├── thread_operators.cuh │ │ ├── thread_reduce.cuh │ │ ├── thread_scan.cuh │ │ ├── thread_search.cuh │ │ └── thread_store.cuh │ ├── util_allocator.cuh │ ├── util_arch.cuh │ ├── util_debug.cuh │ ├── util_device.cuh │ ├── util_macro.cuh │ ├── util_namespace.cuh │ ├── util_ptx.cuh │ ├── util_type.cuh │ └── warp │ │ ├── specializations │ │ ├── warp_reduce_shfl.cuh │ │ ├── warp_reduce_smem.cuh │ │ ├── warp_scan_shfl.cuh │ │ └── warp_scan_smem.cuh │ │ ├── warp_reduce.cuh │ │ └── warp_scan.cuh ├── eclipse code style profile.xml ├── examples │ ├── block │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── example_block_radix_sort.cu │ │ ├── example_block_reduce.cu │ │ ├── example_block_scan.cu │ │ └── reduce_by_key.cu │ └── device │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── example_device_partition_flagged.cu │ │ ├── example_device_partition_if.cu │ │ ├── example_device_radix_sort.cu │ │ ├── example_device_reduce.cu │ │ ├── example_device_scan.cu │ │ ├── example_device_select_flagged.cu │ │ ├── example_device_select_if.cu │ │ ├── example_device_select_unique.cu │ │ └── example_device_sort_find_non_trivial_runs.cu ├── experimental │ ├── .gitignore │ ├── Makefile │ ├── defunct │ │ ├── example_coo_spmv.cu │ │ └── test_device_seg_reduce.cu │ ├── histogram │ │ ├── histogram_cub.h │ │ ├── histogram_gmem_atomics.h │ │ └── histogram_smem_atomics.h │ ├── histogram_compare.cu │ ├── sparse_matrix.h │ ├── spmv_compare.cu │ └── spmv_script.sh ├── test │ ├── .gitignore │ ├── Makefile │ ├── link_a.cu │ ├── link_b.cu │ ├── link_main.cpp │ ├── mersenne.h │ ├── test_allocator.cu │ ├── test_block_histogram.cu │ ├── test_block_load_store.cu │ ├── test_block_radix_sort.cu │ ├── test_block_reduce.cu │ ├── test_block_scan.cu │ ├── test_device_histogram.cu │ ├── test_device_radix_sort.cu │ ├── test_device_reduce.cu │ ├── test_device_reduce_by_key.cu │ ├── test_device_run_length_encode.cu │ ├── test_device_scan.cu │ ├── test_device_select_if.cu │ ├── test_device_select_unique.cu │ ├── test_grid_barrier.cu │ ├── test_iterator.cu │ ├── test_util.h │ ├── test_warp_reduce.cu │ └── test_warp_scan.cu └── tune │ ├── .gitignore │ ├── Makefile │ └── tune_device_reduce.cu ├── demo ├── .gitignore ├── README.md ├── binary_classification │ ├── README.md │ ├── agaricus-lepiota.fmap │ ├── agaricus-lepiota.names │ ├── mapfeat.py │ ├── mknfold.py │ └── runexp.sh ├── data │ ├── README.md │ ├── featmap.txt │ └── gen_autoclaims.R ├── distributed-training │ ├── README.md │ ├── plot_model.ipynb │ └── run_aws.sh ├── gpu_acceleration │ ├── README.md │ └── bosch.py ├── guide-python │ ├── README.md │ ├── basic_walkthrough.py │ ├── boost_from_prediction.py │ ├── cross_validation.py │ ├── custom_objective.py │ ├── evals_result.py │ ├── external_memory.py │ ├── gamma_regression.py │ ├── generalized_linear_model.py │ ├── predict_first_ntree.py │ ├── predict_leaf_indices.py │ ├── runall.sh │ ├── sklearn_evals_result.py │ ├── sklearn_examples.py │ └── sklearn_parallel.py ├── kaggle-higgs │ ├── README.md │ ├── higgs-cv.py │ ├── higgs-numpy.py │ ├── higgs-pred.R │ ├── higgs-pred.py │ ├── higgs-train.R │ ├── run.sh │ ├── speedtest.R │ └── speedtest.py ├── kaggle-otto │ ├── README.MD │ ├── otto_train_pred.R │ └── understandingXGBoostModel.Rmd ├── multiclass_classification │ ├── README.md │ ├── runexp.sh │ └── train.py ├── rank │ ├── README.md │ ├── runexp.sh │ ├── trans_data.py │ └── wgetdata.sh ├── regression │ ├── README.md │ ├── machine.names │ ├── mapfeat.py │ ├── mknfold.py │ └── runexp.sh └── yearpredMSD │ ├── README.md │ ├── csv2libsvm.py │ └── runexp.sh ├── dmlc-core ├── .gitignore ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── Makefile ├── README.md ├── cmake │ ├── Modules │ │ ├── FindCrypto.cmake │ │ └── FindHDFS.cmake │ ├── Utils.cmake │ └── lint.cmake ├── doc │ ├── .gitignore │ ├── Doxyfile │ ├── Makefile │ ├── README │ ├── conf.py │ ├── index.md │ ├── parameter.md │ └── sphinx_util.py ├── example │ ├── dmlc_example.mk │ └── parameter.cc ├── include │ └── dmlc │ │ ├── any.h │ │ ├── array_view.h │ │ ├── base.h │ │ ├── common.h │ │ ├── concurrency.h │ │ ├── config.h │ │ ├── data.h │ │ ├── input_split_shuffle.h │ │ ├── io.h │ │ ├── json.h │ │ ├── logging.h │ │ ├── lua.h │ │ ├── memory.h │ │ ├── memory_io.h │ │ ├── omp.h │ │ ├── optional.h │ │ ├── parameter.h │ │ ├── recordio.h │ │ ├── registry.h │ │ ├── serializer.h │ │ ├── thread_local.h │ │ ├── threadediter.h │ │ ├── timer.h │ │ └── type_traits.h ├── make │ └── dmlc.mk ├── scripts │ ├── lint.py │ ├── packages.mk │ ├── setup_nvcc.sh │ └── travis │ │ ├── travis_before_cache.sh │ │ ├── travis_osx_install.sh │ │ ├── travis_script.sh │ │ └── travis_setup_env.sh ├── src │ ├── config.cc │ ├── data.cc │ ├── data │ │ ├── basic_row_iter.h │ │ ├── csv_parser.h │ │ ├── disk_row_iter.h │ │ ├── libfm_parser.h │ │ ├── libsvm_parser.h │ │ ├── parser.h │ │ ├── row_block.h │ │ ├── strtonum.h │ │ └── text_parser.h │ ├── io.cc │ ├── io │ │ ├── azure_filesys.cc │ │ ├── azure_filesys.h │ │ ├── cached_input_split.h │ │ ├── filesys.h │ │ ├── hdfs_filesys.cc │ │ ├── hdfs_filesys.h │ │ ├── input_split_base.cc │ │ ├── input_split_base.h │ │ ├── line_split.cc │ │ ├── line_split.h │ │ ├── local_filesys.cc │ │ ├── local_filesys.h │ │ ├── recordio_split.cc │ │ ├── recordio_split.h │ │ ├── s3_filesys.cc │ │ ├── s3_filesys.h │ │ ├── single_file_split.h │ │ ├── threaded_input_split.h │ │ └── uri_spec.h │ └── recordio.cc ├── test │ ├── .gitignore │ ├── README.md │ ├── csv_parser_test.cc │ ├── dataiter_test.cc │ ├── dmlc_test.mk │ ├── filesys_test.cc │ ├── iostream_test.cc │ ├── libfm_parser_test.cc │ ├── libsvm_parser_test.cc │ ├── logging_test.cc │ ├── parameter_test.cc │ ├── recordio_test.cc │ ├── registry_test.cc │ ├── split_read_test.cc │ ├── split_repeat_read_test.cc │ ├── split_test.cc │ ├── stream_read_test.cc │ ├── strtonum_test.cc │ └── unittest │ │ ├── .gitignore │ │ ├── dmlc_unittest.mk │ │ ├── unittest_any.cc │ │ ├── unittest_array_view.cc │ │ ├── unittest_config.cc │ │ ├── unittest_json.cc │ │ ├── unittest_logging.cc │ │ ├── unittest_main.cc │ │ ├── unittest_optional.cc │ │ ├── unittest_serializer.cc │ │ └── unittest_threaditer.cc ├── tracker │ ├── README.md │ ├── dmlc-submit │ ├── dmlc_tracker │ │ ├── __init__.py │ │ ├── launcher.py │ │ ├── local.py │ │ ├── mpi.py │ │ ├── opts.py │ │ ├── sge.py │ │ ├── ssh.py │ │ ├── submit.py │ │ ├── tracker.py │ │ └── yarn.py │ └── yarn │ │ ├── .gitignore │ │ ├── README.md │ │ ├── build.bat │ │ ├── build.sh │ │ ├── pom.xml │ │ └── src │ │ └── main │ │ └── java │ │ └── org │ │ └── apache │ │ └── hadoop │ │ └── yarn │ │ └── dmlc │ │ ├── ApplicationMaster.java │ │ ├── Client.java │ │ └── TaskRecord.java └── windows │ ├── .gitignore │ ├── README.md │ ├── dmlc.sln │ └── dmlc │ └── dmlc.vcxproj ├── doc ├── .gitignore ├── Doxyfile ├── Makefile ├── R-package │ ├── .gitignore │ ├── Makefile │ ├── discoverYourData.md │ ├── index.md │ └── xgboostPresentation.md ├── README ├── build.md ├── cli │ └── index.md ├── conf.py ├── faq.md ├── get_started │ └── index.md ├── how_to │ ├── contribute.md │ ├── external_memory.md │ ├── index.md │ └── param_tuning.md ├── index.md ├── input_format.md ├── julia │ └── index.md ├── jvm │ ├── index.md │ ├── java_intro.md │ ├── xgboost4j-intro.md │ └── xgboost4j_full_integration.md ├── model.md ├── parameter.md ├── python │ ├── index.md │ ├── python_api.rst │ └── python_intro.md ├── sphinx_util.py └── tutorials │ ├── aws_yarn.md │ ├── dart.md │ ├── index.md │ └── monotonic.md ├── include └── xgboost │ ├── base.h │ ├── c_api.h │ ├── data.h │ ├── feature_map.h │ ├── gbm.h │ ├── learner.h │ ├── logging.h │ ├── metric.h │ ├── objective.h │ ├── tree_model.h │ └── tree_updater.h ├── jvm-packages ├── .gitignore ├── README.md ├── checkstyle-suppressions.xml ├── checkstyle.xml ├── create_jni.py ├── pom.xml ├── scalastyle-config.xml ├── xgboost4j-example │ ├── LICENSE │ ├── README.md │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── java │ │ │ └── example │ │ │ ├── BasicWalkThrough.java │ │ │ ├── BoostFromPrediction.java │ │ │ ├── CrossValidation.java │ │ │ ├── CustomObjective.java │ │ │ ├── ExternalMemory.java │ │ │ ├── GeneralizedLinearModel.java │ │ │ ├── PredictFirstNtree.java │ │ │ ├── PredictLeafIndices.java │ │ │ └── util │ │ │ ├── CustomEval.java │ │ │ └── DataLoader.java │ │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── scala │ │ └── example │ │ ├── BasicWalkThrough.scala │ │ ├── BoostFromPrediction.scala │ │ ├── CrossValidation.scala │ │ ├── CustomObjective.scala │ │ ├── ExternalMemory.scala │ │ ├── GeneralizedLinearModel.scala │ │ ├── PredictFirstNTree.scala │ │ ├── PredictLeafIndices.scala │ │ ├── flink │ │ └── DistTrainWithFlink.scala │ │ ├── spark │ │ ├── SparkModelTuningTool.scala │ │ ├── SparkWithDataFrame.scala │ │ └── SparkWithRDD.scala │ │ └── util │ │ └── CustomEval.scala ├── xgboost4j-flink │ ├── pom.xml │ └── src │ │ └── main │ │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── scala │ │ └── flink │ │ ├── XGBoost.scala │ │ └── XGBoostModel.scala ├── xgboost4j-spark │ ├── pom.xml │ └── src │ │ ├── main │ │ └── scala │ │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ └── scala │ │ │ └── spark │ │ │ ├── DataUtils.scala │ │ │ ├── XGBoost.scala │ │ │ ├── XGBoostClassificationModel.scala │ │ │ ├── XGBoostEstimator.scala │ │ │ ├── XGBoostModel.scala │ │ │ ├── XGBoostRegressionModel.scala │ │ │ └── params │ │ │ ├── BoosterParams.scala │ │ │ ├── CustomParams.scala │ │ │ ├── DefaultXGBoostParamsReader.scala │ │ │ ├── DefaultXGBoostParamsWriter.scala │ │ │ ├── GeneralParams.scala │ │ │ ├── LearningTaskParams.scala │ │ │ └── Utils.scala │ │ └── test │ │ ├── resources │ │ └── log4j.properties │ │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── scala │ │ └── spark │ │ ├── EvalError.scala │ │ ├── RabitTrackerRobustnessSuite.scala │ │ ├── SharedSparkContext.scala │ │ ├── Utils.scala │ │ ├── XGBoostConfigureSuite.scala │ │ ├── XGBoostDFSuite.scala │ │ ├── XGBoostGeneralSuite.scala │ │ └── XGBoostSparkPipelinePersistence.scala └── xgboost4j │ ├── LICENSE │ ├── pom.xml │ └── src │ ├── main │ ├── java │ │ └── ml │ │ │ └── dmlc │ │ │ └── xgboost4j │ │ │ ├── LabeledPoint.java │ │ │ └── java │ │ │ ├── Booster.java │ │ │ ├── DMatrix.java │ │ │ ├── DataBatch.java │ │ │ ├── IEvaluation.java │ │ │ ├── IObjective.java │ │ │ ├── IRabitTracker.java │ │ │ ├── NativeLibLoader.java │ │ │ ├── Rabit.java │ │ │ ├── RabitTracker.java │ │ │ ├── XGBoost.java │ │ │ ├── XGBoostError.java │ │ │ └── XGBoostJNI.java │ └── scala │ │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── scala │ │ ├── Booster.scala │ │ ├── DMatrix.scala │ │ ├── EvalTrait.scala │ │ ├── ObjectiveTrait.scala │ │ ├── XGBoost.scala │ │ └── rabit │ │ ├── RabitTracker.scala │ │ ├── handler │ │ ├── RabitTrackerHandler.scala │ │ └── RabitWorkerHandler.scala │ │ └── util │ │ ├── LinkMap.scala │ │ └── RabitTrackerHelpers.scala │ ├── native │ ├── xgboost4j.cpp │ └── xgboost4j.h │ └── test │ ├── java │ └── ml │ │ └── dmlc │ │ └── xgboost4j │ │ └── java │ │ ├── BoosterImplTest.java │ │ └── DMatrixTest.java │ └── scala │ └── ml │ └── dmlc │ └── xgboost4j │ └── scala │ ├── DMatrixSuite.scala │ ├── ScalaBoosterImplSuite.scala │ └── rabit │ └── RabitTrackerConnectionHandlerTest.scala ├── make ├── config.mk ├── mingw64.mk ├── minimum.mk ├── minimum_parallel.mk └── travis.mk ├── nccl ├── .gitignore ├── CMakeLists.txt ├── LICENSE.txt ├── Makefile ├── README.md ├── debian │ ├── .gitignore │ ├── changelog.in │ ├── compat │ ├── control.in │ ├── copyright │ ├── libnccl-dev.install │ ├── libnccl-dev.manpages │ ├── libnccl1.install.in │ ├── nccl.7 │ ├── rules │ ├── shlibs.local.in │ └── source │ │ └── format ├── fortran │ ├── Makefile │ ├── src │ │ ├── cudafor.f90 │ │ └── ncclfor.f90 │ └── test │ │ ├── allgather_arr_out.f90 │ │ ├── allgather_ptr_out.f90 │ │ ├── allreduce_arr_out.f90 │ │ ├── allreduce_ptr_out.f90 │ │ ├── broadcast_arr.f90 │ │ ├── broadcast_ptr.f90 │ │ ├── reduce_arr_out.f90 │ │ ├── reduce_ptr_out.f90 │ │ ├── reducescatter_arr_out.f90 │ │ └── reducescatter_ptr_out.f90 ├── src │ ├── all_gather.cu │ ├── all_reduce.cu │ ├── broadcast.cu │ ├── common_coll.h │ ├── common_kernel.h │ ├── copy_kernel.h │ ├── core.cu │ ├── core.h │ ├── enqueue.h │ ├── libwrap.cu │ ├── libwrap.h │ ├── nccl.h │ ├── primitives.h │ ├── reduce.cu │ ├── reduce_kernel.h │ └── reduce_scatter.cu └── test │ ├── include │ └── test_utilities.h │ ├── mpi │ └── mpi_test.cu │ └── single │ ├── all_gather_scan.cu │ ├── all_gather_test.cu │ ├── all_reduce_scan.cu │ ├── all_reduce_test.cu │ ├── broadcast_scan.cu │ ├── broadcast_test.cu │ ├── reduce_scan.cu │ ├── reduce_scatter_scan.cu │ ├── reduce_scatter_test.cu │ └── reduce_test.cu ├── plugin ├── README.md ├── dense_parser │ ├── dense_libsvm.cc │ └── plugin.mk ├── example │ ├── README.md │ ├── custom_obj.cc │ └── plugin.mk ├── lz4 │ ├── plugin.mk │ └── sparse_page_lz4_format.cc └── updater_gpu │ ├── README.md │ ├── benchmark │ └── benchmark.py │ ├── gitshallow_submodules.sh │ ├── plugin.mk │ ├── src │ ├── common.cuh │ ├── device_helpers.cuh │ ├── exact │ │ ├── argmax_by_key.cuh │ │ ├── fused_scan_reduce_by_key.cuh │ │ ├── gpu_builder.cuh │ │ ├── node.cuh │ │ └── split2node.cuh │ ├── gpu_data.cuh │ ├── gpu_hist_builder.cu │ ├── gpu_hist_builder.cuh │ ├── register_updater_gpu.cc │ ├── types.cuh │ ├── updater_gpu.cu │ └── updater_gpu.cuh │ └── test │ ├── cpp │ ├── argmax_by_key.cu │ ├── fused_reduce_scan_by_key.cu │ ├── generate_data.sh │ ├── gpu_builder.cu │ ├── node.cu │ ├── utils.cu │ └── utils.cuh │ └── python │ └── test.py ├── python-package ├── .gitignore ├── .pylintrc ├── MANIFEST.in ├── README.rst ├── build_trouble_shooting.md ├── prep_pip.sh ├── setup.cfg ├── setup.py └── setup_pip.py ├── rabit ├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── doc │ ├── .gitignore │ ├── Doxyfile │ ├── Makefile │ ├── conf.py │ ├── cpp_api.md │ ├── guide.md │ ├── index.md │ ├── parameters.md │ ├── python_api.md │ └── sphinx_util.py ├── guide │ ├── Makefile │ ├── README │ ├── basic.cc │ ├── basic.py │ ├── broadcast.cc │ ├── broadcast.py │ ├── lazy_allreduce.cc │ └── lazy_allreduce.py ├── include │ ├── dmlc │ │ ├── README.md │ │ ├── base.h │ │ ├── io.h │ │ ├── logging.h │ │ ├── serializer.h │ │ └── type_traits.h │ └── rabit │ │ ├── c_api.h │ │ ├── internal │ │ ├── engine.h │ │ ├── io.h │ │ ├── rabit-inl.h │ │ ├── timer.h │ │ └── utils.h │ │ ├── rabit.h │ │ └── serializable.h ├── lib │ ├── flag │ └── readme.md ├── python │ └── rabit.py ├── scripts │ ├── travis_runtest.sh │ └── travis_script.sh ├── src │ ├── README.md │ ├── allreduce_base.cc │ ├── allreduce_base.h │ ├── allreduce_mock.h │ ├── allreduce_robust-inl.h │ ├── allreduce_robust.cc │ ├── allreduce_robust.h │ ├── c_api.cc │ ├── engine.cc │ ├── engine_base.cc │ ├── engine_empty.cc │ ├── engine_mock.cc │ ├── engine_mpi.cc │ ├── socket.h │ └── thread_local.h └── test │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── lazy_recover.cc │ ├── local_recover.cc │ ├── local_recover.py │ ├── model_recover.cc │ ├── speed_runner.py │ ├── speed_test.cc │ └── test.mk ├── src ├── c_api │ ├── c_api.cc │ ├── c_api_error.cc │ └── c_api_error.h ├── cli_main.cc ├── common │ ├── base64.h │ ├── bitmap.h │ ├── column_matrix.h │ ├── common.cc │ ├── common.h │ ├── config.h │ ├── group_data.h │ ├── hist_util.cc │ ├── hist_util.h │ ├── io.h │ ├── math.h │ ├── quantile.h │ ├── random.h │ ├── row_set.h │ └── sync.h ├── data │ ├── data.cc │ ├── simple_csr_source.cc │ ├── simple_csr_source.h │ ├── simple_dmatrix.cc │ ├── simple_dmatrix.h │ ├── sparse_batch_page.h │ ├── sparse_page_dmatrix.cc │ ├── sparse_page_dmatrix.h │ ├── sparse_page_raw_format.cc │ ├── sparse_page_source.cc │ ├── sparse_page_source.h │ └── sparse_page_writer.cc ├── gbm │ ├── gblinear.cc │ ├── gbm.cc │ └── gbtree.cc ├── learner.cc ├── logging.cc ├── metric │ ├── elementwise_metric.cc │ ├── metric.cc │ ├── multiclass_metric.cc │ └── rank_metric.cc ├── objective │ ├── multiclass_obj.cc │ ├── objective.cc │ ├── rank_obj.cc │ └── regression_obj.cc └── tree │ ├── fast_hist_param.h │ ├── param.h │ ├── tree_model.cc │ ├── tree_updater.cc │ ├── updater_basemaker-inl.h │ ├── updater_colmaker.cc │ ├── updater_fast_hist.cc │ ├── updater_histmaker.cc │ ├── updater_prune.cc │ ├── updater_refresh.cc │ ├── updater_skmaker.cc │ └── updater_sync.cc └── tests ├── README.md ├── ci_build ├── Dockerfile.gpu ├── build_gpu_cmake.sh ├── ci_build.sh ├── test_gpu.sh └── with_the_same_user ├── cpp ├── data │ ├── test_metainfo.cc │ ├── test_simple_csr_source.cc │ ├── test_simple_dmatrix.cc │ └── test_sparse_page_dmatrix.cc ├── helpers.cc ├── helpers.h ├── metric │ ├── test_elementwise_metric.cc │ ├── test_metric.cc │ └── test_rank_metric.cc ├── objective │ ├── test_multiclass_metric.cc │ ├── test_objective.cc │ └── test_regression_obj.cc ├── test_main.cc ├── tree │ └── test_param.cc └── xgboost_test.mk ├── distributed ├── runtests.sh └── test_basic.py ├── python ├── test_basic.py ├── test_basic_models.py ├── test_early_stopping.py ├── test_eval_metrics.py ├── test_fast_hist.py ├── test_openmp.py ├── test_plotting.py ├── test_sparse_dmatrix.py ├── test_training_continuation.py ├── test_with_pandas.py ├── test_with_sklearn.py └── testing.py └── travis ├── run_test.sh ├── setup.sh └── travis_after_failure.sh /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thank you for your interest to contribute to the XGBoost-Node. 4 | 5 | Please submit an issue to suggest new features before submitting pull requests. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | short introduction 4 | 5 | ## Platform, Node.js version and C++ compiler version 6 | 7 | example: macOS 10.12 with XCode 8 8 | 9 | ## Steps to reproduce this issue 10 | 11 | code or steps to reproduce this issue 12 | 13 | ## Related issues 14 | 15 | other issues related to this one. 16 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | short introduction 4 | 5 | ## Tested platforms 6 | 7 | list tested platforms 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | build 3 | .DS_Store 4 | lib/libxgboost.a 5 | lib/libxgboost.dylib 6 | .vscode/settings.json 7 | .vscode/tags 8 | .vscode/c_cpp_properties.json 9 | xgboost/xgboost 10 | coverage 11 | readme.html 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | 3 | addons: 4 | apt: 5 | sources: 6 | - ubuntu-toolchain-r-test 7 | packages: 8 | - valgrind 9 | - libgtest-dev 10 | - libboost-dev 11 | 12 | node_js: 13 | - "6" 14 | - "8" 15 | 16 | sudo: false 17 | dist: trusty 18 | osx_image: xcode8.3 19 | 20 | before_script: 21 | - npm install -g codecov 22 | - npm install -g istanbul 23 | 24 | after_success: 25 | - istanbul cover ./node_modules/mocha/bin/_mocha 26 | - codecov --disable=gcov 27 | - if [[ "$TRAVIS_OS_NAME" != "osx" ]]; then valgrind -v --tool=memcheck --leak-check=full --show-leak-kinds=all --track-origins=yes node node_modules/mocha/bin/mocha; fi 28 | 29 | os: 30 | - linux 31 | - osx 32 | 33 | notifications: 34 | email: 35 | on_success: change 36 | on_failure: change 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 by Contributors 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | cd ./xgboost; 2 | sh build.sh; 3 | touch ./rabit/lib/flag 4 | [ -e ./rabit/lib/librabit.a ] && cp ./rabit/lib/librabit.a ./rabit/lib/librabit_empty.a && echo -fopenmp > ./rabit/lib/flag; 5 | cd ../; 6 | echo done building library; 7 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | # 1.0.0 2017-09-09 2 | 3 | + Initial release 4 | 5 | # 1.1.0 2017-10-30 6 | 7 | + Added async predict function 8 | + Updated document 9 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "xgboost", 3 | "version": "1.1.0", 4 | "description": "XGBoost in Node.js ", 5 | "main": "index.js", 6 | "types": "index.d.ts", 7 | "scripts": { 8 | "test": "npm run compile; mocha", 9 | "cover": "istanbul cover _mocha", 10 | "install": "bash ./build.sh; node-gyp rebuild", 11 | "compile": "tsc --p tsconfig.json" 12 | }, 13 | "repository": { 14 | "type": "git", 15 | "url": "https://github.com/nuanio/xgboost-node" 16 | }, 17 | "bugs": { 18 | "url": "https://github.com/nuanio/xgboost-node/issues" 19 | }, 20 | "keywords": [ 21 | "xgboost", 22 | "machine learning", 23 | "classifier", 24 | "gbm", 25 | "algorithm", 26 | "ensemble", 27 | "kaggle", 28 | "kaggle competition", 29 | "python", 30 | "dmlc" 31 | ], 32 | "author": "nuan.io", 33 | "license": "Apache-2.0", 34 | "dependencies": { 35 | "bindings": "^1.2.1", 36 | "nan": "^2.6.2", 37 | "async": "^2.5.0" 38 | }, 39 | "devDependencies": { 40 | "@types/node": "^8.0.9", 41 | "chai": "^4.0.2", 42 | "istanbul": "^0.4.5", 43 | "mocha": "^3.4.2", 44 | "typescript": "^2.4.1" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/base.h: -------------------------------------------------------------------------------- 1 | #ifndef XGBASE_H 2 | #define XGBASE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "c_api.h" 15 | #include "dmlc/logging.h" 16 | #include 17 | 18 | using namespace std; 19 | using namespace v8; 20 | 21 | #endif -------------------------------------------------------------------------------- /src/index.cc: -------------------------------------------------------------------------------- 1 | #include "xgmatrix.h" 2 | #include "xgmodel.h" 3 | 4 | void InitAll(v8::Local exports) 5 | { 6 | XGModel::Init(exports); 7 | XGMatrix::Init(exports); 8 | } 9 | 10 | NODE_MODULE(xgboost, InitAll) 11 | -------------------------------------------------------------------------------- /src/xgmatrix.h: -------------------------------------------------------------------------------- 1 | #ifndef XGMAT_H 2 | #define XGMAT_H 3 | 4 | #include "base.h" 5 | 6 | class XGMatrix : public Nan::ObjectWrap 7 | { 8 | public: 9 | static void Init(v8::Local exports); 10 | DMatrixHandle GetHandle(); 11 | 12 | private: 13 | explicit XGMatrix(DMatrixHandle result); 14 | ~XGMatrix(); 15 | 16 | static NAN_METHOD(NewMatrix); 17 | 18 | static int FromDense(const Nan::FunctionCallbackInfo &info, DMatrixHandle &res); 19 | static int FromCSCR(const Nan::FunctionCallbackInfo &info, DMatrixHandle &res, bool C); 20 | static int FromFile(const Nan::FunctionCallbackInfo &info, DMatrixHandle &res); 21 | static NAN_METHOD(GetCol); 22 | static NAN_METHOD(GetRow); 23 | static Nan::Persistent constructor; 24 | DMatrixHandle handle; 25 | }; 26 | 27 | #endif -------------------------------------------------------------------------------- /test/data/iris.xg.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/test/data/iris.xg.model -------------------------------------------------------------------------------- /test/data/xgmatrix.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/test/data/xgmatrix.bin -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "module": "commonjs", 5 | "noImplicitAny": false, 6 | "removeComments": false, 7 | "noLib": false, 8 | "preserveConstEnums": true, 9 | "declaration": true, 10 | "suppressImplicitAnyIndexErrors": true, 11 | "outDir": "./", 12 | "lib": [ 13 | "es6", 14 | "es7" 15 | ] 16 | }, 17 | "files": [ 18 | "index.ts" 19 | ], 20 | "exclude": [ 21 | "node_modules" 22 | ] 23 | } -------------------------------------------------------------------------------- /xgboost/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.page 6 | # Compiled Dynamic libraries 7 | *.so 8 | *.dylib 9 | *.page 10 | # Compiled Static libraries 11 | *.lai 12 | *.la 13 | *.a 14 | *~ 15 | *.Rcheck 16 | *.rds 17 | *.tar.gz 18 | #*txt* 19 | *conf 20 | *buffer 21 | *model 22 | *pyc 23 | *.train 24 | *.test 25 | *.tar 26 | *group 27 | *rar 28 | *vali 29 | *sdf 30 | Release 31 | *exe* 32 | *exp 33 | ipch 34 | *.filters 35 | *.user 36 | *log 37 | Debug 38 | *suo 39 | .Rhistory 40 | *.dll 41 | *i386 42 | *x64 43 | *dump 44 | *save 45 | *csv 46 | .Rproj.user 47 | *.cpage.col 48 | *.cpage 49 | *.Rproj 50 | ./xgboost 51 | ./xgboost.mpi 52 | ./xgboost.mock 53 | #.Rbuildignore 54 | R-package.Rproj 55 | *.cache* 56 | #java 57 | java/xgboost4j/target 58 | java/xgboost4j/tmp 59 | java/xgboost4j-demo/target 60 | java/xgboost4j-demo/data/ 61 | java/xgboost4j-demo/tmp/ 62 | java/xgboost4j-demo/model/ 63 | nb-configuration* 64 | # Eclipse 65 | .project 66 | .cproject 67 | .pydevproject 68 | .settings/ 69 | build 70 | *.data 71 | build_plugin 72 | .idea 73 | recommonmark/ 74 | tags 75 | *.iml 76 | *.class 77 | target 78 | *.swp 79 | 80 | # cpp tests and gcov generated files 81 | *.gcov 82 | *.gcda 83 | *.gcno 84 | build_tests 85 | /tests/cpp/xgboost_test 86 | 87 | .DS_Store 88 | 89 | # spark 90 | metastore_db 91 | 92 | plugin/updater_gpu/test/cpp/data 93 | -------------------------------------------------------------------------------- /xgboost/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | For bugs or installation issues, please provide the following information. 2 | The more information you provide, the more easily we will be able to offer 3 | help and advice. 4 | 5 | ## Environment info 6 | Operating System: 7 | 8 | Compiler: 9 | 10 | Package used (python/R/jvm/C++): 11 | 12 | `xgboost` version used: 13 | 14 | If installing from source, please provide 15 | 16 | 1. The commit hash (`git rev-parse HEAD`) 17 | 2. Logs will be helpful (If logs are large, please upload as attachment). 18 | 19 | If you are using jvm package, please 20 | 21 | 1. add [jvm-packages] in the title to make it quickly be identified 22 | 2. the gcc version and distribution 23 | 24 | If you are using python package, please provide 25 | 26 | 1. The python version and distribution 27 | 2. The command to install `xgboost` if you are not installing from source 28 | 29 | If you are using R package, please provide 30 | 31 | 1. The R `sessionInfo()` 32 | 2. The command to install `xgboost` if you are not installing from source 33 | 34 | ## Steps to reproduce 35 | 36 | 1. 37 | 2. 38 | 3. 39 | 40 | ## What have you tried? 41 | 42 | 1. 43 | 2. 44 | 3. 45 | -------------------------------------------------------------------------------- /xgboost/Jenkinsfile: -------------------------------------------------------------------------------- 1 | // -*- mode: groovy -*- 2 | // Jenkins pipeline 3 | // See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/ 4 | 5 | // command to start a docker container 6 | docker_run = 'tests/ci_build/ci_build.sh' 7 | 8 | // timeout in minutes 9 | max_time = 60 10 | 11 | // initialize source codes 12 | def init_git() { 13 | retry(5) { 14 | try { 15 | timeout(time: 2, unit: 'MINUTES') { 16 | checkout scm 17 | sh 'git submodule update --init' 18 | } 19 | } catch (exc) { 20 | deleteDir() 21 | error "Failed to fetch source codes" 22 | } 23 | } 24 | } 25 | 26 | stage('Build') { 27 | node('GPU' && 'linux') { 28 | ws('workspace/xgboost/build-gpu-cmake') { 29 | init_git() 30 | timeout(time: max_time, unit: 'MINUTES') { 31 | sh "${docker_run} gpu tests/ci_build/build_gpu_cmake.sh" 32 | } 33 | } 34 | } 35 | node('GPU' && 'linux') { 36 | ws('workspace/xgboost/build-gpu-make') { 37 | init_git() 38 | timeout(time: max_time, unit: 'MINUTES') { 39 | sh "${docker_run} gpu make PLUGIN_UPDATER_GPU=ON" 40 | } 41 | } 42 | } 43 | } 44 | 45 | 46 | stage('Unit Test') { 47 | node('GPU' && 'linux') { 48 | ws('workspace/xgboost/unit-test') { 49 | init_git() 50 | timeout(time: max_time, unit: 'MINUTES') { 51 | sh "${docker_run} gpu tests/ci_build/test_gpu.ssh" 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /xgboost/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 by Contributors 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /xgboost/R-package/.Rbuildignore: -------------------------------------------------------------------------------- 1 | \.o$ 2 | \.so$ 3 | \.dll$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | README.md 7 | -------------------------------------------------------------------------------- /xgboost/R-package/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 by Tianqi Chen and Contributors 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /xgboost/R-package/R/xgb.DMatrix.save.R: -------------------------------------------------------------------------------- 1 | #' Save xgb.DMatrix object to binary file 2 | #' 3 | #' Save xgb.DMatrix object to binary file 4 | #' 5 | #' @param dmatrix the \code{xgb.DMatrix} object 6 | #' @param fname the name of the file to write. 7 | #' 8 | #' @examples 9 | #' data(agaricus.train, package='xgboost') 10 | #' train <- agaricus.train 11 | #' dtrain <- xgb.DMatrix(train$data, label=train$label) 12 | #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') 13 | #' dtrain <- xgb.DMatrix('xgb.DMatrix.data') 14 | #' @export 15 | xgb.DMatrix.save <- function(dmatrix, fname) { 16 | if (typeof(fname) != "character") 17 | stop("fname must be character") 18 | if (!inherits(dmatrix, "xgb.DMatrix")) 19 | stop("dmatrix must be xgb.DMatrix") 20 | 21 | .Call(XGDMatrixSaveBinary_R, dmatrix, fname[1], 0L) 22 | return(TRUE) 23 | } 24 | -------------------------------------------------------------------------------- /xgboost/R-package/R/xgb.save.raw.R: -------------------------------------------------------------------------------- 1 | #' Save xgboost model to R's raw vector, 2 | #' user can call xgb.load to load the model back from raw vector 3 | #' 4 | #' Save xgboost model from xgboost or xgb.train 5 | #' 6 | #' @param model the model object. 7 | #' 8 | #' @examples 9 | #' data(agaricus.train, package='xgboost') 10 | #' data(agaricus.test, package='xgboost') 11 | #' train <- agaricus.train 12 | #' test <- agaricus.test 13 | #' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 14 | #' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") 15 | #' raw <- xgb.save.raw(bst) 16 | #' bst <- xgb.load(raw) 17 | #' pred <- predict(bst, test$data) 18 | #' 19 | #' @export 20 | xgb.save.raw <- function(model) { 21 | model <- xgb.get.handle(model) 22 | .Call(XGBoosterModelToRaw_R, model) 23 | } 24 | -------------------------------------------------------------------------------- /xgboost/R-package/cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f src/Makevars 4 | -------------------------------------------------------------------------------- /xgboost/R-package/configure.ac: -------------------------------------------------------------------------------- 1 | ### configure.ac -*- Autoconf -*- 2 | 3 | AC_PREREQ(2.62) 4 | 5 | AC_INIT([xgboost],[0.6-3],[],[xgboost],[]) 6 | 7 | OPENMP_CXXFLAGS="" 8 | 9 | if test `uname -s` = "Linux" 10 | then 11 | OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)" 12 | fi 13 | 14 | if test `uname -s` = "Darwin" 15 | then 16 | OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)" 17 | ac_pkg_openmp=no 18 | AC_MSG_CHECKING([whether OpenMP will work in a package]) 19 | AC_LANG_CONFTEST( 20 | [AC_LANG_PROGRAM([[#include ]], [[ return omp_get_num_threads (); ]])]) 21 | PKG_CFLAGS="${OPENMP_CFLAGS}" PKG_LIBS="${OPENMP_CFLAGS}" "$RBIN" CMD SHLIB conftest.c 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD && "$RBIN" --vanilla -q -e "dyn.load(paste('conftest',.Platform\$dynlib.ext,sep=''))" 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD && ac_pkg_openmp=yes 22 | AC_MSG_RESULT([${ac_pkg_openmp}]) 23 | if test "${ac_pkg_openmp}" = no; then 24 | OPENMP_CXXFLAGS='' 25 | fi 26 | fi 27 | 28 | AC_SUBST(OPENMP_CXXFLAGS) 29 | AC_CONFIG_FILES([src/Makevars]) 30 | AC_OUTPUT 31 | 32 | -------------------------------------------------------------------------------- /xgboost/R-package/data/agaricus.test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/R-package/data/agaricus.test.rda -------------------------------------------------------------------------------- /xgboost/R-package/data/agaricus.train.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/R-package/data/agaricus.train.rda -------------------------------------------------------------------------------- /xgboost/R-package/demo/00Index: -------------------------------------------------------------------------------- 1 | basic_walkthrough Basic feature walkthrough 2 | caret_wrapper Use xgboost to train in caret library 3 | custom_objective Cutomize loss function, and evaluation metric 4 | boost_from_prediction Boosting from existing prediction 5 | predict_first_ntree Predicting using first n trees 6 | generalized_linear_model Generalized Linear Model 7 | cross_validation Cross validation 8 | create_sparse_matrix Create Sparse Matrix 9 | predict_leaf_indices Predicting the corresponding leaves 10 | early_stopping Early Stop in training 11 | poisson_regression Poisson Regression on count data 12 | tweedie_regression Tweddie Regression 13 | -------------------------------------------------------------------------------- /xgboost/R-package/demo/README.md: -------------------------------------------------------------------------------- 1 | XGBoost R Feature Walkthrough 2 | ==== 3 | * [Basic walkthrough of wrappers](basic_walkthrough.R) 4 | * [Train a xgboost model from caret library](caret_wrapper.R) 5 | * [Cutomize loss function, and evaluation metric](custom_objective.R) 6 | * [Boosting from existing prediction](boost_from_prediction.R) 7 | * [Predicting using first n trees](predict_first_ntree.R) 8 | * [Generalized Linear Model](generalized_linear_model.R) 9 | * [Cross validation](cross_validation.R) 10 | * [Create a sparse matrix from a dense one](create_sparse_matrix.R) 11 | 12 | Benchmarks 13 | ==== 14 | * [Starter script for Kaggle Higgs Boson](../../demo/kaggle-higgs) 15 | 16 | Notes 17 | ==== 18 | * Contribution of examples, benchmarks is more than welcomed! 19 | * If you like to share how you use xgboost to solve your problem, send a pull request:) 20 | -------------------------------------------------------------------------------- /xgboost/R-package/demo/boost_from_prediction.R: -------------------------------------------------------------------------------- 1 | require(xgboost) 2 | # load in the agaricus dataset 3 | data(agaricus.train, package='xgboost') 4 | data(agaricus.test, package='xgboost') 5 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) 6 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) 7 | 8 | watchlist <- list(eval = dtest, train = dtrain) 9 | ### 10 | # advanced: start from a initial base prediction 11 | # 12 | print('start running example to start from a initial prediction') 13 | # train xgboost for 1 round 14 | param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic') 15 | bst <- xgb.train(param, dtrain, 1, watchlist) 16 | # Note: we need the margin value instead of transformed prediction in set_base_margin 17 | # do predict with output_margin=TRUE, will always give you margin values before logistic transformation 18 | ptrain <- predict(bst, dtrain, outputmargin=TRUE) 19 | ptest <- predict(bst, dtest, outputmargin=TRUE) 20 | # set the base_margin property of dtrain and dtest 21 | # base margin is the base prediction we will boost from 22 | setinfo(dtrain, "base_margin", ptrain) 23 | setinfo(dtest, "base_margin", ptest) 24 | 25 | print('this is result of boost from initial prediction') 26 | bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist) 27 | -------------------------------------------------------------------------------- /xgboost/R-package/demo/poisson_regression.R: -------------------------------------------------------------------------------- 1 | data(mtcars) 2 | head(mtcars) 3 | bst = xgboost(data=as.matrix(mtcars[,-11]),label=mtcars[,11], 4 | objective='count:poisson',nrounds=5) 5 | pred = predict(bst,as.matrix(mtcars[,-11])) 6 | sqrt(mean((pred-mtcars[,11])^2)) 7 | 8 | -------------------------------------------------------------------------------- /xgboost/R-package/demo/predict_first_ntree.R: -------------------------------------------------------------------------------- 1 | require(xgboost) 2 | # load in the agaricus dataset 3 | data(agaricus.train, package='xgboost') 4 | data(agaricus.test, package='xgboost') 5 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) 6 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) 7 | 8 | param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') 9 | watchlist <- list(eval = dtest, train = dtrain) 10 | nround = 2 11 | 12 | # training the model for two rounds 13 | bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist) 14 | cat('start testing prediction from first n trees\n') 15 | labels <- getinfo(dtest,'label') 16 | 17 | ### predict using first 1 tree 18 | ypred1 = predict(bst, dtest, ntreelimit=1) 19 | # by default, we predict using all the trees 20 | ypred2 = predict(bst, dtest) 21 | 22 | cat('error of ypred1=', mean(as.numeric(ypred1>0.5)!=labels),'\n') 23 | cat('error of ypred2=', mean(as.numeric(ypred2>0.5)!=labels),'\n') 24 | -------------------------------------------------------------------------------- /xgboost/R-package/demo/runall.R: -------------------------------------------------------------------------------- 1 | # running all scripts in demo folder 2 | demo(basic_walkthrough) 3 | demo(custom_objective) 4 | demo(boost_from_prediction) 5 | demo(predict_first_ntree) 6 | demo(generalized_linear_model) 7 | demo(cross_validation) 8 | demo(create_sparse_matrix) 9 | demo(predict_leaf_indices) 10 | demo(early_stopping) 11 | demo(poisson_regression) 12 | demo(caret_wrapper) 13 | demo(tweedie_regression) -------------------------------------------------------------------------------- /xgboost/R-package/demo/tweedie_regression.R: -------------------------------------------------------------------------------- 1 | library(xgboost) 2 | library(data.table) 3 | library(cplm) 4 | 5 | data(AutoClaim) 6 | 7 | # auto insurance dataset analyzed by Yip and Yau (2005) 8 | dt <- data.table(AutoClaim) 9 | 10 | # exclude these columns from the model matrix 11 | exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_YY') 12 | 13 | # retains the missing values 14 | # NOTE: this dataset is comes ready out of the box 15 | options(na.action = 'na.pass') 16 | x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F]) 17 | options(na.action = 'na.omit') 18 | 19 | # response 20 | y <- dt[, CLM_AMT5] 21 | 22 | d_train <- xgb.DMatrix(data = x, label = y, missing = NA) 23 | 24 | # the tweedie_variance_power parameter determines the shape of 25 | # distribution 26 | # - closer to 1 is more poisson like and the mass 27 | # is more concentrated near zero 28 | # - closer to 2 is more gamma like and the mass spreads to the 29 | # the right with less concentration near zero 30 | 31 | params <- list( 32 | objective = 'reg:tweedie', 33 | eval_metric = 'rmse', 34 | tweedie_variance_power = 1.4, 35 | max_depth = 6, 36 | eta = 1) 37 | 38 | bst <- xgb.train( 39 | data = d_train, 40 | params = params, 41 | maximize = FALSE, 42 | watchlist = list(train = d_train), 43 | nrounds = 20) 44 | 45 | var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst) 46 | 47 | preds <- predict(bst, d_train) 48 | 49 | rmse <- sqrt(sum(mean((y - preds)^2))) -------------------------------------------------------------------------------- /xgboost/R-package/man/agaricus.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgboost.R 3 | \docType{data} 4 | \name{agaricus.test} 5 | \alias{agaricus.test} 6 | \title{Test part from Mushroom Data Set} 7 | \format{A list containing a label vector, and a dgCMatrix object with 1611 8 | rows and 126 variables} 9 | \usage{ 10 | data(agaricus.test) 11 | } 12 | \description{ 13 | This data set is originally from the Mushroom data set, 14 | UCI Machine Learning Repository. 15 | } 16 | \details{ 17 | This data set includes the following fields: 18 | 19 | \itemize{ 20 | \item \code{label} the label for each record 21 | \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. 22 | } 23 | } 24 | \references{ 25 | https://archive.ics.uci.edu/ml/datasets/Mushroom 26 | 27 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 28 | [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 29 | School of Information and Computer Science. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /xgboost/R-package/man/agaricus.train.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgboost.R 3 | \docType{data} 4 | \name{agaricus.train} 5 | \alias{agaricus.train} 6 | \title{Training part from Mushroom Data Set} 7 | \format{A list containing a label vector, and a dgCMatrix object with 6513 8 | rows and 127 variables} 9 | \usage{ 10 | data(agaricus.train) 11 | } 12 | \description{ 13 | This data set is originally from the Mushroom data set, 14 | UCI Machine Learning Repository. 15 | } 16 | \details{ 17 | This data set includes the following fields: 18 | 19 | \itemize{ 20 | \item \code{label} the label for each record 21 | \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. 22 | } 23 | } 24 | \references{ 25 | https://archive.ics.uci.edu/ml/datasets/Mushroom 26 | 27 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 28 | [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 29 | School of Information and Computer Science. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /xgboost/R-package/man/cb.evaluation.log.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{cb.evaluation.log} 4 | \alias{cb.evaluation.log} 5 | \title{Callback closure for logging the evaluation history} 6 | \usage{ 7 | cb.evaluation.log() 8 | } 9 | \description{ 10 | Callback closure for logging the evaluation history 11 | } 12 | \details{ 13 | This callback function appends the current iteration evaluation results \code{bst_evaluation} 14 | available in the calling parent frame to the \code{evaluation_log} list in a calling frame. 15 | 16 | The finalizer callback (called with \code{finalize = TURE} in the end) converts 17 | the \code{evaluation_log} list into a final data.table. 18 | 19 | The iteration evaluation result \code{bst_evaluation} must be a named numeric vector. 20 | 21 | Note: in the column names of the final data.table, the dash '-' character is replaced with 22 | the underscore '_' in order to make the column names more like regular R identifiers. 23 | 24 | Callback function expects the following values to be set in its calling frame: 25 | \code{evaluation_log}, 26 | \code{bst_evaluation}, 27 | \code{iteration}. 28 | } 29 | \seealso{ 30 | \code{\link{callbacks}} 31 | } 32 | -------------------------------------------------------------------------------- /xgboost/R-package/man/cb.print.evaluation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{cb.print.evaluation} 4 | \alias{cb.print.evaluation} 5 | \title{Callback closure for printing the result of evaluation} 6 | \usage{ 7 | cb.print.evaluation(period = 1, showsd = TRUE) 8 | } 9 | \arguments{ 10 | \item{period}{results would be printed every number of periods} 11 | 12 | \item{showsd}{whether standard deviations should be printed (when available)} 13 | } 14 | \description{ 15 | Callback closure for printing the result of evaluation 16 | } 17 | \details{ 18 | The callback function prints the result of evaluation at every \code{period} iterations. 19 | The initial and the last iteration's evaluations are always printed. 20 | 21 | Callback function expects the following values to be set in its calling frame: 22 | \code{bst_evaluation} (also \code{bst_evaluation_err} when available), 23 | \code{iteration}, 24 | \code{begin_iteration}, 25 | \code{end_iteration}. 26 | } 27 | \seealso{ 28 | \code{\link{callbacks}} 29 | } 30 | -------------------------------------------------------------------------------- /xgboost/R-package/man/cb.reset.parameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{cb.reset.parameters} 4 | \alias{cb.reset.parameters} 5 | \title{Callback closure for restetting the booster's parameters at each iteration.} 6 | \usage{ 7 | cb.reset.parameters(new_params) 8 | } 9 | \arguments{ 10 | \item{new_params}{a list where each element corresponds to a parameter that needs to be reset. 11 | Each element's value must be either a vector of values of length \code{nrounds} 12 | to be set at each iteration, 13 | or a function of two parameters \code{learning_rates(iteration, nrounds)} 14 | which returns a new parameter value by using the current iteration number 15 | and the total number of boosting rounds.} 16 | } 17 | \description{ 18 | Callback closure for restetting the booster's parameters at each iteration. 19 | } 20 | \details{ 21 | This is a "pre-iteration" callback function used to reset booster's parameters 22 | at the beginning of each iteration. 23 | 24 | Note that when training is resumed from some previous model, and a function is used to 25 | reset a parameter value, the \code{nround} argument in this function would be the 26 | the number of boosting rounds in the current training. 27 | 28 | Callback function expects the following values to be set in its calling frame: 29 | \code{bst} or \code{bst_folds}, 30 | \code{iteration}, 31 | \code{begin_iteration}, 32 | \code{end_iteration}. 33 | } 34 | \seealso{ 35 | \code{\link{callbacks}} 36 | } 37 | -------------------------------------------------------------------------------- /xgboost/R-package/man/cb.save.model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/callbacks.R 3 | \name{cb.save.model} 4 | \alias{cb.save.model} 5 | \title{Callback closure for saving a model file.} 6 | \usage{ 7 | cb.save.model(save_period = 0, save_name = "xgboost.model") 8 | } 9 | \arguments{ 10 | \item{save_period}{save the model to disk after every 11 | \code{save_period} iterations; 0 means save the model at the end.} 12 | 13 | \item{save_name}{the name or path for the saved model file. 14 | It can contain a \code{\link[base]{sprintf}} formatting specifier 15 | to include the integer iteration number in the file name. 16 | E.g., with \code{save_name} = 'xgboost_%04d.model', 17 | the file saved at iteration 50 would be named "xgboost_0050.model".} 18 | } 19 | \description{ 20 | Callback closure for saving a model file. 21 | } 22 | \details{ 23 | This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end. 24 | 25 | Callback function expects the following values to be set in its calling frame: 26 | \code{bst}, 27 | \code{iteration}, 28 | \code{begin_iteration}, 29 | \code{end_iteration}. 30 | } 31 | \seealso{ 32 | \code{\link{callbacks}} 33 | } 34 | -------------------------------------------------------------------------------- /xgboost/R-package/man/dim.xgb.DMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{dim.xgb.DMatrix} 4 | \alias{dim.xgb.DMatrix} 5 | \title{Dimensions of xgb.DMatrix} 6 | \usage{ 7 | \method{dim}{xgb.DMatrix}(x) 8 | } 9 | \arguments{ 10 | \item{x}{Object of class \code{xgb.DMatrix}} 11 | } 12 | \description{ 13 | Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}. 14 | } 15 | \details{ 16 | Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also 17 | be directly used with an \code{xgb.DMatrix} object. 18 | } 19 | \examples{ 20 | data(agaricus.train, package='xgboost') 21 | train <- agaricus.train 22 | dtrain <- xgb.DMatrix(train$data, label=train$label) 23 | 24 | stopifnot(nrow(dtrain) == nrow(train$data)) 25 | stopifnot(ncol(dtrain) == ncol(train$data)) 26 | stopifnot(all(dim(dtrain) == dim(train$data))) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /xgboost/R-package/man/dimnames.xgb.DMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{dimnames.xgb.DMatrix} 4 | \alias{dimnames.xgb.DMatrix} 5 | \alias{dimnames<-.xgb.DMatrix} 6 | \title{Handling of column names of \code{xgb.DMatrix}} 7 | \usage{ 8 | \method{dimnames}{xgb.DMatrix}(x) 9 | 10 | \method{dimnames}{xgb.DMatrix}(x) <- value 11 | } 12 | \arguments{ 13 | \item{x}{object of class \code{xgb.DMatrix}} 14 | 15 | \item{value}{a list of two elements: the first one is ignored 16 | and the second one is column names} 17 | } 18 | \description{ 19 | Only column names are supported for \code{xgb.DMatrix}, thus setting of 20 | row names would have no effect and returnten row names would be NULL. 21 | } 22 | \details{ 23 | Generic \code{dimnames} methods are used by \code{colnames}. 24 | Since row names are irrelevant, it is recommended to use \code{colnames} directly. 25 | } 26 | \examples{ 27 | data(agaricus.train, package='xgboost') 28 | train <- agaricus.train 29 | dtrain <- xgb.DMatrix(train$data, label=train$label) 30 | dimnames(dtrain) 31 | colnames(dtrain) 32 | colnames(dtrain) <- make.names(1:ncol(train$data)) 33 | print(dtrain, verbose=TRUE) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /xgboost/R-package/man/getinfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{getinfo} 4 | \alias{getinfo} 5 | \alias{getinfo.xgb.DMatrix} 6 | \title{Get information of an xgb.DMatrix object} 7 | \usage{ 8 | getinfo(object, ...) 9 | 10 | \method{getinfo}{xgb.DMatrix}(object, name, ...) 11 | } 12 | \arguments{ 13 | \item{object}{Object of class \code{xgb.DMatrix}} 14 | 15 | \item{...}{other parameters} 16 | 17 | \item{name}{the name of the information field to get (see details)} 18 | } 19 | \description{ 20 | Get information of an xgb.DMatrix object 21 | } 22 | \details{ 23 | The \code{name} field can be one of the following: 24 | 25 | \itemize{ 26 | \item \code{label}: label Xgboost learn from ; 27 | \item \code{weight}: to do a weight rescale ; 28 | \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; 29 | \item \code{nrow}: number of rows of the \code{xgb.DMatrix}. 30 | 31 | } 32 | 33 | \code{group} can be setup by \code{setinfo} but can't be retrieved by \code{getinfo}. 34 | } 35 | \examples{ 36 | data(agaricus.train, package='xgboost') 37 | train <- agaricus.train 38 | dtrain <- xgb.DMatrix(train$data, label=train$label) 39 | 40 | labels <- getinfo(dtrain, 'label') 41 | setinfo(dtrain, 'label', 1-labels) 42 | 43 | labels2 <- getinfo(dtrain, 'label') 44 | stopifnot(all(labels2 == 1-labels)) 45 | } 46 | -------------------------------------------------------------------------------- /xgboost/R-package/man/print.xgb.Booster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.Booster.R 3 | \name{print.xgb.Booster} 4 | \alias{print.xgb.Booster} 5 | \title{Print xgb.Booster} 6 | \usage{ 7 | \method{print}{xgb.Booster}(x, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an xgb.Booster object} 11 | 12 | \item{verbose}{whether to print detailed data (e.g., attribute values)} 13 | 14 | \item{...}{not currently used} 15 | } 16 | \description{ 17 | Print information about xgb.Booster. 18 | } 19 | \examples{ 20 | data(agaricus.train, package='xgboost') 21 | train <- agaricus.train 22 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 23 | eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") 24 | attr(bst, 'myattr') <- 'memo' 25 | 26 | print(bst) 27 | print(bst, verbose=TRUE) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /xgboost/R-package/man/print.xgb.DMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{print.xgb.DMatrix} 4 | \alias{print.xgb.DMatrix} 5 | \title{Print xgb.DMatrix} 6 | \usage{ 7 | \method{print}{xgb.DMatrix}(x, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an xgb.DMatrix object} 11 | 12 | \item{verbose}{whether to print colnames (when present)} 13 | 14 | \item{...}{not currently used} 15 | } 16 | \description{ 17 | Print information about xgb.DMatrix. 18 | Currently it displays dimensions and presence of info-fields and colnames. 19 | } 20 | \examples{ 21 | data(agaricus.train, package='xgboost') 22 | train <- agaricus.train 23 | dtrain <- xgb.DMatrix(train$data, label=train$label) 24 | 25 | dtrain 26 | print(dtrain, verbose=TRUE) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /xgboost/R-package/man/print.xgb.cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.cv.R 3 | \name{print.xgb.cv.synchronous} 4 | \alias{print.xgb.cv.synchronous} 5 | \title{Print xgb.cv result} 6 | \usage{ 7 | \method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an \code{xgb.cv.synchronous} object} 11 | 12 | \item{verbose}{whether to print detailed data} 13 | 14 | \item{...}{passed to \code{data.table.print}} 15 | } 16 | \description{ 17 | Prints formatted results of \code{xgb.cv}. 18 | } 19 | \details{ 20 | When not verbose, it would only print the evaluation results, 21 | including the best iteration (when available). 22 | } 23 | \examples{ 24 | data(agaricus.train, package='xgboost') 25 | train <- agaricus.train 26 | cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2, 27 | eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") 28 | print(cv) 29 | print(cv, verbose=TRUE) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /xgboost/R-package/man/setinfo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{setinfo} 4 | \alias{setinfo} 5 | \alias{setinfo.xgb.DMatrix} 6 | \title{Set information of an xgb.DMatrix object} 7 | \usage{ 8 | setinfo(object, ...) 9 | 10 | \method{setinfo}{xgb.DMatrix}(object, name, info, ...) 11 | } 12 | \arguments{ 13 | \item{object}{Object of class "xgb.DMatrix"} 14 | 15 | \item{...}{other parameters} 16 | 17 | \item{name}{the name of the field to get} 18 | 19 | \item{info}{the specific field of information to set} 20 | } 21 | \description{ 22 | Set information of an xgb.DMatrix object 23 | } 24 | \details{ 25 | The \code{name} field can be one of the following: 26 | 27 | \itemize{ 28 | \item \code{label}: label Xgboost learn from ; 29 | \item \code{weight}: to do a weight rescale ; 30 | \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; 31 | \item \code{group}: number of rows in each group (to use with \code{rank:pairwise} objective). 32 | } 33 | } 34 | \examples{ 35 | data(agaricus.train, package='xgboost') 36 | train <- agaricus.train 37 | dtrain <- xgb.DMatrix(train$data, label=train$label) 38 | 39 | labels <- getinfo(dtrain, 'label') 40 | setinfo(dtrain, 'label', 1-labels) 41 | labels2 <- getinfo(dtrain, 'label') 42 | stopifnot(all.equal(labels2, 1-labels)) 43 | } 44 | -------------------------------------------------------------------------------- /xgboost/R-package/man/slice.xgb.DMatrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.R 3 | \name{slice} 4 | \alias{slice} 5 | \alias{slice.xgb.DMatrix} 6 | \alias{[.xgb.DMatrix} 7 | \title{Get a new DMatrix containing the specified rows of 8 | orginal xgb.DMatrix object} 9 | \usage{ 10 | slice(object, ...) 11 | 12 | \method{slice}{xgb.DMatrix}(object, idxset, ...) 13 | 14 | \method{[}{xgb.DMatrix}(object, idxset, colset = NULL) 15 | } 16 | \arguments{ 17 | \item{object}{Object of class "xgb.DMatrix"} 18 | 19 | \item{...}{other parameters (currently not used)} 20 | 21 | \item{idxset}{a integer vector of indices of rows needed} 22 | 23 | \item{colset}{currently not used (columns subsetting is not available)} 24 | } 25 | \description{ 26 | Get a new DMatrix containing the specified rows of 27 | orginal xgb.DMatrix object 28 | } 29 | \examples{ 30 | data(agaricus.train, package='xgboost') 31 | train <- agaricus.train 32 | dtrain <- xgb.DMatrix(train$data, label=train$label) 33 | 34 | dsub <- slice(dtrain, 1:42) 35 | labels1 <- getinfo(dsub, 'label') 36 | dsub <- dtrain[1:42, ] 37 | labels2 <- getinfo(dsub, 'label') 38 | all.equal(labels1, labels2) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /xgboost/R-package/man/xgb.DMatrix.save.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.DMatrix.save.R 3 | \name{xgb.DMatrix.save} 4 | \alias{xgb.DMatrix.save} 5 | \title{Save xgb.DMatrix object to binary file} 6 | \usage{ 7 | xgb.DMatrix.save(dmatrix, fname) 8 | } 9 | \arguments{ 10 | \item{dmatrix}{the \code{xgb.DMatrix} object} 11 | 12 | \item{fname}{the name of the file to write.} 13 | } 14 | \description{ 15 | Save xgb.DMatrix object to binary file 16 | } 17 | \examples{ 18 | data(agaricus.train, package='xgboost') 19 | train <- agaricus.train 20 | dtrain <- xgb.DMatrix(train$data, label=train$label) 21 | xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') 22 | dtrain <- xgb.DMatrix('xgb.DMatrix.data') 23 | } 24 | -------------------------------------------------------------------------------- /xgboost/R-package/man/xgb.load.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.load.R 3 | \name{xgb.load} 4 | \alias{xgb.load} 5 | \title{Load xgboost model from binary file} 6 | \usage{ 7 | xgb.load(modelfile) 8 | } 9 | \arguments{ 10 | \item{modelfile}{the name of the binary input file.} 11 | } 12 | \value{ 13 | An object of \code{xgb.Booster} class. 14 | } 15 | \description{ 16 | Load xgboost model from the binary model file. 17 | } 18 | \details{ 19 | The input file is expected to contain a model saved in an xgboost-internal binary format 20 | using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some 21 | appropriate methods from other xgboost interfaces. E.g., a model trained in Python and 22 | saved from there in xgboost format, could be loaded from R. 23 | 24 | Note: a model saved as an R-object, has to be loaded using corresponding R-methods, 25 | not \code{xgb.load}. 26 | } 27 | \examples{ 28 | data(agaricus.train, package='xgboost') 29 | data(agaricus.test, package='xgboost') 30 | train <- agaricus.train 31 | test <- agaricus.test 32 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 33 | eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") 34 | xgb.save(bst, 'xgb.model') 35 | bst <- xgb.load('xgb.model') 36 | pred <- predict(bst, test$data) 37 | } 38 | \seealso{ 39 | \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}. 40 | } 41 | -------------------------------------------------------------------------------- /xgboost/R-package/man/xgb.parameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.Booster.R 3 | \name{xgb.parameters<-} 4 | \alias{xgb.parameters<-} 5 | \title{Accessors for model parameters.} 6 | \usage{ 7 | xgb.parameters(object) <- value 8 | } 9 | \arguments{ 10 | \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.} 11 | 12 | \item{value}{a list (or an object coercible to a list) with the names of parameters to set 13 | and the elements corresponding to parameter values.} 14 | } 15 | \description{ 16 | Only the setter for xgboost parameters is currently implemented. 17 | } 18 | \details{ 19 | Note that the setter would usually work more efficiently for \code{xgb.Booster.handle} 20 | than for \code{xgb.Booster}, since only just a handle would need to be copied. 21 | } 22 | \examples{ 23 | data(agaricus.train, package='xgboost') 24 | train <- agaricus.train 25 | 26 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 27 | eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") 28 | 29 | xgb.parameters(bst) <- list(eta = 0.1) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /xgboost/R-package/man/xgb.save.raw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/xgb.save.raw.R 3 | \name{xgb.save.raw} 4 | \alias{xgb.save.raw} 5 | \title{Save xgboost model to R's raw vector, 6 | user can call xgb.load to load the model back from raw vector} 7 | \usage{ 8 | xgb.save.raw(model) 9 | } 10 | \arguments{ 11 | \item{model}{the model object.} 12 | } 13 | \description{ 14 | Save xgboost model from xgboost or xgb.train 15 | } 16 | \examples{ 17 | data(agaricus.train, package='xgboost') 18 | data(agaricus.test, package='xgboost') 19 | train <- agaricus.train 20 | test <- agaricus.test 21 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 22 | eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") 23 | raw <- xgb.save.raw(bst) 24 | bst <- xgb.load(raw) 25 | pred <- predict(bst, test$data) 26 | 27 | } 28 | -------------------------------------------------------------------------------- /xgboost/R-package/man/xgboost-deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{xgboost-deprecated} 4 | \alias{xgboost-deprecated} 5 | \title{Deprecation notices.} 6 | \description{ 7 | At this time, some of the parameter names were changed in order to make the code style more uniform. 8 | The deprecated parameters would be removed in the next release. 9 | } 10 | \details{ 11 | To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table. 12 | 13 | A deprecation warning is shown when any of the deprecated parameters is used in a call. 14 | An additional warning is shown when there was a partial match to a deprecated parameter 15 | (as R is able to partially match parameter names). 16 | } 17 | -------------------------------------------------------------------------------- /xgboost/R-package/src/Makevars.in: -------------------------------------------------------------------------------- 1 | # package root 2 | PKGROOT=../../ 3 | ENABLE_STD_THREAD=1 4 | # _*_ mode: Makefile; _*_ 5 | 6 | CXX_STD = CXX11 7 | 8 | XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ 9 | -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ 10 | -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\ 11 | -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ 12 | 13 | PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS) 14 | PKG_CXXFLAGS= @OPENMP_CXXFLAGS@ $(SHLIB_PTHREAD_FLAGS) 15 | PKG_LIBS = @OPENMP_CXXFLAGS@ $(SHLIB_PTHREAD_FLAGS) 16 | OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o ./init.o\ 17 | $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o\ 18 | $(PKGROOT)/rabit/src/engine_empty.o $(PKGROOT)/rabit/src/c_api.o 19 | -------------------------------------------------------------------------------- /xgboost/R-package/src/Makevars.win: -------------------------------------------------------------------------------- 1 | # package root 2 | PKGROOT=./ 3 | ENABLE_STD_THREAD=0 4 | # _*_ mode: Makefile; _*_ 5 | 6 | # This file is only used for windows compilation from github 7 | # It will be replaced by Makevars in CRAN version 8 | .PHONY: all xgblib 9 | all: $(SHLIB) 10 | $(SHLIB): xgblib 11 | xgblib: 12 | cp -r ../../src . 13 | cp -r ../../rabit . 14 | cp -r ../../dmlc-core . 15 | cp -r ../../include . 16 | cp -r ../../amalgamation . 17 | 18 | CXX_STD = CXX11 19 | 20 | XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ 21 | -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ 22 | -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\ 23 | -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_ 24 | 25 | PKG_CPPFLAGS= -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS) 26 | PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) 27 | PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS) 28 | OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o ./init.o\ 29 | $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o\ 30 | $(PKGROOT)/rabit/src/engine_empty.o $(PKGROOT)/rabit/src/c_api.o 31 | 32 | $(OBJECTS) : xgblib 33 | -------------------------------------------------------------------------------- /xgboost/R-package/src/xgboost_assert.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 by Contributors 2 | #include 3 | #include 4 | #include 5 | 6 | // implements error handling 7 | void XGBoostAssert_R(int exp, const char *fmt, ...) { 8 | char buf[1024]; 9 | if (exp == 0) { 10 | va_list args; 11 | va_start(args, fmt); 12 | vsprintf(buf, fmt, args); 13 | va_end(args); 14 | error("AssertError:%s\n", buf); 15 | } 16 | } 17 | void XGBoostCheck_R(int exp, const char *fmt, ...) { 18 | char buf[1024]; 19 | if (exp == 0) { 20 | va_list args; 21 | va_start(args, fmt); 22 | vsprintf(buf, fmt, args); 23 | va_end(args); 24 | error("%s\n", buf); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(xgboost) 3 | 4 | test_check("xgboost") 5 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat/test_gc_safety.R: -------------------------------------------------------------------------------- 1 | require(xgboost) 2 | 3 | context("Garbage Collection Safety Check") 4 | 5 | test_that("train and prediction when gctorture is on", { 6 | data(agaricus.train, package='xgboost') 7 | data(agaricus.test, package='xgboost') 8 | train <- agaricus.train 9 | test <- agaricus.test 10 | gctorture(TRUE) 11 | bst <- xgboost(data = train$data, label = train$label, max.depth = 2, 12 | eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") 13 | pred <- predict(bst, test$data) 14 | gctorture(FALSE) 15 | }) 16 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat/test_glm.R: -------------------------------------------------------------------------------- 1 | context('Test generalized linear models') 2 | 3 | require(xgboost) 4 | 5 | test_that("glm works", { 6 | data(agaricus.train, package='xgboost') 7 | data(agaricus.test, package='xgboost') 8 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) 9 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) 10 | expect_equal(class(dtrain), "xgb.DMatrix") 11 | expect_equal(class(dtest), "xgb.DMatrix") 12 | param <- list(objective = "binary:logistic", booster = "gblinear", 13 | nthread = 2, alpha = 0.0001, lambda = 1) 14 | watchlist <- list(eval = dtest, train = dtrain) 15 | num_round <- 2 16 | bst <- xgb.train(param, dtrain, num_round, watchlist) 17 | ypred <- predict(bst, dtest) 18 | expect_equal(length(getinfo(dtest, 'label')), 1611) 19 | }) 20 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat/test_lint.R: -------------------------------------------------------------------------------- 1 | context("Code is of high quality and lint free") 2 | test_that("Code Lint", { 3 | skip_on_cran() 4 | skip_on_travis() 5 | skip_if_not_installed("lintr") 6 | my_linters <- list( 7 | absolute_paths_linter=lintr::absolute_paths_linter, 8 | assignment_linter=lintr::assignment_linter, 9 | closed_curly_linter=lintr::closed_curly_linter, 10 | commas_linter=lintr::commas_linter, 11 | # commented_code_linter=lintr::commented_code_linter, 12 | infix_spaces_linter=lintr::infix_spaces_linter, 13 | line_length_linter=lintr::line_length_linter, 14 | no_tab_linter=lintr::no_tab_linter, 15 | object_usage_linter=lintr::object_usage_linter, 16 | # snake_case_linter=lintr::snake_case_linter, 17 | # multiple_dots_linter=lintr::multiple_dots_linter, 18 | object_length_linter=lintr::object_length_linter, 19 | open_curly_linter=lintr::open_curly_linter, 20 | # single_quotes_linter=lintr::single_quotes_linter, 21 | spaces_inside_linter=lintr::spaces_inside_linter, 22 | spaces_left_parentheses_linter=lintr::spaces_left_parentheses_linter, 23 | trailing_blank_lines_linter=lintr::trailing_blank_lines_linter, 24 | trailing_whitespace_linter=lintr::trailing_whitespace_linter 25 | ) 26 | # lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality 27 | }) 28 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat/test_monotone.R: -------------------------------------------------------------------------------- 1 | require(xgboost) 2 | 3 | context("monotone constraints") 4 | 5 | set.seed(1024) 6 | x = rnorm(1000, 10) 7 | y = -1*x + rnorm(1000, 0.001) + 3*sin(x) 8 | train = matrix(x, ncol = 1) 9 | 10 | 11 | test_that("monotone constraints for regression", { 12 | bst = xgboost(data = train, label = y, max_depth = 2, 13 | eta = 0.1, nthread = 2, nrounds = 100, verbose = 0, 14 | monotone_constraints = -1) 15 | 16 | pred = predict(bst, train) 17 | 18 | ind = order(train[,1]) 19 | pred.ord = pred[ind] 20 | expect_true({ 21 | !any(diff(pred.ord) > 0) 22 | }, "Monotone Contraint Satisfied") 23 | 24 | }) 25 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat/test_parameter_exposure.R: -------------------------------------------------------------------------------- 1 | context('Test model params and call are exposed to R') 2 | 3 | require(xgboost) 4 | 5 | data(agaricus.train, package='xgboost') 6 | data(agaricus.test, package='xgboost') 7 | 8 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) 9 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) 10 | 11 | bst <- xgboost(data = dtrain, 12 | max_depth = 2, 13 | eta = 1, 14 | nrounds = 10, 15 | nthread = 1, 16 | verbose = 0, 17 | objective = "binary:logistic") 18 | 19 | test_that("call is exposed to R", { 20 | expect_false(is.null(bst$call)) 21 | expect_is(bst$call, "call") 22 | }) 23 | 24 | test_that("params is exposed to R", { 25 | model_params <- bst$params 26 | expect_is(model_params, "list") 27 | expect_equal(model_params$eta, 1) 28 | expect_equal(model_params$max_depth, 2) 29 | expect_equal(model_params$objective, "binary:logistic") 30 | }) 31 | -------------------------------------------------------------------------------- /xgboost/R-package/tests/testthat/test_poisson_regression.R: -------------------------------------------------------------------------------- 1 | context('Test poisson regression model') 2 | 3 | require(xgboost) 4 | set.seed(1994) 5 | 6 | test_that("poisson regression works", { 7 | data(mtcars) 8 | bst <- xgboost(data = as.matrix(mtcars[,-11]), label = mtcars[,11], 9 | objective = 'count:poisson', nrounds=10, verbose=0) 10 | expect_equal(class(bst), "xgb.Booster") 11 | pred <- predict(bst, as.matrix(mtcars[, -11])) 12 | expect_equal(length(pred), 32) 13 | expect_lt(sqrt(mean( (pred - mtcars[,11])^2 )), 1.2) 14 | }) 15 | -------------------------------------------------------------------------------- /xgboost/R-package/vignettes/xgboost.bib: -------------------------------------------------------------------------------- 1 | @article{friedman2001greedy, 2 | title={Greedy function approximation: a gradient boosting machine}, 3 | author={Friedman, Jerome H}, 4 | journal={Annals of Statistics}, 5 | pages={1189--1232}, 6 | year={2001}, 7 | publisher={JSTOR} 8 | } 9 | 10 | @article{friedman2000additive, 11 | title={Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors)}, 12 | author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert and others}, 13 | journal={The annals of statistics}, 14 | volume={28}, 15 | number={2}, 16 | pages={337--407}, 17 | year={2000}, 18 | publisher={Institute of Mathematical Statistics} 19 | } 20 | 21 | 22 | @misc{ 23 | Bache+Lichman:2013 , 24 | author = "K. Bache and M. Lichman", 25 | year = "2013", 26 | title = "{UCI} Machine Learning Repository", 27 | url = "http://archive.ics.uci.edu/ml", 28 | institution = "University of California, Irvine, School of Information and Computer Sciences" 29 | } 30 | 31 | -------------------------------------------------------------------------------- /xgboost/amalgamation/dmlc-minimum0.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors. 3 | * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib. 4 | * Normally this is not needed. 5 | */ 6 | #include "../dmlc-core/src/io/line_split.cc" 7 | #include "../dmlc-core/src/io/recordio_split.cc" 8 | #include "../dmlc-core/src/io/input_split_base.cc" 9 | #include "../dmlc-core/src/io/local_filesys.cc" 10 | #include "../dmlc-core/src/data.cc" 11 | #include "../dmlc-core/src/io.cc" 12 | #include "../dmlc-core/src/recordio.cc" 13 | 14 | 15 | -------------------------------------------------------------------------------- /xgboost/appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | matrix: 3 | - target: native 4 | solution_name: C:/projects/xgboost/build2013/xgboost.sln 5 | - target: native 6 | solution_name: C:/projects/xgboost/build2015/xgboost.sln 7 | - target: jvm 8 | platform: 9 | - x64 10 | 11 | configuration: 12 | - Debug 13 | - Release 14 | 15 | install: 16 | - SET PATH=;%PATH% 17 | - git submodule update --init --recursive 18 | 19 | before_build: 20 | - mkdir build2013 21 | - mkdir build2015 22 | - cd build2013 23 | - cmake .. -G"Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Release;Debug;" 24 | - cd ../build2015 25 | - cmake .. -G"Visual Studio 14 2015 Win64" -DCMAKE_CONFIGURATION_TYPES="Release;Debug;" 26 | 27 | build_script: 28 | - cd %APPVEYOR_BUILD_FOLDER% 29 | - if "%target%" == "native" msbuild %solution_name% 30 | - if "%target%" == "jvm" cd jvm-packages && mvn test -pl :xgboost4j 31 | -------------------------------------------------------------------------------- /xgboost/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is a simple script to make xgboost in MAC and Linux 3 | # Basically, it first try to make with OpenMP, if fails, disable OpenMP and make it again. 4 | # This will automatically make xgboost for MAC users who don't have OpenMP support. 5 | # In most cases, type make will give what you want. 6 | 7 | # See additional instruction in doc/build.md 8 | set -e 9 | 10 | if make; then 11 | echo "Successfully build multi-thread xgboost" 12 | else 13 | echo "-----------------------------" 14 | echo "Building multi-thread xgboost failed" 15 | echo "Start to build single-thread xgboost" 16 | make clean_all 17 | make config=make/minimum.mk 18 | if [ $? -eq 0 ] ;then 19 | echo "Successfully build single-thread xgboost" 20 | echo "If you want multi-threaded version" 21 | echo "See additional instructions in doc/build.md" 22 | else 23 | echo "Failed to build single-thread xgboost" 24 | fi 25 | fi 26 | -------------------------------------------------------------------------------- /xgboost/cub/examples/block/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /Debug 3 | /Release 4 | /cuda55.sdf 5 | /cuda55.suo 6 | /cuda60.sdf 7 | /cuda60.suo 8 | -------------------------------------------------------------------------------- /xgboost/cub/examples/device/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /Debug 3 | /ipch 4 | /Release 5 | /cuda55.sdf 6 | /cuda55.suo 7 | /cuda60.sdf 8 | /cuda60.suo 9 | -------------------------------------------------------------------------------- /xgboost/cub/experimental/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | -------------------------------------------------------------------------------- /xgboost/cub/experimental/spmv_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 4 | do 5 | echo `date`, `$1 --dense=$i $2 $3 $4 $5 $6 $7` 6 | done 7 | 8 | echo 9 | echo 10 | 11 | for i in `ls /home/dumerrill/graphs/spmv/*.mtx` 12 | do 13 | if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] 14 | then 15 | echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null` 16 | fi 17 | done 18 | 19 | echo 20 | echo 21 | 22 | for i in `ls /scratch/dumerrill/graphs/mtx/*.mtx` 23 | #for i in `ls /cygdrive/w/Dev/UFget/mtx/*.mtx` 24 | do 25 | if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] 26 | then 27 | echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null` 28 | fi 29 | done 30 | 31 | -------------------------------------------------------------------------------- /xgboost/cub/test/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /link_main.obj 3 | -------------------------------------------------------------------------------- /xgboost/cub/test/link_a.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void a() 4 | { 5 | printf("a() called\n"); 6 | 7 | cub::DoubleBuffer d_keys; 8 | cub::DoubleBuffer d_values; 9 | size_t temp_storage_bytes = 0; 10 | cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024); 11 | } 12 | -------------------------------------------------------------------------------- /xgboost/cub/test/link_b.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void b() 4 | { 5 | printf("b() called\n"); 6 | 7 | cub::DoubleBuffer d_keys; 8 | cub::DoubleBuffer d_values; 9 | size_t temp_storage_bytes = 0; 10 | cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024); 11 | } 12 | -------------------------------------------------------------------------------- /xgboost/cub/test/link_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern void a(); 4 | extern void b(); 5 | 6 | int main() 7 | { 8 | printf("hello world\n"); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /xgboost/cub/tune/.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | -------------------------------------------------------------------------------- /xgboost/demo/.gitignore: -------------------------------------------------------------------------------- 1 | *.libsvm 2 | *.pkl 3 | -------------------------------------------------------------------------------- /xgboost/demo/binary_classification/mapfeat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | def loadfmap( fname ): 4 | fmap = {} 5 | nmap = {} 6 | 7 | for l in open( fname ): 8 | arr = l.split() 9 | if arr[0].find('.') != -1: 10 | idx = int( arr[0].strip('.') ) 11 | assert idx not in fmap 12 | fmap[ idx ] = {} 13 | ftype = arr[1].strip(':') 14 | content = arr[2] 15 | else: 16 | content = arr[0] 17 | for it in content.split(','): 18 | if it.strip() == '': 19 | continue 20 | k , v = it.split('=') 21 | fmap[ idx ][ v ] = len(nmap) + 1 22 | nmap[ len(nmap) ] = ftype+'='+k 23 | return fmap, nmap 24 | 25 | def write_nmap( fo, nmap ): 26 | for i in range( len(nmap) ): 27 | fo.write('%d\t%s\ti\n' % (i, nmap[i]) ) 28 | 29 | # start here 30 | fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' ) 31 | fo = open( 'featmap.txt', 'w' ) 32 | write_nmap( fo, nmap ) 33 | fo.close() 34 | 35 | fo = open( 'agaricus.txt', 'w' ) 36 | for l in open( 'agaricus-lepiota.data' ): 37 | arr = l.split(',') 38 | if arr[0] == 'p': 39 | fo.write('1') 40 | else: 41 | assert arr[0] == 'e' 42 | fo.write('0') 43 | for i in range( 1,len(arr) ): 44 | fo.write( ' %d:1' % fmap[i][arr[i].strip()] ) 45 | fo.write('\n') 46 | 47 | fo.close() 48 | -------------------------------------------------------------------------------- /xgboost/demo/binary_classification/mknfold.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import random 4 | 5 | if len(sys.argv) < 2: 6 | print ('Usage: [nfold = 5]') 7 | exit(0) 8 | 9 | random.seed( 10 ) 10 | 11 | k = int( sys.argv[2] ) 12 | if len(sys.argv) > 3: 13 | nfold = int( sys.argv[3] ) 14 | else: 15 | nfold = 5 16 | 17 | fi = open( sys.argv[1], 'r' ) 18 | ftr = open( sys.argv[1]+'.train', 'w' ) 19 | fte = open( sys.argv[1]+'.test', 'w' ) 20 | for l in fi: 21 | if random.randint( 1 , nfold ) == k: 22 | fte.write( l ) 23 | else: 24 | ftr.write( l ) 25 | 26 | fi.close() 27 | ftr.close() 28 | fte.close() 29 | 30 | -------------------------------------------------------------------------------- /xgboost/demo/binary_classification/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # map feature using indicator encoding, also produce featmap.txt 3 | python mapfeat.py 4 | # split train and test 5 | python mknfold.py agaricus.txt 1 6 | # training and output the models 7 | ../../xgboost mushroom.conf 8 | # output prediction task=pred 9 | ../../xgboost mushroom.conf task=pred model_in=0002.model 10 | # print the boosters of 00002.model in dump.raw.txt 11 | ../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt 12 | # use the feature map in printing for better visualization 13 | ../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 14 | cat dump.nice.txt 15 | 16 | -------------------------------------------------------------------------------- /xgboost/demo/data/README.md: -------------------------------------------------------------------------------- 1 | This folder contains processed example dataset used by the demos. 2 | Copyright of the dataset belongs to the original copyright holder 3 | -------------------------------------------------------------------------------- /xgboost/demo/data/gen_autoclaims.R: -------------------------------------------------------------------------------- 1 | site <- 'http://cran.r-project.org' 2 | if (!require('dummies')) 3 | install.packages('dummies', repos=site) 4 | if (!require('insuranceData')) 5 | install.packages('insuranceData', repos=site) 6 | 7 | library(dummies) 8 | library(insuranceData) 9 | 10 | data(AutoClaims) 11 | data = AutoClaims 12 | 13 | data$STATE = as.factor(data$STATE) 14 | data$CLASS = as.factor(data$CLASS) 15 | data$GENDER = as.factor(data$GENDER) 16 | 17 | data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=T); 18 | write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F) 19 | -------------------------------------------------------------------------------- /xgboost/demo/distributed-training/README.md: -------------------------------------------------------------------------------- 1 | Distributed XGBoost Training 2 | ============================ 3 | This is an tutorial of Distributed XGBoost Training. 4 | Currently xgboost supports distributed training via CLI program with the configuration file. 5 | There is also plan push distributed python and other language bindings, please open an issue 6 | if you are interested in contributing. 7 | 8 | Build XGBoost with Distributed Filesystem Support 9 | ------------------------------------------------- 10 | To use distributed xgboost, you only need to turn the options on to build 11 | with distributed filesystems(HDFS or S3) in ```xgboost/make/config.mk```. 12 | 13 | 14 | Step by Step Tutorial on AWS 15 | ---------------------------- 16 | Checkout [this tutorial](https://xgboost.readthedocs.org/en/latest/tutorials/aws_yarn.html) for running distributed xgboost. 17 | 18 | 19 | Model Analysis 20 | -------------- 21 | XGBoost is exchangeable across all bindings and platforms. 22 | This means you can use python or R to analyze the learnt model and do prediction. 23 | For example, you can use the [plot_model.ipynb](plot_model.ipynb) to visualize the learnt model. 24 | -------------------------------------------------------------------------------- /xgboost/demo/distributed-training/run_aws.sh: -------------------------------------------------------------------------------- 1 | # This is the example script to run distributed xgboost on AWS. 2 | # Change the following two lines for configuration 3 | 4 | export BUCKET=mybucket 5 | 6 | # submit the job to YARN 7 | ../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\ 8 | ../../xgboost mushroom.aws.conf nthread=2\ 9 | data=s3://${BUCKET}/xgb-demo/train\ 10 | eval[test]=s3://${BUCKET}/xgb-demo/test\ 11 | model_dir=s3://${BUCKET}/xgb-demo/model 12 | -------------------------------------------------------------------------------- /xgboost/demo/gpu_acceleration/bosch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import xgboost as xgb 4 | import time 5 | import random 6 | from sklearn.model_selection import StratifiedKFold 7 | 8 | #For sampling rows from input file 9 | random_seed = 9 10 | subset = 0.4 11 | 12 | n_rows = 1183747; 13 | train_rows = int(n_rows * subset) 14 | random.seed(random_seed) 15 | skip = sorted(random.sample(range(1,n_rows + 1),n_rows-train_rows)) 16 | data = pd.read_csv("../data/train_numeric.csv", index_col=0, dtype=np.float32, skiprows=skip) 17 | y = data['Response'].values 18 | del data['Response'] 19 | X = data.values 20 | 21 | param = {} 22 | param['objective'] = 'binary:logistic' 23 | param['eval_metric'] = 'auc' 24 | param['max_depth'] = 5 25 | param['eta'] = 0.3 26 | param['silent'] = 0 27 | param['tree_method'] = 'gpu_exact' 28 | 29 | num_round = 20 30 | 31 | skf = StratifiedKFold(n_splits=5) 32 | 33 | for i, (train, test) in enumerate(skf.split(X, y)): 34 | dtrain = xgb.DMatrix(X[train], label=y[train]) 35 | tmp = time.time() 36 | bst = xgb.train(param, dtrain, num_round) 37 | boost_time = time.time() - tmp 38 | res = bst.eval(xgb.DMatrix(X[test], label=y[test])) 39 | print("Fold {}: {}, Boost Time {}".format(i, res, str(boost_time))) 40 | del bst 41 | 42 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/README.md: -------------------------------------------------------------------------------- 1 | XGBoost Python Feature Walkthrough 2 | ================================== 3 | * [Basic walkthrough of wrappers](basic_walkthrough.py) 4 | * [Cutomize loss function, and evaluation metric](custom_objective.py) 5 | * [Boosting from existing prediction](boost_from_prediction.py) 6 | * [Predicting using first n trees](predict_first_ntree.py) 7 | * [Generalized Linear Model](generalized_linear_model.py) 8 | * [Cross validation](cross_validation.py) 9 | * [Predicting leaf indices](predict_leaf_indices.py) 10 | * [Sklearn Wrapper](sklearn_examples.py) 11 | * [Sklearn Parallel](sklearn_parallel.py) 12 | * [Sklearn access evals result](sklearn_evals_result.py) 13 | * [Access evals result](evals_result.py) 14 | * [External Memory](external_memory.py) 15 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/boost_from_prediction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import xgboost as xgb 4 | 5 | dtrain = xgb.DMatrix('../data/agaricus.txt.train') 6 | dtest = xgb.DMatrix('../data/agaricus.txt.test') 7 | watchlist = [(dtest,'eval'), (dtrain,'train')] 8 | ### 9 | # advanced: start from a initial base prediction 10 | # 11 | print ('start running example to start from a initial prediction') 12 | # specify parameters via map, definition are same as c++ version 13 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } 14 | # train xgboost for 1 round 15 | bst = xgb.train( param, dtrain, 1, watchlist ) 16 | # Note: we need the margin value instead of transformed prediction in set_base_margin 17 | # do predict with output_margin=True, will always give you margin values before logistic transformation 18 | ptrain = bst.predict(dtrain, output_margin=True) 19 | ptest = bst.predict(dtest, output_margin=True) 20 | dtrain.set_base_margin(ptrain) 21 | dtest.set_base_margin(ptest) 22 | 23 | print ('this is result of running from initial prediction') 24 | bst = xgb.train( param, dtrain, 1, watchlist ) 25 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/evals_result.py: -------------------------------------------------------------------------------- 1 | ## 2 | # This script demonstrate how to access the eval metrics in xgboost 3 | ## 4 | 5 | import xgboost as xgb 6 | dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True) 7 | dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True) 8 | 9 | param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')] 10 | 11 | num_round = 2 12 | watchlist = [(dtest,'eval'), (dtrain,'train')] 13 | 14 | evals_result = {} 15 | bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result) 16 | 17 | print('Access logloss metric directly from evals_result:') 18 | print(evals_result['eval']['logloss']) 19 | 20 | print('') 21 | print('Access metrics through a loop:') 22 | for e_name, e_mtrs in evals_result.items(): 23 | print('- {}'.format(e_name)) 24 | for e_mtr_name, e_mtr_vals in e_mtrs.items(): 25 | print(' - {}'.format(e_mtr_name)) 26 | print(' - {}'.format(e_mtr_vals)) 27 | 28 | print('') 29 | print('Access complete dictionary:') 30 | print(evals_result) 31 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/external_memory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import scipy.sparse 4 | import xgboost as xgb 5 | 6 | ### simple example for using external memory version 7 | 8 | # this is the only difference, add a # followed by a cache prefix name 9 | # several cache file with the prefix will be generated 10 | # currently only support convert from libsvm file 11 | dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache') 12 | dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache') 13 | 14 | # specify validations set to watch performance 15 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } 16 | 17 | # performance notice: set nthread to be the number of your real cpu 18 | # some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4 19 | #param['nthread']=num_real_cpu 20 | 21 | watchlist = [(dtest,'eval'), (dtrain,'train')] 22 | num_round = 2 23 | bst = xgb.train(param, dtrain, num_round, watchlist) 24 | 25 | 26 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/gamma_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import xgboost as xgb 3 | import numpy as np 4 | 5 | # this script demonstrates how to fit gamma regression model (with log link function) 6 | # in xgboost, before running the demo you need to generate the autoclaims dataset 7 | # by running gen_autoclaims.R located in xgboost/demo/data. 8 | 9 | data = np.genfromtxt('../data/autoclaims.csv', delimiter=',') 10 | dtrain = xgb.DMatrix(data[0:4741, 0:34], data[0:4741, 34]) 11 | dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34]) 12 | 13 | # for gamma regression, we need to set the objective to 'reg:gamma', it also suggests 14 | # to set the base_score to a value between 1 to 5 if the number of iteration is small 15 | param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3} 16 | 17 | # the rest of settings are the same 18 | watchlist = [(dtest,'eval'), (dtrain,'train')] 19 | num_round = 30 20 | 21 | # training and evaluation 22 | bst = xgb.train(param, dtrain, num_round, watchlist) 23 | preds = bst.predict(dtest) 24 | labels = dtest.get_label() 25 | print ('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds)))) 26 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/generalized_linear_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import xgboost as xgb 3 | ## 4 | # this script demonstrate how to fit generalized linear model in xgboost 5 | # basically, we are using linear model, instead of tree for our boosters 6 | ## 7 | dtrain = xgb.DMatrix('../data/agaricus.txt.train') 8 | dtest = xgb.DMatrix('../data/agaricus.txt.test') 9 | # change booster to gblinear, so that we are fitting a linear model 10 | # alpha is the L1 regularizer 11 | # lambda is the L2 regularizer 12 | # you can also set lambda_bias which is L2 regularizer on the bias term 13 | param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear', 14 | 'alpha': 0.0001, 'lambda': 1 } 15 | 16 | # normally, you do not need to set eta (step_size) 17 | # XGBoost uses a parallel coordinate descent algorithm (shotgun), 18 | # there could be affection on convergence with parallelization on certain cases 19 | # setting eta to be smaller value, e.g 0.5 can make the optimization more stable 20 | # param['eta'] = 1 21 | 22 | ## 23 | # the rest of settings are the same 24 | ## 25 | watchlist = [(dtest,'eval'), (dtrain,'train')] 26 | num_round = 4 27 | bst = xgb.train(param, dtrain, num_round, watchlist) 28 | preds = bst.predict(dtest) 29 | labels = dtest.get_label() 30 | print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))) 31 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/predict_first_ntree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import xgboost as xgb 4 | 5 | ### load data in do training 6 | dtrain = xgb.DMatrix('../data/agaricus.txt.train') 7 | dtest = xgb.DMatrix('../data/agaricus.txt.test') 8 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } 9 | watchlist = [(dtest,'eval'), (dtrain,'train')] 10 | num_round = 3 11 | bst = xgb.train(param, dtrain, num_round, watchlist) 12 | 13 | print ('start testing prediction from first n trees') 14 | ### predict using first 1 tree 15 | label = dtest.get_label() 16 | ypred1 = bst.predict(dtest, ntree_limit=1) 17 | # by default, we predict using all the trees 18 | ypred2 = bst.predict(dtest) 19 | print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label)))) 20 | print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label)))) 21 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/predict_leaf_indices.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import xgboost as xgb 3 | 4 | ### load data in do training 5 | dtrain = xgb.DMatrix('../data/agaricus.txt.train') 6 | dtest = xgb.DMatrix('../data/agaricus.txt.test') 7 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } 8 | watchlist = [(dtest,'eval'), (dtrain,'train')] 9 | num_round = 3 10 | bst = xgb.train(param, dtrain, num_round, watchlist) 11 | 12 | print ('start testing predict the leaf indices') 13 | ### predict using first 2 tree 14 | leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True) 15 | print(leafindex.shape) 16 | print(leafindex) 17 | ### predict all trees 18 | leafindex = bst.predict(dtest, pred_leaf = True) 19 | print(leafindex.shape) 20 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/runall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PYTHONPATH=PYTHONPATH:../../python-package 3 | python basic_walkthrough.py 4 | python custom_objective.py 5 | python boost_from_prediction.py 6 | python predict_first_ntree.py 7 | python generalized_linear_model.py 8 | python cross_validation.py 9 | python predict_leaf_indices.py 10 | python sklearn_examples.py 11 | python sklearn_parallel.py 12 | python external_memory.py 13 | rm -rf *~ *.model *.buffer 14 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/sklearn_evals_result.py: -------------------------------------------------------------------------------- 1 | ## 2 | # This script demonstrate how to access the xgboost eval metrics by using sklearn 3 | ## 4 | 5 | import xgboost as xgb 6 | import numpy as np 7 | from sklearn.datasets import make_hastie_10_2 8 | 9 | X, y = make_hastie_10_2(n_samples=2000, random_state=42) 10 | 11 | # Map labels from {-1, 1} to {0, 1} 12 | labels, y = np.unique(y, return_inverse=True) 13 | 14 | X_train, X_test = X[:1600], X[1600:] 15 | y_train, y_test = y[:1600], y[1600:] 16 | 17 | param_dist = {'objective':'binary:logistic', 'n_estimators':2} 18 | 19 | clf = xgb.XGBModel(**param_dist) 20 | # Or you can use: clf = xgb.XGBClassifier(**param_dist) 21 | 22 | clf.fit(X_train, y_train, 23 | eval_set=[(X_train, y_train), (X_test, y_test)], 24 | eval_metric='logloss', 25 | verbose=True) 26 | 27 | # Load evals result by calling the evals_result() function 28 | evals_result = clf.evals_result() 29 | 30 | print('Access logloss metric directly from validation_0:') 31 | print(evals_result['validation_0']['logloss']) 32 | 33 | print('') 34 | print('Access metrics through a loop:') 35 | for e_name, e_mtrs in evals_result.items(): 36 | print('- {}'.format(e_name)) 37 | for e_mtr_name, e_mtr_vals in e_mtrs.items(): 38 | print(' - {}'.format(e_mtr_name)) 39 | print(' - {}'.format(e_mtr_vals)) 40 | 41 | print('') 42 | print('Access complete dict:') 43 | print(evals_result) 44 | -------------------------------------------------------------------------------- /xgboost/demo/guide-python/sklearn_parallel.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | if __name__ == "__main__": 4 | # NOTE: on posix systems, this *has* to be here and in the 5 | # `__name__ == "__main__"` clause to run XGBoost in parallel processes 6 | # using fork, if XGBoost was built with OpenMP support. Otherwise, if you 7 | # build XGBoost without OpenMP support, you can use fork, which is the 8 | # default backend for joblib, and omit this. 9 | try: 10 | from multiprocessing import set_start_method 11 | except ImportError: 12 | raise ImportError("Unable to import multiprocessing.set_start_method." 13 | " This example only runs on Python 3.4") 14 | set_start_method("forkserver") 15 | 16 | import numpy as np 17 | from sklearn.model_selection import GridSearchCV 18 | from sklearn.datasets import load_boston 19 | import xgboost as xgb 20 | 21 | rng = np.random.RandomState(31337) 22 | 23 | print("Parallel Parameter optimization") 24 | boston = load_boston() 25 | 26 | os.environ["OMP_NUM_THREADS"] = "2" # or to whatever you want 27 | y = boston['target'] 28 | X = boston['data'] 29 | xgb_model = xgb.XGBRegressor() 30 | clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6], 31 | 'n_estimators': [50, 100, 200]}, verbose=1, 32 | n_jobs=2) 33 | clf.fit(X, y) 34 | print(clf.best_score_) 35 | print(clf.best_params_) 36 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-higgs/README.md: -------------------------------------------------------------------------------- 1 | Highlights 2 | ===== 3 | Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players 4 | * Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/) 5 | * The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml) 6 | 7 | Guide for Kaggle Higgs Challenge 8 | ===== 9 | 10 | This is the folder giving example of how to use XGBoost Python Module to run Kaggle Higgs competition 11 | 12 | This script will achieve about 3.600 AMS score in public leaderboard. To get start, you need do following step: 13 | 14 | 1. Compile the XGBoost python lib 15 | ```bash 16 | cd ../.. 17 | make 18 | ``` 19 | 20 | 2. Put training.csv test.csv on folder './data' (you can create a symbolic link) 21 | 22 | 3. Run ./run.sh 23 | 24 | Speed 25 | ===== 26 | speedtest.py compares xgboost's speed on this dataset with sklearn.GBM 27 | 28 | 29 | Using R module 30 | ===== 31 | * Alternatively, you can run using R, higgs-train.R and higgs-pred.R. 32 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-higgs/higgs-pred.R: -------------------------------------------------------------------------------- 1 | # install xgboost package, see R-package in root folder 2 | require(xgboost) 3 | require(methods) 4 | 5 | modelfile <- "higgs.model" 6 | outfile <- "higgs.pred.csv" 7 | dtest <- read.csv("data/test.csv", header=TRUE) 8 | data <- as.matrix(dtest[2:31]) 9 | idx <- dtest[[1]] 10 | 11 | xgmat <- xgb.DMatrix(data, missing = -999.0) 12 | bst <- xgb.load(modelfile=modelfile) 13 | ypred <- predict(bst, xgmat) 14 | 15 | rorder <- rank(ypred, ties.method="first") 16 | 17 | threshold <- 0.15 18 | # to be completed 19 | ntop <- length(rorder) - as.integer(threshold*length(rorder)) 20 | plabel <- ifelse(rorder > ntop, "s", "b") 21 | outdata <- list("EventId" = idx, 22 | "RankOrder" = rorder, 23 | "Class" = plabel) 24 | write.csv(outdata, file = outfile, quote=FALSE, row.names=FALSE) 25 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-higgs/higgs-pred.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # make prediction 3 | import numpy as np 4 | import xgboost as xgb 5 | 6 | # path to where the data lies 7 | dpath = 'data' 8 | 9 | modelfile = 'higgs.model' 10 | outfile = 'higgs.pred.csv' 11 | # make top 15% as positive 12 | threshold_ratio = 0.15 13 | 14 | # load in training data, directly use numpy 15 | dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 ) 16 | data = dtest[:,1:31] 17 | idx = dtest[:,0] 18 | 19 | print ('finish loading from csv ') 20 | xgmat = xgb.DMatrix( data, missing = -999.0 ) 21 | bst = xgb.Booster({'nthread':16}, model_file = modelfile) 22 | ypred = bst.predict( xgmat ) 23 | 24 | res = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 25 | 26 | rorder = {} 27 | for k, v in sorted( res, key = lambda x:-x[1] ): 28 | rorder[ k ] = len(rorder) + 1 29 | 30 | # write out predictions 31 | ntop = int( threshold_ratio * len(rorder ) ) 32 | fo = open(outfile, 'w') 33 | nhit = 0 34 | ntot = 0 35 | fo.write('EventId,RankOrder,Class\n') 36 | for k, v in res: 37 | if rorder[k] <= ntop: 38 | lb = 's' 39 | nhit += 1 40 | else: 41 | lb = 'b' 42 | # change output rank order to follow Kaggle convention 43 | fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) ) 44 | ntot += 1 45 | fo.close() 46 | 47 | print ('finished writing into prediction file') 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-higgs/higgs-train.R: -------------------------------------------------------------------------------- 1 | # install xgboost package, see R-package in root folder 2 | require(xgboost) 3 | require(methods) 4 | 5 | testsize <- 550000 6 | 7 | dtrain <- read.csv("data/training.csv", header=TRUE) 8 | dtrain[33] <- dtrain[33] == "s" 9 | label <- as.numeric(dtrain[[33]]) 10 | data <- as.matrix(dtrain[2:31]) 11 | weight <- as.numeric(dtrain[[32]]) * testsize / length(label) 12 | 13 | sumwpos <- sum(weight * (label==1.0)) 14 | sumwneg <- sum(weight * (label==0.0)) 15 | print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos)) 16 | 17 | xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0) 18 | param <- list("objective" = "binary:logitraw", 19 | "scale_pos_weight" = sumwneg / sumwpos, 20 | "bst:eta" = 0.1, 21 | "bst:max_depth" = 6, 22 | "eval_metric" = "auc", 23 | "eval_metric" = "ams@0.15", 24 | "silent" = 1, 25 | "nthread" = 16) 26 | watchlist <- list("train" = xgmat) 27 | nround = 120 28 | print ("loading data end, start to boost trees") 29 | bst = xgb.train(param, xgmat, nround, watchlist ); 30 | # save out model 31 | xgb.save(bst, "higgs.model") 32 | print ('finish training') 33 | 34 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-higgs/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python -u higgs-numpy.py 4 | ret=$? 5 | if [[ $ret != 0 ]]; then 6 | echo "ERROR in higgs-numpy.py" 7 | exit $ret 8 | fi 9 | python -u higgs-pred.py 10 | ret=$? 11 | if [[ $ret != 0 ]]; then 12 | echo "ERROR in higgs-pred.py" 13 | exit $ret 14 | fi 15 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-otto/README.MD: -------------------------------------------------------------------------------- 1 | Benckmark for Otto Group Competition 2 | ========= 3 | 4 | This is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge). 5 | 6 | ## Getting started 7 | 8 | 1. Put `train.csv` and `test.csv` under the `data` folder 9 | 2. Run the script 10 | 3. Submit the `submission.csv` 11 | 12 | The parameter `nthread` controls the number of cores to run on, please set it to suit your machine. 13 | 14 | ## R-package 15 | 16 | To install the R-package of xgboost, please run 17 | 18 | ```r 19 | devtools::install_github('tqchen/xgboost',subdir='R-package') 20 | ``` 21 | 22 | Windows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first. 23 | 24 | 25 | -------------------------------------------------------------------------------- /xgboost/demo/kaggle-otto/otto_train_pred.R: -------------------------------------------------------------------------------- 1 | require(xgboost) 2 | require(methods) 3 | 4 | train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F) 5 | test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F) 6 | train = train[,-1] 7 | test = test[,-1] 8 | 9 | y = train[,ncol(train)] 10 | y = gsub('Class_','',y) 11 | y = as.integer(y)-1 # xgboost take features in [0,numOfClass) 12 | 13 | x = rbind(train[,-ncol(train)],test) 14 | x = as.matrix(x) 15 | x = matrix(as.numeric(x),nrow(x),ncol(x)) 16 | trind = 1:length(y) 17 | teind = (nrow(train)+1):nrow(x) 18 | 19 | # Set necessary parameter 20 | param <- list("objective" = "multi:softprob", 21 | "eval_metric" = "mlogloss", 22 | "num_class" = 9, 23 | "nthread" = 8) 24 | 25 | # Run Cross Validation 26 | cv.nround = 50 27 | bst.cv = xgb.cv(param=param, data = x[trind,], label = y, 28 | nfold = 3, nrounds=cv.nround) 29 | 30 | # Train the model 31 | nround = 50 32 | bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround) 33 | 34 | # Make prediction 35 | pred = predict(bst,x[teind,]) 36 | pred = matrix(pred,9,length(pred)/9) 37 | pred = t(pred) 38 | 39 | # Output submission 40 | pred = format(pred, digits=2,scientific=F) # shrink the size of submission 41 | pred = data.frame(1:nrow(pred),pred) 42 | names(pred) = c('id', paste0('Class_',1:9)) 43 | write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE) 44 | -------------------------------------------------------------------------------- /xgboost/demo/multiclass_classification/README.md: -------------------------------------------------------------------------------- 1 | Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology) 2 | 3 | Make sure you make make xgboost python module in ../../python 4 | 5 | 1. Run runexp.sh 6 | ```bash 7 | ./runexp.sh 8 | ``` 9 | 10 | 11 | -------------------------------------------------------------------------------- /xgboost/demo/multiclass_classification/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ -f dermatology.data ] 3 | then 4 | echo "use existing data to run multi class classification" 5 | else 6 | echo "getting data from uci, make sure you are connected to internet" 7 | wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data 8 | fi 9 | python train.py 10 | -------------------------------------------------------------------------------- /xgboost/demo/rank/README.md: -------------------------------------------------------------------------------- 1 | Learning to rank 2 | ==== 3 | XGBoost supports accomplishing ranking tasks. In ranking scenario, data are often grouped and we need the [group information file](../../doc/input_format.md#group-input-format) to specify ranking tasks. The model used in XGBoost for ranking is the LambdaRank, this function is not yet completed. Currently, we provide pairwise rank. 4 | 5 | ### Parameters 6 | The configuration setting is similar to the regression and binary classification setting, except user need to specify the objectives: 7 | 8 | ``` 9 | ... 10 | objective="rank:pairwise" 11 | ... 12 | ``` 13 | For more usage details please refer to the [binary classification demo](../binary_classification), 14 | 15 | Instructions 16 | ==== 17 | The dataset for ranking demo is from LETOR04 MQ2008 fold1, 18 | You can use the following command to run the example 19 | 20 | Get the data: ./wgetdata.sh 21 | Run the example: ./runexp.sh 22 | -------------------------------------------------------------------------------- /xgboost/demo/rank/runexp.sh: -------------------------------------------------------------------------------- 1 | python trans_data.py train.txt mq2008.train mq2008.train.group 2 | 3 | python trans_data.py test.txt mq2008.test mq2008.test.group 4 | 5 | python trans_data.py vali.txt mq2008.vali mq2008.vali.group 6 | 7 | ../../xgboost mq2008.conf 8 | 9 | ../../xgboost mq2008.conf task=pred model_in=0004.model 10 | 11 | 12 | -------------------------------------------------------------------------------- /xgboost/demo/rank/trans_data.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def save_data(group_data,output_feature,output_group): 4 | if len(group_data) == 0: 5 | return 6 | 7 | output_group.write(str(len(group_data))+"\n") 8 | for data in group_data: 9 | # only include nonzero features 10 | feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ] 11 | output_feature.write(data[0] + " " + " ".join(feats) + "\n") 12 | 13 | if __name__ == "__main__": 14 | if len(sys.argv) != 4: 15 | print ("Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]") 16 | sys.exit(0) 17 | 18 | fi = open(sys.argv[1]) 19 | output_feature = open(sys.argv[2],"w") 20 | output_group = open(sys.argv[3],"w") 21 | 22 | group_data = [] 23 | group = "" 24 | for line in fi: 25 | if not line: 26 | break 27 | if "#" in line: 28 | line = line[:line.index("#")] 29 | splits = line.strip().split(" ") 30 | if splits[1] != group: 31 | save_data(group_data,output_feature,output_group) 32 | group_data = [] 33 | group = splits[1] 34 | group_data.append(splits) 35 | 36 | save_data(group_data,output_feature,output_group) 37 | 38 | fi.close() 39 | output_feature.close() 40 | output_group.close() 41 | 42 | -------------------------------------------------------------------------------- /xgboost/demo/rank/wgetdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar 3 | unrar x MQ2008.rar 4 | mv -f MQ2008/Fold1/*.txt . 5 | -------------------------------------------------------------------------------- /xgboost/demo/regression/README.md: -------------------------------------------------------------------------------- 1 | Regression 2 | ==== 3 | Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost. 4 | 5 | ### Tutorial 6 | The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter: 7 | ``` 8 | # General parameter 9 | # this is the only difference with classification, use reg:linear to do linear regression 10 | # when labels are in [0,1] we can also use reg:logistic 11 | objective = reg:linear 12 | ... 13 | 14 | ``` 15 | 16 | The input format is same as binary classification, except that the label is now the target regression values. We use linear regression here, if we want use objective = reg:logistic logistic regression, the label needed to be pre-scaled into [0,1]. 17 | 18 | -------------------------------------------------------------------------------- /xgboost/demo/regression/mapfeat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | fo = open( 'machine.txt', 'w' ) 4 | cnt = 6 5 | fmap = {} 6 | for l in open( 'machine.data' ): 7 | arr = l.split(',') 8 | fo.write(arr[8]) 9 | for i in range( 0,6 ): 10 | fo.write( ' %d:%s' %(i,arr[i+2]) ) 11 | 12 | if arr[0] not in fmap: 13 | fmap[arr[0]] = cnt 14 | cnt += 1 15 | 16 | fo.write( ' %d:1' % fmap[arr[0]] ) 17 | fo.write('\n') 18 | 19 | fo.close() 20 | 21 | # create feature map for machine data 22 | fo = open('featmap.txt', 'w') 23 | # list from machine.names 24 | names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 25 | 26 | for i in range(0,6): 27 | fo.write( '%d\t%s\tint\n' % (i, names[i+1])) 28 | 29 | for v, k in sorted( fmap.items(), key = lambda x:x[1] ): 30 | fo.write( '%d\tvendor=%s\ti\n' % (k, v)) 31 | fo.close() 32 | -------------------------------------------------------------------------------- /xgboost/demo/regression/mknfold.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | import random 4 | 5 | if len(sys.argv) < 2: 6 | print ('Usage: [nfold = 5]') 7 | exit(0) 8 | 9 | random.seed( 10 ) 10 | 11 | k = int( sys.argv[2] ) 12 | if len(sys.argv) > 3: 13 | nfold = int( sys.argv[3] ) 14 | else: 15 | nfold = 5 16 | 17 | fi = open( sys.argv[1], 'r' ) 18 | ftr = open( sys.argv[1]+'.train', 'w' ) 19 | fte = open( sys.argv[1]+'.test', 'w' ) 20 | for l in fi: 21 | if random.randint( 1 , nfold ) == k: 22 | fte.write( l ) 23 | else: 24 | ftr.write( l ) 25 | 26 | fi.close() 27 | ftr.close() 28 | fte.close() 29 | 30 | -------------------------------------------------------------------------------- /xgboost/demo/regression/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way 3 | python mapfeat.py 4 | # split train and test 5 | python mknfold.py machine.txt 1 6 | # training and output the models 7 | ../../xgboost machine.conf 8 | # output predictions of test data 9 | ../../xgboost machine.conf task=pred model_in=0002.model 10 | # print the boosters of 0002.model in dump.raw.txt 11 | ../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt 12 | # print the boosters of 0002.model in dump.nice.txt with feature map 13 | ../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 14 | 15 | # cat the result 16 | cat dump.nice.txt 17 | -------------------------------------------------------------------------------- /xgboost/demo/yearpredMSD/README.md: -------------------------------------------------------------------------------- 1 | Demonstrating how to use XGBoost on [Year Prediction task of Million Song Dataset](https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD) 2 | 3 | 1. Run runexp.sh 4 | ```bash 5 | ./runexp.sh 6 | ``` 7 | 8 | You can also use the script to prepare LIBSVM format, and run the [Distributed Version](../../multi-node). 9 | Note that though that normally you only need to use single machine for dataset at this scale, and use distributed version for larger scale dataset. 10 | -------------------------------------------------------------------------------- /xgboost/demo/yearpredMSD/csv2libsvm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys 3 | 4 | if len(sys.argv) < 3: 5 | print 'Usage: ' 6 | print 'convert a all numerical csv to libsvm' 7 | 8 | fo = open(sys.argv[2], 'w') 9 | for l in open(sys.argv[1]): 10 | arr = l.split(',') 11 | fo.write('%s' % arr[0]) 12 | for i in xrange(len(arr) - 1): 13 | fo.write(' %d:%s' % (i, arr[i+1])) 14 | fo.close() 15 | -------------------------------------------------------------------------------- /xgboost/demo/yearpredMSD/runexp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f YearPredictionMSD.txt ] 4 | then 5 | echo "use existing data to run experiment" 6 | else 7 | echo "getting data from uci, make sure you are connected to internet" 8 | wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip 9 | unzip YearPredictionMSD.txt.zip 10 | fi 11 | echo "start making data.." 12 | # map feature using indicator encoding, also produce featmap.txt 13 | python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm 14 | head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train 15 | tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test 16 | echo "finish making the data" 17 | ../../xgboost yearpredMSD.conf 18 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | *~ 30 | config.mk 31 | *.pyc 32 | 33 | # Vim 34 | *.swp 35 | *.swo 36 | *.swn 37 | *.csv 38 | 39 | # Emacs 40 | .clang_complete 41 | deps 42 | recommonmark 43 | build 44 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/.travis.yml: -------------------------------------------------------------------------------- 1 | # disable sudo to use container based build 2 | sudo: false 3 | 4 | # Enabling test on Linux and OS X 5 | os: 6 | - linux 7 | - osx 8 | 9 | # Use Build Matrix to do lint and build seperately 10 | env: 11 | matrix: 12 | - TASK=lint 13 | - TASK=unittest_gtest 14 | 15 | matrix: 16 | exclude: 17 | - os: osx 18 | env: TASK=lint 19 | 20 | # dependent apt packages 21 | addons: 22 | apt: 23 | sources: 24 | - ubuntu-toolchain-r-test 25 | packages: 26 | - doxygen 27 | - wget 28 | - git 29 | - libcurl4-openssl-dev 30 | - unzip 31 | - gcc-4.8 32 | - g++-4.8 33 | 34 | before_install: 35 | - export TRAVIS=scripts/travis 36 | - source ${TRAVIS}/travis_setup_env.sh 37 | - ${TRAVIS}/travis_osx_install.sh 38 | 39 | install: 40 | - pip install --user cpplint pylint 41 | 42 | script: ${TRAVIS}/travis_script.sh 43 | 44 | 45 | before_cache: 46 | - ${TRAVIS}/travis_before_cache.sh 47 | 48 | cache: 49 | directories: 50 | - ${HOME}/.cache/usr 51 | 52 | 53 | notifications: 54 | # Emails are sent to the committer's git-configured email address by default, 55 | email: 56 | on_success: change 57 | on_failure: always 58 | slack: dmlc:mKX5kxjqTP6fBb89lXD3G5f3 59 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 by Contributors 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/cmake/lint.cmake: -------------------------------------------------------------------------------- 1 | get_filename_component(CMAKE_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE) 2 | if(NOT MSVC) 3 | set(LINT_COMMAND ${CMAKE_SOURCE_DIR}/scripts/lint.py) 4 | else() 5 | if((NOT PYTHON_EXECUTABLE)) 6 | message(FATAL_ERROR "Cannot lint without python") 7 | endif() 8 | # format output so VS can bring us to the offending file/line 9 | set(LINT_COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/scripts/lint.py) 10 | endif() 11 | 12 | set(LINT_DIRS include src scripts) 13 | 14 | cmake_policy(SET CMP0009 NEW) # suppress cmake warning 15 | execute_process( 16 | COMMAND ${LINT_COMMAND} ${PROJECT_NAME} all ${LINT_DIRS} 17 | WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} 18 | ERROR_VARIABLE LINT_OUTPUT 19 | ERROR_STRIP_TRAILING_WHITESPACE 20 | 21 | ) 22 | message(STATUS ${LINT_OUTPUT}) -------------------------------------------------------------------------------- /xgboost/dmlc-core/doc/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | _build 3 | doxygen 4 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/doc/README: -------------------------------------------------------------------------------- 1 | This document is generated by sphinx. 2 | Make sure you cloned the following repos in the root. 3 | 4 | - https://github.com/tqchen/recommonmark 5 | 6 | Type make html in doc folder. 7 | 8 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/doc/index.md: -------------------------------------------------------------------------------- 1 | DMLC-Core Documentation 2 | ======================= 3 | DMLC Core contains common codebase to help us build machine learning toolkits easier. 4 | 5 | Contents 6 | -------- 7 | * [Parameter Structure for Machine Learning](parameter.md) 8 | * [Doxygen C++ API Reference](https://dmlc-core.readthedocs.org/en/latest/doxygen) 9 | 10 | Indices and tables 11 | ------------------ 12 | 13 | ```eval_rst 14 | * :ref:`genindex` 15 | * :ref:`modindex` 16 | * :ref:`search` 17 | ``` 18 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/doc/sphinx_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Helper utilty function for customization.""" 3 | import sys 4 | import os 5 | import docutils 6 | import subprocess 7 | 8 | if os.environ.get('READTHEDOCS', None) == 'True': 9 | subprocess.call('cd ..; rm -rf recommonmark;' + 10 | 'git clone https://github.com/tqchen/recommonmark', shell=True) 11 | 12 | sys.path.insert(0, os.path.abspath('../recommonmark/')) 13 | from recommonmark import parser, transform 14 | 15 | MarkdownParser = parser.CommonMarkParser 16 | AutoStructify = transform.AutoStructify 17 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/example/dmlc_example.mk: -------------------------------------------------------------------------------- 1 | ALL_EXAMPLE=example/parameter 2 | 3 | 4 | example/parameter: example/parameter.cc libdmlc.a 5 | 6 | $(ALL_EXAMPLE) : 7 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 8 | 9 | 10 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/include/dmlc/common.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file common.h 4 | * \brief defines some common utility function. 5 | */ 6 | #ifndef DMLC_COMMON_H_ 7 | #define DMLC_COMMON_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace dmlc { 14 | /*! 15 | * \brief Split a string by delimiter 16 | * \param s String to be splitted. 17 | * \param delim The delimiter. 18 | * \return a splitted vector of strings. 19 | */ 20 | inline std::vector Split(const std::string& s, char delim) { 21 | std::string item; 22 | std::istringstream is(s); 23 | std::vector ret; 24 | while (std::getline(is, item, delim)) { 25 | ret.push_back(item); 26 | } 27 | return ret; 28 | } 29 | } // namespace dmlc 30 | 31 | #endif // DMLC_COMMON_H_ 32 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/include/dmlc/omp.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file omp.h 4 | * \brief header to handle OpenMP compatibility issues 5 | */ 6 | #ifndef DMLC_OMP_H_ 7 | #define DMLC_OMP_H_ 8 | #if defined(_OPENMP) 9 | #include 10 | #else 11 | #ifndef DISABLE_OPENMP 12 | // use pragma message instead of warning 13 | #pragma message("Warning: OpenMP is not available, " \ 14 | "project will be compiled into single-thread code. " \ 15 | "Use OpenMP-enabled compiler to get benefit of multi-threading.") 16 | #endif 17 | //! \cond Doxygen_Suppress 18 | inline int omp_get_thread_num() { return 0; } 19 | inline int omp_get_num_threads() { return 1; } 20 | inline int omp_get_max_threads() { return 1; } 21 | inline int omp_get_num_procs() { return 1; } 22 | inline void omp_set_num_threads(int nthread) {} 23 | #endif 24 | // loop variable used in openmp 25 | namespace dmlc { 26 | #ifdef _MSC_VER 27 | typedef int omp_uint; 28 | typedef long omp_ulong; // NOLINT(*) 29 | #else 30 | typedef unsigned omp_uint; 31 | typedef unsigned long omp_ulong; // NOLINT(*) 32 | #endif 33 | //! \endcond 34 | } // namespace dmlc 35 | #endif // DMLC_OMP_H_ 36 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/include/dmlc/timer.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file timer.h 4 | * \brief cross platform timer for timing 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_TIMER_H_ 8 | #define DMLC_TIMER_H_ 9 | 10 | #include "base.h" 11 | 12 | #if DMLC_USE_CXX11 13 | #include 14 | #endif 15 | 16 | #include 17 | #ifdef __MACH__ 18 | #include 19 | #include 20 | #endif 21 | #include "./logging.h" 22 | 23 | namespace dmlc { 24 | /*! 25 | * \brief return time in seconds 26 | */ 27 | inline double GetTime(void) { 28 | #if DMLC_USE_CXX11 29 | return std::chrono::duration( 30 | std::chrono::high_resolution_clock::now().time_since_epoch()).count(); 31 | #elif defined __MACH__ 32 | clock_serv_t cclock; 33 | mach_timespec_t mts; 34 | host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); 35 | CHECK(clock_get_time(cclock, &mts) == 0) << "failed to get time"; 36 | mach_port_deallocate(mach_task_self(), cclock); 37 | return static_cast(mts.tv_sec) + static_cast(mts.tv_nsec) * 1e-9; 38 | #else 39 | #if defined(__unix__) || defined(__linux__) 40 | timespec ts; 41 | CHECK(clock_gettime(CLOCK_REALTIME, &ts) == 0) << "failed to get time"; 42 | return static_cast(ts.tv_sec) + static_cast(ts.tv_nsec) * 1e-9; 43 | #else 44 | return static_cast(time(NULL)); 45 | #endif 46 | #endif 47 | } 48 | } // namespace dmlc 49 | #endif // DMLC_TIMER_H_ 50 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/scripts/packages.mk: -------------------------------------------------------------------------------- 1 | # Makfile for easily install dependencies 2 | 3 | # List of packages here 4 | .PHONY: gtest lz4 5 | 6 | # rules for gtest 7 | ${CACHE_PREFIX}/include/gtest: 8 | rm -rf gtest release-1.7.0.zip 9 | wget https://github.com/google/googletest/archive/release-1.7.0.zip 10 | unzip release-1.7.0.zip 11 | mv googletest-release-1.7.0 gtest 12 | cd gtest; g++ -Iinclude -pthread -c src/gtest-all.cc -o gtest-all.o; cd .. 13 | ar -rv libgtest.a gtest/gtest-all.o 14 | mkdir -p ${CACHE_PREFIX}/include ${CACHE_PREFIX}/lib 15 | cp -r gtest/include/gtest ${CACHE_PREFIX}/include 16 | mv libgtest.a ${CACHE_PREFIX}/lib 17 | rm -rf release-1.7.0.zip 18 | 19 | gtest: | ${CACHE_PREFIX}/include/gtest 20 | 21 | lz4: ${CACHE_PREFIX}/include/lz4.h 22 | 23 | ${CACHE_PREFIX}/include/lz4.h: 24 | rm -rf lz4 25 | git clone https://github.com/Cyan4973/lz4 26 | cd lz4; make; make install PREFIX=${CACHE_PREFIX}; cd - 27 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/scripts/travis/travis_before_cache.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # do nothing for now 3 | ls -alLR ${CACHE_PREFIX} -------------------------------------------------------------------------------- /xgboost/dmlc-core/scripts/travis/travis_osx_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ${TRAVIS_OS_NAME} != "osx" ]; then 4 | exit 0 5 | fi 6 | 7 | brew update 8 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/scripts/travis/travis_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # main script of travis 4 | if [ ${TASK} == "lint" ]; then 5 | make lint || exit -1 6 | make doxygen 2>log.txt 7 | (cat log.txt| grep -v ENABLE_PREPROCESSING |grep -v "unsupported tag" |grep warning) && exit -1 8 | exit 0 9 | fi 10 | 11 | if [ ${TRAVIS_OS_NAME} == "osx" ]; then 12 | export NO_OPENMP=1 13 | fi 14 | 15 | if [ ${TASK} == "unittest_gtest" ]; then 16 | cp make/config.mk . 17 | make -f scripts/packages.mk gtest 18 | if [ ${TRAVIS_OS_NAME} != "osx" ]; then 19 | echo "USE_S3=1" >> config.mk 20 | echo "export CXX = g++-4.8" >> config.mk 21 | else 22 | echo "USE_S3=0" >> config.mk 23 | echo "USE_OPENMP=0" >> config.mk 24 | fi 25 | echo "GTEST_PATH="${CACHE_PREFIX} >> config.mk 26 | echo "BUILD_TEST=1" >> config.mk 27 | make all || exit -1 28 | test/unittest/dmlc_unittest || exit -1 29 | fi 30 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/src/io/azure_filesys.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file azure_filesys.h 4 | * \brief Azure access module 5 | * \author Mu Li 6 | */ 7 | #ifndef DMLC_IO_AZURE_FILESYS_H_ 8 | #define DMLC_IO_AZURE_FILESYS_H_ 9 | 10 | #include 11 | #include 12 | #include "./filesys.h" 13 | 14 | namespace dmlc { 15 | namespace io { 16 | 17 | /*! \brief Microsoft Azure Blob filesystem */ 18 | class AzureFileSystem : public FileSystem { 19 | public: 20 | virtual ~AzureFileSystem() {} 21 | 22 | virtual FileInfo GetPathInfo(const URI &path) { return FileInfo(); } 23 | 24 | virtual void ListDirectory(const URI &path, std::vector *out_list); 25 | 26 | virtual Stream *Open(const URI &path, const char* const flag, bool allow_null) { 27 | return NULL; 28 | } 29 | 30 | virtual SeekStream *OpenForRead(const URI &path, bool allow_null) { 31 | return NULL; 32 | } 33 | 34 | /*! 35 | * \brief get a singleton of AzureFileSystem when needed 36 | * \return a singleton instance 37 | */ 38 | inline static AzureFileSystem *GetInstance(void) { 39 | static AzureFileSystem instance; 40 | return &instance; 41 | } 42 | 43 | private: 44 | /*! \brief constructor */ 45 | AzureFileSystem(); 46 | 47 | /*! \brief Azure storage account name */ 48 | std::string azure_account_; 49 | 50 | /*! \brief Azure storage account key */ 51 | std::string azure_key_; 52 | }; 53 | 54 | } // namespace io 55 | } // namespace dmlc 56 | 57 | #endif // DMLC_IO_AZURE_FILESYS_H_ 58 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/src/io/line_split.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file line_split.h 4 | * \brief base class implementation of input splitter 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_IO_LINE_SPLIT_H_ 8 | #define DMLC_IO_LINE_SPLIT_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "./input_split_base.h" 16 | 17 | namespace dmlc { 18 | namespace io { 19 | /*! \brief class that split the files by line */ 20 | class LineSplitter : public InputSplitBase { 21 | public: 22 | LineSplitter(FileSystem *fs, 23 | const char *uri, 24 | unsigned rank, 25 | unsigned nsplit) { 26 | this->Init(fs, uri, 1); 27 | this->ResetPartition(rank, nsplit); 28 | } 29 | 30 | virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk); 31 | protected: 32 | virtual size_t SeekRecordBegin(Stream *fi); 33 | virtual const char* 34 | FindLastRecordBegin(const char *begin, const char *end); 35 | }; 36 | } // namespace io 37 | } // namespace dmlc 38 | #endif // DMLC_IO_LINE_SPLIT_H_ 39 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/src/io/recordio_split.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file recordio_split.h 4 | * \brief input split that splits recordio files 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef DMLC_IO_RECORDIO_SPLIT_H_ 8 | #define DMLC_IO_RECORDIO_SPLIT_H_ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "./input_split_base.h" 17 | 18 | namespace dmlc { 19 | namespace io { 20 | /*! \brief class that split the files by line */ 21 | class RecordIOSplitter : public InputSplitBase { 22 | public: 23 | RecordIOSplitter(FileSystem *fs, 24 | const char *uri, 25 | unsigned rank, 26 | unsigned nsplit) { 27 | this->Init(fs, uri, 4); 28 | this->ResetPartition(rank, nsplit); 29 | } 30 | 31 | virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk); 32 | 33 | protected: 34 | virtual size_t SeekRecordBegin(Stream *fi); 35 | virtual const char* 36 | FindLastRecordBegin(const char *begin, const char *end); 37 | }; 38 | } // namespace io 39 | } // namespace dmlc 40 | #endif // DMLC_IO_RECORDIO_SPLIT_H_ 41 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/.gitignore: -------------------------------------------------------------------------------- 1 | *_test 2 | *.csv -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/README.md: -------------------------------------------------------------------------------- 1 | This folder contains testcases for the project 2 | 3 | test scripts for s3: 4 | 5 | `test.sh` 6 | 7 | ```bash 8 | for r in {0..10}; do 9 | file=data/${RANDOM} 10 | start=`date +'%s.%N'` 11 | ./filesys_test cat s3://dmlc/ilsvrc12/val.rec >$file 12 | # ./filesys_test cat s3://dmlc/cifar10/train.rec >$file 13 | end=`date +'%s.%N'` 14 | res=$(echo "$end - $start" | bc -l) 15 | md5=`md5sum $file` 16 | rm $file 17 | echo "job $1, rp $r, $md5, time $res" 18 | done 19 | echo "job $1 done" 20 | ``` 21 | 22 | `run.sh` 23 | 24 | ```bash 25 | mkdir -p data 26 | rm -f data/* 27 | for i in {0..9}; do 28 | bash test.sh $i & 29 | sleep 1 30 | done 31 | wait 32 | ``` 33 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/dataiter_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | if (argc < 4) { 6 | printf("Usage: filename partid npart [format]\n"); 7 | return 0; 8 | } 9 | char libsvm[10] = "libsvm"; 10 | char* format; 11 | if (argc > 4) { 12 | format = argv[4]; 13 | } else { 14 | format = libsvm; 15 | } 16 | 17 | using namespace dmlc; 18 | RowBlockIter *iter = 19 | RowBlockIter::Create( 20 | argv[1], atoi(argv[2]), atoi(argv[3]), format); 21 | double tstart = GetTime(); 22 | size_t bytes_read = 0; 23 | while (iter->Next()) { 24 | const RowBlock &batch = iter->Value(); 25 | bytes_read += batch.MemCostBytes(); 26 | double tdiff = GetTime() - tstart; 27 | LOG(INFO) << (bytes_read >> 20UL) << 28 | " MB read " << ((bytes_read >> 20UL) / tdiff)<< " MB/sec"; 29 | } 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/dmlc_test.mk: -------------------------------------------------------------------------------- 1 | TEST=test/filesys_test test/dataiter_test\ 2 | test/iostream_test test/recordio_test test/split_read_test\ 3 | test/stream_read_test test/split_test test/libsvm_parser_test\ 4 | test/libfm_parser_test test/split_repeat_read_test test/strtonum_test\ 5 | test/logging_test test/parameter_test test/registry_test\ 6 | test/csv_parser_test 7 | 8 | test/filesys_test: test/filesys_test.cc src/io/*.h libdmlc.a 9 | test/dataiter_test: test/dataiter_test.cc libdmlc.a 10 | test/iostream_test: test/iostream_test.cc libdmlc.a 11 | test/recordio_test: test/recordio_test.cc libdmlc.a 12 | test/split_read_test: test/split_read_test.cc libdmlc.a 13 | test/split_repeat_read_test: test/split_repeat_read_test.cc libdmlc.a 14 | test/stream_read_test: test/stream_read_test.cc libdmlc.a 15 | test/split_test: test/split_test.cc libdmlc.a 16 | test/libsvm_parser_test: test/libsvm_parser_test.cc src/data/libsvm_parser.h libdmlc.a 17 | test/libfm_parser_test: test/libfm_parser_test.cc src/data/libfm_parser.h libdmlc.a 18 | test/csv_parser_test: test/csv_parser_test.cc src/data/csv_parser.h libdmlc.a 19 | test/strtonum_test: test/strtonum_test.cc src/data/strtonum.h 20 | test/logging_test: test/logging_test.cc 21 | test/parameter_test: test/parameter_test.cc 22 | test/registry_test: test/registry_test.cc 23 | 24 | $(TEST) : 25 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 26 | 27 | include test/unittest/dmlc_unittest.mk 28 | 29 | ALL_TEST=$(TEST) $(UNITTEST) 30 | ALL_TEST_OBJ=$(UNITTEST_OBJ) 31 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/iostream_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | if (argc < 2) { 6 | printf("Usage: \n"); 7 | return 0; 8 | } 9 | {// output 10 | dmlc::Stream *fs = dmlc::Stream::Create(argv[1], "w"); 11 | dmlc::ostream os(fs); 12 | os << "hello-world " << 1e-10<< std::endl; 13 | delete fs; 14 | } 15 | {// input 16 | std::string name; 17 | double data; 18 | dmlc::Stream *fs = dmlc::Stream::Create(argv[1], "r"); 19 | dmlc::istream is(fs); 20 | is >> name >> data; 21 | std::cout << name << " " << data << std::endl; 22 | delete fs; 23 | } 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/libfm_parser_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../src/data/libfm_parser.h" 6 | 7 | int main(int argc, char *argv[]) { 8 | if (argc < 5) { 9 | printf("Usage: partid npart nthread\n"); 10 | return 0; 11 | } 12 | using namespace dmlc; 13 | InputSplit *split = InputSplit::Create(argv[1], 14 | atoi(argv[2]), 15 | atoi(argv[3]), 16 | "text"); 17 | int nthread = atoi(argv[4]); 18 | data::LibFMParser parser(split, nthread); 19 | double tstart = GetTime(); 20 | size_t bytes_read = 0; 21 | size_t bytes_expect = 10UL << 20UL; 22 | size_t num_ex = 0; 23 | while (parser.Next()) { 24 | bytes_read = parser.BytesRead(); 25 | num_ex += parser.Value().size; 26 | std::cout << "read bytes:" << bytes_read << " batch size:" << num_ex << std::endl; 27 | double tdiff = GetTime() - tstart; 28 | if (bytes_read >= bytes_expect) { 29 | printf("%lu examples, %lu MB read, %g MB/sec\n", 30 | num_ex, bytes_read >> 20UL, 31 | (bytes_read >> 20UL) / tdiff); 32 | bytes_expect += 10UL << 20UL; 33 | } 34 | } 35 | return 0; 36 | } -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/libsvm_parser_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "../src/data/libsvm_parser.h" 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc < 5) { 10 | printf("Usage: partid npart nthread\n"); 11 | return 0; 12 | } 13 | using namespace dmlc; 14 | InputSplit *split = InputSplit::Create(argv[1], 15 | atoi(argv[2]), 16 | atoi(argv[3]), 17 | "text"); 18 | int nthread = atoi(argv[4]); 19 | data::LibSVMParser parser(split, nthread); 20 | double tstart = GetTime(); 21 | size_t bytes_read = 0; 22 | size_t bytes_expect = 10UL << 20UL; 23 | size_t num_ex = 0; 24 | while (parser.Next()) { 25 | bytes_read = parser.BytesRead(); 26 | num_ex += parser.Value().size; 27 | double tdiff = GetTime() - tstart; 28 | if (bytes_read >= bytes_expect) { 29 | printf("%lu examples, %lu MB read, %g MB/sec\n", 30 | num_ex, bytes_read >> 20UL, 31 | (bytes_read >> 20UL) / tdiff); 32 | bytes_expect += 10UL << 20UL; 33 | } 34 | } 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/logging_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(void) { 4 | LOG(INFO) << "hello"; 5 | LOG(ERROR) << "error"; 6 | try { 7 | LOG(FATAL)<<'a'<<11<<33; 8 | } catch (dmlc::Error e) { 9 | LOG(INFO) << "catch " << e.what(); 10 | } 11 | CHECK(2!=3) << "test"; 12 | CHECK(2==3) << "test"; 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/split_read_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | if (argc < 4) { 9 | printf("Usage: partid npart\n"); 10 | return 0; 11 | } 12 | using namespace dmlc; 13 | InputSplit *split = InputSplit::Create(argv[1], 14 | atoi(argv[2]), 15 | atoi(argv[3]), 16 | "text"); 17 | std::vector data; 18 | InputSplit::Blob blb; 19 | double tstart = GetTime(); 20 | size_t bytes_read = 0; 21 | size_t bytes_expect = 10UL << 20UL; 22 | while (split->NextRecord(&blb)) { 23 | std::string dat = std::string((char*)blb.dptr, 24 | blb.size); 25 | data.push_back(dat); 26 | bytes_read += blb.size; 27 | double tdiff = GetTime() - tstart; 28 | if (bytes_read >= bytes_expect) { 29 | printf("%lu MB read, %g MB/sec\n", 30 | bytes_read >> 20UL, 31 | (bytes_read >> 20UL) / tdiff); 32 | bytes_expect += 10UL << 20UL; 33 | } 34 | } 35 | delete split; 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/split_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a InputSplit 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | int main(int argc, char *argv[]) { 9 | if (argc < 5) { 10 | printf("Usage: partid npart\n"); 11 | return 0; 12 | } 13 | using namespace dmlc; 14 | InputSplit *split = InputSplit::Create(argv[1], 15 | atoi(argv[2]), 16 | atoi(argv[3]), 17 | "text"); 18 | InputSplit::Blob blb; 19 | while (split->NextChunk(&blb)) { 20 | std::cout << std::string((char*)blb.dptr, blb.size); 21 | } 22 | delete split; 23 | return 0; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/stream_read_test.cc: -------------------------------------------------------------------------------- 1 | // test reading speed from a Stream 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int main(int argc, char *argv[]) { 8 | if (argc < 3) { 9 | printf("Usage: uri buffersize [skip-proc]\n"); 10 | return 0; 11 | } 12 | int skip_proc = 0; 13 | if (argc > 3) { 14 | skip_proc = atoi(argv[3]); 15 | } 16 | size_t sz = atol(argv[2]); 17 | std::string buffer; buffer.resize(sz); 18 | using namespace dmlc; 19 | Stream *fi = Stream::Create(argv[1], "r", true); 20 | CHECK(fi != NULL) << "cannot open " << argv[1]; 21 | double tstart = GetTime(); 22 | size_t size; 23 | size_t bytes_read = 0; 24 | size_t bytes_expect = 10UL << 20UL; 25 | while ((size = fi->Read(BeginPtr(buffer), sz)) != 0) { 26 | int cnt = 0; 27 | if (skip_proc == 0) { 28 | //#pragma omp parallel for reduction(+:cnt) 29 | for (size_t i = 0; i < size; ++i) { 30 | if (buffer[i] == '\n' || buffer[i] == '\r') { 31 | buffer[i] = '\0'; ++ cnt; 32 | } 33 | } 34 | } 35 | bytes_read += size; 36 | double tdiff = GetTime() - tstart; 37 | if (bytes_read >= bytes_expect) { 38 | printf("%lu MB read, %g MB/sec, cnt=%d\n", 39 | bytes_read >> 20UL, 40 | (bytes_read >> 20UL) / tdiff, cnt); 41 | bytes_expect += 10UL << 20UL; 42 | } 43 | } 44 | delete fi; 45 | return 0; 46 | } 47 | 48 | 49 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/strtonum_test.cc: -------------------------------------------------------------------------------- 1 | #include "../src/data/strtonum.h" 2 | #include 3 | 4 | int main(int argc, char *argv[]) { 5 | using namespace dmlc; 6 | 7 | // float 8 | std::vector f = { 9 | "1234567901234", "+12345.6789", "-0.00123", "+0123.234e-2", 10 | "-234234.123123e20", "3.1029831e+38", "000.123e-28"}; 11 | for (size_t i = 0; i < f.size(); ++i) { 12 | float v1 = data::atof(f[i].c_str()); 13 | float v2 = atof(f[i].c_str()); 14 | CHECK_EQ(v1, v2); 15 | } 16 | 17 | // long 18 | std::vector l = { 19 | "2147483647", "+12345", "-123123", "-2147483648" 20 | }; 21 | for (size_t i = 0; i < l.size(); ++i) { 22 | long v1 = data::atol(l[i].c_str()); 23 | long v2 = atol(l[i].c_str()); 24 | CHECK_EQ(v1, v2); 25 | } 26 | 27 | // uint64 28 | std::vector ull = { 29 | "2147483647", "+12345", "18446744073709551615" 30 | }; 31 | for (size_t i = 0; i < ull.size(); ++i) { 32 | unsigned long long v1 = data::strtoull(ull[i].c_str(), 0, 10); 33 | unsigned long long v2 = strtoull(ull[i].c_str(), 0, 10); 34 | CHECK_EQ(v1, v2); 35 | } 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/unittest/.gitignore: -------------------------------------------------------------------------------- 1 | dmlc_unittest 2 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/unittest/dmlc_unittest.mk: -------------------------------------------------------------------------------- 1 | UTEST_ROOT=test/unittest 2 | UNITTEST=$(UTEST_ROOT)/dmlc_unittest 3 | UNITTEST_SRC=$(wildcard $(UTEST_ROOT)/*.cc) 4 | UNITTEST_OBJ=$(patsubst %.cc,%.o,$(UNITTEST_SRC)) 5 | 6 | GTEST_LIB=$(GTEST_PATH)/lib/ 7 | GTEST_INC=$(GTEST_PATH)/include/ 8 | 9 | $(UTEST_ROOT)/%.o : $(UTEST_ROOT)/%.cc libdmlc.a 10 | $(CXX) $(CFLAGS) -I$(GTEST_INC) -o $@ -c $< 11 | 12 | $(UNITTEST) : $(UNITTEST_OBJ) 13 | $(CXX) $(CFLAGS) -L$(GTEST_LIB) -o $@ $^ libdmlc.a $(LDFLAGS) -lgtest 14 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/unittest/unittest_array_view.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void ArrayViewTest(dmlc::array_view view, int base) { 6 | int cnt = base; 7 | for (int v : view) { 8 | CHECK_EQ(v, cnt); 9 | ++cnt; 10 | } 11 | } 12 | 13 | TEST(ArrayView, Basic) { 14 | std::vector vec{0, 1, 2}; 15 | ArrayViewTest(vec, 0); 16 | int arr[] = {1, 2, 3}; 17 | ArrayViewTest(dmlc::array_view(arr, arr + 3), 1); 18 | dmlc::array_view a = vec; 19 | CHECK_EQ(a.size(), vec.size()); 20 | } 21 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/unittest/unittest_logging.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #define DMLC_LOG_FATAL_THROW 0 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | TEST(Logging, basics) { 9 | LOG(INFO) << "hello"; 10 | LOG(ERROR) << "error"; 11 | 12 | int x = 1, y = 1; 13 | CHECK_EQ(x, y); 14 | CHECK_GE(x, y); 15 | 16 | int *z = &x; 17 | CHECK_EQ(*CHECK_NOTNULL(z), x); 18 | 19 | ASSERT_DEATH(CHECK_NE(x, y), ".*"); 20 | } 21 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/test/unittest/unittest_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | int main(int argc, char ** argv) { 5 | testing::InitGoogleTest(&argc, argv); 6 | testing::FLAGS_gtest_death_test_style = "threadsafe"; 7 | return RUN_ALL_TESTS(); 8 | } 9 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/dmlc-submit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os 4 | curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) 5 | sys.path.insert(0, curr_path) 6 | 7 | from dmlc_tracker import submit 8 | 9 | submit.main() 10 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/dmlc_tracker/__init__.py: -------------------------------------------------------------------------------- 1 | """DMLC Tracker modules for running jobs on different platforms.""" 2 | from __future__ import absolute_import 3 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/yarn/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | .classpath 3 | .project 4 | *.jar 5 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/yarn/README.md: -------------------------------------------------------------------------------- 1 | DMLC YARN AppMaster 2 | =================== 3 | * This folder contains Application code to allow rabit run on Yarn. 4 | * See [tracker](../) for job submission. 5 | - run ```./build.sh``` to build the jar, before using the script 6 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/yarn/build.bat: -------------------------------------------------------------------------------- 1 | mkdir bin 2 | 3 | for /f %%i in ('%HADOOP_HOME%\bin\hadoop classpath') do set CPATH=%%i 4 | %JAVA_HOME%/bin/javac -cp %CPATH% -d bin src/main/java/org/apache/hadoop/yarn/dmlc/*.java 5 | %JAVA_HOME%/bin/jar cf dmlc-yarn.jar -C bin . 6 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/yarn/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ ! -d bin ]; then 3 | mkdir bin 4 | fi 5 | 6 | CPATH=`${HADOOP_HOME}/bin/hadoop classpath` 7 | ${JAVA_HOME}/bin/javac -cp $CPATH -d bin src/main/java/org/apache/hadoop/yarn/dmlc/*.java 8 | ${JAVA_HOME}/bin/jar cf dmlc-yarn.jar -C bin . 9 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/TaskRecord.java: -------------------------------------------------------------------------------- 1 | package org.apache.hadoop.yarn.dmlc; 2 | 3 | import org.apache.hadoop.yarn.api.records.Container; 4 | import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest; 5 | 6 | /** 7 | * data structure to hold the task information 8 | */ 9 | public class TaskRecord { 10 | // task id of the task 11 | public int taskId = 0; 12 | // role of current node 13 | public String taskRole = "worker"; 14 | // number of failed attempts to run the task 15 | public int attemptCounter = 0; 16 | // container request, can be null if task is already running 17 | public ContainerRequest containerRequest = null; 18 | // running container, can be null if the task is not launched 19 | public Container container = null; 20 | // whether we have requested abortion of this task 21 | public boolean abortRequested = false; 22 | 23 | public TaskRecord(int taskId, String role) { 24 | this.taskId = taskId; 25 | this.taskRole = role; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/windows/.gitignore: -------------------------------------------------------------------------------- 1 | Debug 2 | *suo 3 | *.dll 4 | *i386 5 | *x64 6 | ipch 7 | *.filters 8 | *.user 9 | *sdf 10 | Release 11 | Debug 12 | -------------------------------------------------------------------------------- /xgboost/dmlc-core/windows/README.md: -------------------------------------------------------------------------------- 1 | MSVC Project 2 | ==== 3 | The solution has been created with Visual Studio Express 2010. 4 | Preliminary project for testing windows compatibility. 5 | It do not come with a warranty. 6 | -------------------------------------------------------------------------------- /xgboost/doc/.gitignore: -------------------------------------------------------------------------------- 1 | html 2 | latex 3 | *.sh 4 | _* 5 | doxygen 6 | parser.py 7 | *.pyc 8 | web-data 9 | -------------------------------------------------------------------------------- /xgboost/doc/R-package/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /xgboost/doc/R-package/Makefile: -------------------------------------------------------------------------------- 1 | # This is the makefile for compiling Rmarkdown files into the md file with results. 2 | PKGROOT=../../R-package 3 | 4 | # ADD The Markdown to be built here, with suffix md 5 | discoverYourData.md: $(PKGROOT)/vignettes/discoverYourData.Rmd 6 | xgboostPresentation.md: $(PKGROOT)/vignettes/xgboostPresentation.Rmd 7 | 8 | # General Rules for build rmarkdowns, need knitr 9 | %.md: 10 | Rscript -e \ 11 | "require(methods);"\ 12 | "require(knitr);"\ 13 | "knitr::opts_knit\$$set(root.dir=\".\");"\ 14 | "knitr::opts_chunk\$$set(fig.path=\"../web-data/xgboost/knitr/$(basename $@)-\");"\ 15 | "knitr::knit(\"$+\")" 16 | -------------------------------------------------------------------------------- /xgboost/doc/R-package/index.md: -------------------------------------------------------------------------------- 1 | XGBoost R Package 2 | ================= 3 | [![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](http://cran.r-project.org/web/packages/xgboost) 4 | [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](http://cran.rstudio.com/web/packages/xgboost/index.html) 5 | 6 | 7 | You have find XGBoost R Package! 8 | 9 | Get Started 10 | ----------- 11 | * Checkout the [Installation Guide](../build.md) contains instructions to install xgboost, and [Tutorials](#tutorials) for examples on how to use xgboost for various tasks. 12 | * Please visit [walk through example](../../R-package/demo). 13 | 14 | Tutorials 15 | --------- 16 | - [Introduction to XGBoost in R](xgboostPresentation.md) 17 | - [Discover your data with XGBoost in R](discoverYourData.md) 18 | -------------------------------------------------------------------------------- /xgboost/doc/README: -------------------------------------------------------------------------------- 1 | The documentation of xgboost is generated with recommonmark and sphinx. 2 | 3 | You can build it locally by typing "make html" in this folder. 4 | - clone https://github.com/tqchen/recommonmark to root 5 | - type make html 6 | 7 | Checkout https://recommonmark.readthedocs.org for guide on how to write markdown with extensions used in this doc, such as math formulas and table of content. 8 | -------------------------------------------------------------------------------- /xgboost/doc/cli/index.md: -------------------------------------------------------------------------------- 1 | # XGBoost Command Line version 2 | 3 | See [XGBoost Command Line walkthrough](https://github.com/dmlc/xgboost/blob/master/demo/binary_classification/README.md) 4 | -------------------------------------------------------------------------------- /xgboost/doc/how_to/index.md: -------------------------------------------------------------------------------- 1 | # XGBoost How To 2 | 3 | This page contains guidelines to use and develop XGBoost. 4 | 5 | ## Installation 6 | - [How to Install XGBoost](../build.md) 7 | 8 | ## Use XGBoost in Specific Ways 9 | - [Parameter tuning guide](param_tuning.md) 10 | - [Use out of core computation for large dataset](external_memory.md) 11 | 12 | ## Develop and Hack XGBoost 13 | - [Contribute to XGBoost](contribute.md) 14 | 15 | ## Frequently Ask Questions 16 | - [FAQ](../faq.md) 17 | -------------------------------------------------------------------------------- /xgboost/doc/index.md: -------------------------------------------------------------------------------- 1 | XGBoost Documentation 2 | ===================== 3 | This document is hosted at http://xgboost.readthedocs.org/. You can also browse most of the documents in github directly. 4 | 5 | 6 | These are used to generate the index used in search. 7 | 8 | * [Python Package Document](python/index.md) 9 | * [R Package Document](R-package/index.md) 10 | * [Java/Scala Package Document](jvm/index.md) 11 | * [Julia Package Document](julia/index.md) 12 | * [CLI Package Document](cli/index.md) 13 | - [Howto Documents](how_to/index.md) 14 | - [Get Started Documents](get_started/index.md) 15 | - [Tutorials](tutorials/index.md) 16 | -------------------------------------------------------------------------------- /xgboost/doc/julia/index.md: -------------------------------------------------------------------------------- 1 | # XGBoost.jl 2 | 3 | See [XGBoost.jl Project page](https://github.com/dmlc/XGBoost.jl) -------------------------------------------------------------------------------- /xgboost/doc/python/index.md: -------------------------------------------------------------------------------- 1 | XGBoost Python Package 2 | ====================== 3 | This page contains links to all the python related documents on python package. 4 | To install the package package, checkout [Build and Installation Instruction](../build.md). 5 | 6 | Contents 7 | -------- 8 | * [Python Overview Tutorial](python_intro.md) 9 | * [Learning to use XGBoost by Example](../../demo) 10 | * [Python API Reference](python_api.rst) 11 | -------------------------------------------------------------------------------- /xgboost/doc/python/python_api.rst: -------------------------------------------------------------------------------- 1 | Python API Reference 2 | ==================== 3 | This page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about python package. 4 | 5 | The document in this page is automatically generated by sphinx. The content do not render at github, you can view it at http://xgboost.readthedocs.org/en/latest/python/python_api.html 6 | 7 | Core Data Structure 8 | ------------------- 9 | .. automodule:: xgboost.core 10 | 11 | .. autoclass:: xgboost.DMatrix 12 | :members: 13 | :show-inheritance: 14 | 15 | .. autoclass:: xgboost.Booster 16 | :members: 17 | :show-inheritance: 18 | 19 | 20 | Learning API 21 | ------------ 22 | .. automodule:: xgboost.training 23 | 24 | .. autofunction:: xgboost.train 25 | 26 | .. autofunction:: xgboost.cv 27 | 28 | 29 | Scikit-Learn API 30 | ---------------- 31 | .. automodule:: xgboost.sklearn 32 | .. autoclass:: xgboost.XGBRegressor 33 | :members: 34 | :show-inheritance: 35 | .. autoclass:: xgboost.XGBClassifier 36 | :members: 37 | :show-inheritance: 38 | 39 | Plotting API 40 | ------------ 41 | .. automodule:: xgboost.plotting 42 | 43 | .. autofunction:: xgboost.plot_importance 44 | 45 | .. autofunction:: xgboost.plot_tree 46 | 47 | .. autofunction:: xgboost.to_graphviz 48 | -------------------------------------------------------------------------------- /xgboost/doc/sphinx_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Helper utilty function for customization.""" 3 | import sys 4 | import os 5 | import docutils 6 | import subprocess 7 | 8 | READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None) 9 | 10 | if not os.path.exists('../recommonmark'): 11 | subprocess.call('cd ..; rm -rf recommonmark;' + 12 | 'git clone https://github.com/tqchen/recommonmark', shell = True) 13 | else: 14 | subprocess.call('cd ../recommonmark/; git pull', shell=True) 15 | 16 | if not os.path.exists('web-data'): 17 | subprocess.call('rm -rf web-data;' + 18 | 'git clone https://github.com/dmlc/web-data', shell = True) 19 | else: 20 | subprocess.call('cd web-data; git pull', shell=True) 21 | 22 | 23 | sys.path.insert(0, os.path.abspath('../recommonmark/')) 24 | sys.stderr.write('READTHEDOCS=%s\n' % (READTHEDOCS_BUILD)) 25 | 26 | 27 | from recommonmark import parser, transform 28 | 29 | MarkdownParser = parser.CommonMarkParser 30 | AutoStructify = transform.AutoStructify 31 | -------------------------------------------------------------------------------- /xgboost/doc/tutorials/index.md: -------------------------------------------------------------------------------- 1 | # XGBoost Tutorials 2 | 3 | This section contains official tutorials inside XGBoost package. 4 | See [Awesome XGBoost](https://github.com/dmlc/xgboost/tree/master/demo) for links to more resources. 5 | 6 | ## Contents 7 | - [Introduction to Boosted Trees](../model.md) 8 | - [Distributed XGBoost YARN on AWS](aws_yarn.md) 9 | - [DART booster](dart.md) 10 | - [Monotonic Constraints](monotonic.md) 11 | -------------------------------------------------------------------------------- /xgboost/include/xgboost/logging.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file logging.h 4 | * \brief defines console logging options for xgboost. 5 | * Use to enforce unified print behavior. 6 | * For debug loggers, use LOG(INFO) and LOG(ERROR). 7 | */ 8 | #ifndef XGBOOST_LOGGING_H_ 9 | #define XGBOOST_LOGGING_H_ 10 | 11 | #include 12 | #include 13 | #include "./base.h" 14 | 15 | namespace xgboost { 16 | 17 | class BaseLogger { 18 | public: 19 | BaseLogger() { 20 | #if XGBOOST_LOG_WITH_TIME 21 | log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] "; 22 | #endif 23 | } 24 | std::ostream& stream() { return log_stream_; } 25 | 26 | protected: 27 | std::ostringstream log_stream_; 28 | }; 29 | 30 | class ConsoleLogger : public BaseLogger { 31 | public: 32 | ~ConsoleLogger(); 33 | }; 34 | 35 | class TrackerLogger : public BaseLogger { 36 | public: 37 | ~TrackerLogger(); 38 | }; 39 | 40 | // redefines the logging macro if not existed 41 | #ifndef LOG 42 | #define LOG(severity) LOG_##severity.stream() 43 | #endif 44 | 45 | // Enable LOG(CONSOLE) for print messages to console. 46 | #define LOG_CONSOLE ::xgboost::ConsoleLogger() 47 | // Enable LOG(TRACKER) for print messages to tracker 48 | #define LOG_TRACKER ::xgboost::TrackerLogger() 49 | } // namespace xgboost. 50 | #endif // XGBOOST_LOGGING_H_ 51 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/.gitignore: -------------------------------------------------------------------------------- 1 | tracker.py 2 | build.sh -------------------------------------------------------------------------------- /xgboost/jvm-packages/checkstyle-suppressions.xml: -------------------------------------------------------------------------------- 1 | 17 | 18 | 21 | 22 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j-example/LICENSE: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j-spark/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | ml.dmlc 8 | xgboost-jvm 9 | 0.7 10 | 11 | xgboost4j-spark 12 | 13 | 14 | 15 | org.apache.maven.plugins 16 | maven-assembly-plugin 17 | 18 | false 19 | 20 | 21 | 22 | 23 | 24 | 25 | ml.dmlc 26 | xgboost4j 27 | 0.7 28 | 29 | 30 | org.apache.spark 31 | spark-mllib_${scala.binary.version} 32 | ${spark.version} 33 | provided 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/Utils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package ml.dmlc.xgboost4j.scala.spark.params 18 | 19 | // based on org.apache.spark.util copy /paste 20 | private[spark] object Utils { 21 | 22 | def getSparkClassLoader: ClassLoader = getClass.getClassLoader 23 | 24 | def getContextOrSparkClassLoader: ClassLoader = 25 | Option(Thread.currentThread().getContextClassLoader).getOrElse(getSparkClassLoader) 26 | 27 | // scalastyle:off classforname 28 | /** Preferred alternative to Class.forName(className) */ 29 | def classForName(className: String): Class[_] = { 30 | Class.forName(className, true, getContextOrSparkClassLoader) 31 | // scalastyle:on classforname 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j-spark/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.logger.org.apache.spark=ERROR -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/SharedSparkContext.scala: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package ml.dmlc.xgboost4j.scala.spark 18 | 19 | import org.apache.spark.{SparkConf, SparkContext} 20 | import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite} 21 | 22 | trait SharedSparkContext extends FunSuite with BeforeAndAfter with BeforeAndAfterAll 23 | with Serializable { 24 | 25 | @transient protected implicit var sc: SparkContext = _ 26 | 27 | override def beforeAll() { 28 | val sparkConf = new SparkConf() 29 | .setMaster("local[*]") 30 | .setAppName("XGBoostSuite") 31 | .set("spark.driver.memory", "512m") 32 | .set("spark.ui.enabled", "false") 33 | 34 | sc = new SparkContext(sparkConf) 35 | } 36 | 37 | override def afterAll() { 38 | if (sc != null) { 39 | sc.stop() 40 | sc = null 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j/LICENSE: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IEvaluation.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package ml.dmlc.xgboost4j.java; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * interface for customized evaluation 22 | * 23 | * @author hzx 24 | */ 25 | public interface IEvaluation extends Serializable { 26 | /** 27 | * get evaluate metric 28 | * 29 | * @return evalMetric 30 | */ 31 | String getMetric(); 32 | 33 | /** 34 | * evaluate with predicts and data 35 | * 36 | * @param predicts predictions as array 37 | * @param dmat data matrix to evaluate 38 | * @return result of the metric 39 | */ 40 | float eval(float[][] predicts, DMatrix dmat); 41 | } 42 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IObjective.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package ml.dmlc.xgboost4j.java; 17 | 18 | import java.io.Serializable; 19 | import java.util.List; 20 | 21 | /** 22 | * interface for customize Object function 23 | * 24 | * @author hzx 25 | */ 26 | public interface IObjective extends Serializable { 27 | /** 28 | * user define objective function, return gradient and second order gradient 29 | * 30 | * @param predicts untransformed margin predicts 31 | * @param dtrain training data 32 | * @return List with two float array, correspond to first order grad and second order grad 33 | */ 34 | List getGradient(float[][] predicts, DMatrix dtrain); 35 | } 36 | -------------------------------------------------------------------------------- /xgboost/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostError.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014 by Contributors 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package ml.dmlc.xgboost4j.java; 17 | 18 | /** 19 | * custom error class for xgboost 20 | * 21 | * @author hzx 22 | */ 23 | public class XGBoostError extends Exception { 24 | public XGBoostError(String message) { 25 | super(message); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /xgboost/make/mingw64.mk: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------- 2 | # xgboost: Configuration for MinGW(Windows 64bit) 3 | # This allows to compile xgboost on windows by using mingw. 4 | # You will need to get install an mingw toolchain. 5 | # g++-4.6 or later is required. 6 | # 7 | # see config.mk for template. 8 | #----------------------------------------------------------- 9 | export CXX=g++ -m64 10 | export CC=gcc -m64 11 | 12 | # Whether enable openmp support, needed for multi-threading. 13 | USE_OPENMP = 1 14 | 15 | # whether use HDFS support during compile 16 | USE_HDFS = 0 17 | 18 | # whether use AWS S3 support during compile 19 | USE_S3 = 0 20 | 21 | # whether use Azure blob support during compile 22 | USE_AZURE = 0 23 | 24 | # Rabit library version, 25 | # - librabit.a Normal distributed version. 26 | # - librabit_empty.a Non distributed mock version, 27 | LIB_RABIT = librabit_empty.a 28 | 29 | DMLC_CFLAGS = -DDMLC_ENABLE_STD_THREAD=0 30 | ADD_CFLAGS = -DDMLC_ENABLE_STD_THREAD=0 -------------------------------------------------------------------------------- /xgboost/make/minimum.mk: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------- 2 | # xgboost: minumum dependency configuration, 3 | # see config.mk for template. 4 | #---------------------------------------------------- 5 | 6 | # Whether enable openmp support, needed for multi-threading. 7 | USE_OPENMP = 0 8 | 9 | # whether use HDFS support during compile 10 | USE_HDFS = 0 11 | 12 | # whether use AWS S3 support during compile 13 | USE_S3 = 0 14 | 15 | # whether use Azure blob support during compile 16 | USE_AZURE = 0 17 | 18 | # Rabit library version, 19 | # - librabit.a Normal distributed version. 20 | # - librabit_empty.a Non distributed mock version, 21 | LIB_RABIT = librabit_empty.a 22 | 23 | -------------------------------------------------------------------------------- /xgboost/make/minimum_parallel.mk: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------ 2 | # xgboost: minumum dependency configuration with Parallelization. 3 | # This configuration is standard but cannot run distributed computing. 4 | # 5 | # see config.mk for template. 6 | #------------------------------------------------------------------------ 7 | 8 | # Whether enable openmp support, needed for multi-threading. 9 | USE_OPENMP = 1 10 | 11 | # whether use HDFS support during compile 12 | USE_HDFS = 0 13 | 14 | # whether use AWS S3 support during compile 15 | USE_S3 = 0 16 | 17 | # whether use Azure blob support during compile 18 | USE_AZURE = 0 19 | 20 | # Rabit library version, 21 | # - librabit.a Normal distributed version. 22 | # - librabit_empty.a Non distributed mock version, 23 | LIB_RABIT = librabit_empty.a 24 | -------------------------------------------------------------------------------- /xgboost/make/travis.mk: -------------------------------------------------------------------------------- 1 | 2 | # the additional link flags you want to add 3 | ADD_LDFLAGS = 4 | 5 | # the additional compile flags you want to add 6 | ADD_CFLAGS = 7 | 8 | # Whether enable openmp support, needed for multi-threading. 9 | USE_OPENMP = 1 10 | 11 | # whether use HDFS support during compile 12 | USE_HDFS = 0 13 | 14 | # whether use AWS S3 support during compile 15 | USE_S3 = 0 16 | 17 | # whether use Azure blob support during compile 18 | USE_AZURE = 0 19 | 20 | # Rabit library version, 21 | # - librabit.a Normal distributed version. 22 | # - librabit_empty.a Non distributed mock version, 23 | LIB_RABIT = librabit.a 24 | 25 | # path to libjvm.so 26 | LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server 27 | 28 | # path to googletest and whether to measure coverage or not 29 | GTEST_PATH = 30 | WITH_COVER = 1 31 | 32 | # List of additional plugins, checkout plugin folder. 33 | # uncomment the following lines to include these plugins 34 | # you can also add your own plugin like this 35 | # 36 | XGB_PLUGINS += plugin/example/plugin.mk 37 | XGB_PLUGINS += plugin/lz4/plugin.mk 38 | XGB_PLUGINS += plugin/dense_parser/plugin.mk 39 | -------------------------------------------------------------------------------- /xgboost/nccl/.gitignore: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. 2 | /build 3 | Win32/ 4 | x64/ 5 | *.sdf 6 | *.user 7 | *.opensdf 8 | *.pdb 9 | *.suo 10 | windows/test/test/ 11 | *.VC.db 12 | -------------------------------------------------------------------------------- /xgboost/nccl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.5) 2 | project(nccl) 3 | find_package(CUDA 7.5 QUIET REQUIRED) 4 | 5 | set(NCCL_MAJOR 1) 6 | set(NCCL_MINOR 3) 7 | set(NCCL_PATCH 4) 8 | 9 | # Call add_subdirectory(nccl) after nvcc flags have been set in the parent project to propagate flags to nccl 10 | set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DNCCL_MAJOR=${NCCL_MAJOR} -DNCCL_MINOR=${NCCL_MINOR} -DNCCL_PATCH=${NCCL_PATCH} -DCUDA_MAJOR=7 -DCUDA_MINOR=5") 11 | file(GLOB SOURCES src/*.cu) 12 | 13 | cuda_add_library(${PROJECT_NAME} STATIC ${SOURCES}) 14 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/.gitignore: -------------------------------------------------------------------------------- 1 | /*.debhelper.log 2 | /*.debhelper 3 | /*.substvars 4 | /tmp/ 5 | /files 6 | /libnccl1/ 7 | /libnccl-dev/ 8 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/changelog.in: -------------------------------------------------------------------------------- 1 | nccl (${nccl:Major}.${nccl:Minor}.${nccl:Patch}-${deb:Revision}+cuda${cuda:Major}.${cuda:Minor}) trusty; urgency=medium 2 | 3 | * Automatic Debian package from build 4 | 5 | -- cudatools ${deb:Timestamp} 6 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/control.in: -------------------------------------------------------------------------------- 1 | Source: nccl 2 | Section: libs 3 | Maintainer: cudatools 4 | Priority: optional 5 | Build-depends: debhelper(>=9) 6 | Standards-Version: 3.9.5 7 | 8 | Package: libnccl${nccl:Major} 9 | Section: libs 10 | Architecture: ${deb:Arch} 11 | Depends: ${misc:Depends}, ${shlibs:Depends} 12 | Description: NVIDIA Collectives Communication Library (NCCL) Runtime 13 | NCCL (pronounced "Nickel") is a stand-alone library of standard collective 14 | communication routines for GPUs, such as all-gather, reduce, broadcast, etc., 15 | that have been optimized to achieve high bandwidth over PCIe. NCCL supports up 16 | to eight GPUs and can be used in either single- or multi-process (e.g., MPI) 17 | applications. 18 | 19 | Package: libnccl-dev 20 | Section: libdevel 21 | Architecture: ${deb:Arch} 22 | Depends: ${misc:Depends}, ${shlibs:Depends}, libnccl${nccl:Major} (= ${binary:Version}) 23 | Description: NVIDIA Collectives Communication Library (NCCL) Development Files 24 | NCCL (pronounced "Nickel") is a stand-alone library of standard collective 25 | communication routines for GPUs, such as all-gather, reduce, broadcast, etc., 26 | that have been optimized to achieve high bandwidth over PCIe. NCCL supports up 27 | to eight GPUs and can be used in either single- or multi-process (e.g., MPI) 28 | applications. 29 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/copyright: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /xgboost/nccl/debian/libnccl-dev.install: -------------------------------------------------------------------------------- 1 | include/nccl.h usr/include 2 | lib/libnccl.so /usr/lib/x86_64-linux-gnu 3 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/libnccl-dev.manpages: -------------------------------------------------------------------------------- 1 | debian/nccl.7 2 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/libnccl1.install.in: -------------------------------------------------------------------------------- 1 | lib/libnccl.so.${nccl:Major} /usr/lib/x86_64-linux-gnu 2 | lib/libnccl.so.${nccl:Major}.${nccl:Minor}.${nccl:Patch} /usr/lib/x86_64-linux-gnu 3 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | %: 4 | dh $@ --parallel 5 | 6 | override_dh_auto_install: 7 | PREFIX=debian/tmp dh_auto_install 8 | 9 | override_dh_auto_test: 10 | # Do not make test 11 | 12 | override_dh_auto_clean: 13 | # Do not make clean 14 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/shlibs.local.in: -------------------------------------------------------------------------------- 1 | libcudart ${cuda:Major}.${cuda:Minor} cuda-cudart-${cuda:Major}-${cuda:Minor} 2 | -------------------------------------------------------------------------------- /xgboost/nccl/debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /xgboost/plugin/dense_parser/plugin.mk: -------------------------------------------------------------------------------- 1 | PLUGIN_OBJS += build_plugin/dense_parser/dense_libsvm.o 2 | PLUGIN_LDFLAGS += 3 | -------------------------------------------------------------------------------- /xgboost/plugin/example/README.md: -------------------------------------------------------------------------------- 1 | XGBoost Plugin Example 2 | ====================== 3 | This folder provides an example of xgboost plugin. 4 | 5 | There are three steps you need to do to add a plugin to xgboost 6 | - Create your source .cc file, implement a new extension 7 | - In this example [custom_obj.cc](custom_obj.cc) 8 | - Register this extension to xgboost via a registration macro 9 | - In this example ```XGBOOST_REGISTER_OBJECTIVE``` in [this line](custom_obj.cc#L75) 10 | - Create a [plugin.mk](plugin.mk) on this folder 11 | 12 | To add this plugin, add the following line to ```config.mk```(template in make/config.mk). 13 | ```makefile 14 | # Add plugin by include the plugin in config 15 | XGB_PLUGINS += plugin/plugin_a/plugin.mk 16 | ``` 17 | 18 | Then you can test this plugin by using ```objective=mylogistic``` parameter. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /xgboost/plugin/example/plugin.mk: -------------------------------------------------------------------------------- 1 | # Add the object files you like to include in this plugin. 2 | PLUGIN_OBJS += build_plugin/example/custom_obj.o 3 | # Add additional dependent libraries this plugin might have 4 | PLUGIN_LDFLAGS += -------------------------------------------------------------------------------- /xgboost/plugin/lz4/plugin.mk: -------------------------------------------------------------------------------- 1 | PLUGIN_OBJS += build_plugin/lz4/sparse_page_lz4_format.o 2 | PLUGIN_LDFLAGS += -llz4 3 | -------------------------------------------------------------------------------- /xgboost/plugin/updater_gpu/gitshallow_submodules.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | git submodule init 3 | for i in $(git submodule | awk '{print $2}'); do 4 | spath=$(git config -f .gitmodules --get submodule.$i.path) 5 | surl=$(git config -f .gitmodules --get submodule.$i.url) 6 | if [ $spath == "cub" ] 7 | then 8 | git submodule update --depth 3 $spath 9 | else 10 | git submodule update $spath 11 | fi 12 | done 13 | -------------------------------------------------------------------------------- /xgboost/plugin/updater_gpu/plugin.mk: -------------------------------------------------------------------------------- 1 | 2 | PLUGIN_OBJS += build_plugin/updater_gpu/src/register_updater_gpu.o \ 3 | build_plugin/updater_gpu/src/updater_gpu.o \ 4 | build_plugin/updater_gpu/src/gpu_hist_builder.o 5 | PLUGIN_LDFLAGS += -L$(CUDA_ROOT)/lib64 -lcudart 6 | -------------------------------------------------------------------------------- /xgboost/plugin/updater_gpu/src/register_updater_gpu.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2017 XGBoost contributors 3 | */ 4 | #include 5 | #include "updater_gpu.cuh" 6 | 7 | namespace xgboost { 8 | namespace tree { 9 | DMLC_REGISTRY_FILE_TAG(updater_gpumaker); 10 | 11 | 12 | XGBOOST_REGISTER_TREE_UPDATER(GPUMaker, "grow_gpu") 13 | .describe("Grow tree with GPU.") 14 | .set_body([]() { return new GPUMaker(); }); 15 | 16 | XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist") 17 | .describe("Grow tree with GPU.") 18 | .set_body([]() { return new GPUHistMaker(); }); 19 | } // namespace tree 20 | } // namespace xgboost 21 | -------------------------------------------------------------------------------- /xgboost/plugin/updater_gpu/src/updater_gpu.cuh: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | * Copyright 2017 XGBoost contributors 4 | */ 5 | #pragma once 6 | #include 7 | #include 8 | #include "../../../src/tree/param.h" 9 | 10 | namespace xgboost { 11 | namespace tree { 12 | 13 | // Forward declare builder classes 14 | class GPUHistBuilder; 15 | namespace exact { 16 | template 17 | class GPUBuilder; 18 | } 19 | 20 | class GPUMaker : public TreeUpdater { 21 | protected: 22 | TrainParam param; 23 | std::unique_ptr> builder; 24 | 25 | public: 26 | GPUMaker(); 27 | void Init( 28 | const std::vector>& args) override; 29 | void Update(const std::vector& gpair, DMatrix* dmat, 30 | const std::vector& trees); 31 | }; 32 | 33 | class GPUHistMaker : public TreeUpdater { 34 | public: 35 | GPUHistMaker(); 36 | void Init( 37 | const std::vector>& args) override; 38 | void Update(const std::vector& gpair, DMatrix* dmat, 39 | const std::vector& trees) override; 40 | bool UpdatePredictionCache(const DMatrix* data, 41 | std::vector* out_preds) override; 42 | 43 | protected: 44 | TrainParam param; 45 | std::unique_ptr builder; 46 | }; 47 | } // namespace tree 48 | } // namespace xgboost 49 | -------------------------------------------------------------------------------- /xgboost/python-package/.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | *.egg* -------------------------------------------------------------------------------- /xgboost/python-package/.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | ignore=tests 4 | 5 | disiable=unexpected-special-method-signature,too-many-nested-blocks 6 | 7 | dummy-variables-rgx=(unused|)_.* 8 | 9 | reports=no 10 | -------------------------------------------------------------------------------- /xgboost/python-package/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md *.rst 2 | recursive-include xgboost * 3 | recursive-include xgboost/include * 4 | recursive-include xgboost/src * 5 | recursive-include xgboost/make * 6 | recursive-include xgboost/rabit * 7 | recursive-include xgboost/lib * 8 | recursive-include xgboost/dmlc-core * 9 | #exclude pre-compiled .o and .a file for less confusions 10 | #make sure .a files are all removed for forcing compiling 11 | #include the pre-compiled .so is needed as a placeholder 12 | #since it will be copy after compiling on the fly 13 | global-exclude *.o 14 | global-exclude *.a 15 | global-exclude *.pyo 16 | global-exclude *.pyc 17 | -------------------------------------------------------------------------------- /xgboost/python-package/prep_pip.sh: -------------------------------------------------------------------------------- 1 | # this script is for preparation for PyPI installation package, 2 | # please don't use it for installing xgboost from github 3 | 4 | # after executing `make pippack`, cd xgboost-python, 5 | #run this script and get the sdist tar.gz in ./dist/ 6 | sh ./xgboost/build-python.sh 7 | cp setup_pip.py setup.py 8 | python setup.py sdist 9 | 10 | #make sure you know what you gonna do, and uncomment the following line 11 | #python setup.py register upload 12 | -------------------------------------------------------------------------------- /xgboost/python-package/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst 3 | -------------------------------------------------------------------------------- /xgboost/rabit/.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | *.lnk 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | *~ 30 | *.pyc 31 | *.mpi 32 | *.exe 33 | *.txt 34 | *tmp* 35 | *.rabit 36 | *.mock 37 | dmlc-core 38 | recommonmark 39 | recom 40 | -------------------------------------------------------------------------------- /xgboost/rabit/.travis.yml: -------------------------------------------------------------------------------- 1 | # disable sudo to use container based build 2 | sudo: false 3 | 4 | # Use Build Matrix to do lint and build seperately 5 | env: 6 | matrix: 7 | - TASK=lint LINT_LANG=cpp 8 | - TASK=lint LINT_LANG=python 9 | - TASK=doc 10 | - TASK=build CXX=g++ 11 | - TASK=test CXX=g++ 12 | 13 | # dependent apt packages 14 | addons: 15 | apt: 16 | packages: 17 | - doxygen 18 | - libopenmpi-dev 19 | - wget 20 | - git 21 | - libcurl4-openssl-dev 22 | - unzip 23 | - python-numpy 24 | 25 | before_install: 26 | - git clone https://github.com/dmlc/dmlc-core 27 | - export TRAVIS=dmlc-core/scripts/travis/ 28 | - source ${TRAVIS}/travis_setup_env.sh 29 | 30 | install: 31 | - pip install --user cpplint pylint 32 | 33 | script: scripts/travis_script.sh 34 | 35 | 36 | before_cache: 37 | - ${TRAVIS}/travis_before_cache.sh 38 | 39 | 40 | cache: 41 | directories: 42 | - ${HOME}/.cache/usr 43 | 44 | 45 | notifications: 46 | # Emails are sent to the committer's git-configured email address by default, 47 | email: 48 | on_success: change 49 | on_failure: always 50 | 51 | 52 | -------------------------------------------------------------------------------- /xgboost/rabit/doc/.gitignore: -------------------------------------------------------------------------------- 1 | html 2 | latex 3 | *.sh 4 | _* 5 | doxygen 6 | -------------------------------------------------------------------------------- /xgboost/rabit/doc/cpp_api.md: -------------------------------------------------------------------------------- 1 | C++ Library API of Rabit 2 | ======================== 3 | This page contains document of Library API of rabit. 4 | 5 | ```eval_rst 6 | .. toctree:: 7 | 8 | .. doxygennamespace:: rabit 9 | ``` 10 | -------------------------------------------------------------------------------- /xgboost/rabit/doc/index.md: -------------------------------------------------------------------------------- 1 | Rabit Documentation 2 | ===================== 3 | rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. The goal of rabit is to support **portable** , **scalable** and **reliable** distributed machine learning programs. 4 | 5 | API Documents 6 | ------------- 7 | ```eval_rst 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | python_api.md 13 | cpp_api.md 14 | parameters.md 15 | guide.md 16 | ``` 17 | Indices and tables 18 | ------------------ 19 | 20 | ```eval_rst 21 | * :ref:`genindex` 22 | * :ref:`modindex` 23 | * :ref:`search` 24 | ``` -------------------------------------------------------------------------------- /xgboost/rabit/doc/parameters.md: -------------------------------------------------------------------------------- 1 | Parameters 2 | ========== 3 | This section list all the parameters that can be passed to rabit::Init function as argv. 4 | All the parameters are passed in as string in format of ``parameter-name=parameter-value``. 5 | In most setting these parameters have default value or will be automatically detected, 6 | and do not need to be manually configured. 7 | 8 | * rabit_tracker_uri [passed in automatically by tracker] 9 | - The uri/ip of rabit tracker 10 | * rabit_tracker_port [passed in automatically by tracker] 11 | - The port of rabit tracker 12 | * rabit_task_id [automatically detected] 13 | - The unique identifier of computing process 14 | - When running on hadoop, this is automatically extracted from enviroment variable 15 | * rabit_reduce_buffer [default = 256MB] 16 | - The memory buffer used to store intermediate result of reduction 17 | - Format "digits + unit", can be 128M, 1G 18 | * rabit_global_replica [default = 5] 19 | - Number of replication copies of result kept for each Allreduce/Broadcast call 20 | * rabit_local_replica [default = 2] 21 | - Number of replication of local model in check point 22 | -------------------------------------------------------------------------------- /xgboost/rabit/doc/python_api.md: -------------------------------------------------------------------------------- 1 | Python API of Rabit 2 | =================== 3 | This page contains document of python API of rabit. 4 | 5 | ```eval_rst 6 | .. toctree:: 7 | 8 | .. automodule:: rabit 9 | :members: 10 | :show-inheritance: 11 | ``` 12 | -------------------------------------------------------------------------------- /xgboost/rabit/doc/sphinx_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Helper utilty function for customization.""" 3 | import sys 4 | import os 5 | import docutils 6 | import subprocess 7 | 8 | if os.environ.get('READTHEDOCS', None) == 'True': 9 | subprocess.call('cd ..; rm -rf recommonmark;' + 10 | 'git clone https://github.com/tqchen/recommonmark', shell=True) 11 | 12 | sys.path.insert(0, os.path.abspath('../recommonmark/')) 13 | from recommonmark import parser, transform 14 | 15 | MarkdownParser = parser.CommonMarkParser 16 | AutoStructify = transform.AutoStructify 17 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/Makefile: -------------------------------------------------------------------------------- 1 | export CC = gcc 2 | export CXX = g++ 3 | export MPICXX = mpicxx 4 | export LDFLAGS= -pthread -lm -L../lib 5 | export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -fopenmp -I../include 6 | 7 | .PHONY: clean all lib libmpi 8 | BIN = basic.rabit broadcast.rabit 9 | MOCKBIN= lazy_allreduce.mock 10 | 11 | all: $(BIN) 12 | basic.rabit: basic.cc lib ../lib/librabit.a 13 | broadcast.rabit: broadcast.cc lib ../lib/librabit.a 14 | lazy_allreduce.mock: lazy_allreduce.cc lib ../lib/librabit.a 15 | 16 | $(BIN) : 17 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) 18 | 19 | $(MOCKBIN) : 20 | $(CXX) $(CFLAGS) -std=c++11 -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock 21 | 22 | $(OBJ) : 23 | $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) 24 | 25 | clean: 26 | $(RM) $(OBJ) $(BIN) $(MOCKBIN) *~ ../src/*~ 27 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/README: -------------------------------------------------------------------------------- 1 | See tutorial at ../doc/guide.md -------------------------------------------------------------------------------- /xgboost/rabit/guide/basic.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2014 by Contributors 3 | * \file basic.cc 4 | * \brief This is an example demonstrating what is Allreduce 5 | * 6 | * \author Tianqi Chen 7 | */ 8 | #define _CRT_SECURE_NO_WARNINGS 9 | #define _CRT_SECURE_NO_DEPRECATE 10 | #include 11 | #include 12 | using namespace rabit; 13 | int main(int argc, char *argv[]) { 14 | int N = 3; 15 | if (argc > 1) { 16 | N = atoi(argv[1]); 17 | } 18 | std::vector a(N); 19 | rabit::Init(argc, argv); 20 | for (int i = 0; i < N; ++i) { 21 | a[i] = rabit::GetRank() + i; 22 | } 23 | printf("@node[%d] before-allreduce: a={%d, %d, %d}\n", 24 | rabit::GetRank(), a[0], a[1], a[2]); 25 | // allreduce take max of each elements in all processes 26 | Allreduce(&a[0], N); 27 | printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n", 28 | rabit::GetRank(), a[0], a[1], a[2]); 29 | // second allreduce that sums everything up 30 | Allreduce(&a[0], N); 31 | printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n", 32 | rabit::GetRank(), a[0], a[1], a[2]); 33 | rabit::Finalize(); 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | demo python script of rabit 4 | """ 5 | import os 6 | import sys 7 | import numpy as np 8 | # import rabit, the tracker script will setup the lib path correctly 9 | # for normal run without tracker script, add following line 10 | # sys.path.append(os.path.dirname(__file__) + '/../python') 11 | import rabit 12 | 13 | rabit.init() 14 | n = 3 15 | rank = rabit.get_rank() 16 | a = np.zeros(n) 17 | for i in xrange(n): 18 | a[i] = rank + i 19 | 20 | print '@node[%d] before-allreduce: a=%s' % (rank, str(a)) 21 | a = rabit.allreduce(a, rabit.MAX) 22 | print '@node[%d] after-allreduce-max: a=%s' % (rank, str(a)) 23 | a = rabit.allreduce(a, rabit.SUM) 24 | print '@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)) 25 | rabit.finalize() 26 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/broadcast.cc: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace rabit; 3 | const int N = 3; 4 | int main(int argc, char *argv[]) { 5 | rabit::Init(argc, argv); 6 | std::string s; 7 | if (rabit::GetRank() == 0) s = "hello world"; 8 | printf("@node[%d] before-broadcast: s=\"%s\"\n", 9 | rabit::GetRank(), s.c_str()); 10 | // broadcast s from node 0 to all other nodes 11 | rabit::Broadcast(&s, 0); 12 | printf("@node[%d] after-broadcast: s=\"%s\"\n", 13 | rabit::GetRank(), s.c_str()); 14 | rabit::Finalize(); 15 | return 0; 16 | } 17 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/broadcast.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | demo python script of rabit 4 | """ 5 | import os 6 | import sys 7 | # add path to wrapper 8 | # for normal run without tracker script, add following line 9 | # sys.path.append(os.path.dirname(__file__) + '/../wrapper') 10 | import rabit 11 | 12 | rabit.init() 13 | n = 3 14 | rank = rabit.get_rank() 15 | s = None 16 | if rank == 0: 17 | s = {'hello world':100, 2:3} 18 | print '@node[%d] before-broadcast: s=\"%s\"' % (rank, str(s)) 19 | s = rabit.broadcast(s, 0) 20 | 21 | print '@node[%d] after-broadcast: s=\"%s\"' % (rank, str(s)) 22 | rabit.finalize() 23 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/lazy_allreduce.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2014 by Contributors 3 | * \file basic.cc 4 | * \brief This is an example demonstrating what is Allreduce 5 | * 6 | * \author Tianqi Chen 7 | */ 8 | #include 9 | 10 | using namespace rabit; 11 | const int N = 3; 12 | int main(int argc, char *argv[]) { 13 | int a[N] = {0}; 14 | rabit::Init(argc, argv); 15 | // lazy preparation function 16 | auto prepare = [&]() { 17 | printf("@node[%d] run prepare function\n", rabit::GetRank()); 18 | for (int i = 0; i < N; ++i) { 19 | a[i] = rabit::GetRank() + i; 20 | } 21 | }; 22 | printf("@node[%d] before-allreduce: a={%d, %d, %d}\n", 23 | rabit::GetRank(), a[0], a[1], a[2]); 24 | // allreduce take max of each elements in all processes 25 | Allreduce(&a[0], N, prepare); 26 | printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n", 27 | rabit::GetRank(), a[0], a[1], a[2]); 28 | // rum second allreduce 29 | Allreduce(&a[0], N); 30 | printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n", 31 | rabit::GetRank(), a[0], a[1], a[2]); 32 | rabit::Finalize(); 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /xgboost/rabit/guide/lazy_allreduce.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | """ 3 | demo python script of rabit: Lazy preparation function 4 | """ 5 | import os 6 | import sys 7 | import numpy as np 8 | # import rabit, the tracker script will setup the lib path correctly 9 | # for normal run without tracker script, add following line 10 | # sys.path.append(os.path.dirname(__file__) + '/../wrapper') 11 | import rabit 12 | 13 | 14 | # use mock library so that we can run failure test 15 | rabit.init(lib = 'mock') 16 | n = 3 17 | rank = rabit.get_rank() 18 | a = np.zeros(n) 19 | 20 | def prepare(a): 21 | print('@node[%d] run prepare function' % rank) 22 | # must take in reference and modify the reference 23 | for i in xrange(n): 24 | a[i] = rank + i 25 | 26 | print('@node[%d] before-allreduce: a=%s' % (rank, str(a))) 27 | a = rabit.allreduce(a, rabit.MAX, prepare_fun = prepare) 28 | print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a))) 29 | a = rabit.allreduce(a, rabit.SUM) 30 | print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a))) 31 | rabit.finalize() 32 | -------------------------------------------------------------------------------- /xgboost/rabit/include/dmlc/README.md: -------------------------------------------------------------------------------- 1 | This folder is part of dmlc-core library, this allows rabit to use unified stream interface with other dmlc projects. 2 | 3 | - Since it is only interface dependency DMLC core is not required to compile rabit 4 | - To compile project that uses dmlc-core functions, link to libdmlc.a (provided by dmlc-core) will be required. 5 | -------------------------------------------------------------------------------- /xgboost/rabit/include/rabit/internal/timer.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright by Contributors 3 | * \file timer.h 4 | * \brief This file defines the utils for timing 5 | * \author Tianqi Chen, Nacho, Tianyi 6 | */ 7 | #ifndef RABIT_INTERNAL_TIMER_H_ 8 | #define RABIT_INTERNAL_TIMER_H_ 9 | #include 10 | #ifdef __MACH__ 11 | #include 12 | #include 13 | #endif 14 | #include "./utils.h" 15 | 16 | namespace rabit { 17 | namespace utils { 18 | /*! 19 | * \brief return time in seconds, not cross platform, avoid to use this in most places 20 | */ 21 | inline double GetTime(void) { 22 | #ifdef __MACH__ 23 | clock_serv_t cclock; 24 | mach_timespec_t mts; 25 | host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); 26 | utils::Check(clock_get_time(cclock, &mts) == 0, "failed to get time"); 27 | mach_port_deallocate(mach_task_self(), cclock); 28 | return static_cast(mts.tv_sec) + static_cast(mts.tv_nsec) * 1e-9; 29 | #else 30 | #if defined(__unix__) || defined(__linux__) 31 | timespec ts; 32 | utils::Check(clock_gettime(CLOCK_REALTIME, &ts) == 0, "failed to get time"); 33 | return static_cast(ts.tv_sec) + static_cast(ts.tv_nsec) * 1e-9; 34 | #else 35 | return static_cast(time(NULL)); 36 | #endif 37 | #endif 38 | } 39 | } // namespace utils 40 | } // namespace rabit 41 | #endif // RABIT_INTERNAL_TIMER_H_ 42 | -------------------------------------------------------------------------------- /xgboost/rabit/include/rabit/serializable.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2014 by Contributors 3 | * \file serializable.h 4 | * \brief defines serializable interface of rabit 5 | * \author Tianqi Chen 6 | */ 7 | #ifndef RABIT_SERIALIZABLE_H_ 8 | #define RABIT_SERIALIZABLE_H_ 9 | #include 10 | #include 11 | #include "./internal/utils.h" 12 | #include "../dmlc/io.h" 13 | 14 | namespace rabit { 15 | /*! 16 | * \brief defines stream used in rabit 17 | * see definition of Stream in dmlc/io.h 18 | */ 19 | typedef dmlc::Stream Stream; 20 | /*! 21 | * \brief defines serializable objects used in rabit 22 | * see definition of Serializable in dmlc/io.h 23 | */ 24 | typedef dmlc::Serializable Serializable; 25 | 26 | } // namespace rabit 27 | #endif // RABIT_SERIALIZABLE_H_ 28 | -------------------------------------------------------------------------------- /xgboost/rabit/lib/flag: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/rabit/lib/flag -------------------------------------------------------------------------------- /xgboost/rabit/lib/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/rabit/lib/readme.md -------------------------------------------------------------------------------- /xgboost/rabit/scripts/travis_runtest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | make -f test.mk model_recover_10_10k || exit -1 3 | make -f test.mk model_recover_10_10k_die_same || exit -1 4 | make -f test.mk local_recover_10_10k || exit -1 5 | make -f test.mk lazy_recover_10_10k_die_hard || exit -1 6 | make -f test.mk lazy_recover_10_10k_die_same || exit -1 7 | make -f test.mk ringallreduce_10_10k || exit -1 8 | -------------------------------------------------------------------------------- /xgboost/rabit/scripts/travis_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # main script of travis 4 | if [ ${TASK} == "lint" ]; then 5 | make lint || exit -1 6 | fi 7 | 8 | if [ ${TASK} == "doc" ]; then 9 | make doc 2>log.txt 10 | (cat log.txt| grep -v ENABLE_PREPROCESSING |grep -v "unsupported tag" |grep warning) && exit -1 11 | fi 12 | 13 | if [ ${TASK} == "build" ]; then 14 | make all || exit -1 15 | fi 16 | 17 | if [ ${TASK} == "test" ]; then 18 | cd test 19 | make all || exit -1 20 | ../scripts/travis_runtest.sh || exit -1 21 | fi 22 | 23 | -------------------------------------------------------------------------------- /xgboost/rabit/src/README.md: -------------------------------------------------------------------------------- 1 | Source Files of Rabit 2 | ==== 3 | * This folder contains the source files of rabit library 4 | * The library headers are in folder [include](../include) 5 | * The .h files in this folder are internal header files that are only used by rabit and will not be seen by users 6 | 7 | -------------------------------------------------------------------------------- /xgboost/rabit/src/engine_base.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2014 by Contributors 3 | * \file engine_mock.cc 4 | * \brief this is an engine implementation that will 5 | * insert failures in certain call point, to test if the engine is robust to failure 6 | * \author Tianqi Chen 7 | */ 8 | // define use MOCK, os we will use mock Manager 9 | #define _CRT_SECURE_NO_WARNINGS 10 | #define _CRT_SECURE_NO_DEPRECATE 11 | #define NOMINMAX 12 | // switch engine to AllreduceMock 13 | #define RABIT_USE_BASE 14 | #include "./engine.cc" 15 | 16 | -------------------------------------------------------------------------------- /xgboost/rabit/src/engine_mock.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2014 by Contributors 3 | * \file engine_mock.cc 4 | * \brief this is an engine implementation that will 5 | * insert failures in certain call point, to test if the engine is robust to failure 6 | * \author Tianqi Chen 7 | */ 8 | // define use MOCK, os we will use mock Manager 9 | #define _CRT_SECURE_NO_WARNINGS 10 | #define _CRT_SECURE_NO_DEPRECATE 11 | #define NOMINMAX 12 | // switch engine to AllreduceMock 13 | #define RABIT_USE_MOCK 14 | #include "./allreduce_mock.h" 15 | #include "./engine.cc" 16 | 17 | -------------------------------------------------------------------------------- /xgboost/rabit/test/.gitignore: -------------------------------------------------------------------------------- 1 | *.mpi 2 | test_* 3 | *_test 4 | *_recover 5 | -------------------------------------------------------------------------------- /xgboost/rabit/test/Makefile: -------------------------------------------------------------------------------- 1 | export CC = gcc 2 | export CXX = g++ 3 | export MPICXX = mpicxx 4 | export LDFLAGS= -L../lib -pthread -lm -lrt 5 | export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include -std=c++0x 6 | 7 | # specify tensor path 8 | BIN = speed_test model_recover local_recover lazy_recover 9 | OBJ = $(RABIT_OBJ) speed_test.o model_recover.o local_recover.o lazy_recover.o 10 | MPIBIN = speed_test.mpi 11 | .PHONY: clean all lib mpi 12 | 13 | all: $(BIN) 14 | lib: 15 | cd ..;make;cd - 16 | mpi: 17 | cd ..;make mpi;cd - 18 | # programs 19 | speed_test.o: speed_test.cc ../include/rabit/*.h lib mpi 20 | model_recover.o: model_recover.cc ../include/rabit/*.h lib 21 | local_recover.o: local_recover.cc ../include/rabit/*.h lib 22 | lazy_recover.o: lazy_recover.cc ../include/rabit/*.h lib 23 | 24 | # we can link against MPI version to get use MPI 25 | speed_test: speed_test.o $(RABIT_OBJ) 26 | speed_test.mpi: speed_test.o $(MPIOBJ) 27 | model_recover: model_recover.o $(RABIT_OBJ) 28 | local_recover: local_recover.o $(RABIT_OBJ) 29 | lazy_recover: lazy_recover.o $(RABIT_OBJ) 30 | 31 | $(BIN) : 32 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) ../lib/librabit_mock.a $(LDFLAGS) 33 | 34 | $(OBJ) : 35 | $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) 36 | 37 | $(MPIBIN) : 38 | $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mpi 39 | 40 | clean: 41 | $(RM) $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) *~ ../src/*~ 42 | -------------------------------------------------------------------------------- /xgboost/rabit/test/README.md: -------------------------------------------------------------------------------- 1 | Testcases of Rabit 2 | ==== 3 | This folder contains internal testcases to test correctness and efficiency of rabit API 4 | 5 | The example running scripts for testcases are given by test.mk 6 | * type ```make -f test.mk testcasename``` to run certain testcase 7 | 8 | 9 | Helper Scripts 10 | ==== 11 | * test.mk contains Makefile documentation of all testcases 12 | * keepalive.sh helper bash to restart a program when it dies abnormally 13 | 14 | List of Programs 15 | ==== 16 | * speed_test: test the running speed of rabit API 17 | * test_local_recover: test recovery of local state when error happens 18 | * test_model_recover: test recovery of global state when error happens 19 | -------------------------------------------------------------------------------- /xgboost/rabit/test/local_recover.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import rabit 3 | import numpy as np 4 | 5 | rabit.init(lib='mock') 6 | rank = rabit.get_rank() 7 | n = 10 8 | nround = 3 9 | data = np.ones(n) * rank 10 | 11 | version, model, local = rabit.load_checkpoint(True) 12 | if version == 0: 13 | model = np.zeros(n) 14 | local = np.ones(n) 15 | else: 16 | print '[%d] restart from version %d' % (rank, version) 17 | 18 | for i in xrange(version, nround): 19 | res = rabit.allreduce(data + model+local, rabit.SUM) 20 | print '[%d] iter=%d: %s' % (rank, i, str(res)) 21 | model = res 22 | local[:] = i 23 | rabit.checkpoint(model, local) 24 | 25 | rabit.finalize() 26 | -------------------------------------------------------------------------------- /xgboost/rabit/test/speed_runner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import sys 4 | 5 | def main(): 6 | parser = argparse.ArgumentParser(description='TODO') 7 | parser.add_argument('-ho', '--host_dir', required=True) 8 | parser.add_argument('-s', '--submit_script', required=True) 9 | parser.add_argument('-rex', '--rabit_exec', required=True) 10 | parser.add_argument('-mpi', '--mpi_exec', required=True) 11 | args = parser.parse_args() 12 | 13 | ndata = [10**4, 10**5, 10**6, 10**7] 14 | nrepeat = [10**4, 10**3, 10**2, 10] 15 | 16 | machines = [2,4,8,16,31] 17 | 18 | executables = [args.rabit_exec, args.mpi_exec] 19 | 20 | for executable in executables: 21 | sys.stderr.write('Executable %s' % executable) 22 | sys.stderr.flush() 23 | for i, data in enumerate(ndata): 24 | for machine in machines: 25 | host_file = os.path.join(args.host_dir, 'hosts%d' % machine) 26 | cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, executable, data, nrepeat[i]) 27 | sys.stderr.write('data=%d, repeat=%d, machine=%d\n' % (data, nrepeat[i], machine)) 28 | sys.stderr.flush() 29 | os.system(cmd) 30 | sys.stderr.write('\n') 31 | sys.stderr.flush() 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /xgboost/src/c_api/c_api_error.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file c_api_error.cc 4 | * \brief C error handling 5 | */ 6 | #include 7 | #include "./c_api_error.h" 8 | 9 | struct XGBAPIErrorEntry { 10 | std::string last_error; 11 | }; 12 | 13 | typedef dmlc::ThreadLocalStore XGBAPIErrorStore; 14 | 15 | const char *XGBGetLastError() { 16 | return XGBAPIErrorStore::Get()->last_error.c_str(); 17 | } 18 | 19 | void XGBAPISetLastError(const char* msg) { 20 | XGBAPIErrorStore::Get()->last_error = msg; 21 | } 22 | -------------------------------------------------------------------------------- /xgboost/src/c_api/c_api_error.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2015 by Contributors 3 | * \file c_api_error.h 4 | * \brief Error handling for C API. 5 | */ 6 | #ifndef XGBOOST_C_API_C_API_ERROR_H_ 7 | #define XGBOOST_C_API_C_API_ERROR_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | /*! \brief macro to guard beginning and end section of all functions */ 14 | #define API_BEGIN() try { 15 | /*! \brief every function starts with API_BEGIN(); 16 | and finishes with API_END() or API_END_HANDLE_ERROR */ 17 | #define API_END() } catch(dmlc::Error &_except_) { return XGBAPIHandleException(_except_); } return 0; // NOLINT(*) 18 | /*! 19 | * \brief every function starts with API_BEGIN(); 20 | * and finishes with API_END() or API_END_HANDLE_ERROR 21 | * The finally clause contains procedure to cleanup states when an error happens. 22 | */ 23 | #define API_END_HANDLE_ERROR(Finalize) } catch(dmlc::Error &_except_) { Finalize; return XGBAPIHandleException(_except_); } return 0; // NOLINT(*) 24 | 25 | /*! 26 | * \brief Set the last error message needed by C API 27 | * \param msg The error message to set. 28 | */ 29 | void XGBAPISetLastError(const char* msg); 30 | /*! 31 | * \brief handle exception thrown out 32 | * \param e the exception 33 | * \return the return value of API after exception is handled 34 | */ 35 | inline int XGBAPIHandleException(const dmlc::Error &e) { 36 | XGBAPISetLastError(e.what()); 37 | return -1; 38 | } 39 | #endif // XGBOOST_C_API_C_API_ERROR_H_ 40 | -------------------------------------------------------------------------------- /xgboost/src/common/common.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file common.cc 4 | * \brief Enable all kinds of global variables in common. 5 | */ 6 | #include 7 | #include "./random.h" 8 | 9 | namespace xgboost { 10 | namespace common { 11 | /*! \brief thread local entry for random. */ 12 | struct RandomThreadLocalEntry { 13 | /*! \brief the random engine instance. */ 14 | GlobalRandomEngine engine; 15 | }; 16 | 17 | typedef dmlc::ThreadLocalStore RandomThreadLocalStore; 18 | 19 | GlobalRandomEngine& GlobalRandom() { 20 | return RandomThreadLocalStore::Get()->engine; 21 | } 22 | } // namespace common 23 | } // namespace xgboost 24 | -------------------------------------------------------------------------------- /xgboost/src/common/common.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file common.h 4 | * \brief Common utilities 5 | */ 6 | #ifndef XGBOOST_COMMON_COMMON_H_ 7 | #define XGBOOST_COMMON_COMMON_H_ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | namespace xgboost { 14 | namespace common { 15 | /*! 16 | * \brief Split a string by delimiter 17 | * \param s String to be splitted. 18 | * \param delim The delimiter. 19 | */ 20 | inline std::vector Split(const std::string& s, char delim) { 21 | std::string item; 22 | std::istringstream is(s); 23 | std::vector ret; 24 | while (std::getline(is, item, delim)) { 25 | ret.push_back(item); 26 | } 27 | return ret; 28 | } 29 | 30 | // simple routine to convert any data to string 31 | template 32 | inline std::string ToString(const T& data) { 33 | std::ostringstream os; 34 | os << data; 35 | return os.str(); 36 | } 37 | 38 | } // namespace common 39 | } // namespace xgboost 40 | #endif // XGBOOST_COMMON_COMMON_H_ 41 | -------------------------------------------------------------------------------- /xgboost/src/common/sync.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2014 by Contributors 3 | * \file sync.h 4 | * \brief the synchronization module of rabit 5 | * redirects to rabit header 6 | * \author Tianqi Chen 7 | */ 8 | #ifndef XGBOOST_COMMON_SYNC_H_ 9 | #define XGBOOST_COMMON_SYNC_H_ 10 | 11 | #include 12 | 13 | #endif // XGBOOST_COMMON_SYNC_H_ 14 | -------------------------------------------------------------------------------- /xgboost/src/gbm/gbm.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file gbm.cc 4 | * \brief Registry of gradient boosters. 5 | */ 6 | #include 7 | #include 8 | 9 | namespace dmlc { 10 | DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg); 11 | } // namespace dmlc 12 | 13 | namespace xgboost { 14 | GradientBooster* GradientBooster::Create( 15 | const std::string& name, 16 | const std::vector >& cache_mats, 17 | bst_float base_margin) { 18 | auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name); 19 | if (e == nullptr) { 20 | LOG(FATAL) << "Unknown gbm type " << name; 21 | } 22 | return (e->body)(cache_mats, base_margin); 23 | } 24 | } // namespace xgboost 25 | 26 | namespace xgboost { 27 | namespace gbm { 28 | // List of files that will be force linked in static links. 29 | DMLC_REGISTRY_LINK_TAG(gblinear); 30 | DMLC_REGISTRY_LINK_TAG(gbtree); 31 | } // namespace gbm 32 | } // namespace xgboost 33 | -------------------------------------------------------------------------------- /xgboost/src/logging.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file logging.cc 4 | * \brief Implementation of loggers. 5 | * \author Tianqi Chen 6 | */ 7 | #include 8 | #include 9 | #include "./common/sync.h" 10 | 11 | namespace xgboost { 12 | 13 | #if XGBOOST_CUSTOMIZE_LOGGER == 0 14 | ConsoleLogger::~ConsoleLogger() { 15 | std::cerr << log_stream_.str() << std::endl; 16 | } 17 | 18 | TrackerLogger::~TrackerLogger() { 19 | log_stream_ << '\n'; 20 | rabit::TrackerPrint(log_stream_.str()); 21 | } 22 | #endif 23 | } // namespace xgboost 24 | -------------------------------------------------------------------------------- /xgboost/src/metric/metric.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file metric_registry.cc 4 | * \brief Registry of objective functions. 5 | */ 6 | #include 7 | #include 8 | 9 | namespace dmlc { 10 | DMLC_REGISTRY_ENABLE(::xgboost::MetricReg); 11 | } 12 | 13 | namespace xgboost { 14 | Metric* Metric::Create(const std::string& name) { 15 | std::string buf = name; 16 | std::string prefix = name; 17 | auto pos = buf.find('@'); 18 | if (pos == std::string::npos) { 19 | auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(name); 20 | if (e == nullptr) { 21 | LOG(FATAL) << "Unknown metric function " << name; 22 | } 23 | return (e->body)(nullptr); 24 | } else { 25 | std::string prefix = buf.substr(0, pos); 26 | auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str()); 27 | if (e == nullptr) { 28 | LOG(FATAL) << "Unknown metric function " << name; 29 | } 30 | return (e->body)(buf.substr(pos + 1, buf.length()).c_str()); 31 | } 32 | } 33 | } // namespace xgboost 34 | 35 | namespace xgboost { 36 | namespace metric { 37 | // List of files that will be force linked in static links. 38 | DMLC_REGISTRY_LINK_TAG(elementwise_metric); 39 | DMLC_REGISTRY_LINK_TAG(multiclass_metric); 40 | DMLC_REGISTRY_LINK_TAG(rank_metric); 41 | } // namespace metric 42 | } // namespace xgboost 43 | -------------------------------------------------------------------------------- /xgboost/src/objective/objective.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file objective.cc 4 | * \brief Registry of all objective functions. 5 | */ 6 | #include 7 | #include 8 | 9 | namespace dmlc { 10 | DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg); 11 | } // namespace dmlc 12 | 13 | namespace xgboost { 14 | // implement factory functions 15 | ObjFunction* ObjFunction::Create(const std::string& name) { 16 | auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name); 17 | if (e == nullptr) { 18 | for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) { 19 | LOG(INFO) << "Objective candidate: " << entry->name; 20 | } 21 | LOG(FATAL) << "Unknown objective function " << name; 22 | } 23 | return (e->body)(); 24 | } 25 | } // namespace xgboost 26 | 27 | namespace xgboost { 28 | namespace obj { 29 | // List of files that will be force linked in static links. 30 | DMLC_REGISTRY_LINK_TAG(regression_obj); 31 | DMLC_REGISTRY_LINK_TAG(multiclass_obj); 32 | DMLC_REGISTRY_LINK_TAG(rank_obj); 33 | } // namespace obj 34 | } // namespace xgboost 35 | -------------------------------------------------------------------------------- /xgboost/src/tree/tree_updater.cc: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright 2015 by Contributors 3 | * \file tree_updater.cc 4 | * \brief Registry of tree updaters. 5 | */ 6 | #include 7 | #include 8 | 9 | namespace dmlc { 10 | DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg); 11 | } // namespace dmlc 12 | 13 | namespace xgboost { 14 | 15 | TreeUpdater* TreeUpdater::Create(const std::string& name) { 16 | auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name); 17 | if (e == nullptr) { 18 | LOG(FATAL) << "Unknown tree updater " << name; 19 | } 20 | return (e->body)(); 21 | } 22 | 23 | } // namespace xgboost 24 | 25 | namespace xgboost { 26 | namespace tree { 27 | // List of files that will be force linked in static links. 28 | DMLC_REGISTRY_LINK_TAG(updater_colmaker); 29 | DMLC_REGISTRY_LINK_TAG(updater_skmaker); 30 | DMLC_REGISTRY_LINK_TAG(updater_refresh); 31 | DMLC_REGISTRY_LINK_TAG(updater_prune); 32 | DMLC_REGISTRY_LINK_TAG(updater_fast_hist); 33 | DMLC_REGISTRY_LINK_TAG(updater_histmaker); 34 | DMLC_REGISTRY_LINK_TAG(updater_sync); 35 | } // namespace tree 36 | } // namespace xgboost 37 | -------------------------------------------------------------------------------- /xgboost/tests/README.md: -------------------------------------------------------------------------------- 1 | This folder contains testcases for xgboost. 2 | -------------------------------------------------------------------------------- /xgboost/tests/ci_build/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:8.0-devel-ubuntu14.04 2 | 3 | RUN apt-get update && apt-get -y upgrade 4 | # CMAKE 5 | RUN sudo apt-get install -y build-essential 6 | RUN apt-get install -y wget 7 | RUN wget http://www.cmake.org/files/v3.5/cmake-3.5.2.tar.gz 8 | RUN tar -xvzf cmake-3.5.2.tar.gz 9 | RUN cd cmake-3.5.2/ && ./configure && make && sudo make install 10 | 11 | # BLAS 12 | RUN apt-get install -y libatlas-base-dev 13 | 14 | # PYTHON2 15 | RUN apt-get install -y python-setuptools python-pip python-dev unzip gfortran 16 | RUN pip install numpy nose scipy scikit-learn 17 | -------------------------------------------------------------------------------- /xgboost/tests/ci_build/build_gpu_cmake.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir build 4 | cd build 5 | cmake .. -DPLUGIN_UPDATER_GPU=ON 6 | make 7 | -------------------------------------------------------------------------------- /xgboost/tests/ci_build/test_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | mkdir build 4 | cd build 5 | cmake .. -DPLUGIN_UPDATER_GPU=ON 6 | make 7 | cd .. 8 | cd python-package 9 | python setup.py install --user 10 | cd ../plugin/updater_gpu 11 | python -m nose test/python 12 | -------------------------------------------------------------------------------- /xgboost/tests/ci_build/with_the_same_user: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script is a wrapper creating the same user inside container as the one 4 | # running the ci_build.sh outside the container. It also set the home directory 5 | # for the user inside container to match the same absolute path as the workspace 6 | # outside of container. Do not run this manually. It does not make sense. It is 7 | # intended to be called by ci_build.sh only. 8 | 9 | set -e 10 | 11 | COMMAND=("$@") 12 | 13 | if ! touch /this_is_writable_file_system; then 14 | echo "You can't write to your filesystem!" 15 | echo "If you are in Docker you should check you do not have too many images" \ 16 | "with too many files in them. Docker has some issue with it." 17 | exit 1 18 | else 19 | rm /this_is_writable_file_system 20 | fi 21 | 22 | getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" 23 | getent passwd "${CI_BUILD_UID}" || adduser --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ 24 | --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ 25 | --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}" 26 | usermod -a -G sudo "${CI_BUILD_USER}" 27 | echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo 28 | 29 | sudo -u "#${CI_BUILD_UID}" --preserve-env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \ 30 | "HOME=${CI_BUILD_HOME}" ${COMMAND[@]} 31 | -------------------------------------------------------------------------------- /xgboost/tests/cpp/data/test_simple_csr_source.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | #include "../../../src/data/simple_csr_source.h" 4 | 5 | #include "../helpers.h" 6 | 7 | TEST(SimpleCSRSource, SaveLoadBinary) { 8 | std::string tmp_file = CreateSimpleTestData(); 9 | xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false); 10 | std::remove(tmp_file.c_str()); 11 | 12 | std::string tmp_binfile = TempFileName(); 13 | dmat->SaveToLocalFile(tmp_binfile); 14 | xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false); 15 | std::remove(tmp_binfile.c_str()); 16 | 17 | EXPECT_EQ(dmat->info().num_col, dmat_read->info().num_col); 18 | EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row); 19 | EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row); 20 | 21 | dmlc::DataIter * row_iter = dmat->RowIterator(); 22 | dmlc::DataIter * row_iter_read = dmat_read->RowIterator(); 23 | // Test the data read into the first row 24 | row_iter->BeforeFirst(); row_iter->Next(); 25 | row_iter_read->BeforeFirst(); row_iter_read->Next(); 26 | xgboost::SparseBatch::Inst first_row = row_iter->Value()[0]; 27 | xgboost::SparseBatch::Inst first_row_read = row_iter_read->Value()[0]; 28 | EXPECT_EQ(first_row.length, first_row_read.length); 29 | EXPECT_EQ(first_row[2].index, first_row_read[2].index); 30 | EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue); 31 | row_iter = nullptr; row_iter_read = nullptr; 32 | } 33 | -------------------------------------------------------------------------------- /xgboost/tests/cpp/helpers.h: -------------------------------------------------------------------------------- 1 | #ifndef XGBOOST_TESTS_CPP_HELPERS_H_ 2 | #define XGBOOST_TESTS_CPP_HELPERS_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | std::string TempFileName(); 19 | 20 | bool FileExists(const std::string name); 21 | 22 | long GetFileSize(const std::string filename); 23 | 24 | std::string CreateSimpleTestData(); 25 | 26 | void CheckObjFunction(xgboost::ObjFunction * obj, 27 | std::vector preds, 28 | std::vector labels, 29 | std::vector weights, 30 | std::vector out_grad, 31 | std::vector out_hess); 32 | 33 | xgboost::bst_float GetMetricEval( 34 | xgboost::Metric * metric, 35 | std::vector preds, 36 | std::vector labels, 37 | std::vector weights = std::vector ()); 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /xgboost/tests/cpp/metric/test_metric.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | #include "../helpers.h" 5 | 6 | TEST(Metric, UnknownMetric) { 7 | EXPECT_ANY_THROW(xgboost::Metric::Create("unknown_name")); 8 | EXPECT_NO_THROW(xgboost::Metric::Create("rmse")); 9 | EXPECT_ANY_THROW(xgboost::Metric::Create("unknown_name@1")); 10 | EXPECT_NO_THROW(xgboost::Metric::Create("error@0.5f")); 11 | } 12 | -------------------------------------------------------------------------------- /xgboost/tests/cpp/objective/test_multiclass_metric.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | #include "../helpers.h" 5 | 6 | TEST(Metric, MultiClassError) { 7 | xgboost::Metric * metric = xgboost::Metric::Create("merror"); 8 | ASSERT_STREQ(metric->Name(), "merror"); 9 | EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0})); 10 | EXPECT_NEAR(GetMetricEval( 11 | metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10); 12 | EXPECT_NEAR(GetMetricEval(metric, 13 | {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}, 14 | {0, 1, 2}), 15 | 0.666f, 0.001f); 16 | } 17 | 18 | TEST(Metric, MultiClassLogLoss) { 19 | xgboost::Metric * metric = xgboost::Metric::Create("mlogloss"); 20 | ASSERT_STREQ(metric->Name(), "mlogloss"); 21 | EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0})); 22 | EXPECT_NEAR(GetMetricEval( 23 | metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10); 24 | EXPECT_NEAR(GetMetricEval(metric, 25 | {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}, 26 | {0, 1, 2}), 27 | 2.302f, 0.001f); 28 | } 29 | -------------------------------------------------------------------------------- /xgboost/tests/cpp/objective/test_objective.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | #include "../helpers.h" 5 | 6 | TEST(Objective, UnknownFunction) { 7 | EXPECT_ANY_THROW(xgboost::ObjFunction::Create("unknown_name")); 8 | EXPECT_NO_THROW(xgboost::ObjFunction::Create("reg:linear")); 9 | } 10 | -------------------------------------------------------------------------------- /xgboost/tests/cpp/test_main.cc: -------------------------------------------------------------------------------- 1 | // Copyright by Contributors 2 | #include 3 | 4 | int main(int argc, char ** argv) { 5 | testing::InitGoogleTest(&argc, argv); 6 | testing::FLAGS_gtest_death_test_style = "threadsafe"; 7 | return RUN_ALL_TESTS(); 8 | } 9 | -------------------------------------------------------------------------------- /xgboost/tests/distributed/runtests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PYTHONPATH=../../python-package/ ../../dmlc-core/tracker/dmlc-submit --cluster=local --num-workers=3\ 4 | python test_basic.py 5 | -------------------------------------------------------------------------------- /xgboost/tests/distributed/test_basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import numpy as np 3 | import scipy.sparse 4 | import pickle 5 | import xgboost as xgb 6 | 7 | # always call this before using distributed module 8 | xgb.rabit.init() 9 | 10 | # Load file, file will be automatically sharded in distributed mode. 11 | dtrain = xgb.DMatrix('../../demo/data/agaricus.txt.train') 12 | dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test') 13 | 14 | # specify parameters via map, definition are same as c++ version 15 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' } 16 | 17 | # specify validations set to watch performance 18 | watchlist = [(dtest,'eval'), (dtrain,'train')] 19 | num_round = 20 20 | 21 | # Run training, all the features in training API is available. 22 | # Currently, this script only support calling train once for fault recovery purpose. 23 | bst = xgb.train(param, dtrain, num_round, watchlist, early_stopping_rounds=2) 24 | 25 | # save the model, only ask process 0 to save the model. 26 | if xgb.rabit.get_rank() == 0: 27 | bst.save_model("test.model") 28 | xgb.rabit.tracker_print("Finished training\n") 29 | 30 | # Notify the tracker all training has been successful 31 | # This is only needed in distributed training. 32 | xgb.rabit.finalize() 33 | -------------------------------------------------------------------------------- /xgboost/tests/python/test_sparse_dmatrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xgboost as xgb 3 | from scipy.sparse import rand 4 | 5 | rng = np.random.RandomState(1) 6 | 7 | param = {'max_depth': 3, 'objective': 'binary:logistic', 'silent': 1} 8 | 9 | 10 | def test_sparse_dmatrix_csr(): 11 | nrow = 100 12 | ncol = 1000 13 | x = rand(nrow, ncol, density=0.0005, format='csr', random_state=rng) 14 | assert x.indices.max() < ncol - 1 15 | x.data[:] = 1 16 | dtrain = xgb.DMatrix(x, label=np.random.binomial(1, 0.3, nrow)) 17 | assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) 18 | watchlist = [(dtrain, 'train')] 19 | bst = xgb.train(param, dtrain, 5, watchlist) 20 | bst.predict(dtrain) 21 | 22 | 23 | def test_sparse_dmatrix_csc(): 24 | nrow = 1000 25 | ncol = 100 26 | x = rand(nrow, ncol, density=0.0005, format='csc', random_state=rng) 27 | assert x.indices.max() < nrow - 1 28 | x.data[:] = 1 29 | dtrain = xgb.DMatrix(x, label=np.random.binomial(1, 0.3, nrow)) 30 | assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) 31 | watchlist = [(dtrain, 'train')] 32 | bst = xgb.train(param, dtrain, 5, watchlist) 33 | bst.predict(dtrain) 34 | -------------------------------------------------------------------------------- /xgboost/tests/python/testing.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import nose 4 | 5 | from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED 6 | 7 | 8 | def _skip_if_no_sklearn(): 9 | if not SKLEARN_INSTALLED: 10 | raise nose.SkipTest() 11 | 12 | 13 | def _skip_if_no_pandas(): 14 | if not PANDAS_INSTALLED: 15 | raise nose.SkipTest() 16 | 17 | 18 | def _skip_if_no_matplotlib(): 19 | try: 20 | import matplotlib.pyplot as _ # noqa 21 | except ImportError: 22 | raise nose.SkipTest() 23 | -------------------------------------------------------------------------------- /xgboost/tests/travis/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ${TRAVIS_OS_NAME} == "osx" ]; then 4 | brew update 5 | brew install graphviz 6 | fi 7 | 8 | if [ ${TASK} == "lint" ]; then 9 | pip install --user cpplint 'pylint==1.4.4' 'astroid==1.3.6' 10 | fi 11 | 12 | 13 | if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ]; then 14 | # python2 15 | if [ ${TRAVIS_OS_NAME} == "osx" ]; then 16 | wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh 17 | else 18 | wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 19 | fi 20 | bash conda.sh -b -p $HOME/miniconda 21 | export PATH="$HOME/miniconda/bin:$PATH" 22 | hash -r 23 | conda config --set always_yes yes --set changeps1 no 24 | conda update -q conda 25 | # Useful for debugging any issues with conda 26 | conda info -a 27 | conda create -n python3 python=3.5 28 | conda create -n python2 python=2.7 29 | fi 30 | -------------------------------------------------------------------------------- /xgboost/tests/travis/travis_after_failure.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ ${TASK} == "r_test" ]; then 4 | cat xgboost/xgboost.Rcheck/*.log 5 | echo "--------------------------" 6 | cat xgboost/xgboost.Rcheck/*.out 7 | fi 8 | --------------------------------------------------------------------------------