├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE.md
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── binding.gyp
├── build.sh
├── changelog.md
├── doc
    ├── api.md
    ├── intro.md
    └── intro_zh.md
├── index.d.ts
├── index.js
├── index.ts
├── package-lock.json
├── package.json
├── readme.md
├── src
    ├── base.h
    ├── index.cc
    ├── xgmatrix.cc
    ├── xgmatrix.h
    ├── xgmodel.cc
    └── xgmodel.h
├── test
    ├── base.js
    └── data
    │   ├── iris.xg.model
    │   └── xgmatrix.bin
├── tsconfig.json
└── xgboost
    ├── .gitignore
    ├── .travis.yml
    ├── CMakeLists.txt
    ├── CONTRIBUTORS.md
    ├── ISSUE_TEMPLATE.md
    ├── Jenkinsfile
    ├── LICENSE
    ├── Makefile
    ├── NEWS.md
    ├── R-package
        ├── .Rbuildignore
        ├── DESCRIPTION
        ├── LICENSE
        ├── NAMESPACE
        ├── R
        │   ├── callbacks.R
        │   ├── utils.R
        │   ├── xgb.Booster.R
        │   ├── xgb.DMatrix.R
        │   ├── xgb.DMatrix.save.R
        │   ├── xgb.create.features.R
        │   ├── xgb.cv.R
        │   ├── xgb.dump.R
        │   ├── xgb.ggplot.R
        │   ├── xgb.importance.R
        │   ├── xgb.load.R
        │   ├── xgb.model.dt.tree.R
        │   ├── xgb.plot.deepness.R
        │   ├── xgb.plot.importance.R
        │   ├── xgb.plot.multi.trees.R
        │   ├── xgb.plot.tree.R
        │   ├── xgb.save.R
        │   ├── xgb.save.raw.R
        │   ├── xgb.train.R
        │   └── xgboost.R
        ├── README.md
        ├── cleanup
        ├── configure
        ├── configure.ac
        ├── data
        │   ├── agaricus.test.rda
        │   └── agaricus.train.rda
        ├── demo
        │   ├── 00Index
        │   ├── README.md
        │   ├── basic_walkthrough.R
        │   ├── boost_from_prediction.R
        │   ├── caret_wrapper.R
        │   ├── create_sparse_matrix.R
        │   ├── cross_validation.R
        │   ├── custom_objective.R
        │   ├── early_stopping.R
        │   ├── generalized_linear_model.R
        │   ├── poisson_regression.R
        │   ├── predict_first_ntree.R
        │   ├── predict_leaf_indices.R
        │   ├── runall.R
        │   └── tweedie_regression.R
        ├── man
        │   ├── agaricus.test.Rd
        │   ├── agaricus.train.Rd
        │   ├── callbacks.Rd
        │   ├── cb.cv.predict.Rd
        │   ├── cb.early.stop.Rd
        │   ├── cb.evaluation.log.Rd
        │   ├── cb.print.evaluation.Rd
        │   ├── cb.reset.parameters.Rd
        │   ├── cb.save.model.Rd
        │   ├── dim.xgb.DMatrix.Rd
        │   ├── dimnames.xgb.DMatrix.Rd
        │   ├── getinfo.Rd
        │   ├── predict.xgb.Booster.Rd
        │   ├── print.xgb.Booster.Rd
        │   ├── print.xgb.DMatrix.Rd
        │   ├── print.xgb.cv.Rd
        │   ├── setinfo.Rd
        │   ├── slice.xgb.DMatrix.Rd
        │   ├── xgb.Booster.complete.Rd
        │   ├── xgb.DMatrix.Rd
        │   ├── xgb.DMatrix.save.Rd
        │   ├── xgb.attr.Rd
        │   ├── xgb.create.features.Rd
        │   ├── xgb.cv.Rd
        │   ├── xgb.dump.Rd
        │   ├── xgb.importance.Rd
        │   ├── xgb.load.Rd
        │   ├── xgb.model.dt.tree.Rd
        │   ├── xgb.parameters.Rd
        │   ├── xgb.plot.deepness.Rd
        │   ├── xgb.plot.importance.Rd
        │   ├── xgb.plot.multi.trees.Rd
        │   ├── xgb.plot.tree.Rd
        │   ├── xgb.save.Rd
        │   ├── xgb.save.raw.Rd
        │   ├── xgb.train.Rd
        │   └── xgboost-deprecated.Rd
        ├── src
        │   ├── Makevars.in
        │   ├── Makevars.win
        │   ├── init.c
        │   ├── xgboost_R.cc
        │   ├── xgboost_R.h
        │   ├── xgboost_assert.c
        │   └── xgboost_custom.cc
        ├── tests
        │   ├── testthat.R
        │   └── testthat
        │   │   ├── test_basic.R
        │   │   ├── test_callbacks.R
        │   │   ├── test_custom_objective.R
        │   │   ├── test_dmatrix.R
        │   │   ├── test_gc_safety.R
        │   │   ├── test_glm.R
        │   │   ├── test_helpers.R
        │   │   ├── test_lint.R
        │   │   ├── test_monotone.R
        │   │   ├── test_parameter_exposure.R
        │   │   ├── test_poisson_regression.R
        │   │   └── test_update.R
        └── vignettes
        │   ├── discoverYourData.Rmd
        │   ├── vignette.css
        │   ├── xgboost.Rnw
        │   ├── xgboost.bib
        │   └── xgboostPresentation.Rmd
    ├── README.md
    ├── amalgamation
        ├── dmlc-minimum0.cc
        └── xgboost-all0.cc
    ├── appveyor.yml
    ├── build.sh
    ├── cmake
        └── Utils.cmake
    ├── cub
        ├── CHANGE_LOG.TXT
        ├── LICENSE.TXT
        ├── README.md
        ├── common.mk
        ├── cub
        │   ├── agent
        │   │   ├── agent_histogram.cuh
        │   │   ├── agent_radix_sort_downsweep.cuh
        │   │   ├── agent_radix_sort_upsweep.cuh
        │   │   ├── agent_reduce.cuh
        │   │   ├── agent_reduce_by_key.cuh
        │   │   ├── agent_rle.cuh
        │   │   ├── agent_scan.cuh
        │   │   ├── agent_segment_fixup.cuh
        │   │   ├── agent_select_if.cuh
        │   │   ├── agent_spmv_csrt.cuh
        │   │   ├── agent_spmv_orig.cuh
        │   │   ├── agent_spmv_row_based.cuh
        │   │   └── single_pass_scan_operators.cuh
        │   ├── block
        │   │   ├── block_adjacent_difference.cuh
        │   │   ├── block_discontinuity.cuh
        │   │   ├── block_exchange.cuh
        │   │   ├── block_histogram.cuh
        │   │   ├── block_load.cuh
        │   │   ├── block_radix_rank.cuh
        │   │   ├── block_radix_sort.cuh
        │   │   ├── block_raking_layout.cuh
        │   │   ├── block_reduce.cuh
        │   │   ├── block_scan.cuh
        │   │   ├── block_shuffle.cuh
        │   │   ├── block_store.cuh
        │   │   └── specializations
        │   │   │   ├── block_histogram_atomic.cuh
        │   │   │   ├── block_histogram_sort.cuh
        │   │   │   ├── block_reduce_raking.cuh
        │   │   │   ├── block_reduce_raking_commutative_only.cuh
        │   │   │   ├── block_reduce_warp_reductions.cuh
        │   │   │   ├── block_scan_raking.cuh
        │   │   │   ├── block_scan_warp_scans.cuh
        │   │   │   ├── block_scan_warp_scans2.cuh
        │   │   │   └── block_scan_warp_scans3.cuh
        │   ├── cub.cuh
        │   ├── device
        │   │   ├── device_histogram.cuh
        │   │   ├── device_partition.cuh
        │   │   ├── device_radix_sort.cuh
        │   │   ├── device_reduce.cuh
        │   │   ├── device_run_length_encode.cuh
        │   │   ├── device_scan.cuh
        │   │   ├── device_segmented_radix_sort.cuh
        │   │   ├── device_segmented_reduce.cuh
        │   │   ├── device_select.cuh
        │   │   ├── device_spmv.cuh
        │   │   └── dispatch
        │   │   │   ├── dispatch_histogram.cuh
        │   │   │   ├── dispatch_radix_sort.cuh
        │   │   │   ├── dispatch_reduce.cuh
        │   │   │   ├── dispatch_reduce_by_key.cuh
        │   │   │   ├── dispatch_rle.cuh
        │   │   │   ├── dispatch_scan.cuh
        │   │   │   ├── dispatch_select_if.cuh
        │   │   │   ├── dispatch_spmv_csrt.cuh
        │   │   │   ├── dispatch_spmv_orig.cuh
        │   │   │   └── dispatch_spmv_row_based.cuh
        │   ├── grid
        │   │   ├── grid_barrier.cuh
        │   │   ├── grid_even_share.cuh
        │   │   ├── grid_mapping.cuh
        │   │   └── grid_queue.cuh
        │   ├── host
        │   │   └── mutex.cuh
        │   ├── iterator
        │   │   ├── arg_index_input_iterator.cuh
        │   │   ├── cache_modified_input_iterator.cuh
        │   │   ├── cache_modified_output_iterator.cuh
        │   │   ├── constant_input_iterator.cuh
        │   │   ├── counting_input_iterator.cuh
        │   │   ├── discard_output_iterator.cuh
        │   │   ├── tex_obj_input_iterator.cuh
        │   │   ├── tex_ref_input_iterator.cuh
        │   │   └── transform_input_iterator.cuh
        │   ├── thread
        │   │   ├── thread_load.cuh
        │   │   ├── thread_operators.cuh
        │   │   ├── thread_reduce.cuh
        │   │   ├── thread_scan.cuh
        │   │   ├── thread_search.cuh
        │   │   └── thread_store.cuh
        │   ├── util_allocator.cuh
        │   ├── util_arch.cuh
        │   ├── util_debug.cuh
        │   ├── util_device.cuh
        │   ├── util_macro.cuh
        │   ├── util_namespace.cuh
        │   ├── util_ptx.cuh
        │   ├── util_type.cuh
        │   └── warp
        │   │   ├── specializations
        │   │       ├── warp_reduce_shfl.cuh
        │   │       ├── warp_reduce_smem.cuh
        │   │       ├── warp_scan_shfl.cuh
        │   │       └── warp_scan_smem.cuh
        │   │   ├── warp_reduce.cuh
        │   │   └── warp_scan.cuh
        ├── eclipse code style profile.xml
        ├── examples
        │   ├── block
        │   │   ├── .gitignore
        │   │   ├── Makefile
        │   │   ├── example_block_radix_sort.cu
        │   │   ├── example_block_reduce.cu
        │   │   ├── example_block_scan.cu
        │   │   └── reduce_by_key.cu
        │   └── device
        │   │   ├── .gitignore
        │   │   ├── Makefile
        │   │   ├── example_device_partition_flagged.cu
        │   │   ├── example_device_partition_if.cu
        │   │   ├── example_device_radix_sort.cu
        │   │   ├── example_device_reduce.cu
        │   │   ├── example_device_scan.cu
        │   │   ├── example_device_select_flagged.cu
        │   │   ├── example_device_select_if.cu
        │   │   ├── example_device_select_unique.cu
        │   │   └── example_device_sort_find_non_trivial_runs.cu
        ├── experimental
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── defunct
        │   │   ├── example_coo_spmv.cu
        │   │   └── test_device_seg_reduce.cu
        │   ├── histogram
        │   │   ├── histogram_cub.h
        │   │   ├── histogram_gmem_atomics.h
        │   │   └── histogram_smem_atomics.h
        │   ├── histogram_compare.cu
        │   ├── sparse_matrix.h
        │   ├── spmv_compare.cu
        │   └── spmv_script.sh
        ├── test
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── link_a.cu
        │   ├── link_b.cu
        │   ├── link_main.cpp
        │   ├── mersenne.h
        │   ├── test_allocator.cu
        │   ├── test_block_histogram.cu
        │   ├── test_block_load_store.cu
        │   ├── test_block_radix_sort.cu
        │   ├── test_block_reduce.cu
        │   ├── test_block_scan.cu
        │   ├── test_device_histogram.cu
        │   ├── test_device_radix_sort.cu
        │   ├── test_device_reduce.cu
        │   ├── test_device_reduce_by_key.cu
        │   ├── test_device_run_length_encode.cu
        │   ├── test_device_scan.cu
        │   ├── test_device_select_if.cu
        │   ├── test_device_select_unique.cu
        │   ├── test_grid_barrier.cu
        │   ├── test_iterator.cu
        │   ├── test_util.h
        │   ├── test_warp_reduce.cu
        │   └── test_warp_scan.cu
        └── tune
        │   ├── .gitignore
        │   ├── Makefile
        │   └── tune_device_reduce.cu
    ├── demo
        ├── .gitignore
        ├── README.md
        ├── binary_classification
        │   ├── README.md
        │   ├── agaricus-lepiota.fmap
        │   ├── agaricus-lepiota.names
        │   ├── mapfeat.py
        │   ├── mknfold.py
        │   └── runexp.sh
        ├── data
        │   ├── README.md
        │   ├── featmap.txt
        │   └── gen_autoclaims.R
        ├── distributed-training
        │   ├── README.md
        │   ├── plot_model.ipynb
        │   └── run_aws.sh
        ├── gpu_acceleration
        │   ├── README.md
        │   └── bosch.py
        ├── guide-python
        │   ├── README.md
        │   ├── basic_walkthrough.py
        │   ├── boost_from_prediction.py
        │   ├── cross_validation.py
        │   ├── custom_objective.py
        │   ├── evals_result.py
        │   ├── external_memory.py
        │   ├── gamma_regression.py
        │   ├── generalized_linear_model.py
        │   ├── predict_first_ntree.py
        │   ├── predict_leaf_indices.py
        │   ├── runall.sh
        │   ├── sklearn_evals_result.py
        │   ├── sklearn_examples.py
        │   └── sklearn_parallel.py
        ├── kaggle-higgs
        │   ├── README.md
        │   ├── higgs-cv.py
        │   ├── higgs-numpy.py
        │   ├── higgs-pred.R
        │   ├── higgs-pred.py
        │   ├── higgs-train.R
        │   ├── run.sh
        │   ├── speedtest.R
        │   └── speedtest.py
        ├── kaggle-otto
        │   ├── README.MD
        │   ├── otto_train_pred.R
        │   └── understandingXGBoostModel.Rmd
        ├── multiclass_classification
        │   ├── README.md
        │   ├── runexp.sh
        │   └── train.py
        ├── rank
        │   ├── README.md
        │   ├── runexp.sh
        │   ├── trans_data.py
        │   └── wgetdata.sh
        ├── regression
        │   ├── README.md
        │   ├── machine.names
        │   ├── mapfeat.py
        │   ├── mknfold.py
        │   └── runexp.sh
        └── yearpredMSD
        │   ├── README.md
        │   ├── csv2libsvm.py
        │   └── runexp.sh
    ├── dmlc-core
        ├── .gitignore
        ├── .travis.yml
        ├── CMakeLists.txt
        ├── LICENSE
        ├── Makefile
        ├── README.md
        ├── cmake
        │   ├── Modules
        │   │   ├── FindCrypto.cmake
        │   │   └── FindHDFS.cmake
        │   ├── Utils.cmake
        │   └── lint.cmake
        ├── doc
        │   ├── .gitignore
        │   ├── Doxyfile
        │   ├── Makefile
        │   ├── README
        │   ├── conf.py
        │   ├── index.md
        │   ├── parameter.md
        │   └── sphinx_util.py
        ├── example
        │   ├── dmlc_example.mk
        │   └── parameter.cc
        ├── include
        │   └── dmlc
        │   │   ├── any.h
        │   │   ├── array_view.h
        │   │   ├── base.h
        │   │   ├── common.h
        │   │   ├── concurrency.h
        │   │   ├── config.h
        │   │   ├── data.h
        │   │   ├── input_split_shuffle.h
        │   │   ├── io.h
        │   │   ├── json.h
        │   │   ├── logging.h
        │   │   ├── lua.h
        │   │   ├── memory.h
        │   │   ├── memory_io.h
        │   │   ├── omp.h
        │   │   ├── optional.h
        │   │   ├── parameter.h
        │   │   ├── recordio.h
        │   │   ├── registry.h
        │   │   ├── serializer.h
        │   │   ├── thread_local.h
        │   │   ├── threadediter.h
        │   │   ├── timer.h
        │   │   └── type_traits.h
        ├── make
        │   └── dmlc.mk
        ├── scripts
        │   ├── lint.py
        │   ├── packages.mk
        │   ├── setup_nvcc.sh
        │   └── travis
        │   │   ├── travis_before_cache.sh
        │   │   ├── travis_osx_install.sh
        │   │   ├── travis_script.sh
        │   │   └── travis_setup_env.sh
        ├── src
        │   ├── config.cc
        │   ├── data.cc
        │   ├── data
        │   │   ├── basic_row_iter.h
        │   │   ├── csv_parser.h
        │   │   ├── disk_row_iter.h
        │   │   ├── libfm_parser.h
        │   │   ├── libsvm_parser.h
        │   │   ├── parser.h
        │   │   ├── row_block.h
        │   │   ├── strtonum.h
        │   │   └── text_parser.h
        │   ├── io.cc
        │   ├── io
        │   │   ├── azure_filesys.cc
        │   │   ├── azure_filesys.h
        │   │   ├── cached_input_split.h
        │   │   ├── filesys.h
        │   │   ├── hdfs_filesys.cc
        │   │   ├── hdfs_filesys.h
        │   │   ├── input_split_base.cc
        │   │   ├── input_split_base.h
        │   │   ├── line_split.cc
        │   │   ├── line_split.h
        │   │   ├── local_filesys.cc
        │   │   ├── local_filesys.h
        │   │   ├── recordio_split.cc
        │   │   ├── recordio_split.h
        │   │   ├── s3_filesys.cc
        │   │   ├── s3_filesys.h
        │   │   ├── single_file_split.h
        │   │   ├── threaded_input_split.h
        │   │   └── uri_spec.h
        │   └── recordio.cc
        ├── test
        │   ├── .gitignore
        │   ├── README.md
        │   ├── csv_parser_test.cc
        │   ├── dataiter_test.cc
        │   ├── dmlc_test.mk
        │   ├── filesys_test.cc
        │   ├── iostream_test.cc
        │   ├── libfm_parser_test.cc
        │   ├── libsvm_parser_test.cc
        │   ├── logging_test.cc
        │   ├── parameter_test.cc
        │   ├── recordio_test.cc
        │   ├── registry_test.cc
        │   ├── split_read_test.cc
        │   ├── split_repeat_read_test.cc
        │   ├── split_test.cc
        │   ├── stream_read_test.cc
        │   ├── strtonum_test.cc
        │   └── unittest
        │   │   ├── .gitignore
        │   │   ├── dmlc_unittest.mk
        │   │   ├── unittest_any.cc
        │   │   ├── unittest_array_view.cc
        │   │   ├── unittest_config.cc
        │   │   ├── unittest_json.cc
        │   │   ├── unittest_logging.cc
        │   │   ├── unittest_main.cc
        │   │   ├── unittest_optional.cc
        │   │   ├── unittest_serializer.cc
        │   │   └── unittest_threaditer.cc
        ├── tracker
        │   ├── README.md
        │   ├── dmlc-submit
        │   ├── dmlc_tracker
        │   │   ├── __init__.py
        │   │   ├── launcher.py
        │   │   ├── local.py
        │   │   ├── mpi.py
        │   │   ├── opts.py
        │   │   ├── sge.py
        │   │   ├── ssh.py
        │   │   ├── submit.py
        │   │   ├── tracker.py
        │   │   └── yarn.py
        │   └── yarn
        │   │   ├── .gitignore
        │   │   ├── README.md
        │   │   ├── build.bat
        │   │   ├── build.sh
        │   │   ├── pom.xml
        │   │   └── src
        │   │       └── main
        │   │           └── java
        │   │               └── org
        │   │                   └── apache
        │   │                       └── hadoop
        │   │                           └── yarn
        │   │                               └── dmlc
        │   │                                   ├── ApplicationMaster.java
        │   │                                   ├── Client.java
        │   │                                   └── TaskRecord.java
        └── windows
        │   ├── .gitignore
        │   ├── README.md
        │   ├── dmlc.sln
        │   └── dmlc
        │       └── dmlc.vcxproj
    ├── doc
        ├── .gitignore
        ├── Doxyfile
        ├── Makefile
        ├── R-package
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── discoverYourData.md
        │   ├── index.md
        │   └── xgboostPresentation.md
        ├── README
        ├── build.md
        ├── cli
        │   └── index.md
        ├── conf.py
        ├── faq.md
        ├── get_started
        │   └── index.md
        ├── how_to
        │   ├── contribute.md
        │   ├── external_memory.md
        │   ├── index.md
        │   └── param_tuning.md
        ├── index.md
        ├── input_format.md
        ├── julia
        │   └── index.md
        ├── jvm
        │   ├── index.md
        │   ├── java_intro.md
        │   ├── xgboost4j-intro.md
        │   └── xgboost4j_full_integration.md
        ├── model.md
        ├── parameter.md
        ├── python
        │   ├── index.md
        │   ├── python_api.rst
        │   └── python_intro.md
        ├── sphinx_util.py
        └── tutorials
        │   ├── aws_yarn.md
        │   ├── dart.md
        │   ├── index.md
        │   └── monotonic.md
    ├── include
        └── xgboost
        │   ├── base.h
        │   ├── c_api.h
        │   ├── data.h
        │   ├── feature_map.h
        │   ├── gbm.h
        │   ├── learner.h
        │   ├── logging.h
        │   ├── metric.h
        │   ├── objective.h
        │   ├── tree_model.h
        │   └── tree_updater.h
    ├── jvm-packages
        ├── .gitignore
        ├── README.md
        ├── checkstyle-suppressions.xml
        ├── checkstyle.xml
        ├── create_jni.py
        ├── pom.xml
        ├── scalastyle-config.xml
        ├── xgboost4j-example
        │   ├── LICENSE
        │   ├── README.md
        │   ├── pom.xml
        │   └── src
        │   │   └── main
        │   │       ├── java
        │   │           └── ml
        │   │           │   └── dmlc
        │   │           │       └── xgboost4j
        │   │           │           └── java
        │   │           │               └── example
        │   │           │                   ├── BasicWalkThrough.java
        │   │           │                   ├── BoostFromPrediction.java
        │   │           │                   ├── CrossValidation.java
        │   │           │                   ├── CustomObjective.java
        │   │           │                   ├── ExternalMemory.java
        │   │           │                   ├── GeneralizedLinearModel.java
        │   │           │                   ├── PredictFirstNtree.java
        │   │           │                   ├── PredictLeafIndices.java
        │   │           │                   └── util
        │   │           │                       ├── CustomEval.java
        │   │           │                       └── DataLoader.java
        │   │       └── scala
        │   │           └── ml
        │   │               └── dmlc
        │   │                   └── xgboost4j
        │   │                       └── scala
        │   │                           └── example
        │   │                               ├── BasicWalkThrough.scala
        │   │                               ├── BoostFromPrediction.scala
        │   │                               ├── CrossValidation.scala
        │   │                               ├── CustomObjective.scala
        │   │                               ├── ExternalMemory.scala
        │   │                               ├── GeneralizedLinearModel.scala
        │   │                               ├── PredictFirstNTree.scala
        │   │                               ├── PredictLeafIndices.scala
        │   │                               ├── flink
        │   │                                   └── DistTrainWithFlink.scala
        │   │                               ├── spark
        │   │                                   ├── SparkModelTuningTool.scala
        │   │                                   ├── SparkWithDataFrame.scala
        │   │                                   └── SparkWithRDD.scala
        │   │                               └── util
        │   │                                   └── CustomEval.scala
        ├── xgboost4j-flink
        │   ├── pom.xml
        │   └── src
        │   │   └── main
        │   │       └── scala
        │   │           └── ml
        │   │               └── dmlc
        │   │                   └── xgboost4j
        │   │                       └── scala
        │   │                           └── flink
        │   │                               ├── XGBoost.scala
        │   │                               └── XGBoostModel.scala
        ├── xgboost4j-spark
        │   ├── pom.xml
        │   └── src
        │   │   ├── main
        │   │       └── scala
        │   │       │   └── ml
        │   │       │       └── dmlc
        │   │       │           └── xgboost4j
        │   │       │               └── scala
        │   │       │                   └── spark
        │   │       │                       ├── DataUtils.scala
        │   │       │                       ├── XGBoost.scala
        │   │       │                       ├── XGBoostClassificationModel.scala
        │   │       │                       ├── XGBoostEstimator.scala
        │   │       │                       ├── XGBoostModel.scala
        │   │       │                       ├── XGBoostRegressionModel.scala
        │   │       │                       └── params
        │   │       │                           ├── BoosterParams.scala
        │   │       │                           ├── CustomParams.scala
        │   │       │                           ├── DefaultXGBoostParamsReader.scala
        │   │       │                           ├── DefaultXGBoostParamsWriter.scala
        │   │       │                           ├── GeneralParams.scala
        │   │       │                           ├── LearningTaskParams.scala
        │   │       │                           └── Utils.scala
        │   │   └── test
        │   │       ├── resources
        │   │           └── log4j.properties
        │   │       └── scala
        │   │           └── ml
        │   │               └── dmlc
        │   │                   └── xgboost4j
        │   │                       └── scala
        │   │                           └── spark
        │   │                               ├── EvalError.scala
        │   │                               ├── RabitTrackerRobustnessSuite.scala
        │   │                               ├── SharedSparkContext.scala
        │   │                               ├── Utils.scala
        │   │                               ├── XGBoostConfigureSuite.scala
        │   │                               ├── XGBoostDFSuite.scala
        │   │                               ├── XGBoostGeneralSuite.scala
        │   │                               └── XGBoostSparkPipelinePersistence.scala
        └── xgboost4j
        │   ├── LICENSE
        │   ├── pom.xml
        │   └── src
        │       ├── main
        │           ├── java
        │           │   └── ml
        │           │   │   └── dmlc
        │           │   │       └── xgboost4j
        │           │   │           ├── LabeledPoint.java
        │           │   │           └── java
        │           │   │               ├── Booster.java
        │           │   │               ├── DMatrix.java
        │           │   │               ├── DataBatch.java
        │           │   │               ├── IEvaluation.java
        │           │   │               ├── IObjective.java
        │           │   │               ├── IRabitTracker.java
        │           │   │               ├── NativeLibLoader.java
        │           │   │               ├── Rabit.java
        │           │   │               ├── RabitTracker.java
        │           │   │               ├── XGBoost.java
        │           │   │               ├── XGBoostError.java
        │           │   │               └── XGBoostJNI.java
        │           └── scala
        │           │   └── ml
        │           │       └── dmlc
        │           │           └── xgboost4j
        │           │               └── scala
        │           │                   ├── Booster.scala
        │           │                   ├── DMatrix.scala
        │           │                   ├── EvalTrait.scala
        │           │                   ├── ObjectiveTrait.scala
        │           │                   ├── XGBoost.scala
        │           │                   └── rabit
        │           │                       ├── RabitTracker.scala
        │           │                       ├── handler
        │           │                           ├── RabitTrackerHandler.scala
        │           │                           └── RabitWorkerHandler.scala
        │           │                       └── util
        │           │                           ├── LinkMap.scala
        │           │                           └── RabitTrackerHelpers.scala
        │       ├── native
        │           ├── xgboost4j.cpp
        │           └── xgboost4j.h
        │       └── test
        │           ├── java
        │               └── ml
        │               │   └── dmlc
        │               │       └── xgboost4j
        │               │           └── java
        │               │               ├── BoosterImplTest.java
        │               │               └── DMatrixTest.java
        │           └── scala
        │               └── ml
        │                   └── dmlc
        │                       └── xgboost4j
        │                           └── scala
        │                               ├── DMatrixSuite.scala
        │                               ├── ScalaBoosterImplSuite.scala
        │                               └── rabit
        │                                   └── RabitTrackerConnectionHandlerTest.scala
    ├── make
        ├── config.mk
        ├── mingw64.mk
        ├── minimum.mk
        ├── minimum_parallel.mk
        └── travis.mk
    ├── nccl
        ├── .gitignore
        ├── CMakeLists.txt
        ├── LICENSE.txt
        ├── Makefile
        ├── README.md
        ├── debian
        │   ├── .gitignore
        │   ├── changelog.in
        │   ├── compat
        │   ├── control.in
        │   ├── copyright
        │   ├── libnccl-dev.install
        │   ├── libnccl-dev.manpages
        │   ├── libnccl1.install.in
        │   ├── nccl.7
        │   ├── rules
        │   ├── shlibs.local.in
        │   └── source
        │   │   └── format
        ├── fortran
        │   ├── Makefile
        │   ├── src
        │   │   ├── cudafor.f90
        │   │   └── ncclfor.f90
        │   └── test
        │   │   ├── allgather_arr_out.f90
        │   │   ├── allgather_ptr_out.f90
        │   │   ├── allreduce_arr_out.f90
        │   │   ├── allreduce_ptr_out.f90
        │   │   ├── broadcast_arr.f90
        │   │   ├── broadcast_ptr.f90
        │   │   ├── reduce_arr_out.f90
        │   │   ├── reduce_ptr_out.f90
        │   │   ├── reducescatter_arr_out.f90
        │   │   └── reducescatter_ptr_out.f90
        ├── src
        │   ├── all_gather.cu
        │   ├── all_reduce.cu
        │   ├── broadcast.cu
        │   ├── common_coll.h
        │   ├── common_kernel.h
        │   ├── copy_kernel.h
        │   ├── core.cu
        │   ├── core.h
        │   ├── enqueue.h
        │   ├── libwrap.cu
        │   ├── libwrap.h
        │   ├── nccl.h
        │   ├── primitives.h
        │   ├── reduce.cu
        │   ├── reduce_kernel.h
        │   └── reduce_scatter.cu
        └── test
        │   ├── include
        │       └── test_utilities.h
        │   ├── mpi
        │       └── mpi_test.cu
        │   └── single
        │       ├── all_gather_scan.cu
        │       ├── all_gather_test.cu
        │       ├── all_reduce_scan.cu
        │       ├── all_reduce_test.cu
        │       ├── broadcast_scan.cu
        │       ├── broadcast_test.cu
        │       ├── reduce_scan.cu
        │       ├── reduce_scatter_scan.cu
        │       ├── reduce_scatter_test.cu
        │       └── reduce_test.cu
    ├── plugin
        ├── README.md
        ├── dense_parser
        │   ├── dense_libsvm.cc
        │   └── plugin.mk
        ├── example
        │   ├── README.md
        │   ├── custom_obj.cc
        │   └── plugin.mk
        ├── lz4
        │   ├── plugin.mk
        │   └── sparse_page_lz4_format.cc
        └── updater_gpu
        │   ├── README.md
        │   ├── benchmark
        │       └── benchmark.py
        │   ├── gitshallow_submodules.sh
        │   ├── plugin.mk
        │   ├── src
        │       ├── common.cuh
        │       ├── device_helpers.cuh
        │       ├── exact
        │       │   ├── argmax_by_key.cuh
        │       │   ├── fused_scan_reduce_by_key.cuh
        │       │   ├── gpu_builder.cuh
        │       │   ├── node.cuh
        │       │   └── split2node.cuh
        │       ├── gpu_data.cuh
        │       ├── gpu_hist_builder.cu
        │       ├── gpu_hist_builder.cuh
        │       ├── register_updater_gpu.cc
        │       ├── types.cuh
        │       ├── updater_gpu.cu
        │       └── updater_gpu.cuh
        │   └── test
        │       ├── cpp
        │           ├── argmax_by_key.cu
        │           ├── fused_reduce_scan_by_key.cu
        │           ├── generate_data.sh
        │           ├── gpu_builder.cu
        │           ├── node.cu
        │           ├── utils.cu
        │           └── utils.cuh
        │       └── python
        │           └── test.py
    ├── python-package
        ├── .gitignore
        ├── .pylintrc
        ├── MANIFEST.in
        ├── README.rst
        ├── build_trouble_shooting.md
        ├── prep_pip.sh
        ├── setup.cfg
        ├── setup.py
        └── setup_pip.py
    ├── rabit
        ├── .gitignore
        ├── .travis.yml
        ├── LICENSE
        ├── Makefile
        ├── README.md
        ├── doc
        │   ├── .gitignore
        │   ├── Doxyfile
        │   ├── Makefile
        │   ├── conf.py
        │   ├── cpp_api.md
        │   ├── guide.md
        │   ├── index.md
        │   ├── parameters.md
        │   ├── python_api.md
        │   └── sphinx_util.py
        ├── guide
        │   ├── Makefile
        │   ├── README
        │   ├── basic.cc
        │   ├── basic.py
        │   ├── broadcast.cc
        │   ├── broadcast.py
        │   ├── lazy_allreduce.cc
        │   └── lazy_allreduce.py
        ├── include
        │   ├── dmlc
        │   │   ├── README.md
        │   │   ├── base.h
        │   │   ├── io.h
        │   │   ├── logging.h
        │   │   ├── serializer.h
        │   │   └── type_traits.h
        │   └── rabit
        │   │   ├── c_api.h
        │   │   ├── internal
        │   │       ├── engine.h
        │   │       ├── io.h
        │   │       ├── rabit-inl.h
        │   │       ├── timer.h
        │   │       └── utils.h
        │   │   ├── rabit.h
        │   │   └── serializable.h
        ├── lib
        │   ├── flag
        │   └── readme.md
        ├── python
        │   └── rabit.py
        ├── scripts
        │   ├── travis_runtest.sh
        │   └── travis_script.sh
        ├── src
        │   ├── README.md
        │   ├── allreduce_base.cc
        │   ├── allreduce_base.h
        │   ├── allreduce_mock.h
        │   ├── allreduce_robust-inl.h
        │   ├── allreduce_robust.cc
        │   ├── allreduce_robust.h
        │   ├── c_api.cc
        │   ├── engine.cc
        │   ├── engine_base.cc
        │   ├── engine_empty.cc
        │   ├── engine_mock.cc
        │   ├── engine_mpi.cc
        │   ├── socket.h
        │   └── thread_local.h
        └── test
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── README.md
        │   ├── lazy_recover.cc
        │   ├── local_recover.cc
        │   ├── local_recover.py
        │   ├── model_recover.cc
        │   ├── speed_runner.py
        │   ├── speed_test.cc
        │   └── test.mk
    ├── src
        ├── c_api
        │   ├── c_api.cc
        │   ├── c_api_error.cc
        │   └── c_api_error.h
        ├── cli_main.cc
        ├── common
        │   ├── base64.h
        │   ├── bitmap.h
        │   ├── column_matrix.h
        │   ├── common.cc
        │   ├── common.h
        │   ├── config.h
        │   ├── group_data.h
        │   ├── hist_util.cc
        │   ├── hist_util.h
        │   ├── io.h
        │   ├── math.h
        │   ├── quantile.h
        │   ├── random.h
        │   ├── row_set.h
        │   └── sync.h
        ├── data
        │   ├── data.cc
        │   ├── simple_csr_source.cc
        │   ├── simple_csr_source.h
        │   ├── simple_dmatrix.cc
        │   ├── simple_dmatrix.h
        │   ├── sparse_batch_page.h
        │   ├── sparse_page_dmatrix.cc
        │   ├── sparse_page_dmatrix.h
        │   ├── sparse_page_raw_format.cc
        │   ├── sparse_page_source.cc
        │   ├── sparse_page_source.h
        │   └── sparse_page_writer.cc
        ├── gbm
        │   ├── gblinear.cc
        │   ├── gbm.cc
        │   └── gbtree.cc
        ├── learner.cc
        ├── logging.cc
        ├── metric
        │   ├── elementwise_metric.cc
        │   ├── metric.cc
        │   ├── multiclass_metric.cc
        │   └── rank_metric.cc
        ├── objective
        │   ├── multiclass_obj.cc
        │   ├── objective.cc
        │   ├── rank_obj.cc
        │   └── regression_obj.cc
        └── tree
        │   ├── fast_hist_param.h
        │   ├── param.h
        │   ├── tree_model.cc
        │   ├── tree_updater.cc
        │   ├── updater_basemaker-inl.h
        │   ├── updater_colmaker.cc
        │   ├── updater_fast_hist.cc
        │   ├── updater_histmaker.cc
        │   ├── updater_prune.cc
        │   ├── updater_refresh.cc
        │   ├── updater_skmaker.cc
        │   └── updater_sync.cc
    └── tests
        ├── README.md
        ├── ci_build
            ├── Dockerfile.gpu
            ├── build_gpu_cmake.sh
            ├── ci_build.sh
            ├── test_gpu.sh
            └── with_the_same_user
        ├── cpp
            ├── data
            │   ├── test_metainfo.cc
            │   ├── test_simple_csr_source.cc
            │   ├── test_simple_dmatrix.cc
            │   └── test_sparse_page_dmatrix.cc
            ├── helpers.cc
            ├── helpers.h
            ├── metric
            │   ├── test_elementwise_metric.cc
            │   ├── test_metric.cc
            │   └── test_rank_metric.cc
            ├── objective
            │   ├── test_multiclass_metric.cc
            │   ├── test_objective.cc
            │   └── test_regression_obj.cc
            ├── test_main.cc
            ├── tree
            │   └── test_param.cc
            └── xgboost_test.mk
        ├── distributed
            ├── runtests.sh
            └── test_basic.py
        ├── python
            ├── test_basic.py
            ├── test_basic_models.py
            ├── test_early_stopping.py
            ├── test_eval_metrics.py
            ├── test_fast_hist.py
            ├── test_openmp.py
            ├── test_plotting.py
            ├── test_sparse_dmatrix.py
            ├── test_training_continuation.py
            ├── test_with_pandas.py
            ├── test_with_sklearn.py
            └── testing.py
        └── travis
            ├── run_test.sh
            ├── setup.sh
            └── travis_after_failure.sh


/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | 
3 | Thank you for your interest to contribute to the XGBoost-Node.
4 | 
5 | Please submit an issue to suggest new features before submitting pull requests.
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | 
 3 | short introduction
 4 | 
 5 | ## Platform, Node.js version and C++ compiler version
 6 | 
 7 | example: macOS 10.12 with XCode 8
 8 | 
 9 | ## Steps to reproduce this issue
10 | 
11 | code or steps to reproduce this issue
12 | 
13 | ## Related issues
14 | 
15 | other issues related to this one.
16 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Description
2 | 
3 | short introduction
4 | 
5 | ## Tested platforms
6 | 
7 | list tested platforms
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | node_modules
 2 | build
 3 | .DS_Store
 4 | lib/libxgboost.a
 5 | lib/libxgboost.dylib
 6 | .vscode/settings.json
 7 | .vscode/tags
 8 | .vscode/c_cpp_properties.json
 9 | xgboost/xgboost
10 | coverage
11 | readme.html
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | 
 3 | addons:
 4 |   apt:
 5 |    sources:
 6 |       - ubuntu-toolchain-r-test
 7 |    packages:
 8 |       - valgrind
 9 |       - libgtest-dev
10 |       - libboost-dev
11 | 
12 | node_js:
13 |   - "6"
14 |   - "8"
15 | 
16 | sudo: false
17 | dist: trusty
18 | osx_image: xcode8.3
19 | 
20 | before_script:
21 |   - npm install -g codecov
22 |   - npm install -g istanbul
23 | 
24 | after_success:
25 |   - istanbul cover ./node_modules/mocha/bin/_mocha
26 |   - codecov --disable=gcov
27 |   - if [[ "$TRAVIS_OS_NAME" != "osx" ]]; then valgrind -v --tool=memcheck --leak-check=full --show-leak-kinds=all --track-origins=yes node node_modules/mocha/bin/mocha; fi
28 | 
29 | os:
30 |   - linux
31 |   - osx
32 | 
33 | notifications:
34 |   email:
35 |     on_success: change
36 |     on_failure: change
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 by Contributors
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |    http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | cd ./xgboost;
2 | sh build.sh;
3 | touch ./rabit/lib/flag
4 | [ -e ./rabit/lib/librabit.a ] && cp ./rabit/lib/librabit.a ./rabit/lib/librabit_empty.a && echo -fopenmp > ./rabit/lib/flag;
5 | cd ../;
6 | echo done building library;
7 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
1 | # 1.0.0 2017-09-09
2 | 
3 | + Initial release
4 | 
5 | # 1.1.0 2017-10-30
6 | 
7 | + Added async predict function
8 | + Updated document
9 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "xgboost",
 3 |   "version": "1.1.0",
 4 |   "description": "XGBoost in Node.js ",
 5 |   "main": "index.js",
 6 |   "types": "index.d.ts",
 7 |   "scripts": {
 8 |     "test": "npm run compile; mocha",
 9 |     "cover": "istanbul cover _mocha",
10 |     "install": "bash ./build.sh; node-gyp rebuild",
11 |     "compile": "tsc --p tsconfig.json"
12 |   },
13 |   "repository": {
14 |     "type": "git",
15 |     "url": "https://github.com/nuanio/xgboost-node"
16 |   },
17 |   "bugs": {
18 |     "url": "https://github.com/nuanio/xgboost-node/issues"
19 |   },
20 |   "keywords": [
21 |     "xgboost",
22 |     "machine learning",
23 |     "classifier",
24 |     "gbm",
25 |     "algorithm",
26 |     "ensemble",
27 |     "kaggle",
28 |     "kaggle competition",
29 |     "python",
30 |     "dmlc"
31 |   ],
32 |   "author": "nuan.io",
33 |   "license": "Apache-2.0",
34 |   "dependencies": {
35 |     "bindings": "^1.2.1",
36 |     "nan": "^2.6.2",
37 |     "async": "^2.5.0"
38 |   },
39 |   "devDependencies": {
40 |     "@types/node": "^8.0.9",
41 |     "chai": "^4.0.2",
42 |     "istanbul": "^0.4.5",
43 |     "mocha": "^3.4.2",
44 |     "typescript": "^2.4.1"
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/base.h:
--------------------------------------------------------------------------------
 1 | #ifndef XGBASE_H
 2 | #define XGBASE_H
 3 | 
 4 | #include <node.h>
 5 | #include <v8.h>
 6 | #include <nan.h>
 7 | 
 8 | #include <string.h>
 9 | #include <iostream>
10 | #include <sstream>
11 | #include <string>
12 | #include <vector>
13 | 
14 | #include "c_api.h"
15 | #include "dmlc/logging.h"
16 | #include <memory>
17 | 
18 | using namespace std;
19 | using namespace v8;
20 | 
21 | #endif


--------------------------------------------------------------------------------
/src/index.cc:
--------------------------------------------------------------------------------
 1 | #include "xgmatrix.h"
 2 | #include "xgmodel.h"
 3 | 
 4 | void InitAll(v8::Local<v8::Object> exports)
 5 | {
 6 |   XGModel::Init(exports);
 7 |   XGMatrix::Init(exports);
 8 | }
 9 | 
10 | NODE_MODULE(xgboost, InitAll)
11 | 


--------------------------------------------------------------------------------
/src/xgmatrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef XGMAT_H
 2 | #define XGMAT_H
 3 | 
 4 | #include "base.h"
 5 | 
 6 | class XGMatrix : public Nan::ObjectWrap
 7 | {
 8 | public:
 9 |   static void Init(v8::Local<v8::Object> exports);
10 |   DMatrixHandle GetHandle();
11 | 
12 | private:
13 |   explicit XGMatrix(DMatrixHandle result);
14 |   ~XGMatrix();
15 | 
16 |   static NAN_METHOD(NewMatrix);
17 | 
18 |   static int FromDense(const Nan::FunctionCallbackInfo<v8::Value> &info, DMatrixHandle &res);
19 |   static int FromCSCR(const Nan::FunctionCallbackInfo<v8::Value> &info, DMatrixHandle &res, bool C);
20 |   static int FromFile(const Nan::FunctionCallbackInfo<v8::Value> &info, DMatrixHandle &res);
21 |   static NAN_METHOD(GetCol);
22 |   static NAN_METHOD(GetRow);
23 |   static Nan::Persistent<v8::Function> constructor;
24 |   DMatrixHandle handle;
25 | };
26 | 
27 | #endif


--------------------------------------------------------------------------------
/test/data/iris.xg.model:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/test/data/iris.xg.model


--------------------------------------------------------------------------------
/test/data/xgmatrix.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/test/data/xgmatrix.bin


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "target": "es5",
 4 |         "module": "commonjs",
 5 |         "noImplicitAny": false,
 6 |         "removeComments": false,
 7 |         "noLib": false,
 8 |         "preserveConstEnums": true,
 9 |         "declaration": true,
10 |         "suppressImplicitAnyIndexErrors": true,
11 |         "outDir": "./",
12 |         "lib": [
13 |             "es6",
14 |             "es7"
15 |         ]
16 |     },
17 |     "files": [
18 |         "index.ts"
19 |     ],
20 |     "exclude": [
21 |         "node_modules"
22 |     ]
23 | }


--------------------------------------------------------------------------------
/xgboost/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.page
 6 | # Compiled Dynamic libraries
 7 | *.so
 8 | *.dylib
 9 | *.page
10 | # Compiled Static libraries
11 | *.lai
12 | *.la
13 | *.a
14 | *~
15 | *.Rcheck
16 | *.rds
17 | *.tar.gz
18 | #*txt*
19 | *conf
20 | *buffer
21 | *model
22 | *pyc
23 | *.train
24 | *.test
25 | *.tar
26 | *group
27 | *rar
28 | *vali
29 | *sdf
30 | Release
31 | *exe*
32 | *exp
33 | ipch
34 | *.filters
35 | *.user
36 | *log
37 | Debug
38 | *suo
39 | .Rhistory
40 | *.dll
41 | *i386
42 | *x64
43 | *dump
44 | *save
45 | *csv
46 | .Rproj.user
47 | *.cpage.col
48 | *.cpage
49 | *.Rproj
50 | ./xgboost
51 | ./xgboost.mpi
52 | ./xgboost.mock
53 | #.Rbuildignore
54 | R-package.Rproj
55 | *.cache*
56 | #java
57 | java/xgboost4j/target
58 | java/xgboost4j/tmp
59 | java/xgboost4j-demo/target
60 | java/xgboost4j-demo/data/
61 | java/xgboost4j-demo/tmp/
62 | java/xgboost4j-demo/model/
63 | nb-configuration*
64 | # Eclipse
65 | .project
66 | .cproject
67 | .pydevproject
68 | .settings/
69 | build
70 | *.data
71 | build_plugin
72 | .idea
73 | recommonmark/
74 | tags
75 | *.iml
76 | *.class
77 | target
78 | *.swp
79 | 
80 | # cpp tests and gcov generated files
81 | *.gcov
82 | *.gcda
83 | *.gcno
84 | build_tests
85 | /tests/cpp/xgboost_test
86 | 
87 | .DS_Store
88 | 
89 | # spark
90 | metastore_db
91 | 
92 | plugin/updater_gpu/test/cpp/data
93 | 


--------------------------------------------------------------------------------
/xgboost/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | For bugs or installation issues, please provide the following information.
 2 | The more information you provide, the more easily we will be able to offer
 3 | help and advice.
 4 | 
 5 | ## Environment info
 6 | Operating System:
 7 | 
 8 | Compiler:
 9 | 
10 | Package used (python/R/jvm/C++):
11 | 
12 | `xgboost` version used:
13 | 
14 | If installing from source, please provide 
15 | 
16 | 1. The commit hash (`git rev-parse HEAD`)
17 | 2. Logs will be helpful (If logs are large, please upload as attachment).
18 | 
19 | If you are using jvm package, please 
20 | 
21 | 1. add [jvm-packages] in the title to make it quickly be identified
22 | 2. the gcc version and distribution
23 | 
24 | If you are using python package, please provide
25 | 
26 | 1. The python version and distribution
27 | 2. The command to install `xgboost` if you are not installing from source
28 | 
29 | If you are using R package, please provide
30 | 
31 | 1. The R `sessionInfo()`
32 | 2. The command to install `xgboost` if you are not installing from source
33 | 
34 | ## Steps to reproduce
35 | 
36 | 1.
37 | 2.
38 | 3.
39 | 
40 | ## What have you tried?
41 | 
42 | 1.
43 | 2.
44 | 3.
45 | 


--------------------------------------------------------------------------------
/xgboost/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | // -*- mode: groovy -*-
 2 | // Jenkins pipeline
 3 | // See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
 4 | 
 5 | // command to start a docker container
 6 | docker_run = 'tests/ci_build/ci_build.sh'
 7 | 
 8 | // timeout in minutes
 9 | max_time = 60
10 | 
11 | // initialize source codes
12 | def init_git() {
13 |   retry(5) {
14 |     try {
15 |       timeout(time: 2, unit: 'MINUTES') {
16 |         checkout scm
17 |         sh 'git submodule update --init'
18 |       }
19 |     } catch (exc) {
20 |       deleteDir()
21 |       error "Failed to fetch source codes"
22 |     }
23 |   }
24 | }
25 | 
26 | stage('Build') {
27 |     node('GPU' && 'linux') {
28 |       ws('workspace/xgboost/build-gpu-cmake') {
29 |         init_git()
30 |   	timeout(time: max_time, unit: 'MINUTES') {
31 |       		sh "${docker_run} gpu tests/ci_build/build_gpu_cmake.sh"
32 | 	}
33 |       }
34 |     }
35 |     node('GPU' && 'linux') {
36 |       ws('workspace/xgboost/build-gpu-make') {
37 |         init_git()
38 |   	timeout(time: max_time, unit: 'MINUTES') {
39 |       		sh "${docker_run} gpu make PLUGIN_UPDATER_GPU=ON"
40 | 	}
41 |       }
42 |     }
43 | }
44 | 
45 | 
46 | stage('Unit Test') {
47 |     node('GPU' && 'linux') {
48 |       ws('workspace/xgboost/unit-test') {
49 |         init_git()
50 |   	timeout(time: max_time, unit: 'MINUTES') {
51 |       		sh "${docker_run} gpu tests/ci_build/test_gpu.ssh"
52 | 	}
53 |       }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/xgboost/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016 by Contributors
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |    http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/xgboost/R-package/.Rbuildignore:
--------------------------------------------------------------------------------
1 | \.o$
2 | \.so$
3 | \.dll$
4 | ^.*\.Rproj$
5 | ^\.Rproj\.user$
6 | README.md
7 | 


--------------------------------------------------------------------------------
/xgboost/R-package/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014 by Tianqi Chen and Contributors 
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 |     
 7 |    http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/xgboost/R-package/R/xgb.DMatrix.save.R:
--------------------------------------------------------------------------------
 1 | #' Save xgb.DMatrix object to binary file
 2 | #' 
 3 | #' Save xgb.DMatrix object to binary file
 4 | #' 
 5 | #' @param dmatrix the \code{xgb.DMatrix} object
 6 | #' @param fname the name of the file to write.
 7 | #' 
 8 | #' @examples
 9 | #' data(agaricus.train, package='xgboost')
10 | #' train <- agaricus.train
11 | #' dtrain <- xgb.DMatrix(train$data, label=train$label)
12 | #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
13 | #' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
14 | #' @export
15 | xgb.DMatrix.save <- function(dmatrix, fname) {
16 |   if (typeof(fname) != "character")
17 |     stop("fname must be character")
18 |   if (!inherits(dmatrix, "xgb.DMatrix"))
19 |     stop("dmatrix must be xgb.DMatrix")
20 |   
21 |   .Call(XGDMatrixSaveBinary_R, dmatrix, fname[1], 0L)
22 |   return(TRUE)
23 | }
24 | 


--------------------------------------------------------------------------------
/xgboost/R-package/R/xgb.save.raw.R:
--------------------------------------------------------------------------------
 1 | #' Save xgboost model to R's raw vector,
 2 | #' user can call xgb.load to load the model back from raw vector
 3 | #' 
 4 | #' Save xgboost model from xgboost or xgb.train
 5 | #' 
 6 | #' @param model the model object.
 7 | #' 
 8 | #' @examples
 9 | #' data(agaricus.train, package='xgboost')
10 | #' data(agaricus.test, package='xgboost')
11 | #' train <- agaricus.train
12 | #' test <- agaricus.test
13 | #' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
14 | #'                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
15 | #' raw <- xgb.save.raw(bst)
16 | #' bst <- xgb.load(raw)
17 | #' pred <- predict(bst, test$data)
18 | #'
19 | #' @export
20 | xgb.save.raw <- function(model) {
21 |   model <- xgb.get.handle(model)
22 |   .Call(XGBoosterModelToRaw_R, model)
23 | }
24 | 


--------------------------------------------------------------------------------
/xgboost/R-package/cleanup:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | rm -f src/Makevars
4 | 


--------------------------------------------------------------------------------
/xgboost/R-package/configure.ac:
--------------------------------------------------------------------------------
 1 | ### configure.ac					-*- Autoconf -*-
 2 | 
 3 | AC_PREREQ(2.62)
 4 | 
 5 | AC_INIT([xgboost],[0.6-3],[],[xgboost],[])
 6 | 
 7 | OPENMP_CXXFLAGS=""
 8 | 
 9 | if test `uname -s` = "Linux"
10 | then
11 |   OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)"
12 | fi
13 | 
14 | if test `uname -s` = "Darwin"
15 | then
16 |   OPENMP_CXXFLAGS="\$(SHLIB_OPENMP_CFLAGS)"
17 |   ac_pkg_openmp=no
18 |   AC_MSG_CHECKING([whether OpenMP will work in a package])
19 |   AC_LANG_CONFTEST(
20 |   [AC_LANG_PROGRAM([[#include <omp.h>]], [[ return omp_get_num_threads (); ]])])
21 |   PKG_CFLAGS="${OPENMP_CFLAGS}" PKG_LIBS="${OPENMP_CFLAGS}" "$RBIN" CMD SHLIB conftest.c 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD && "$RBIN" --vanilla -q -e "dyn.load(paste('conftest',.Platform\$dynlib.ext,sep=''))" 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD && ac_pkg_openmp=yes
22 |   AC_MSG_RESULT([${ac_pkg_openmp}])
23 |   if test "${ac_pkg_openmp}" = no; then
24 |     OPENMP_CXXFLAGS=''
25 |   fi
26 | fi
27 | 
28 | AC_SUBST(OPENMP_CXXFLAGS)
29 | AC_CONFIG_FILES([src/Makevars])
30 | AC_OUTPUT
31 | 
32 | 


--------------------------------------------------------------------------------
/xgboost/R-package/data/agaricus.test.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/R-package/data/agaricus.test.rda


--------------------------------------------------------------------------------
/xgboost/R-package/data/agaricus.train.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/R-package/data/agaricus.train.rda


--------------------------------------------------------------------------------
/xgboost/R-package/demo/00Index:
--------------------------------------------------------------------------------
 1 | basic_walkthrough               Basic feature walkthrough
 2 | caret_wrapper                   Use xgboost to train in caret library
 3 | custom_objective                Cutomize loss function, and evaluation metric
 4 | boost_from_prediction           Boosting from existing prediction
 5 | predict_first_ntree             Predicting using first n trees
 6 | generalized_linear_model        Generalized Linear Model
 7 | cross_validation                Cross validation
 8 | create_sparse_matrix            Create Sparse Matrix
 9 | predict_leaf_indices            Predicting the corresponding leaves
10 | early_stopping                  Early Stop in training
11 | poisson_regression              Poisson Regression on count data
12 | tweedie_regression              Tweddie Regression
13 | 


--------------------------------------------------------------------------------
/xgboost/R-package/demo/README.md:
--------------------------------------------------------------------------------
 1 | XGBoost R Feature Walkthrough
 2 | ====
 3 | * [Basic walkthrough of wrappers](basic_walkthrough.R)
 4 | * [Train a xgboost model from caret library](caret_wrapper.R)
 5 | * [Cutomize loss function, and evaluation metric](custom_objective.R)
 6 | * [Boosting from existing prediction](boost_from_prediction.R)
 7 | * [Predicting using first n trees](predict_first_ntree.R)
 8 | * [Generalized Linear Model](generalized_linear_model.R)
 9 | * [Cross validation](cross_validation.R)
10 | * [Create a sparse matrix from a dense one](create_sparse_matrix.R)
11 | 
12 | Benchmarks
13 | ====
14 | * [Starter script for Kaggle Higgs Boson](../../demo/kaggle-higgs)
15 |  
16 | Notes
17 | ====
18 | * Contribution of examples, benchmarks is more than welcomed!
19 | * If you like to share how you use xgboost to solve your problem, send a pull request:)
20 | 


--------------------------------------------------------------------------------
/xgboost/R-package/demo/boost_from_prediction.R:
--------------------------------------------------------------------------------
 1 | require(xgboost)
 2 | # load in the agaricus dataset
 3 | data(agaricus.train, package='xgboost')
 4 | data(agaricus.test, package='xgboost')
 5 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 6 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 7 | 
 8 | watchlist <- list(eval = dtest, train = dtrain)
 9 | ###
10 | # advanced: start from a initial base prediction
11 | #
12 | print('start running example to start from a initial prediction')
13 | # train xgboost for 1 round
14 | param <- list(max_depth=2, eta=1, nthread = 2, silent=1, objective='binary:logistic')
15 | bst <- xgb.train(param, dtrain, 1, watchlist)
16 | # Note: we need the margin value instead of transformed prediction in set_base_margin
17 | # do predict with output_margin=TRUE, will always give you margin values before logistic transformation
18 | ptrain <- predict(bst, dtrain, outputmargin=TRUE)
19 | ptest  <- predict(bst, dtest, outputmargin=TRUE)
20 | # set the base_margin property of dtrain and dtest
21 | # base margin is the base prediction we will boost from
22 | setinfo(dtrain, "base_margin", ptrain)
23 | setinfo(dtest, "base_margin", ptest)
24 | 
25 | print('this is result of boost from initial prediction')
26 | bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
27 | 


--------------------------------------------------------------------------------
/xgboost/R-package/demo/poisson_regression.R:
--------------------------------------------------------------------------------
1 | data(mtcars)
2 | head(mtcars)
3 | bst = xgboost(data=as.matrix(mtcars[,-11]),label=mtcars[,11],
4 |               objective='count:poisson',nrounds=5)
5 | pred = predict(bst,as.matrix(mtcars[,-11]))
6 | sqrt(mean((pred-mtcars[,11])^2))
7 | 
8 | 


--------------------------------------------------------------------------------
/xgboost/R-package/demo/predict_first_ntree.R:
--------------------------------------------------------------------------------
 1 | require(xgboost)
 2 | # load in the agaricus dataset
 3 | data(agaricus.train, package='xgboost')
 4 | data(agaricus.test, package='xgboost')
 5 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 6 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 7 | 
 8 | param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic')
 9 | watchlist <- list(eval = dtest, train = dtrain)
10 | nround = 2
11 | 
12 | # training the model for two rounds
13 | bst = xgb.train(param, dtrain, nround, nthread = 2, watchlist)
14 | cat('start testing prediction from first n trees\n')
15 | labels <- getinfo(dtest,'label')
16 | 
17 | ### predict using first 1 tree
18 | ypred1 = predict(bst, dtest, ntreelimit=1)
19 | # by default, we predict using all the trees
20 | ypred2 = predict(bst, dtest)
21 | 
22 | cat('error of ypred1=', mean(as.numeric(ypred1>0.5)!=labels),'\n')
23 | cat('error of ypred2=', mean(as.numeric(ypred2>0.5)!=labels),'\n')
24 | 


--------------------------------------------------------------------------------
/xgboost/R-package/demo/runall.R:
--------------------------------------------------------------------------------
 1 | # running all scripts in demo folder
 2 | demo(basic_walkthrough)
 3 | demo(custom_objective)
 4 | demo(boost_from_prediction)
 5 | demo(predict_first_ntree)
 6 | demo(generalized_linear_model)
 7 | demo(cross_validation)
 8 | demo(create_sparse_matrix)
 9 | demo(predict_leaf_indices)
10 | demo(early_stopping)
11 | demo(poisson_regression)
12 | demo(caret_wrapper)
13 | demo(tweedie_regression)


--------------------------------------------------------------------------------
/xgboost/R-package/demo/tweedie_regression.R:
--------------------------------------------------------------------------------
 1 | library(xgboost)
 2 | library(data.table)
 3 | library(cplm)
 4 | 
 5 | data(AutoClaim)
 6 | 
 7 | # auto insurance dataset analyzed by Yip and Yau (2005)
 8 | dt <- data.table(AutoClaim)
 9 | 
10 | # exclude these columns from the model matrix
11 | exclude <-  c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_YY')
12 | 
13 | # retains the missing values
14 | # NOTE: this dataset is comes ready out of the box
15 | options(na.action = 'na.pass')
16 | x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F])
17 | options(na.action = 'na.omit')
18 | 
19 | # response
20 | y <- dt[, CLM_AMT5]
21 | 
22 | d_train <- xgb.DMatrix(data = x, label = y, missing = NA)
23 | 
24 | # the tweedie_variance_power parameter determines the shape of 
25 | # distribution
26 | # - closer to 1 is more poisson like and the mass
27 | #   is more concentrated near zero 
28 | # - closer to 2 is more gamma like and the mass spreads to the 
29 | #   the right with less concentration near zero
30 | 
31 | params <- list(
32 |   objective = 'reg:tweedie',
33 |   eval_metric = 'rmse', 
34 |   tweedie_variance_power = 1.4,
35 |   max_depth = 6,
36 |   eta = 1)
37 | 
38 | bst <- xgb.train(
39 |   data = d_train, 
40 |   params = params, 
41 |   maximize = FALSE,
42 |   watchlist = list(train = d_train), 
43 |   nrounds = 20)
44 | 
45 | var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
46 | 
47 | preds <- predict(bst, d_train)
48 | 
49 | rmse <- sqrt(sum(mean((y - preds)^2)))


--------------------------------------------------------------------------------
/xgboost/R-package/man/agaricus.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgboost.R
 3 | \docType{data}
 4 | \name{agaricus.test}
 5 | \alias{agaricus.test}
 6 | \title{Test part from Mushroom Data Set}
 7 | \format{A list containing a label vector, and a dgCMatrix object with 1611 
 8 | rows and 126 variables}
 9 | \usage{
10 | data(agaricus.test)
11 | }
12 | \description{
13 | This data set is originally from the Mushroom data set,
14 | UCI Machine Learning Repository.
15 | }
16 | \details{
17 | This data set includes the following fields:
18 | 
19 | \itemize{
20 |  \item \code{label} the label for each record
21 |  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
22 | }
23 | }
24 | \references{
25 | https://archive.ics.uci.edu/ml/datasets/Mushroom
26 | 
27 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
28 | [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
29 | School of Information and Computer Science.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/agaricus.train.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgboost.R
 3 | \docType{data}
 4 | \name{agaricus.train}
 5 | \alias{agaricus.train}
 6 | \title{Training part from Mushroom Data Set}
 7 | \format{A list containing a label vector, and a dgCMatrix object with 6513 
 8 | rows and 127 variables}
 9 | \usage{
10 | data(agaricus.train)
11 | }
12 | \description{
13 | This data set is originally from the Mushroom data set,
14 | UCI Machine Learning Repository.
15 | }
16 | \details{
17 | This data set includes the following fields:
18 | 
19 | \itemize{
20 |  \item \code{label} the label for each record
21 |  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
22 | }
23 | }
24 | \references{
25 | https://archive.ics.uci.edu/ml/datasets/Mushroom
26 | 
27 | Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
28 | [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
29 | School of Information and Computer Science.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/cb.evaluation.log.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/callbacks.R
 3 | \name{cb.evaluation.log}
 4 | \alias{cb.evaluation.log}
 5 | \title{Callback closure for logging the evaluation history}
 6 | \usage{
 7 | cb.evaluation.log()
 8 | }
 9 | \description{
10 | Callback closure for logging the evaluation history
11 | }
12 | \details{
13 | This callback function appends the current iteration evaluation results \code{bst_evaluation}
14 | available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
15 | 
16 | The finalizer callback (called with \code{finalize = TURE} in the end) converts 
17 | the \code{evaluation_log} list into a final data.table.
18 | 
19 | The iteration evaluation result \code{bst_evaluation} must be a named numeric vector. 
20 | 
21 | Note: in the column names of the final data.table, the dash '-' character is replaced with 
22 | the underscore '_' in order to make the column names more like regular R identifiers.
23 | 
24 | Callback function expects the following values to be set in its calling frame:
25 | \code{evaluation_log},
26 | \code{bst_evaluation},
27 | \code{iteration}.
28 | }
29 | \seealso{
30 | \code{\link{callbacks}}
31 | }
32 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/cb.print.evaluation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/callbacks.R
 3 | \name{cb.print.evaluation}
 4 | \alias{cb.print.evaluation}
 5 | \title{Callback closure for printing the result of evaluation}
 6 | \usage{
 7 | cb.print.evaluation(period = 1, showsd = TRUE)
 8 | }
 9 | \arguments{
10 | \item{period}{results would be printed every number of periods}
11 | 
12 | \item{showsd}{whether standard deviations should be printed (when available)}
13 | }
14 | \description{
15 | Callback closure for printing the result of evaluation
16 | }
17 | \details{
18 | The callback function prints the result of evaluation at every \code{period} iterations.
19 | The initial and the last iteration's evaluations are always printed.
20 | 
21 | Callback function expects the following values to be set in its calling frame:
22 | \code{bst_evaluation} (also \code{bst_evaluation_err} when available),
23 | \code{iteration},
24 | \code{begin_iteration},
25 | \code{end_iteration}.
26 | }
27 | \seealso{
28 | \code{\link{callbacks}}
29 | }
30 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/cb.reset.parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/callbacks.R
 3 | \name{cb.reset.parameters}
 4 | \alias{cb.reset.parameters}
 5 | \title{Callback closure for restetting the booster's parameters at each iteration.}
 6 | \usage{
 7 | cb.reset.parameters(new_params)
 8 | }
 9 | \arguments{
10 | \item{new_params}{a list where each element corresponds to a parameter that needs to be reset.
11 | Each element's value must be either a vector of values of length \code{nrounds} 
12 | to be set at each iteration, 
13 | or a function of two parameters \code{learning_rates(iteration, nrounds)} 
14 | which returns a new parameter value by using the current iteration number 
15 | and the total number of boosting rounds.}
16 | }
17 | \description{
18 | Callback closure for restetting the booster's parameters at each iteration.
19 | }
20 | \details{
21 | This is a "pre-iteration" callback function used to reset booster's parameters
22 | at the beginning of each iteration.
23 | 
24 | Note that when training is resumed from some previous model, and a function is used to 
25 | reset a parameter value, the \code{nround} argument in this function would be the 
26 | the number of boosting rounds in the current training.
27 | 
28 | Callback function expects the following values to be set in its calling frame:
29 | \code{bst} or \code{bst_folds},
30 | \code{iteration},
31 | \code{begin_iteration},
32 | \code{end_iteration}.
33 | }
34 | \seealso{
35 | \code{\link{callbacks}}
36 | }
37 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/cb.save.model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/callbacks.R
 3 | \name{cb.save.model}
 4 | \alias{cb.save.model}
 5 | \title{Callback closure for saving a model file.}
 6 | \usage{
 7 | cb.save.model(save_period = 0, save_name = "xgboost.model")
 8 | }
 9 | \arguments{
10 | \item{save_period}{save the model to disk after every 
11 | \code{save_period} iterations; 0 means save the model at the end.}
12 | 
13 | \item{save_name}{the name or path for the saved model file.
14 | It can contain a \code{\link[base]{sprintf}} formatting specifier 
15 | to include the integer iteration number in the file name.
16 | E.g., with \code{save_name} = 'xgboost_%04d.model', 
17 | the file saved at iteration 50 would be named "xgboost_0050.model".}
18 | }
19 | \description{
20 | Callback closure for saving a model file.
21 | }
22 | \details{
23 | This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
24 | 
25 | Callback function expects the following values to be set in its calling frame:
26 | \code{bst},
27 | \code{iteration},
28 | \code{begin_iteration},
29 | \code{end_iteration}.
30 | }
31 | \seealso{
32 | \code{\link{callbacks}}
33 | }
34 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/dim.xgb.DMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.R
 3 | \name{dim.xgb.DMatrix}
 4 | \alias{dim.xgb.DMatrix}
 5 | \title{Dimensions of xgb.DMatrix}
 6 | \usage{
 7 | \method{dim}{xgb.DMatrix}(x)
 8 | }
 9 | \arguments{
10 | \item{x}{Object of class \code{xgb.DMatrix}}
11 | }
12 | \description{
13 | Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
14 | }
15 | \details{
16 | Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also 
17 | be directly used with an \code{xgb.DMatrix} object.
18 | }
19 | \examples{
20 | data(agaricus.train, package='xgboost')
21 | train <- agaricus.train
22 | dtrain <- xgb.DMatrix(train$data, label=train$label)
23 | 
24 | stopifnot(nrow(dtrain) == nrow(train$data))
25 | stopifnot(ncol(dtrain) == ncol(train$data))
26 | stopifnot(all(dim(dtrain) == dim(train$data)))
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/dimnames.xgb.DMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.R
 3 | \name{dimnames.xgb.DMatrix}
 4 | \alias{dimnames.xgb.DMatrix}
 5 | \alias{dimnames<-.xgb.DMatrix}
 6 | \title{Handling of column names of \code{xgb.DMatrix}}
 7 | \usage{
 8 | \method{dimnames}{xgb.DMatrix}(x)
 9 | 
10 | \method{dimnames}{xgb.DMatrix}(x) <- value
11 | }
12 | \arguments{
13 | \item{x}{object of class \code{xgb.DMatrix}}
14 | 
15 | \item{value}{a list of two elements: the first one is ignored
16 | and the second one is column names}
17 | }
18 | \description{
19 | Only column names are supported for \code{xgb.DMatrix}, thus setting of 
20 | row names would have no effect and returnten row names would be NULL.
21 | }
22 | \details{
23 | Generic \code{dimnames} methods are used by \code{colnames}.
24 | Since row names are irrelevant, it is recommended to use \code{colnames} directly.
25 | }
26 | \examples{
27 | data(agaricus.train, package='xgboost')
28 | train <- agaricus.train
29 | dtrain <- xgb.DMatrix(train$data, label=train$label)
30 | dimnames(dtrain)
31 | colnames(dtrain)
32 | colnames(dtrain) <- make.names(1:ncol(train$data))
33 | print(dtrain, verbose=TRUE)
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/getinfo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.R
 3 | \name{getinfo}
 4 | \alias{getinfo}
 5 | \alias{getinfo.xgb.DMatrix}
 6 | \title{Get information of an xgb.DMatrix object}
 7 | \usage{
 8 | getinfo(object, ...)
 9 | 
10 | \method{getinfo}{xgb.DMatrix}(object, name, ...)
11 | }
12 | \arguments{
13 | \item{object}{Object of class \code{xgb.DMatrix}}
14 | 
15 | \item{...}{other parameters}
16 | 
17 | \item{name}{the name of the information field to get (see details)}
18 | }
19 | \description{
20 | Get information of an xgb.DMatrix object
21 | }
22 | \details{
23 | The \code{name} field can be one of the following:
24 | 
25 | \itemize{
26 |     \item \code{label}: label Xgboost learn from ;
27 |     \item \code{weight}: to do a weight rescale ;
28 |     \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
29 |     \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
30 |     
31 | }
32 | 
33 | \code{group} can be setup by \code{setinfo} but can't be retrieved by \code{getinfo}.
34 | }
35 | \examples{
36 | data(agaricus.train, package='xgboost')
37 | train <- agaricus.train
38 | dtrain <- xgb.DMatrix(train$data, label=train$label)
39 | 
40 | labels <- getinfo(dtrain, 'label')
41 | setinfo(dtrain, 'label', 1-labels)
42 | 
43 | labels2 <- getinfo(dtrain, 'label')
44 | stopifnot(all(labels2 == 1-labels))
45 | }
46 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/print.xgb.Booster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.Booster.R
 3 | \name{print.xgb.Booster}
 4 | \alias{print.xgb.Booster}
 5 | \title{Print xgb.Booster}
 6 | \usage{
 7 | \method{print}{xgb.Booster}(x, verbose = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an xgb.Booster object}
11 | 
12 | \item{verbose}{whether to print detailed data (e.g., attribute values)}
13 | 
14 | \item{...}{not currently used}
15 | }
16 | \description{
17 | Print information about xgb.Booster.
18 | }
19 | \examples{
20 | data(agaricus.train, package='xgboost')
21 | train <- agaricus.train
22 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
23 |                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
24 | attr(bst, 'myattr') <- 'memo'
25 | 
26 | print(bst)
27 | print(bst, verbose=TRUE)
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/print.xgb.DMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.R
 3 | \name{print.xgb.DMatrix}
 4 | \alias{print.xgb.DMatrix}
 5 | \title{Print xgb.DMatrix}
 6 | \usage{
 7 | \method{print}{xgb.DMatrix}(x, verbose = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an xgb.DMatrix object}
11 | 
12 | \item{verbose}{whether to print colnames (when present)}
13 | 
14 | \item{...}{not currently used}
15 | }
16 | \description{
17 | Print information about xgb.DMatrix. 
18 | Currently it displays dimensions and presence of info-fields and colnames.
19 | }
20 | \examples{
21 | data(agaricus.train, package='xgboost')
22 | train <- agaricus.train
23 | dtrain <- xgb.DMatrix(train$data, label=train$label)
24 | 
25 | dtrain
26 | print(dtrain, verbose=TRUE)
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/print.xgb.cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.cv.R
 3 | \name{print.xgb.cv.synchronous}
 4 | \alias{print.xgb.cv.synchronous}
 5 | \title{Print xgb.cv result}
 6 | \usage{
 7 | \method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an \code{xgb.cv.synchronous} object}
11 | 
12 | \item{verbose}{whether to print detailed data}
13 | 
14 | \item{...}{passed to \code{data.table.print}}
15 | }
16 | \description{
17 | Prints formatted results of \code{xgb.cv}.
18 | }
19 | \details{
20 | When not verbose, it would only print the evaluation results, 
21 | including the best iteration (when available).
22 | }
23 | \examples{
24 | data(agaricus.train, package='xgboost')
25 | train <- agaricus.train
26 | cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
27 |                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
28 | print(cv)
29 | print(cv, verbose=TRUE)
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/setinfo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.R
 3 | \name{setinfo}
 4 | \alias{setinfo}
 5 | \alias{setinfo.xgb.DMatrix}
 6 | \title{Set information of an xgb.DMatrix object}
 7 | \usage{
 8 | setinfo(object, ...)
 9 | 
10 | \method{setinfo}{xgb.DMatrix}(object, name, info, ...)
11 | }
12 | \arguments{
13 | \item{object}{Object of class "xgb.DMatrix"}
14 | 
15 | \item{...}{other parameters}
16 | 
17 | \item{name}{the name of the field to get}
18 | 
19 | \item{info}{the specific field of information to set}
20 | }
21 | \description{
22 | Set information of an xgb.DMatrix object
23 | }
24 | \details{
25 | The \code{name} field can be one of the following:
26 | 
27 | \itemize{
28 |     \item \code{label}: label Xgboost learn from ;
29 |     \item \code{weight}: to do a weight rescale ;
30 |     \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
31 |     \item \code{group}: number of rows in each group (to use with \code{rank:pairwise} objective).
32 | }
33 | }
34 | \examples{
35 | data(agaricus.train, package='xgboost')
36 | train <- agaricus.train
37 | dtrain <- xgb.DMatrix(train$data, label=train$label)
38 | 
39 | labels <- getinfo(dtrain, 'label')
40 | setinfo(dtrain, 'label', 1-labels)
41 | labels2 <- getinfo(dtrain, 'label')
42 | stopifnot(all.equal(labels2, 1-labels))
43 | }
44 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/slice.xgb.DMatrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.R
 3 | \name{slice}
 4 | \alias{slice}
 5 | \alias{slice.xgb.DMatrix}
 6 | \alias{[.xgb.DMatrix}
 7 | \title{Get a new DMatrix containing the specified rows of
 8 | orginal xgb.DMatrix object}
 9 | \usage{
10 | slice(object, ...)
11 | 
12 | \method{slice}{xgb.DMatrix}(object, idxset, ...)
13 | 
14 | \method{[}{xgb.DMatrix}(object, idxset, colset = NULL)
15 | }
16 | \arguments{
17 | \item{object}{Object of class "xgb.DMatrix"}
18 | 
19 | \item{...}{other parameters (currently not used)}
20 | 
21 | \item{idxset}{a integer vector of indices of rows needed}
22 | 
23 | \item{colset}{currently not used (columns subsetting is not available)}
24 | }
25 | \description{
26 | Get a new DMatrix containing the specified rows of
27 | orginal xgb.DMatrix object
28 | }
29 | \examples{
30 | data(agaricus.train, package='xgboost')
31 | train <- agaricus.train
32 | dtrain <- xgb.DMatrix(train$data, label=train$label)
33 | 
34 | dsub <- slice(dtrain, 1:42)
35 | labels1 <- getinfo(dsub, 'label')
36 | dsub <- dtrain[1:42, ]
37 | labels2 <- getinfo(dsub, 'label')
38 | all.equal(labels1, labels2)
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/xgb.DMatrix.save.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.DMatrix.save.R
 3 | \name{xgb.DMatrix.save}
 4 | \alias{xgb.DMatrix.save}
 5 | \title{Save xgb.DMatrix object to binary file}
 6 | \usage{
 7 | xgb.DMatrix.save(dmatrix, fname)
 8 | }
 9 | \arguments{
10 | \item{dmatrix}{the \code{xgb.DMatrix} object}
11 | 
12 | \item{fname}{the name of the file to write.}
13 | }
14 | \description{
15 | Save xgb.DMatrix object to binary file
16 | }
17 | \examples{
18 | data(agaricus.train, package='xgboost')
19 | train <- agaricus.train
20 | dtrain <- xgb.DMatrix(train$data, label=train$label)
21 | xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
22 | dtrain <- xgb.DMatrix('xgb.DMatrix.data')
23 | }
24 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/xgb.load.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.load.R
 3 | \name{xgb.load}
 4 | \alias{xgb.load}
 5 | \title{Load xgboost model from binary file}
 6 | \usage{
 7 | xgb.load(modelfile)
 8 | }
 9 | \arguments{
10 | \item{modelfile}{the name of the binary input file.}
11 | }
12 | \value{
13 | An object of \code{xgb.Booster} class.
14 | }
15 | \description{
16 | Load xgboost model from the binary model file.
17 | }
18 | \details{
19 | The input file is expected to contain a model saved in an xgboost-internal binary format
20 | using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some 
21 | appropriate methods from other xgboost interfaces. E.g., a model trained in Python and 
22 | saved from there in xgboost format, could be loaded from R.
23 | 
24 | Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
25 | not \code{xgb.load}.
26 | }
27 | \examples{
28 | data(agaricus.train, package='xgboost')
29 | data(agaricus.test, package='xgboost')
30 | train <- agaricus.train
31 | test <- agaricus.test
32 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
33 |                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
34 | xgb.save(bst, 'xgb.model')
35 | bst <- xgb.load('xgb.model')
36 | pred <- predict(bst, test$data)
37 | }
38 | \seealso{
39 | \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
40 | }
41 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/xgb.parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.Booster.R
 3 | \name{xgb.parameters<-}
 4 | \alias{xgb.parameters<-}
 5 | \title{Accessors for model parameters.}
 6 | \usage{
 7 | xgb.parameters(object) <- value
 8 | }
 9 | \arguments{
10 | \item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.}
11 | 
12 | \item{value}{a list (or an object coercible to a list) with the names of parameters to set
13 | and the elements corresponding to parameter values.}
14 | }
15 | \description{
16 | Only the setter for xgboost parameters is currently implemented.
17 | }
18 | \details{
19 | Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
20 | than for \code{xgb.Booster}, since only just a handle would need to be copied.
21 | }
22 | \examples{
23 | data(agaricus.train, package='xgboost')
24 | train <- agaricus.train
25 | 
26 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
27 |                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
28 | 
29 | xgb.parameters(bst) <- list(eta = 0.1)
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/xgb.save.raw.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xgb.save.raw.R
 3 | \name{xgb.save.raw}
 4 | \alias{xgb.save.raw}
 5 | \title{Save xgboost model to R's raw vector,
 6 | user can call xgb.load to load the model back from raw vector}
 7 | \usage{
 8 | xgb.save.raw(model)
 9 | }
10 | \arguments{
11 | \item{model}{the model object.}
12 | }
13 | \description{
14 | Save xgboost model from xgboost or xgb.train
15 | }
16 | \examples{
17 | data(agaricus.train, package='xgboost')
18 | data(agaricus.test, package='xgboost')
19 | train <- agaricus.train
20 | test <- agaricus.test
21 | bst <- xgboost(data = train$data, label = train$label, max_depth = 2, 
22 |                eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
23 | raw <- xgb.save.raw(bst)
24 | bst <- xgb.load(raw)
25 | pred <- predict(bst, test$data)
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/xgboost/R-package/man/xgboost-deprecated.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{xgboost-deprecated}
 4 | \alias{xgboost-deprecated}
 5 | \title{Deprecation notices.}
 6 | \description{
 7 | At this time, some of the parameter names were changed in order to make the code style more uniform.
 8 | The deprecated parameters would be removed in the next release.
 9 | }
10 | \details{
11 | To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
12 | 
13 | A deprecation warning is shown when any of the deprecated parameters is used in a call. 
14 | An additional warning is shown when there was a partial match to a deprecated parameter 
15 | (as R is able to partially match parameter names).
16 | }
17 | 


--------------------------------------------------------------------------------
/xgboost/R-package/src/Makevars.in:
--------------------------------------------------------------------------------
 1 | # package root
 2 | PKGROOT=../../
 3 | ENABLE_STD_THREAD=1
 4 | # _*_ mode: Makefile; _*_
 5 | 
 6 | CXX_STD = CXX11
 7 | 
 8 | XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\
 9 |            -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\
10 |            -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\
11 |            -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_
12 | 
13 | PKG_CPPFLAGS=  -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS)
14 | PKG_CXXFLAGS= @OPENMP_CXXFLAGS@ $(SHLIB_PTHREAD_FLAGS)
15 | PKG_LIBS = @OPENMP_CXXFLAGS@ $(SHLIB_PTHREAD_FLAGS)
16 | OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o ./init.o\
17 |          $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o\
18 |          $(PKGROOT)/rabit/src/engine_empty.o $(PKGROOT)/rabit/src/c_api.o
19 | 


--------------------------------------------------------------------------------
/xgboost/R-package/src/Makevars.win:
--------------------------------------------------------------------------------
 1 | # package root
 2 | PKGROOT=./
 3 | ENABLE_STD_THREAD=0
 4 | # _*_ mode: Makefile; _*_
 5 | 
 6 | # This file is only used for windows compilation from github
 7 | # It will be replaced by Makevars in CRAN version
 8 | .PHONY: all xgblib
 9 | all: $(SHLIB)
10 | $(SHLIB): xgblib
11 | xgblib:
12 | 	cp -r ../../src .
13 | 	cp -r ../../rabit .
14 | 	cp -r ../../dmlc-core .
15 | 	cp -r ../../include .
16 | 	cp -r ../../amalgamation .
17 | 
18 | CXX_STD = CXX11
19 | 
20 | XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\
21 |            -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\
22 |            -DDMLC_LOG_CUSTOMIZE=1 -DXGBOOST_CUSTOMIZE_LOGGER=1\
23 |            -DRABIT_CUSTOMIZE_MSG_ -DRABIT_STRICT_CXX98_
24 | 
25 | PKG_CPPFLAGS=  -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include -I$(PKGROOT)/rabit/include -I$(PKGROOT) $(XGB_RFLAGS)
26 | PKG_CXXFLAGS= $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
27 | PKG_LIBS = $(SHLIB_OPENMP_CFLAGS) $(SHLIB_PTHREAD_FLAGS)
28 | OBJECTS= ./xgboost_R.o ./xgboost_custom.o ./xgboost_assert.o ./init.o\
29 |          $(PKGROOT)/amalgamation/xgboost-all0.o $(PKGROOT)/amalgamation/dmlc-minimum0.o\
30 |          $(PKGROOT)/rabit/src/engine_empty.o $(PKGROOT)/rabit/src/c_api.o
31 | 
32 | $(OBJECTS) : xgblib
33 | 


--------------------------------------------------------------------------------
/xgboost/R-package/src/xgboost_assert.c:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2014 by Contributors
 2 | #include <stdio.h>
 3 | #include <stdarg.h>
 4 | #include <Rinternals.h>
 5 | 
 6 | // implements error handling
 7 | void XGBoostAssert_R(int exp, const char *fmt, ...) {
 8 |   char buf[1024];
 9 |   if (exp == 0) {
10 |     va_list args;
11 |     va_start(args, fmt);
12 |     vsprintf(buf, fmt, args);
13 |     va_end(args);
14 |     error("AssertError:%s\n", buf);
15 |   }
16 | }
17 | void XGBoostCheck_R(int exp, const char *fmt, ...) {
18 |   char buf[1024];
19 |   if (exp == 0) {
20 |     va_list args;
21 |     va_start(args, fmt);
22 |     vsprintf(buf, fmt, args);
23 |     va_end(args);
24 |     error("%s\n", buf);
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(xgboost)
3 | 
4 | test_check("xgboost")
5 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat/test_gc_safety.R:
--------------------------------------------------------------------------------
 1 | require(xgboost)
 2 | 
 3 | context("Garbage Collection Safety Check")
 4 | 
 5 | test_that("train and prediction when gctorture is on", {
 6 |   data(agaricus.train, package='xgboost')
 7 |   data(agaricus.test, package='xgboost')
 8 |   train <- agaricus.train
 9 |   test <- agaricus.test
10 |   gctorture(TRUE)
11 |   bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
12 |   eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
13 |   pred <- predict(bst, test$data)
14 |   gctorture(FALSE)
15 | })
16 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat/test_glm.R:
--------------------------------------------------------------------------------
 1 | context('Test generalized linear models')
 2 | 
 3 | require(xgboost)
 4 | 
 5 | test_that("glm works", {
 6 |   data(agaricus.train, package='xgboost')
 7 |   data(agaricus.test, package='xgboost')
 8 |   dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 9 |   dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
10 |   expect_equal(class(dtrain), "xgb.DMatrix")
11 |   expect_equal(class(dtest), "xgb.DMatrix")
12 |   param <- list(objective = "binary:logistic", booster = "gblinear",
13 |                 nthread = 2, alpha = 0.0001, lambda = 1)
14 |   watchlist <- list(eval = dtest, train = dtrain)
15 |   num_round <- 2
16 |   bst <- xgb.train(param, dtrain, num_round, watchlist)
17 |   ypred <- predict(bst, dtest)
18 |   expect_equal(length(getinfo(dtest, 'label')), 1611)
19 | })
20 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat/test_lint.R:
--------------------------------------------------------------------------------
 1 | context("Code is of high quality and lint free")
 2 | test_that("Code Lint", {
 3 |   skip_on_cran()
 4 |   skip_on_travis()
 5 |   skip_if_not_installed("lintr")
 6 |   my_linters <- list(
 7 |     absolute_paths_linter=lintr::absolute_paths_linter,
 8 |     assignment_linter=lintr::assignment_linter,
 9 |     closed_curly_linter=lintr::closed_curly_linter,
10 |     commas_linter=lintr::commas_linter,
11 |     # commented_code_linter=lintr::commented_code_linter,
12 |     infix_spaces_linter=lintr::infix_spaces_linter,
13 |     line_length_linter=lintr::line_length_linter,
14 |     no_tab_linter=lintr::no_tab_linter,
15 |     object_usage_linter=lintr::object_usage_linter,
16 |     # snake_case_linter=lintr::snake_case_linter,
17 |     # multiple_dots_linter=lintr::multiple_dots_linter,
18 |     object_length_linter=lintr::object_length_linter,
19 |     open_curly_linter=lintr::open_curly_linter,
20 |     # single_quotes_linter=lintr::single_quotes_linter,
21 |     spaces_inside_linter=lintr::spaces_inside_linter,
22 |     spaces_left_parentheses_linter=lintr::spaces_left_parentheses_linter,
23 |     trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
24 |     trailing_whitespace_linter=lintr::trailing_whitespace_linter
25 |   )
26 |   # lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
27 | })
28 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat/test_monotone.R:
--------------------------------------------------------------------------------
 1 | require(xgboost)
 2 | 
 3 | context("monotone constraints")
 4 | 
 5 | set.seed(1024)
 6 | x = rnorm(1000, 10)
 7 | y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
 8 | train = matrix(x, ncol = 1)
 9 | 
10 | 
11 | test_that("monotone constraints for regression", {
12 |   bst = xgboost(data = train, label = y, max_depth = 2,
13 |                 eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
14 |                 monotone_constraints = -1)
15 |   
16 |   pred = predict(bst, train)
17 |   
18 |   ind = order(train[,1])
19 |   pred.ord = pred[ind]
20 |   expect_true({
21 |     !any(diff(pred.ord) > 0)
22 |   }, "Monotone Contraint Satisfied")
23 |   
24 | })
25 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat/test_parameter_exposure.R:
--------------------------------------------------------------------------------
 1 | context('Test model params and call are exposed to R')
 2 | 
 3 | require(xgboost)
 4 | 
 5 | data(agaricus.train, package='xgboost')
 6 | data(agaricus.test, package='xgboost')
 7 | 
 8 | dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 9 | dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
10 | 
11 | bst <- xgboost(data = dtrain,
12 |                max_depth = 2,
13 |                eta = 1,
14 |                nrounds = 10,
15 |                nthread = 1,
16 |                verbose = 0,
17 |                objective = "binary:logistic")
18 | 
19 | test_that("call is exposed to R", {
20 |   expect_false(is.null(bst$call))
21 |   expect_is(bst$call, "call")
22 | })
23 | 
24 | test_that("params is exposed to R", {
25 |   model_params <- bst$params
26 |   expect_is(model_params, "list")
27 |   expect_equal(model_params$eta, 1)
28 |   expect_equal(model_params$max_depth, 2)
29 |   expect_equal(model_params$objective, "binary:logistic")
30 | })
31 | 


--------------------------------------------------------------------------------
/xgboost/R-package/tests/testthat/test_poisson_regression.R:
--------------------------------------------------------------------------------
 1 | context('Test poisson regression model')
 2 | 
 3 | require(xgboost)
 4 | set.seed(1994)
 5 | 
 6 | test_that("poisson regression works", {
 7 |   data(mtcars)
 8 |   bst <- xgboost(data = as.matrix(mtcars[,-11]), label = mtcars[,11],
 9 |                 objective = 'count:poisson', nrounds=10, verbose=0)
10 |   expect_equal(class(bst), "xgb.Booster")
11 |   pred <- predict(bst, as.matrix(mtcars[, -11]))
12 |   expect_equal(length(pred), 32)
13 |   expect_lt(sqrt(mean( (pred - mtcars[,11])^2 )), 1.2)
14 | })
15 | 


--------------------------------------------------------------------------------
/xgboost/R-package/vignettes/xgboost.bib:
--------------------------------------------------------------------------------
 1 | @article{friedman2001greedy,
 2 |     title={Greedy function approximation: a gradient boosting machine},
 3 |     author={Friedman, Jerome H},
 4 |     journal={Annals of Statistics},
 5 |     pages={1189--1232},
 6 |     year={2001},
 7 |     publisher={JSTOR}
 8 | }
 9 | 
10 | @article{friedman2000additive,
11 |   title={Additive logistic regression: a statistical view of boosting (with discussion and a rejoinder by the authors)},
12 |   author={Friedman, Jerome and Hastie, Trevor and Tibshirani, Robert and others},
13 |   journal={The annals of statistics},
14 |   volume={28},
15 |   number={2},
16 |   pages={337--407},
17 |   year={2000},
18 |   publisher={Institute of Mathematical Statistics}
19 | }
20 | 
21 | 
22 | @misc{
23 |     Bache+Lichman:2013 ,
24 |     author = "K. Bache and M. Lichman",
25 |     year = "2013",
26 |     title = "{UCI} Machine Learning Repository",
27 |     url = "http://archive.ics.uci.edu/ml",
28 |     institution = "University of California, Irvine, School of Information and Computer Sciences" 
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/xgboost/amalgamation/dmlc-minimum0.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors.
 3 |  * \brief Mininum DMLC library Amalgamation, used for easy plugin of dmlc lib.
 4 |  *  Normally this is not needed.
 5 |  */
 6 | #include "../dmlc-core/src/io/line_split.cc"
 7 | #include "../dmlc-core/src/io/recordio_split.cc"
 8 | #include "../dmlc-core/src/io/input_split_base.cc"
 9 | #include "../dmlc-core/src/io/local_filesys.cc"
10 | #include "../dmlc-core/src/data.cc"
11 | #include "../dmlc-core/src/io.cc"
12 | #include "../dmlc-core/src/recordio.cc"
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/xgboost/appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |         matrix:
 3 |                 - target: native
 4 |                   solution_name: C:/projects/xgboost/build2013/xgboost.sln
 5 |                 - target: native
 6 |                   solution_name: C:/projects/xgboost/build2015/xgboost.sln
 7 |                 - target: jvm
 8 | platform:
 9 |         - x64
10 | 
11 | configuration:
12 |         - Debug
13 |         - Release
14 | 
15 | install:
16 |         - SET PATH=;%PATH%
17 |         - git submodule update --init --recursive
18 | 
19 | before_build:
20 |         - mkdir build2013
21 |         - mkdir build2015
22 |         - cd build2013
23 |         - cmake .. -G"Visual Studio 12 2013 Win64" -DCMAKE_CONFIGURATION_TYPES="Release;Debug;" 
24 |         - cd ../build2015
25 |         - cmake .. -G"Visual Studio 14 2015 Win64" -DCMAKE_CONFIGURATION_TYPES="Release;Debug;" 
26 | 
27 | build_script:
28 |         - cd %APPVEYOR_BUILD_FOLDER%
29 |         - if "%target%" == "native" msbuild %solution_name%
30 |         - if "%target%" == "jvm" cd jvm-packages && mvn test -pl :xgboost4j
31 | 


--------------------------------------------------------------------------------
/xgboost/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # This is a simple script to make xgboost in MAC and Linux
 3 | # Basically, it first try to make with OpenMP, if fails, disable OpenMP and make it again.
 4 | # This will automatically make xgboost for MAC users who don't have OpenMP support.
 5 | # In most cases, type make will give what you want.
 6 | 
 7 | # See additional instruction in doc/build.md
 8 | set -e
 9 | 
10 | if make; then
11 |     echo "Successfully build multi-thread xgboost"
12 | else
13 |     echo "-----------------------------"
14 |     echo "Building multi-thread xgboost failed"
15 |     echo "Start to build single-thread xgboost"
16 |     make clean_all
17 |     make config=make/minimum.mk
18 |     if [ $? -eq 0 ] ;then
19 |       echo "Successfully build single-thread xgboost"
20 |       echo "If you want multi-threaded version"
21 |       echo "See additional instructions in doc/build.md"
22 |     else
23 |       echo "Failed to build single-thread xgboost"
24 |     fi
25 | fi
26 | 


--------------------------------------------------------------------------------
/xgboost/cub/examples/block/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /Debug
3 | /Release
4 | /cuda55.sdf
5 | /cuda55.suo
6 | /cuda60.sdf
7 | /cuda60.suo
8 | 


--------------------------------------------------------------------------------
/xgboost/cub/examples/device/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /Debug
3 | /ipch
4 | /Release
5 | /cuda55.sdf
6 | /cuda55.suo
7 | /cuda60.sdf
8 | /cuda60.suo
9 | 


--------------------------------------------------------------------------------
/xgboost/cub/experimental/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | 


--------------------------------------------------------------------------------
/xgboost/cub/experimental/spmv_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216
 4 | do
 5 | 	echo `date`, `$1 --dense=$i $2 $3 $4 $5 $6 $7`
 6 | done
 7 | 
 8 | echo
 9 | echo
10 | 
11 | for i in `ls /home/dumerrill/graphs/spmv/*.mtx`
12 | do
13 |     if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] 
14 |     then
15 |     	echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null`
16 |     fi
17 | done
18 | 
19 | echo
20 | echo
21 | 
22 | for i in `ls /scratch/dumerrill/graphs/mtx/*.mtx`
23 | #for i in `ls /cygdrive/w/Dev/UFget/mtx/*.mtx`
24 | do 
25 |     if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] 
26 |     then
27 |     	echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null`
28 |     fi
29 | done 
30 | 
31 | 


--------------------------------------------------------------------------------
/xgboost/cub/test/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /link_main.obj
3 | 


--------------------------------------------------------------------------------
/xgboost/cub/test/link_a.cu:
--------------------------------------------------------------------------------
 1 | #include <cub/cub.cuh>
 2 | 
 3 | void a()
 4 | {
 5 |     printf("a() called\n");
 6 | 
 7 |     cub::DoubleBuffer<unsigned int>     d_keys;
 8 |     cub::DoubleBuffer<cub::NullType>    d_values;
 9 |     size_t                              temp_storage_bytes = 0;
10 |     cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024);
11 | }
12 | 


--------------------------------------------------------------------------------
/xgboost/cub/test/link_b.cu:
--------------------------------------------------------------------------------
 1 | #include <cub/cub.cuh>
 2 | 
 3 | void b()
 4 | {
 5 |     printf("b() called\n");
 6 | 
 7 |     cub::DoubleBuffer<unsigned int>     d_keys;
 8 |     cub::DoubleBuffer<cub::NullType>    d_values;
 9 |     size_t                              temp_storage_bytes = 0;
10 |     cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024);
11 | }
12 | 


--------------------------------------------------------------------------------
/xgboost/cub/test/link_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | 
 3 | extern void a();
 4 | extern void b();
 5 | 
 6 | int main()
 7 | {
 8 |     printf("hello world\n");
 9 |     return 0;
10 | }
11 | 


--------------------------------------------------------------------------------
/xgboost/cub/tune/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | 


--------------------------------------------------------------------------------
/xgboost/demo/.gitignore:
--------------------------------------------------------------------------------
1 | *.libsvm
2 | *.pkl
3 | 


--------------------------------------------------------------------------------
/xgboost/demo/binary_classification/mapfeat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | def loadfmap( fname ):
 4 |     fmap = {}
 5 |     nmap = {}
 6 | 
 7 |     for l in open( fname ):
 8 |         arr = l.split()
 9 |         if arr[0].find('.') != -1:
10 |             idx = int( arr[0].strip('.') )
11 |             assert idx not in fmap
12 |             fmap[ idx ] = {}
13 |             ftype = arr[1].strip(':')
14 |             content = arr[2]
15 |         else:
16 |             content = arr[0]
17 |         for it in content.split(','):
18 |             if it.strip() == '':
19 |                 continue
20 |             k , v = it.split('=')
21 |             fmap[ idx ][ v ] = len(nmap) + 1
22 |             nmap[ len(nmap) ] = ftype+'='+k
23 |     return fmap, nmap
24 | 
25 | def write_nmap( fo, nmap ):
26 |     for i in range( len(nmap) ):
27 |         fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
28 | 
29 | # start here
30 | fmap, nmap = loadfmap( 'agaricus-lepiota.fmap' )
31 | fo = open( 'featmap.txt', 'w' )
32 | write_nmap( fo, nmap )
33 | fo.close()
34 | 
35 | fo = open( 'agaricus.txt', 'w' )
36 | for l in open( 'agaricus-lepiota.data' ):
37 |     arr = l.split(',')
38 |     if arr[0] == 'p':
39 |         fo.write('1')
40 |     else:
41 |         assert arr[0] == 'e'
42 |         fo.write('0')
43 |     for i in range( 1,len(arr) ):
44 |         fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
45 |     fo.write('\n')
46 | 
47 | fo.close()
48 | 


--------------------------------------------------------------------------------
/xgboost/demo/binary_classification/mknfold.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | import random
 4 | 
 5 | if len(sys.argv) < 2:
 6 |     print ('Usage:<filename> <k> [nfold = 5]')
 7 |     exit(0)
 8 | 
 9 | random.seed( 10 )
10 | 
11 | k = int( sys.argv[2] )
12 | if len(sys.argv) > 3:
13 |     nfold = int( sys.argv[3] )
14 | else:
15 |     nfold = 5
16 | 
17 | fi = open( sys.argv[1], 'r' )
18 | ftr = open( sys.argv[1]+'.train', 'w' )
19 | fte = open( sys.argv[1]+'.test', 'w' )
20 | for l in fi:
21 |     if random.randint( 1 , nfold ) == k:
22 |         fte.write( l )
23 |     else:
24 |         ftr.write( l )
25 | 
26 | fi.close()
27 | ftr.close()
28 | fte.close()
29 | 
30 | 


--------------------------------------------------------------------------------
/xgboost/demo/binary_classification/runexp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # map feature using indicator encoding, also produce featmap.txt
 3 | python mapfeat.py
 4 | # split train and test
 5 | python mknfold.py agaricus.txt 1
 6 | # training and output the models
 7 | ../../xgboost mushroom.conf
 8 | # output prediction task=pred 
 9 | ../../xgboost mushroom.conf task=pred model_in=0002.model
10 | # print the boosters of 00002.model in dump.raw.txt
11 | ../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt 
12 | # use the feature map in printing for better visualization
13 | ../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt
14 | cat dump.nice.txt
15 | 
16 | 


--------------------------------------------------------------------------------
/xgboost/demo/data/README.md:
--------------------------------------------------------------------------------
1 | This folder contains processed example dataset used by the demos.
2 | Copyright of the dataset belongs to the original copyright holder
3 | 


--------------------------------------------------------------------------------
/xgboost/demo/data/gen_autoclaims.R:
--------------------------------------------------------------------------------
 1 | site <- 'http://cran.r-project.org'
 2 | if (!require('dummies'))
 3 |     install.packages('dummies', repos=site)
 4 | if (!require('insuranceData'))
 5 |     install.packages('insuranceData', repos=site)
 6 | 
 7 | library(dummies)
 8 | library(insuranceData)
 9 | 
10 | data(AutoClaims)
11 | data = AutoClaims
12 | 
13 | data$STATE = as.factor(data$STATE)
14 | data$CLASS = as.factor(data$CLASS)
15 | data$GENDER = as.factor(data$GENDER)
16 | 
17 | data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=T);
18 | write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F)
19 | 


--------------------------------------------------------------------------------
/xgboost/demo/distributed-training/README.md:
--------------------------------------------------------------------------------
 1 | Distributed XGBoost Training
 2 | ============================
 3 | This is an tutorial of Distributed XGBoost Training.
 4 | Currently xgboost supports distributed training via CLI program with the configuration file.
 5 | There is also plan push distributed python and other language bindings, please open an issue
 6 | if you are interested in contributing.
 7 | 
 8 | Build XGBoost with Distributed Filesystem Support
 9 | -------------------------------------------------
10 | To use distributed xgboost, you only need to turn the options on to build
11 | with distributed filesystems(HDFS or S3) in ```xgboost/make/config.mk```.
12 | 
13 | 
14 | Step by Step Tutorial on AWS
15 | ----------------------------
16 | Checkout [this tutorial](https://xgboost.readthedocs.org/en/latest/tutorials/aws_yarn.html) for running distributed xgboost.
17 | 
18 | 
19 | Model Analysis
20 | --------------
21 | XGBoost is exchangeable across all bindings and platforms.
22 | This means you can use python or R to analyze the learnt model and do prediction.
23 | For example, you can use the [plot_model.ipynb](plot_model.ipynb) to visualize the learnt model.
24 | 


--------------------------------------------------------------------------------
/xgboost/demo/distributed-training/run_aws.sh:
--------------------------------------------------------------------------------
 1 | # This is the example script to run distributed xgboost on AWS.
 2 | # Change the following two lines for configuration
 3 | 
 4 | export BUCKET=mybucket
 5 | 
 6 | # submit the job to YARN
 7 | ../../dmlc-core/tracker/dmlc-submit --cluster=yarn --num-workers=2 --worker-cores=2\
 8 |     ../../xgboost mushroom.aws.conf nthread=2\
 9 |     data=s3://${BUCKET}/xgb-demo/train\
10 |     eval[test]=s3://${BUCKET}/xgb-demo/test\
11 |     model_dir=s3://${BUCKET}/xgb-demo/model
12 | 


--------------------------------------------------------------------------------
/xgboost/demo/gpu_acceleration/bosch.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import xgboost as xgb
 4 | import time
 5 | import random
 6 | from sklearn.model_selection import StratifiedKFold
 7 | 
 8 | #For sampling rows from input file
 9 | random_seed = 9
10 | subset = 0.4
11 | 
12 | n_rows = 1183747;
13 | train_rows = int(n_rows * subset)
14 | random.seed(random_seed)
15 | skip = sorted(random.sample(range(1,n_rows + 1),n_rows-train_rows))
16 | data = pd.read_csv("../data/train_numeric.csv", index_col=0, dtype=np.float32, skiprows=skip)
17 | y = data['Response'].values
18 | del data['Response']
19 | X = data.values
20 | 
21 | param = {}
22 | param['objective'] = 'binary:logistic'
23 | param['eval_metric'] = 'auc'
24 | param['max_depth'] = 5
25 | param['eta'] = 0.3
26 | param['silent'] = 0
27 | param['tree_method'] = 'gpu_exact'
28 | 
29 | num_round = 20
30 | 
31 | skf = StratifiedKFold(n_splits=5)
32 | 
33 | for i, (train, test) in enumerate(skf.split(X, y)):
34 |     dtrain = xgb.DMatrix(X[train], label=y[train])
35 |     tmp = time.time()
36 |     bst = xgb.train(param, dtrain, num_round)
37 |     boost_time = time.time() - tmp
38 |     res = bst.eval(xgb.DMatrix(X[test], label=y[test]))
39 |     print("Fold {}: {}, Boost Time {}".format(i, res, str(boost_time)))
40 |     del bst
41 | 
42 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/README.md:
--------------------------------------------------------------------------------
 1 | XGBoost Python Feature Walkthrough
 2 | ==================================
 3 | * [Basic walkthrough of wrappers](basic_walkthrough.py)
 4 | * [Cutomize loss function, and evaluation metric](custom_objective.py)
 5 | * [Boosting from existing prediction](boost_from_prediction.py)
 6 | * [Predicting using first n trees](predict_first_ntree.py)
 7 | * [Generalized Linear Model](generalized_linear_model.py)
 8 | * [Cross validation](cross_validation.py)
 9 | * [Predicting leaf indices](predict_leaf_indices.py)
10 | * [Sklearn Wrapper](sklearn_examples.py)
11 | * [Sklearn Parallel](sklearn_parallel.py)
12 | * [Sklearn access evals result](sklearn_evals_result.py)
13 | * [Access evals result](evals_result.py)
14 | * [External Memory](external_memory.py)
15 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/boost_from_prediction.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import numpy as np
 3 | import xgboost as xgb
 4 | 
 5 | dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 6 | dtest = xgb.DMatrix('../data/agaricus.txt.test')
 7 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
 8 | ###
 9 | # advanced: start from a initial base prediction
10 | #
11 | print ('start running example to start from a initial prediction')
12 | # specify parameters via map, definition are same as c++ version
13 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
14 | # train xgboost for 1 round
15 | bst = xgb.train( param, dtrain, 1, watchlist )
16 | # Note: we need the margin value instead of transformed prediction in set_base_margin
17 | # do predict with output_margin=True, will always give you margin values before logistic transformation
18 | ptrain = bst.predict(dtrain, output_margin=True)
19 | ptest  = bst.predict(dtest, output_margin=True)
20 | dtrain.set_base_margin(ptrain)
21 | dtest.set_base_margin(ptest)
22 | 
23 | print ('this is result of running from initial prediction')
24 | bst = xgb.train( param, dtrain, 1, watchlist )
25 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/evals_result.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | #  This script demonstrate how to access the eval metrics in xgboost
 3 | ##
 4 | 
 5 | import xgboost as xgb
 6 | dtrain = xgb.DMatrix('../data/agaricus.txt.train', silent=True)
 7 | dtest = xgb.DMatrix('../data/agaricus.txt.test', silent=True)
 8 | 
 9 | param = [('max_depth', 2), ('objective', 'binary:logistic'), ('eval_metric', 'logloss'), ('eval_metric', 'error')]
10 |  
11 | num_round = 2
12 | watchlist = [(dtest,'eval'), (dtrain,'train')]
13 | 
14 | evals_result = {}
15 | bst = xgb.train(param, dtrain, num_round, watchlist, evals_result=evals_result)
16 | 
17 | print('Access logloss metric directly from evals_result:')
18 | print(evals_result['eval']['logloss'])
19 | 
20 | print('')
21 | print('Access metrics through a loop:')
22 | for e_name, e_mtrs in evals_result.items():
23 |     print('- {}'.format(e_name))
24 |     for e_mtr_name, e_mtr_vals in e_mtrs.items():
25 |         print('   - {}'.format(e_mtr_name))
26 |         print('      - {}'.format(e_mtr_vals))
27 | 
28 | print('')
29 | print('Access complete dictionary:')
30 | print(evals_result)
31 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/external_memory.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import numpy as np
 3 | import scipy.sparse
 4 | import xgboost as xgb
 5 | 
 6 | ### simple example for using external memory version
 7 | 
 8 | # this is the only difference, add a # followed by a cache prefix name
 9 | # several cache file with the prefix will be generated
10 | # currently only support convert from libsvm file
11 | dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache')
12 | dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache')
13 | 
14 | # specify validations set to watch performance
15 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
16 | 
17 | # performance notice: set nthread to be the number of your real cpu
18 | # some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
19 | #param['nthread']=num_real_cpu
20 | 
21 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
22 | num_round = 2
23 | bst = xgb.train(param, dtrain, num_round, watchlist)
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/gamma_regression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import xgboost as xgb
 3 | import numpy as np
 4 | 
 5 | #  this script demonstrates how to fit gamma regression model (with log link function)
 6 | #  in xgboost, before running the demo you need to generate the autoclaims dataset
 7 | #  by running gen_autoclaims.R located in xgboost/demo/data.
 8 | 
 9 | data = np.genfromtxt('../data/autoclaims.csv', delimiter=',')
10 | dtrain = xgb.DMatrix(data[0:4741, 0:34], data[0:4741, 34])
11 | dtest = xgb.DMatrix(data[4741:6773, 0:34], data[4741:6773, 34])
12 | 
13 | # for gamma regression, we need to set the objective to 'reg:gamma', it also suggests
14 | # to set the base_score to a value between 1 to 5 if the number of iteration is small
15 | param = {'silent':1, 'objective':'reg:gamma', 'booster':'gbtree', 'base_score':3}
16 | 
17 | # the rest of settings are the same
18 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
19 | num_round = 30
20 | 
21 | # training and evaluation
22 | bst = xgb.train(param, dtrain, num_round, watchlist)
23 | preds = bst.predict(dtest)
24 | labels = dtest.get_label()
25 | print ('test deviance=%f' % (2 * np.sum((labels - preds) / preds - np.log(labels) + np.log(preds))))
26 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/generalized_linear_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import xgboost as xgb
 3 | ##
 4 | #  this script demonstrate how to fit generalized linear model in xgboost
 5 | #  basically, we are using linear model, instead of tree for our boosters
 6 | ##
 7 | dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 8 | dtest = xgb.DMatrix('../data/agaricus.txt.test')
 9 | # change booster to gblinear, so that we are fitting a linear model
10 | # alpha is the L1 regularizer
11 | # lambda is the L2 regularizer
12 | # you can also set lambda_bias which is L2 regularizer on the bias term
13 | param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
14 |          'alpha': 0.0001, 'lambda': 1 }
15 | 
16 | # normally, you do not need to set eta (step_size)
17 | # XGBoost uses a parallel coordinate descent algorithm (shotgun),
18 | # there could be affection on convergence with parallelization on certain cases
19 | # setting eta to be smaller value, e.g 0.5 can make the optimization more stable
20 | # param['eta'] = 1
21 | 
22 | ##
23 | # the rest of settings are the same
24 | ##
25 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
26 | num_round = 4
27 | bst = xgb.train(param, dtrain, num_round, watchlist)
28 | preds = bst.predict(dtest)
29 | labels = dtest.get_label()
30 | print ('error=%f' % ( sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
31 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/predict_first_ntree.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import numpy as np
 3 | import xgboost as xgb
 4 | 
 5 | ### load data in do training
 6 | dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 7 | dtest = xgb.DMatrix('../data/agaricus.txt.test')
 8 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
 9 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
10 | num_round = 3
11 | bst = xgb.train(param, dtrain, num_round, watchlist)
12 | 
13 | print ('start testing prediction from first n trees')
14 | ### predict using first 1 tree
15 | label = dtest.get_label()
16 | ypred1 = bst.predict(dtest, ntree_limit=1)
17 | # by default, we predict using all the trees
18 | ypred2 = bst.predict(dtest)
19 | print ('error of ypred1=%f' % (np.sum((ypred1>0.5)!=label) /float(len(label))))
20 | print ('error of ypred2=%f' % (np.sum((ypred2>0.5)!=label) /float(len(label))))
21 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/predict_leaf_indices.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import xgboost as xgb
 3 | 
 4 | ### load data in do training
 5 | dtrain = xgb.DMatrix('../data/agaricus.txt.train')
 6 | dtest = xgb.DMatrix('../data/agaricus.txt.test')
 7 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
 8 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
 9 | num_round = 3
10 | bst = xgb.train(param, dtrain, num_round, watchlist)
11 | 
12 | print ('start testing predict the leaf indices')
13 | ### predict using first 2 tree
14 | leafindex = bst.predict(dtest, ntree_limit=2, pred_leaf=True)
15 | print(leafindex.shape)
16 | print(leafindex)
17 | ### predict all trees
18 | leafindex = bst.predict(dtest, pred_leaf = True)
19 | print(leafindex.shape)
20 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/runall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export PYTHONPATH=PYTHONPATH:../../python-package
 3 | python basic_walkthrough.py
 4 | python custom_objective.py
 5 | python boost_from_prediction.py
 6 | python predict_first_ntree.py
 7 | python generalized_linear_model.py
 8 | python cross_validation.py
 9 | python predict_leaf_indices.py
10 | python sklearn_examples.py
11 | python sklearn_parallel.py
12 | python external_memory.py
13 | rm -rf *~ *.model *.buffer
14 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/sklearn_evals_result.py:
--------------------------------------------------------------------------------
 1 | ##
 2 | #  This script demonstrate how to access the xgboost eval metrics by using sklearn
 3 | ##
 4 | 
 5 | import xgboost as xgb
 6 | import numpy as np
 7 | from sklearn.datasets import make_hastie_10_2
 8 | 
 9 | X, y = make_hastie_10_2(n_samples=2000, random_state=42)
10 | 
11 | # Map labels from {-1, 1} to {0, 1}
12 | labels, y = np.unique(y, return_inverse=True)
13 | 
14 | X_train, X_test = X[:1600], X[1600:]
15 | y_train, y_test = y[:1600], y[1600:]
16 | 
17 | param_dist = {'objective':'binary:logistic', 'n_estimators':2}
18 | 
19 | clf = xgb.XGBModel(**param_dist)
20 | # Or you can use: clf = xgb.XGBClassifier(**param_dist)
21 | 
22 | clf.fit(X_train, y_train,
23 |         eval_set=[(X_train, y_train), (X_test, y_test)], 
24 |         eval_metric='logloss',
25 |         verbose=True)
26 | 
27 | # Load evals result by calling the evals_result() function
28 | evals_result = clf.evals_result()
29 | 
30 | print('Access logloss metric directly from validation_0:')
31 | print(evals_result['validation_0']['logloss'])
32 | 
33 | print('')
34 | print('Access metrics through a loop:')
35 | for e_name, e_mtrs in evals_result.items():
36 |     print('- {}'.format(e_name))
37 |     for e_mtr_name, e_mtr_vals in e_mtrs.items():
38 |         print('   - {}'.format(e_mtr_name))
39 |         print('      - {}'.format(e_mtr_vals))
40 |  
41 | print('')
42 | print('Access complete dict:')
43 | print(evals_result)
44 | 


--------------------------------------------------------------------------------
/xgboost/demo/guide-python/sklearn_parallel.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | if __name__ == "__main__":
 4 |     # NOTE: on posix systems, this *has* to be here and in the
 5 |     # `__name__ == "__main__"` clause to run XGBoost in parallel processes
 6 |     # using fork, if XGBoost was built with OpenMP support. Otherwise, if you
 7 |     # build XGBoost without OpenMP support, you can use fork, which is the
 8 |     # default backend for joblib, and omit this.
 9 |     try:
10 |         from multiprocessing import set_start_method
11 |     except ImportError:
12 |         raise ImportError("Unable to import multiprocessing.set_start_method."
13 |                           " This example only runs on Python 3.4")
14 |     set_start_method("forkserver")
15 | 
16 |     import numpy as np
17 |     from sklearn.model_selection import GridSearchCV
18 |     from sklearn.datasets import load_boston
19 |     import xgboost as xgb
20 | 
21 |     rng = np.random.RandomState(31337)
22 | 
23 |     print("Parallel Parameter optimization")
24 |     boston = load_boston()
25 | 
26 |     os.environ["OMP_NUM_THREADS"] = "2"  # or to whatever you want
27 |     y = boston['target']
28 |     X = boston['data']
29 |     xgb_model = xgb.XGBRegressor()
30 |     clf = GridSearchCV(xgb_model, {'max_depth': [2, 4, 6],
31 |                                    'n_estimators': [50, 100, 200]}, verbose=1,
32 |                        n_jobs=2)
33 |     clf.fit(X, y)
34 |     print(clf.best_score_)
35 |     print(clf.best_params_)
36 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-higgs/README.md:
--------------------------------------------------------------------------------
 1 | Highlights
 2 | =====
 3 | Higgs challenge ends recently, xgboost is being used by many users. This list highlights the xgboost solutions of players
 4 | * Blogpost by phunther: [Winning solution of Kaggle Higgs competition: what a single model can do](http://no2147483647.wordpress.com/2014/09/17/winning-solution-of-kaggle-higgs-competition-what-a-single-model-can-do/)
 5 | * The solution by Tianqi Chen and Tong He [Link](https://github.com/hetong007/higgsml)
 6 | 
 7 | Guide for Kaggle Higgs Challenge
 8 | =====
 9 | 
10 | This is the folder giving example of how to use XGBoost Python Module  to run Kaggle Higgs competition
11 | 
12 | This script will achieve about 3.600 AMS score in public leaderboard. To get start, you need do following step:
13 | 
14 | 1. Compile the XGBoost python lib
15 | ```bash
16 | cd ../..
17 | make
18 | ```
19 | 
20 | 2. Put training.csv test.csv on folder './data' (you can create a symbolic link)
21 | 
22 | 3. Run ./run.sh
23 | 
24 | Speed
25 | =====
26 | speedtest.py compares xgboost's speed on this dataset with sklearn.GBM
27 | 
28 | 
29 | Using R module
30 | =====
31 | * Alternatively, you can run using R, higgs-train.R and higgs-pred.R.
32 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-higgs/higgs-pred.R:
--------------------------------------------------------------------------------
 1 | # install xgboost package, see R-package in root folder
 2 | require(xgboost)
 3 | require(methods)
 4 | 
 5 | modelfile <- "higgs.model"
 6 | outfile <- "higgs.pred.csv"
 7 | dtest <- read.csv("data/test.csv", header=TRUE)
 8 | data <- as.matrix(dtest[2:31])
 9 | idx <- dtest[[1]]
10 | 
11 | xgmat <- xgb.DMatrix(data, missing = -999.0)
12 | bst <- xgb.load(modelfile=modelfile)
13 | ypred <- predict(bst, xgmat)
14 | 
15 | rorder <- rank(ypred, ties.method="first")
16 | 
17 | threshold <- 0.15
18 | # to be completed
19 | ntop <- length(rorder) - as.integer(threshold*length(rorder))
20 | plabel <- ifelse(rorder > ntop, "s", "b")
21 | outdata <- list("EventId" = idx,
22 |                 "RankOrder" = rorder,
23 |                 "Class" = plabel)
24 | write.csv(outdata, file = outfile, quote=FALSE, row.names=FALSE)
25 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-higgs/higgs-pred.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # make prediction
 3 | import numpy as np
 4 | import xgboost as xgb
 5 | 
 6 | # path to where the data lies
 7 | dpath = 'data'
 8 | 
 9 | modelfile = 'higgs.model'
10 | outfile = 'higgs.pred.csv'
11 | # make top 15% as positive
12 | threshold_ratio = 0.15
13 | 
14 | # load in training data, directly use numpy
15 | dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )
16 | data   = dtest[:,1:31]
17 | idx = dtest[:,0]
18 | 
19 | print ('finish loading from csv ')
20 | xgmat = xgb.DMatrix( data, missing = -999.0 )
21 | bst = xgb.Booster({'nthread':16}, model_file = modelfile)
22 | ypred = bst.predict( xgmat )
23 | 
24 | res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ]
25 | 
26 | rorder = {}
27 | for k, v in sorted( res, key = lambda x:-x[1] ):
28 |     rorder[ k ] = len(rorder) + 1
29 | 
30 | # write out predictions
31 | ntop = int( threshold_ratio * len(rorder ) )
32 | fo = open(outfile, 'w')
33 | nhit = 0
34 | ntot = 0
35 | fo.write('EventId,RankOrder,Class\n')
36 | for k, v in res:
37 |     if rorder[k] <= ntop:
38 |         lb = 's'
39 |         nhit += 1
40 |     else:
41 |         lb = 'b'
42 |     # change output rank order to follow Kaggle convention
43 |     fo.write('%s,%d,%s\n' % ( k,  len(rorder)+1-rorder[k], lb ) )
44 |     ntot += 1
45 | fo.close()
46 | 
47 | print ('finished writing into prediction file')
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-higgs/higgs-train.R:
--------------------------------------------------------------------------------
 1 | # install xgboost package, see R-package in root folder
 2 | require(xgboost)
 3 | require(methods)
 4 | 
 5 | testsize <- 550000
 6 | 
 7 | dtrain <- read.csv("data/training.csv", header=TRUE)
 8 | dtrain[33] <- dtrain[33] == "s"
 9 | label <- as.numeric(dtrain[[33]])
10 | data <- as.matrix(dtrain[2:31])
11 | weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
12 | 
13 | sumwpos <- sum(weight * (label==1.0))
14 | sumwneg <- sum(weight * (label==0.0))
15 | print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
16 | 
17 | xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)
18 | param <- list("objective" = "binary:logitraw",
19 |               "scale_pos_weight" = sumwneg / sumwpos,
20 |               "bst:eta" = 0.1,
21 |               "bst:max_depth" = 6,
22 |               "eval_metric" = "auc",
23 |               "eval_metric" = "ams@0.15",
24 |               "silent" = 1,
25 |               "nthread" = 16)
26 | watchlist <- list("train" = xgmat)
27 | nround = 120
28 | print ("loading data end, start to boost trees")
29 | bst = xgb.train(param, xgmat, nround, watchlist );
30 | # save out model
31 | xgb.save(bst, "higgs.model")
32 | print ('finish training')
33 | 
34 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-higgs/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | python -u higgs-numpy.py
 4 | ret=$?
 5 | if [[ $ret != 0 ]]; then
 6 |     echo "ERROR in higgs-numpy.py"
 7 |     exit $ret
 8 | fi
 9 | python -u higgs-pred.py
10 | ret=$?
11 | if [[ $ret != 0 ]]; then
12 |     echo "ERROR in higgs-pred.py"
13 |     exit $ret
14 | fi
15 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-otto/README.MD:
--------------------------------------------------------------------------------
 1 | Benckmark for Otto Group Competition
 2 | =========
 3 | 
 4 | This is a folder containing the benchmark for the [Otto Group Competition on Kaggle](http://www.kaggle.com/c/otto-group-product-classification-challenge).
 5 | 
 6 | ## Getting started
 7 | 
 8 | 1. Put `train.csv` and `test.csv` under the `data` folder
 9 | 2. Run the script
10 | 3. Submit the `submission.csv`
11 | 
12 | The parameter `nthread` controls the number of cores to run on, please set it to suit your machine.
13 | 
14 | ## R-package
15 | 
16 | To install the R-package of xgboost, please run
17 | 
18 | ```r
19 | devtools::install_github('tqchen/xgboost',subdir='R-package')
20 | ```
21 | 
22 | Windows users may need to install [RTools](http://cran.r-project.org/bin/windows/Rtools/) first.
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/xgboost/demo/kaggle-otto/otto_train_pred.R:
--------------------------------------------------------------------------------
 1 | require(xgboost)
 2 | require(methods)
 3 | 
 4 | train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F)
 5 | test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F)
 6 | train = train[,-1]
 7 | test = test[,-1]
 8 | 
 9 | y = train[,ncol(train)]
10 | y = gsub('Class_','',y)
11 | y = as.integer(y)-1  # xgboost take features in [0,numOfClass)
12 | 
13 | x = rbind(train[,-ncol(train)],test)
14 | x = as.matrix(x)
15 | x = matrix(as.numeric(x),nrow(x),ncol(x))
16 | trind = 1:length(y)
17 | teind = (nrow(train)+1):nrow(x)
18 | 
19 | # Set necessary parameter
20 | param <- list("objective" = "multi:softprob",
21 |               "eval_metric" = "mlogloss",
22 |               "num_class" = 9,
23 |               "nthread" = 8)
24 | 
25 | # Run Cross Validation
26 | cv.nround = 50
27 | bst.cv = xgb.cv(param=param, data = x[trind,], label = y, 
28 |                 nfold = 3, nrounds=cv.nround)
29 | 
30 | # Train the model
31 | nround = 50
32 | bst = xgboost(param=param, data = x[trind,], label = y, nrounds=nround)
33 | 
34 | # Make prediction
35 | pred = predict(bst,x[teind,])
36 | pred = matrix(pred,9,length(pred)/9)
37 | pred = t(pred)
38 | 
39 | # Output submission
40 | pred = format(pred, digits=2,scientific=F) # shrink the size of submission
41 | pred = data.frame(1:nrow(pred),pred)
42 | names(pred) = c('id', paste0('Class_',1:9))
43 | write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)
44 | 


--------------------------------------------------------------------------------
/xgboost/demo/multiclass_classification/README.md:
--------------------------------------------------------------------------------
 1 | Demonstrating how to use XGBoost accomplish Multi-Class classification task on [UCI Dermatology dataset](https://archive.ics.uci.edu/ml/datasets/Dermatology)
 2 | 
 3 | Make sure you make make xgboost python module in ../../python
 4 | 
 5 | 1. Run runexp.sh
 6 | ```bash
 7 | ./runexp.sh
 8 | ```
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/xgboost/demo/multiclass_classification/runexp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -f dermatology.data ]
 3 | then
 4 |     echo "use existing data to run multi class classification"
 5 | else
 6 |     echo "getting data from uci, make sure you are connected to internet"
 7 |     wget https://archive.ics.uci.edu/ml/machine-learning-databases/dermatology/dermatology.data
 8 | fi
 9 | python train.py
10 | 


--------------------------------------------------------------------------------
/xgboost/demo/rank/README.md:
--------------------------------------------------------------------------------
 1 | Learning to rank
 2 | ====
 3 | XGBoost supports accomplishing ranking tasks. In ranking scenario, data are often grouped and we need the [group information file](../../doc/input_format.md#group-input-format) to specify ranking tasks. The model used in XGBoost for ranking is the LambdaRank, this function is not yet completed. Currently, we provide pairwise rank.
 4 | 
 5 | ### Parameters
 6 | The configuration setting is similar to the regression and binary classification setting, except user need to specify the objectives:
 7 | 
 8 | ```
 9 | ...
10 | objective="rank:pairwise"
11 | ...
12 | ```
13 | For more usage details please refer to the [binary classification demo](../binary_classification),
14 | 
15 | Instructions
16 | ====
17 | The dataset for ranking demo is from LETOR04 MQ2008 fold1,
18 | You can use the following command to run the example
19 | 
20 | Get the data: ./wgetdata.sh
21 | Run the example: ./runexp.sh
22 | 


--------------------------------------------------------------------------------
/xgboost/demo/rank/runexp.sh:
--------------------------------------------------------------------------------
 1 | python trans_data.py train.txt mq2008.train mq2008.train.group
 2 | 
 3 | python trans_data.py test.txt mq2008.test mq2008.test.group
 4 | 
 5 | python trans_data.py vali.txt mq2008.vali mq2008.vali.group
 6 | 
 7 | ../../xgboost mq2008.conf
 8 | 
 9 | ../../xgboost mq2008.conf task=pred model_in=0004.model
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/xgboost/demo/rank/trans_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | def save_data(group_data,output_feature,output_group):
 4 |     if len(group_data) == 0:
 5 |         return
 6 | 
 7 |     output_group.write(str(len(group_data))+"\n")
 8 |     for data in group_data:
 9 |         # only include nonzero features
10 |         feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]        
11 |         output_feature.write(data[0] + " " + " ".join(feats) + "\n")
12 | 
13 | if __name__ == "__main__":
14 |     if len(sys.argv) != 4:
15 |         print ("Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]")
16 |         sys.exit(0)
17 | 
18 |     fi = open(sys.argv[1])
19 |     output_feature = open(sys.argv[2],"w")
20 |     output_group = open(sys.argv[3],"w")
21 |     
22 |     group_data = []
23 |     group = ""
24 |     for line in fi:
25 |         if not line:
26 |             break
27 |         if "#" in line:
28 |             line = line[:line.index("#")]
29 |         splits = line.strip().split(" ")
30 |         if splits[1] != group:
31 |             save_data(group_data,output_feature,output_group)
32 |             group_data = []
33 |         group = splits[1]
34 |         group_data.append(splits)
35 | 
36 |     save_data(group_data,output_feature,output_group)
37 | 
38 |     fi.close()
39 |     output_feature.close()
40 |     output_group.close()
41 | 
42 | 


--------------------------------------------------------------------------------
/xgboost/demo/rank/wgetdata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2008.rar
3 | unrar x MQ2008.rar
4 | mv -f MQ2008/Fold1/*.txt .
5 | 


--------------------------------------------------------------------------------
/xgboost/demo/regression/README.md:
--------------------------------------------------------------------------------
 1 | Regression
 2 | ====
 3 | Using XGBoost for regression is very similar to using it for binary classification. We suggest that you can refer to the [binary classification demo](../binary_classification) first. In XGBoost if we use negative log likelihood as the loss function for regression, the training procedure is same as training binary classifier of XGBoost. 
 4 | 
 5 | ### Tutorial
 6 | The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
 7 | ```
 8 | # General parameter
 9 | # this is the only difference with classification, use reg:linear to do linear regression
10 | # when labels are in [0,1] we can also use reg:logistic
11 | objective = reg:linear
12 | ...
13 | 
14 | ```
15 | 
16 | The input format is same as binary classification, except that the label is now the target regression values. We use linear regression here, if we want use objective = reg:logistic logistic regression, the label needed to be pre-scaled into [0,1].
17 | 
18 | 


--------------------------------------------------------------------------------
/xgboost/demo/regression/mapfeat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | fo = open( 'machine.txt', 'w' )
 4 | cnt = 6
 5 | fmap = {}
 6 | for l in open( 'machine.data' ):
 7 |     arr = l.split(',')
 8 |     fo.write(arr[8])
 9 |     for i in range( 0,6 ):
10 |         fo.write( ' %d:%s' %(i,arr[i+2]) )
11 | 
12 |     if arr[0] not in fmap:
13 |         fmap[arr[0]] = cnt
14 |         cnt += 1
15 | 
16 |     fo.write( ' %d:1' % fmap[arr[0]] )
17 |     fo.write('\n')
18 | 
19 | fo.close()
20 | 
21 | # create feature map for machine data
22 | fo = open('featmap.txt', 'w')
23 | # list from machine.names
24 | names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ];
25 | 
26 | for i in range(0,6):
27 |     fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
28 | 
29 | for v, k in sorted( fmap.items(), key = lambda x:x[1] ):
30 |     fo.write( '%d\tvendor=%s\ti\n' % (k, v))
31 | fo.close()
32 | 


--------------------------------------------------------------------------------
/xgboost/demo/regression/mknfold.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | import random
 4 | 
 5 | if len(sys.argv) < 2:
 6 |     print ('Usage:<filename> <k> [nfold = 5]')
 7 |     exit(0)
 8 | 
 9 | random.seed( 10 )
10 | 
11 | k = int( sys.argv[2] )
12 | if len(sys.argv) > 3:
13 |     nfold = int( sys.argv[3] )
14 | else:
15 |     nfold = 5
16 | 
17 | fi = open( sys.argv[1], 'r' )
18 | ftr = open( sys.argv[1]+'.train', 'w' )
19 | fte = open( sys.argv[1]+'.test', 'w' )
20 | for l in fi:
21 |     if random.randint( 1 , nfold ) == k:
22 |         fte.write( l )
23 |     else:
24 |         ftr.write( l )
25 | 
26 | fi.close()
27 | ftr.close()
28 | fte.close()
29 | 
30 | 


--------------------------------------------------------------------------------
/xgboost/demo/regression/runexp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # map the data to features. For convenience we only use 7 original attributes and encode them as features in a trivial way 
 3 | python mapfeat.py
 4 | # split train and test
 5 | python mknfold.py machine.txt 1
 6 | # training and output the models
 7 | ../../xgboost machine.conf
 8 | # output predictions of test data
 9 | ../../xgboost machine.conf task=pred model_in=0002.model
10 | # print the boosters of 0002.model in dump.raw.txt
11 | ../../xgboost machine.conf task=dump model_in=0002.model name_dump=dump.raw.txt
12 | # print the boosters of 0002.model in dump.nice.txt with feature map
13 | ../../xgboost machine.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt 
14 | 
15 | # cat the result
16 | cat dump.nice.txt
17 | 


--------------------------------------------------------------------------------
/xgboost/demo/yearpredMSD/README.md:
--------------------------------------------------------------------------------
 1 | Demonstrating how to use XGBoost on [Year Prediction task of Million Song Dataset](https://archive.ics.uci.edu/ml/datasets/YearPredictionMSD)
 2 | 
 3 | 1. Run runexp.sh
 4 | ```bash
 5 | ./runexp.sh
 6 | ```
 7 | 
 8 | You can also use the script to prepare LIBSVM format, and run the [Distributed Version](../../multi-node).
 9 | Note that though that normally you only need to use single machine for dataset at this scale, and use distributed version for larger scale dataset.
10 | 


--------------------------------------------------------------------------------
/xgboost/demo/yearpredMSD/csv2libsvm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | 
 4 | if len(sys.argv) < 3:
 5 |     print 'Usage: <csv> <libsvm>'
 6 |     print 'convert a all numerical csv to libsvm'
 7 | 
 8 | fo = open(sys.argv[2], 'w')
 9 | for l in open(sys.argv[1]):
10 |     arr = l.split(',')
11 |     fo.write('%s' % arr[0])
12 |     for i in xrange(len(arr) - 1):
13 |         fo.write(' %d:%s' % (i, arr[i+1]))
14 | fo.close()
15 | 


--------------------------------------------------------------------------------
/xgboost/demo/yearpredMSD/runexp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -f YearPredictionMSD.txt ]
 4 | then
 5 |     echo "use existing data to run experiment"
 6 | else
 7 |     echo "getting data from uci, make sure you are connected to internet"
 8 |     wget https://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
 9 |     unzip YearPredictionMSD.txt.zip
10 | fi
11 | echo "start making data.."
12 | # map feature using indicator encoding, also produce featmap.txt
13 | python csv2libsvm.py YearPredictionMSD.txt yearpredMSD.libsvm
14 | head -n 463715 yearpredMSD.libsvm > yearpredMSD.libsvm.train
15 | tail -n 51630 yearpredMSD.libsvm > yearpredMSD.libsvm.test
16 | echo "finish making the data"
17 | ../../xgboost yearpredMSD.conf
18 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | *~
30 | config.mk
31 | *.pyc
32 | 
33 | # Vim
34 | *.swp
35 | *.swo
36 | *.swn
37 | *.csv
38 | 
39 | # Emacs
40 | .clang_complete
41 | deps
42 | recommonmark
43 | build
44 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/.travis.yml:
--------------------------------------------------------------------------------
 1 | # disable sudo to use container based build
 2 | sudo: false
 3 | 
 4 | # Enabling test on Linux and OS X
 5 | os:
 6 |   - linux
 7 |   - osx
 8 | 
 9 | # Use Build Matrix to do lint and build seperately
10 | env:
11 |   matrix:
12 |     - TASK=lint
13 |     - TASK=unittest_gtest
14 | 
15 | matrix:
16 |   exclude:
17 |     - os: osx
18 |       env: TASK=lint
19 | 
20 | # dependent apt packages
21 | addons:
22 |   apt:
23 |     sources:
24 |       - ubuntu-toolchain-r-test
25 |     packages:
26 |       - doxygen
27 |       - wget
28 |       - git
29 |       - libcurl4-openssl-dev
30 |       - unzip
31 |       - gcc-4.8
32 |       - g++-4.8
33 | 
34 | before_install:
35 |   - export TRAVIS=scripts/travis
36 |   - source ${TRAVIS}/travis_setup_env.sh
37 |   - ${TRAVIS}/travis_osx_install.sh
38 | 
39 | install:
40 |   - pip install --user cpplint pylint
41 | 
42 | script: ${TRAVIS}/travis_script.sh
43 | 
44 | 
45 | before_cache:
46 |   - ${TRAVIS}/travis_before_cache.sh
47 | 
48 | cache:
49 |   directories:
50 |     - ${HOME}/.cache/usr
51 | 
52 | 
53 | notifications:
54 | # Emails are sent to the committer's git-configured email address by default,
55 |   email:
56 |     on_success: change
57 |     on_failure: always
58 |   slack: dmlc:mKX5kxjqTP6fBb89lXD3G5f3
59 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 by Contributors 
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 |     
 7 |    http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/cmake/lint.cmake:
--------------------------------------------------------------------------------
 1 | ﻿get_filename_component(CMAKE_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE)
 2 | if(NOT MSVC)
 3 |     set(LINT_COMMAND ${CMAKE_SOURCE_DIR}/scripts/lint.py)
 4 | else()
 5 |     if((NOT PYTHON_EXECUTABLE))
 6 |          message(FATAL_ERROR "Cannot lint without python")
 7 |     endif()
 8 |     # format output so VS can bring us to the offending file/line
 9 | 	set(LINT_COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/scripts/lint.py)
10 | endif()
11 | 
12 | set(LINT_DIRS include src scripts)
13 | 
14 | cmake_policy(SET CMP0009 NEW)  # suppress cmake warning
15 | execute_process(
16 |     COMMAND ${LINT_COMMAND} ${PROJECT_NAME} all ${LINT_DIRS}
17 | 	WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
18 |     ERROR_VARIABLE LINT_OUTPUT
19 |     ERROR_STRIP_TRAILING_WHITESPACE
20 | 	
21 | )
22 | message(STATUS ${LINT_OUTPUT})


--------------------------------------------------------------------------------
/xgboost/dmlc-core/doc/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | _build
3 | doxygen
4 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/doc/README:
--------------------------------------------------------------------------------
1 | This document is generated by sphinx.
2 | Make sure you cloned the following repos in the root.
3 | 
4 | - https://github.com/tqchen/recommonmark
5 | 
6 | Type make html in doc folder.
7 | 
8 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/doc/index.md:
--------------------------------------------------------------------------------
 1 | DMLC-Core Documentation
 2 | =======================
 3 | DMLC Core contains common codebase to help us build machine learning toolkits easier.
 4 | 
 5 | Contents
 6 | --------
 7 | * [Parameter Structure for Machine Learning](parameter.md)
 8 | * [Doxygen C++ API Reference](https://dmlc-core.readthedocs.org/en/latest/doxygen)
 9 | 
10 | Indices and tables
11 | ------------------
12 | 
13 | ```eval_rst
14 | * :ref:`genindex`
15 | * :ref:`modindex`
16 | * :ref:`search`
17 | ```
18 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/doc/sphinx_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Helper utilty function for customization."""
 3 | import sys
 4 | import os
 5 | import docutils
 6 | import subprocess
 7 | 
 8 | if os.environ.get('READTHEDOCS', None) == 'True':
 9 |     subprocess.call('cd ..; rm -rf recommonmark;' +
10 |                     'git clone https://github.com/tqchen/recommonmark', shell=True)
11 | 
12 | sys.path.insert(0, os.path.abspath('../recommonmark/'))
13 | from recommonmark import parser, transform
14 | 
15 | MarkdownParser = parser.CommonMarkParser
16 | AutoStructify = transform.AutoStructify
17 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/example/dmlc_example.mk:
--------------------------------------------------------------------------------
 1 | ALL_EXAMPLE=example/parameter
 2 | 
 3 | 
 4 | example/parameter: example/parameter.cc libdmlc.a
 5 | 
 6 | $(ALL_EXAMPLE) :
 7 | 	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a,  $^) $(LDFLAGS)
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/include/dmlc/common.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file common.h
 4 |  * \brief defines some common utility function.
 5 |  */
 6 | #ifndef DMLC_COMMON_H_
 7 | #define DMLC_COMMON_H_
 8 | 
 9 | #include <vector>
10 | #include <string>
11 | #include <sstream>
12 | 
13 | namespace dmlc {
14 | /*!
15 |  * \brief Split a string by delimiter
16 |  * \param s String to be splitted.
17 |  * \param delim The delimiter.
18 |  * \return a splitted vector of strings.
19 |  */
20 | inline std::vector<std::string> Split(const std::string& s, char delim) {
21 |   std::string item;
22 |   std::istringstream is(s);
23 |   std::vector<std::string> ret;
24 |   while (std::getline(is, item, delim)) {
25 |     ret.push_back(item);
26 |   }
27 |   return ret;
28 | }
29 | }  // namespace dmlc
30 | 
31 | #endif  // DMLC_COMMON_H_
32 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/include/dmlc/omp.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file omp.h
 4 |  * \brief header to handle OpenMP compatibility issues
 5 |  */
 6 | #ifndef DMLC_OMP_H_
 7 | #define DMLC_OMP_H_
 8 | #if defined(_OPENMP)
 9 | #include <omp.h>
10 | #else
11 | #ifndef DISABLE_OPENMP
12 | // use pragma message instead of warning
13 | #pragma message("Warning: OpenMP is not available, "                    \
14 |                 "project will be compiled into single-thread code. "     \
15 |                 "Use OpenMP-enabled compiler to get benefit of multi-threading.")
16 | #endif
17 | //! \cond Doxygen_Suppress
18 | inline int omp_get_thread_num() { return 0; }
19 | inline int omp_get_num_threads() { return 1; }
20 | inline int omp_get_max_threads() { return 1; }
21 | inline int omp_get_num_procs() { return 1; }
22 | inline void omp_set_num_threads(int nthread) {}
23 | #endif
24 | // loop variable used in openmp
25 | namespace dmlc {
26 | #ifdef _MSC_VER
27 | typedef int omp_uint;
28 | typedef long omp_ulong;  // NOLINT(*)
29 | #else
30 | typedef unsigned omp_uint;
31 | typedef unsigned long omp_ulong; // NOLINT(*)
32 | #endif
33 | //! \endcond
34 | }  // namespace dmlc
35 | #endif  // DMLC_OMP_H_
36 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/include/dmlc/timer.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file timer.h
 4 |  * \brief cross platform timer for timing
 5 |  * \author Tianqi Chen
 6 |  */
 7 | #ifndef DMLC_TIMER_H_
 8 | #define DMLC_TIMER_H_
 9 | 
10 | #include "base.h"
11 | 
12 | #if DMLC_USE_CXX11
13 | #include <chrono>
14 | #endif
15 | 
16 | #include <time.h>
17 | #ifdef __MACH__
18 | #include <mach/clock.h>
19 | #include <mach/mach.h>
20 | #endif
21 | #include "./logging.h"
22 | 
23 | namespace dmlc {
24 | /*!
25 |  * \brief return time in seconds
26 |  */
27 | inline double GetTime(void) {
28 |   #if DMLC_USE_CXX11
29 |   return std::chrono::duration<double>(
30 |       std::chrono::high_resolution_clock::now().time_since_epoch()).count();
31 |   #elif defined __MACH__
32 |   clock_serv_t cclock;
33 |   mach_timespec_t mts;
34 |   host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
35 |   CHECK(clock_get_time(cclock, &mts) == 0) << "failed to get time";
36 |   mach_port_deallocate(mach_task_self(), cclock);
37 |   return static_cast<double>(mts.tv_sec) + static_cast<double>(mts.tv_nsec) * 1e-9;
38 |   #else
39 |   #if defined(__unix__) || defined(__linux__)
40 |   timespec ts;
41 |   CHECK(clock_gettime(CLOCK_REALTIME, &ts) == 0) << "failed to get time";
42 |   return static_cast<double>(ts.tv_sec) + static_cast<double>(ts.tv_nsec) * 1e-9;
43 |   #else
44 |   return static_cast<double>(time(NULL));
45 |   #endif
46 |   #endif
47 | }
48 | }  // namespace dmlc
49 | #endif  // DMLC_TIMER_H_
50 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/scripts/packages.mk:
--------------------------------------------------------------------------------
 1 | # Makfile for easily install dependencies
 2 | 
 3 | # List of packages here
 4 | .PHONY: gtest lz4
 5 | 
 6 | # rules for gtest
 7 | ${CACHE_PREFIX}/include/gtest:
 8 | 	rm -rf gtest release-1.7.0.zip
 9 | 	wget https://github.com/google/googletest/archive/release-1.7.0.zip
10 | 	unzip release-1.7.0.zip
11 | 	mv googletest-release-1.7.0 gtest
12 | 	cd gtest; g++ -Iinclude -pthread -c src/gtest-all.cc -o gtest-all.o; cd ..
13 | 	ar -rv libgtest.a gtest/gtest-all.o
14 | 	mkdir -p ${CACHE_PREFIX}/include ${CACHE_PREFIX}/lib
15 | 	cp -r gtest/include/gtest ${CACHE_PREFIX}/include
16 | 	mv libgtest.a ${CACHE_PREFIX}/lib
17 | 	rm -rf release-1.7.0.zip
18 | 
19 | gtest: | ${CACHE_PREFIX}/include/gtest
20 | 
21 | lz4:  ${CACHE_PREFIX}/include/lz4.h
22 | 
23 | ${CACHE_PREFIX}/include/lz4.h:
24 | 	rm -rf lz4
25 | 	git clone https://github.com/Cyan4973/lz4
26 | 	cd lz4; make; make install PREFIX=${CACHE_PREFIX}; cd -
27 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/scripts/travis/travis_before_cache.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # do nothing for now
3 | ls -alLR ${CACHE_PREFIX}


--------------------------------------------------------------------------------
/xgboost/dmlc-core/scripts/travis/travis_osx_install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | if [ ${TRAVIS_OS_NAME} != "osx" ]; then
4 |     exit 0
5 | fi
6 | 
7 | brew update
8 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/scripts/travis/travis_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # main script of travis
 4 | if [ ${TASK} == "lint" ]; then
 5 |     make lint || exit -1
 6 |     make doxygen 2>log.txt
 7 |     (cat log.txt| grep -v ENABLE_PREPROCESSING |grep -v "unsupported tag" |grep warning) && exit -1
 8 |     exit 0
 9 | fi
10 | 
11 | if [ ${TRAVIS_OS_NAME} == "osx" ]; then
12 |     export NO_OPENMP=1
13 | fi
14 | 
15 | if [ ${TASK} == "unittest_gtest" ]; then
16 |     cp make/config.mk .
17 |     make -f scripts/packages.mk gtest
18 |     if [ ${TRAVIS_OS_NAME} != "osx" ]; then
19 |         echo "USE_S3=1" >> config.mk
20 |         echo "export CXX = g++-4.8" >> config.mk
21 |     else
22 |         echo "USE_S3=0" >> config.mk
23 |         echo "USE_OPENMP=0" >> config.mk
24 |     fi
25 |     echo "GTEST_PATH="${CACHE_PREFIX} >> config.mk
26 |     echo "BUILD_TEST=1" >> config.mk
27 |     make all || exit -1
28 |     test/unittest/dmlc_unittest || exit -1
29 | fi
30 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/src/io/azure_filesys.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file azure_filesys.h
 4 |  * \brief Azure access module
 5 |  * \author Mu Li
 6 |  */
 7 | #ifndef DMLC_IO_AZURE_FILESYS_H_
 8 | #define DMLC_IO_AZURE_FILESYS_H_
 9 | 
10 | #include <vector>
11 | #include <string>
12 | #include "./filesys.h"
13 | 
14 | namespace dmlc {
15 | namespace io {
16 | 
17 | /*! \brief Microsoft Azure Blob filesystem */
18 | class AzureFileSystem : public FileSystem {
19 |  public:
20 |   virtual ~AzureFileSystem() {}
21 | 
22 |   virtual FileInfo GetPathInfo(const URI &path) { return FileInfo(); }
23 | 
24 |   virtual void ListDirectory(const URI &path, std::vector<FileInfo> *out_list);
25 | 
26 |   virtual Stream *Open(const URI &path, const char* const flag, bool allow_null) {
27 |     return NULL;
28 |   }
29 | 
30 |   virtual SeekStream *OpenForRead(const URI &path, bool allow_null) {
31 |     return NULL;
32 |   }
33 | 
34 |   /*!
35 |    * \brief get a singleton of AzureFileSystem when needed
36 |    * \return a singleton instance
37 |    */
38 |   inline static AzureFileSystem *GetInstance(void) {
39 |     static AzureFileSystem instance;
40 |     return &instance;
41 |   }
42 | 
43 |  private:
44 |   /*! \brief constructor */
45 |   AzureFileSystem();
46 | 
47 |   /*! \brief Azure storage account name */
48 |   std::string azure_account_;
49 | 
50 |   /*! \brief Azure storage account key */
51 |   std::string azure_key_;
52 | };
53 | 
54 | }  // namespace io
55 | }  // namespace dmlc
56 | 
57 | #endif  // DMLC_IO_AZURE_FILESYS_H_
58 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/src/io/line_split.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file line_split.h
 4 |  * \brief base class implementation of input splitter
 5 |  * \author Tianqi Chen
 6 |  */
 7 | #ifndef DMLC_IO_LINE_SPLIT_H_
 8 | #define DMLC_IO_LINE_SPLIT_H_
 9 | 
10 | #include <dmlc/io.h>
11 | #include <vector>
12 | #include <cstdio>
13 | #include <string>
14 | #include <cstring>
15 | #include "./input_split_base.h"
16 | 
17 | namespace dmlc {
18 | namespace io {
19 | /*! \brief class that split the files by line */
20 | class LineSplitter : public InputSplitBase {
21 |  public:
22 |   LineSplitter(FileSystem *fs,
23 |                const char *uri,
24 |                unsigned rank,
25 |                unsigned nsplit) {
26 |     this->Init(fs, uri, 1);
27 |     this->ResetPartition(rank, nsplit);
28 |   }
29 | 
30 |   virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk);
31 |  protected:
32 |   virtual size_t SeekRecordBegin(Stream *fi);
33 |   virtual const char*
34 |   FindLastRecordBegin(const char *begin, const char *end);
35 | };
36 | }  // namespace io
37 | }  // namespace dmlc
38 | #endif  // DMLC_IO_LINE_SPLIT_H_
39 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/src/io/recordio_split.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file recordio_split.h
 4 |  * \brief input split that splits recordio files
 5 |  * \author Tianqi Chen
 6 |  */
 7 | #ifndef DMLC_IO_RECORDIO_SPLIT_H_
 8 | #define DMLC_IO_RECORDIO_SPLIT_H_
 9 | 
10 | #include <dmlc/io.h>
11 | #include <dmlc/recordio.h>
12 | #include <vector>
13 | #include <cstdio>
14 | #include <string>
15 | #include <cstring>
16 | #include "./input_split_base.h"
17 | 
18 | namespace dmlc {
19 | namespace io {
20 | /*! \brief class that split the files by line */
21 | class RecordIOSplitter : public InputSplitBase {
22 |  public:
23 |   RecordIOSplitter(FileSystem *fs,
24 |                    const char *uri,
25 |                    unsigned rank,
26 |                    unsigned nsplit) {
27 |     this->Init(fs, uri, 4);
28 |     this->ResetPartition(rank, nsplit);
29 |   }
30 | 
31 |   virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk);
32 | 
33 |  protected:
34 |   virtual size_t SeekRecordBegin(Stream *fi);
35 |   virtual const char*
36 |   FindLastRecordBegin(const char *begin, const char *end);
37 | };
38 | }  // namespace io
39 | }  // namespace dmlc
40 | #endif  // DMLC_IO_RECORDIO_SPLIT_H_
41 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/.gitignore:
--------------------------------------------------------------------------------
1 | *_test
2 | *.csv


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/README.md:
--------------------------------------------------------------------------------
 1 | This folder contains testcases for the project
 2 | 
 3 | test scripts for s3:
 4 | 
 5 | `test.sh`
 6 | 
 7 | ```bash
 8 | for r in {0..10}; do
 9 |     file=data/${RANDOM}
10 |     start=`date +'%s.%N'`
11 |     ./filesys_test cat s3://dmlc/ilsvrc12/val.rec >$file
12 |     # ./filesys_test cat s3://dmlc/cifar10/train.rec >$file
13 |     end=`date +'%s.%N'`
14 |     res=$(echo "$end - $start" | bc -l)
15 |     md5=`md5sum $file`
16 |     rm $file
17 |     echo "job $1, rp $r, $md5, time $res"
18 | done
19 | echo "job $1 done"
20 | ```
21 | 
22 | `run.sh`
23 | 
24 | ```bash
25 | mkdir -p data
26 | rm -f data/*
27 | for i in {0..9}; do
28 |     bash test.sh $i &
29 |     sleep 1
30 | done
31 | wait
32 | ```
33 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/dataiter_test.cc:
--------------------------------------------------------------------------------
 1 | #include <dmlc/data.h>
 2 | #include <dmlc/timer.h>
 3 | 
 4 | int main(int argc, char *argv[]) {
 5 |   if (argc < 4) {
 6 |     printf("Usage: filename partid npart [format]\n");
 7 |     return 0;
 8 |   }
 9 |   char libsvm[10] = "libsvm";
10 |   char* format;
11 |   if (argc > 4) {
12 |     format = argv[4];
13 |   } else {
14 |     format = libsvm;
15 |   }
16 | 
17 |   using namespace dmlc;
18 |   RowBlockIter<index_t> *iter =
19 |       RowBlockIter<index_t>::Create(
20 |           argv[1], atoi(argv[2]), atoi(argv[3]), format);
21 |   double tstart = GetTime();
22 |   size_t bytes_read = 0;
23 |   while (iter->Next()) {
24 |     const RowBlock<index_t> &batch = iter->Value();
25 |     bytes_read += batch.MemCostBytes();
26 |     double tdiff = GetTime() - tstart;
27 |     LOG(INFO) << (bytes_read >> 20UL) <<
28 |         " MB read " << ((bytes_read >> 20UL) / tdiff)<< " MB/sec";
29 |   }
30 |   return 0;
31 | }
32 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/dmlc_test.mk:
--------------------------------------------------------------------------------
 1 | TEST=test/filesys_test test/dataiter_test\
 2 | 	test/iostream_test test/recordio_test test/split_read_test\
 3 | 	test/stream_read_test test/split_test test/libsvm_parser_test\
 4 | 	test/libfm_parser_test test/split_repeat_read_test test/strtonum_test\
 5 | 	test/logging_test test/parameter_test test/registry_test\
 6 | 	test/csv_parser_test
 7 | 
 8 | test/filesys_test: test/filesys_test.cc src/io/*.h libdmlc.a
 9 | test/dataiter_test: test/dataiter_test.cc  libdmlc.a
10 | test/iostream_test: test/iostream_test.cc libdmlc.a
11 | test/recordio_test: test/recordio_test.cc libdmlc.a
12 | test/split_read_test: test/split_read_test.cc libdmlc.a
13 | test/split_repeat_read_test: test/split_repeat_read_test.cc libdmlc.a
14 | test/stream_read_test: test/stream_read_test.cc libdmlc.a
15 | test/split_test: test/split_test.cc libdmlc.a
16 | test/libsvm_parser_test: test/libsvm_parser_test.cc src/data/libsvm_parser.h libdmlc.a
17 | test/libfm_parser_test: test/libfm_parser_test.cc src/data/libfm_parser.h libdmlc.a
18 | test/csv_parser_test: test/csv_parser_test.cc src/data/csv_parser.h libdmlc.a
19 | test/strtonum_test: test/strtonum_test.cc src/data/strtonum.h
20 | test/logging_test: test/logging_test.cc
21 | test/parameter_test: test/parameter_test.cc
22 | test/registry_test: test/registry_test.cc
23 | 
24 | $(TEST) :
25 | 	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a,  $^) $(LDFLAGS)
26 | 
27 | include test/unittest/dmlc_unittest.mk
28 | 
29 | ALL_TEST=$(TEST) $(UNITTEST)
30 | ALL_TEST_OBJ=$(UNITTEST_OBJ)
31 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/iostream_test.cc:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <dmlc/io.h>
 3 | 
 4 | int main(int argc, char *argv[]) {
 5 |   if (argc < 2) {
 6 |     printf("Usage: <filename>\n");
 7 |     return 0;
 8 |   }
 9 |   {// output
10 |     dmlc::Stream *fs = dmlc::Stream::Create(argv[1], "w");
11 |     dmlc::ostream os(fs);
12 |     os << "hello-world " << 1e-10<< std::endl;
13 |     delete fs;
14 |   }
15 |   {// input
16 |     std::string name;
17 |     double data;
18 |     dmlc::Stream *fs = dmlc::Stream::Create(argv[1], "r");
19 |     dmlc::istream is(fs);
20 |     is >> name >> data;
21 |     std::cout << name << " " << data << std::endl;
22 |     delete fs;
23 |   }
24 |   return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/libfm_parser_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | #include <cstdio>
 3 | #include <dmlc/io.h>
 4 | #include <dmlc/timer.h>
 5 | #include "../src/data/libfm_parser.h"
 6 | 
 7 | int main(int argc, char *argv[]) {
 8 |   if (argc < 5) {
 9 |     printf("Usage: <libfm> partid npart nthread\n");
10 |     return 0;
11 |   }
12 |   using namespace dmlc;
13 |   InputSplit *split = InputSplit::Create(argv[1],
14 |                                          atoi(argv[2]),
15 |                                          atoi(argv[3]),
16 |                                          "text");
17 |   int nthread = atoi(argv[4]);
18 |   data::LibFMParser<unsigned> parser(split, nthread);
19 |   double tstart = GetTime();
20 |   size_t bytes_read = 0;
21 |   size_t bytes_expect = 10UL << 20UL;
22 |   size_t num_ex = 0;
23 |   while (parser.Next()) {
24 |     bytes_read  = parser.BytesRead();
25 |     num_ex += parser.Value().size;
26 |     std::cout << "read bytes:" << bytes_read << " batch size:" << num_ex << std::endl;
27 |     double tdiff = GetTime() - tstart;
28 |     if (bytes_read >= bytes_expect) {
29 |       printf("%lu examples, %lu MB read, %g MB/sec\n",
30 |              num_ex, bytes_read >> 20UL,
31 |              (bytes_read >> 20UL) / tdiff);
32 |       bytes_expect += 10UL << 20UL;
33 |     }
34 |   }
35 |   return 0;
36 | }


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/libsvm_parser_test.cc:
--------------------------------------------------------------------------------
 1 | // test reading speed from a InputSplit
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <dmlc/io.h>
 5 | #include <dmlc/timer.h>
 6 | #include "../src/data/libsvm_parser.h"
 7 | 
 8 | int main(int argc, char *argv[]) {
 9 |   if (argc < 5) {
10 |     printf("Usage: <libsvm> partid npart nthread\n");
11 |     return 0;
12 |   }
13 |   using namespace dmlc;
14 |   InputSplit *split = InputSplit::Create(argv[1],
15 |                                          atoi(argv[2]),
16 |                                          atoi(argv[3]),
17 |                                          "text");
18 |   int nthread = atoi(argv[4]);
19 |   data::LibSVMParser<unsigned> parser(split, nthread);
20 |   double tstart = GetTime();
21 |   size_t bytes_read = 0;
22 |   size_t bytes_expect = 10UL << 20UL;
23 |   size_t num_ex = 0;
24 |   while (parser.Next()) {
25 |     bytes_read  = parser.BytesRead();
26 |     num_ex += parser.Value().size;
27 |     double tdiff = GetTime() - tstart;
28 |     if (bytes_read >= bytes_expect) {
29 |       printf("%lu examples, %lu MB read, %g MB/sec\n",
30 |              num_ex, bytes_read >> 20UL,
31 |              (bytes_read >> 20UL) / tdiff);
32 |       bytes_expect += 10UL << 20UL;
33 |     }
34 |   }
35 |   return 0;
36 | }
37 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/logging_test.cc:
--------------------------------------------------------------------------------
 1 | #include <dmlc/logging.h>
 2 | 
 3 | int main(void) {
 4 |   LOG(INFO) << "hello";
 5 |   LOG(ERROR) << "error";
 6 |   try {
 7 |     LOG(FATAL)<<'a'<<11<<33;
 8 |   } catch (dmlc::Error e) {
 9 |     LOG(INFO) << "catch " << e.what();
10 |   }
11 |   CHECK(2!=3) << "test";
12 |   CHECK(2==3) << "test";
13 |   return 0;
14 | }
15 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/split_read_test.cc:
--------------------------------------------------------------------------------
 1 | // test reading speed from a InputSplit
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <dmlc/io.h>
 5 | #include <dmlc/timer.h>
 6 | 
 7 | int main(int argc, char *argv[]) {
 8 |   if (argc < 4) {
 9 |     printf("Usage: <libsvm> partid npart\n");
10 |     return 0;
11 |   }
12 |   using namespace dmlc;
13 |   InputSplit *split = InputSplit::Create(argv[1],
14 |                                          atoi(argv[2]),
15 |                                          atoi(argv[3]),
16 |                                          "text");
17 |   std::vector<std::string> data;
18 |   InputSplit::Blob blb;
19 |   double tstart = GetTime();
20 |   size_t bytes_read = 0;
21 |   size_t bytes_expect = 10UL << 20UL;
22 |   while (split->NextRecord(&blb)) {
23 |     std::string dat = std::string((char*)blb.dptr, 
24 |                                   blb.size);
25 |     data.push_back(dat);
26 |     bytes_read += blb.size;
27 |     double tdiff = GetTime() - tstart;
28 |     if (bytes_read >= bytes_expect) {
29 |       printf("%lu MB read, %g MB/sec\n",
30 |              bytes_read >> 20UL,
31 |              (bytes_read >> 20UL) / tdiff);
32 |       bytes_expect += 10UL << 20UL;
33 |     }
34 |   }
35 |   delete split;
36 |   return 0;
37 | }
38 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/split_test.cc:
--------------------------------------------------------------------------------
 1 | // test reading speed from a InputSplit
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <iostream>
 5 | #include <dmlc/io.h>
 6 | #include <dmlc/base.h>
 7 | 
 8 | int main(int argc, char *argv[]) {
 9 |   if (argc < 5) {
10 |     printf("Usage: <libsvm> partid npart\n");
11 |     return 0;
12 |   }
13 |   using namespace dmlc;
14 |   InputSplit *split = InputSplit::Create(argv[1],
15 |                                          atoi(argv[2]),
16 |                                          atoi(argv[3]),
17 |                                          "text");
18 |   InputSplit::Blob blb;
19 |   while (split->NextChunk(&blb)) {
20 |     std::cout << std::string((char*)blb.dptr, blb.size);
21 |   }
22 |   delete split;
23 |   return 0;
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/stream_read_test.cc:
--------------------------------------------------------------------------------
 1 | // test reading speed from a Stream
 2 | #include <cstdlib>
 3 | #include <cstdio>
 4 | #include <dmlc/io.h>
 5 | #include <dmlc/timer.h>
 6 | 
 7 | int main(int argc, char *argv[]) {
 8 |   if (argc < 3) {
 9 |     printf("Usage: uri buffersize [skip-proc]\n");
10 |     return 0;
11 |   }
12 |   int skip_proc = 0;
13 |   if (argc > 3) {
14 |     skip_proc = atoi(argv[3]);
15 |   }
16 |   size_t sz = atol(argv[2]);
17 |   std::string buffer; buffer.resize(sz);
18 |   using namespace dmlc;
19 |   Stream *fi = Stream::Create(argv[1], "r", true);
20 |   CHECK(fi != NULL) << "cannot open " << argv[1];
21 |   double tstart = GetTime();
22 |   size_t size;
23 |   size_t bytes_read = 0;
24 |   size_t bytes_expect = 10UL << 20UL;
25 |   while ((size = fi->Read(BeginPtr(buffer), sz)) != 0) {
26 |     int cnt = 0;
27 |     if (skip_proc == 0) {
28 |       //#pragma omp parallel for reduction(+:cnt)
29 |       for (size_t i = 0; i < size; ++i) {
30 |         if (buffer[i] == '\n' || buffer[i] == '\r') {
31 |           buffer[i] = '\0'; ++ cnt;
32 |         }
33 |       }    
34 |     }
35 |     bytes_read += size;
36 |     double tdiff = GetTime() - tstart;
37 |     if (bytes_read >= bytes_expect) {
38 |       printf("%lu MB read, %g MB/sec, cnt=%d\n",
39 |              bytes_read >> 20UL,
40 |              (bytes_read >> 20UL) / tdiff, cnt);
41 |       bytes_expect += 10UL << 20UL;
42 |     }
43 |   }
44 |   delete fi;
45 |   return 0;
46 | }
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/strtonum_test.cc:
--------------------------------------------------------------------------------
 1 | #include "../src/data/strtonum.h"
 2 | #include <dmlc/logging.h>
 3 | 
 4 | int main(int argc, char *argv[]) {
 5 |   using namespace dmlc;
 6 | 
 7 |   // float
 8 |   std::vector<std::string> f = {
 9 |     "1234567901234", "+12345.6789", "-0.00123", "+0123.234e-2",
10 |     "-234234.123123e20", "3.1029831e+38", "000.123e-28"};
11 |   for (size_t i = 0; i < f.size(); ++i) {
12 |     float v1 = data::atof(f[i].c_str());
13 |     float v2 = atof(f[i].c_str());
14 |     CHECK_EQ(v1, v2);
15 |   }
16 | 
17 |   // long
18 |   std::vector<std::string> l = {
19 |     "2147483647", "+12345", "-123123", "-2147483648"
20 |   };
21 |   for (size_t i = 0; i < l.size(); ++i) {
22 |     long v1 = data::atol(l[i].c_str());
23 |     long v2 = atol(l[i].c_str());
24 |     CHECK_EQ(v1, v2);
25 |   }
26 | 
27 |   // uint64
28 |   std::vector<std::string> ull = {
29 |     "2147483647", "+12345", "18446744073709551615"
30 |   };
31 |   for (size_t i = 0; i < ull.size(); ++i) {
32 |     unsigned long long v1 = data::strtoull(ull[i].c_str(), 0, 10);
33 |     unsigned long long v2 = strtoull(ull[i].c_str(), 0, 10);
34 |     CHECK_EQ(v1, v2);
35 |   }
36 |   return 0;
37 | }
38 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/unittest/.gitignore:
--------------------------------------------------------------------------------
1 | dmlc_unittest
2 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/unittest/dmlc_unittest.mk:
--------------------------------------------------------------------------------
 1 | UTEST_ROOT=test/unittest
 2 | UNITTEST=$(UTEST_ROOT)/dmlc_unittest
 3 | UNITTEST_SRC=$(wildcard $(UTEST_ROOT)/*.cc)
 4 | UNITTEST_OBJ=$(patsubst %.cc,%.o,$(UNITTEST_SRC))
 5 | 
 6 | GTEST_LIB=$(GTEST_PATH)/lib/
 7 | GTEST_INC=$(GTEST_PATH)/include/
 8 | 
 9 | $(UTEST_ROOT)/%.o : $(UTEST_ROOT)/%.cc libdmlc.a
10 | 	$(CXX) $(CFLAGS) -I$(GTEST_INC) -o $@ -c $<
11 | 
12 | $(UNITTEST) : $(UNITTEST_OBJ)
13 | 	$(CXX) $(CFLAGS) -L$(GTEST_LIB) -o $@ $^ libdmlc.a $(LDFLAGS) -lgtest
14 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/unittest/unittest_array_view.cc:
--------------------------------------------------------------------------------
 1 | #include <dmlc/logging.h>
 2 | #include <gtest/gtest.h>
 3 | #include <dmlc/array_view.h>
 4 | 
 5 | void ArrayViewTest(dmlc::array_view<int> view, int base) {
 6 |   int cnt = base;
 7 |   for (int v : view) {
 8 |     CHECK_EQ(v, cnt);
 9 |     ++cnt;
10 |   }
11 | }
12 | 
13 | TEST(ArrayView, Basic) {
14 |   std::vector<int> vec{0, 1, 2};
15 |   ArrayViewTest(vec, 0);
16 |   int arr[] = {1, 2, 3};
17 |   ArrayViewTest(dmlc::array_view<int>(arr, arr + 3), 1);
18 |   dmlc::array_view<int> a = vec;
19 |   CHECK_EQ(a.size(), vec.size());
20 | }
21 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/unittest/unittest_logging.cc:
--------------------------------------------------------------------------------
 1 | // Copyright by Contributors
 2 | #define DMLC_LOG_FATAL_THROW 0
 3 | #include <dmlc/logging.h>
 4 | #include <gtest/gtest.h>
 5 | 
 6 | using namespace std;
 7 | 
 8 | TEST(Logging, basics) {
 9 |   LOG(INFO) << "hello";
10 |   LOG(ERROR) << "error";
11 | 
12 |   int x = 1, y = 1;
13 |   CHECK_EQ(x, y);
14 |   CHECK_GE(x, y);
15 | 
16 |   int *z = &x;
17 |   CHECK_EQ(*CHECK_NOTNULL(z), x);
18 | 
19 |   ASSERT_DEATH(CHECK_NE(x, y), ".*");
20 | }
21 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/test/unittest/unittest_main.cc:
--------------------------------------------------------------------------------
1 | // Copyright by Contributors
2 | #include <gtest/gtest.h>
3 | 
4 | int main(int argc, char ** argv) {
5 |   testing::InitGoogleTest(&argc, argv);
6 |   testing::FLAGS_gtest_death_test_style = "threadsafe";
7 |   return RUN_ALL_TESTS();
8 | }
9 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/dmlc-submit:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | import os
 4 | curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 5 | sys.path.insert(0, curr_path)
 6 | 
 7 | from dmlc_tracker import submit
 8 | 
 9 | submit.main()
10 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/dmlc_tracker/__init__.py:
--------------------------------------------------------------------------------
1 | """DMLC Tracker modules for running jobs on different platforms."""
2 | from __future__ import absolute_import
3 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/yarn/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | .classpath
3 | .project
4 | *.jar
5 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/yarn/README.md:
--------------------------------------------------------------------------------
1 | DMLC YARN AppMaster
2 | ===================
3 | * This folder contains Application code to allow rabit run on Yarn.
4 | * See [tracker](../) for job submission.
5 |   - run ```./build.sh``` to build the jar, before using the script
6 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/yarn/build.bat:
--------------------------------------------------------------------------------
1 | mkdir bin
2 | 
3 | for /f %%i in ('%HADOOP_HOME%\bin\hadoop classpath') do set CPATH=%%i
4 | %JAVA_HOME%/bin/javac -cp %CPATH% -d bin src/main/java/org/apache/hadoop/yarn/dmlc/*.java
5 | %JAVA_HOME%/bin/jar cf dmlc-yarn.jar -C bin .
6 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/yarn/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [ ! -d bin ]; then
3 |     mkdir bin
4 | fi
5 | 
6 | CPATH=`${HADOOP_HOME}/bin/hadoop classpath`
7 | ${JAVA_HOME}/bin/javac -cp $CPATH -d bin src/main/java/org/apache/hadoop/yarn/dmlc/*.java
8 | ${JAVA_HOME}/bin/jar cf dmlc-yarn.jar -C bin .
9 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/tracker/yarn/src/main/java/org/apache/hadoop/yarn/dmlc/TaskRecord.java:
--------------------------------------------------------------------------------
 1 | package org.apache.hadoop.yarn.dmlc;
 2 | 
 3 | import org.apache.hadoop.yarn.api.records.Container;
 4 | import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
 5 | 
 6 | /**
 7 |  * data structure to hold the task information
 8 |  */
 9 | public class TaskRecord {
10 |     // task id of the task
11 |     public int taskId = 0;
12 |     // role of current node 
13 |     public String taskRole = "worker";
14 |     // number of failed attempts to run the task
15 |     public int attemptCounter = 0;
16 |     // container request, can be null if task is already running
17 |     public ContainerRequest containerRequest = null;
18 |     // running container, can be null if the task is not launched
19 |     public Container container = null;
20 |     // whether we have requested abortion of this task
21 |     public boolean abortRequested = false;
22 | 
23 |     public TaskRecord(int taskId, String role) {
24 |         this.taskId = taskId;
25 |         this.taskRole = role;
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/windows/.gitignore:
--------------------------------------------------------------------------------
 1 | Debug
 2 | *suo
 3 | *.dll
 4 | *i386
 5 | *x64
 6 | ipch
 7 | *.filters
 8 | *.user
 9 | *sdf
10 | Release
11 | Debug
12 | 


--------------------------------------------------------------------------------
/xgboost/dmlc-core/windows/README.md:
--------------------------------------------------------------------------------
1 | MSVC Project
2 | ====
3 | The solution has been created with Visual Studio Express 2010.
4 | Preliminary project for testing windows compatibility.
5 | It do not come with a warranty.
6 | 


--------------------------------------------------------------------------------
/xgboost/doc/.gitignore:
--------------------------------------------------------------------------------
1 | html
2 | latex
3 | *.sh
4 | _*
5 | doxygen
6 | parser.py
7 | *.pyc
8 | web-data
9 | 


--------------------------------------------------------------------------------
/xgboost/doc/R-package/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/xgboost/doc/R-package/Makefile:
--------------------------------------------------------------------------------
 1 | # This is the makefile for compiling Rmarkdown files into the md file with results.
 2 | PKGROOT=../../R-package
 3 | 
 4 | # ADD The Markdown to be built here, with suffix md
 5 | discoverYourData.md: $(PKGROOT)/vignettes/discoverYourData.Rmd
 6 | xgboostPresentation.md: $(PKGROOT)/vignettes/xgboostPresentation.Rmd
 7 | 
 8 | # General Rules for build rmarkdowns, need knitr
 9 | %.md:
10 | 	Rscript -e \
11 | 	"require(methods);"\
12 | 	"require(knitr);"\
13 | 	"knitr::opts_knit\$$set(root.dir=\".\");"\
14 | 	"knitr::opts_chunk\$$set(fig.path=\"../web-data/xgboost/knitr/$(basename $@)-\");"\
15 | 	"knitr::knit(\"$+\")"
16 | 


--------------------------------------------------------------------------------
/xgboost/doc/R-package/index.md:
--------------------------------------------------------------------------------
 1 | XGBoost R Package
 2 | =================
 3 | [![CRAN Status Badge](http://www.r-pkg.org/badges/version/xgboost)](http://cran.r-project.org/web/packages/xgboost)
 4 | [![CRAN Downloads](http://cranlogs.r-pkg.org/badges/xgboost)](http://cran.rstudio.com/web/packages/xgboost/index.html)
 5 | 
 6 | 
 7 | You have find XGBoost R Package!
 8 | 
 9 | Get Started
10 | -----------
11 | * Checkout the [Installation Guide](../build.md) contains instructions to install xgboost, and [Tutorials](#tutorials) for examples on how to use xgboost for various tasks.
12 | * Please visit [walk through example](../../R-package/demo).
13 | 
14 | Tutorials
15 | ---------
16 | - [Introduction to XGBoost in R](xgboostPresentation.md)
17 | - [Discover your data with XGBoost in R](discoverYourData.md)
18 | 


--------------------------------------------------------------------------------
/xgboost/doc/README:
--------------------------------------------------------------------------------
1 | The documentation of xgboost is generated with recommonmark and sphinx.
2 | 
3 | You can build it locally by typing "make html" in this folder.
4 | - clone https://github.com/tqchen/recommonmark to root
5 | - type make html
6 | 
7 | Checkout https://recommonmark.readthedocs.org for guide on how to write markdown with extensions used in this doc, such as math formulas and table of content.
8 | 


--------------------------------------------------------------------------------
/xgboost/doc/cli/index.md:
--------------------------------------------------------------------------------
1 | # XGBoost Command Line version
2 | 
3 | See [XGBoost Command Line walkthrough](https://github.com/dmlc/xgboost/blob/master/demo/binary_classification/README.md)
4 | 


--------------------------------------------------------------------------------
/xgboost/doc/how_to/index.md:
--------------------------------------------------------------------------------
 1 | # XGBoost How To
 2 | 
 3 | This page contains guidelines to use and develop XGBoost.
 4 | 
 5 | ## Installation
 6 | - [How to Install XGBoost](../build.md)
 7 | 
 8 | ## Use XGBoost in Specific Ways
 9 | - [Parameter tuning guide](param_tuning.md)
10 | - [Use out of core computation for large dataset](external_memory.md)
11 | 
12 | ## Develop and Hack XGBoost
13 | - [Contribute to XGBoost](contribute.md)
14 | 
15 | ## Frequently Ask Questions
16 | - [FAQ](../faq.md)
17 | 


--------------------------------------------------------------------------------
/xgboost/doc/index.md:
--------------------------------------------------------------------------------
 1 | XGBoost Documentation
 2 | =====================
 3 | This document is hosted at http://xgboost.readthedocs.org/. You can also browse most of the documents in github directly.
 4 | 
 5 | 
 6 | These are used to generate the index used in search.
 7 | 
 8 | * [Python Package Document](python/index.md)
 9 | * [R Package Document](R-package/index.md)
10 | * [Java/Scala Package Document](jvm/index.md)
11 | * [Julia Package Document](julia/index.md)
12 | * [CLI Package Document](cli/index.md)
13 | - [Howto Documents](how_to/index.md)
14 | - [Get Started Documents](get_started/index.md)
15 | - [Tutorials](tutorials/index.md)
16 | 


--------------------------------------------------------------------------------
/xgboost/doc/julia/index.md:
--------------------------------------------------------------------------------
1 | # XGBoost.jl
2 | 
3 | See [XGBoost.jl Project page](https://github.com/dmlc/XGBoost.jl)


--------------------------------------------------------------------------------
/xgboost/doc/python/index.md:
--------------------------------------------------------------------------------
 1 | XGBoost Python Package
 2 | ======================
 3 | This page contains links to all the python related documents on python package.
 4 | To install the package package, checkout [Build and Installation Instruction](../build.md).
 5 | 
 6 | Contents
 7 | --------
 8 | * [Python Overview Tutorial](python_intro.md)
 9 | * [Learning to use XGBoost by Example](../../demo)
10 | * [Python API Reference](python_api.rst)
11 | 


--------------------------------------------------------------------------------
/xgboost/doc/python/python_api.rst:
--------------------------------------------------------------------------------
 1 | Python API Reference
 2 | ====================
 3 | This page gives the Python API reference of xgboost, please also refer to Python Package Introduction for more information about python package.
 4 | 
 5 | The document in this page is automatically generated by sphinx. The content do not render at github, you can view it at http://xgboost.readthedocs.org/en/latest/python/python_api.html
 6 | 
 7 | Core Data Structure
 8 | -------------------
 9 | .. automodule:: xgboost.core
10 | 
11 | .. autoclass:: xgboost.DMatrix
12 |     :members:
13 |     :show-inheritance:
14 | 
15 | .. autoclass:: xgboost.Booster
16 |     :members:
17 |     :show-inheritance:
18 | 
19 | 
20 | Learning API
21 | ------------
22 | .. automodule:: xgboost.training
23 | 
24 | .. autofunction:: xgboost.train
25 | 
26 | .. autofunction:: xgboost.cv
27 | 
28 | 
29 | Scikit-Learn API
30 | ----------------
31 | .. automodule:: xgboost.sklearn
32 | .. autoclass:: xgboost.XGBRegressor
33 |     :members:
34 |     :show-inheritance:
35 | .. autoclass:: xgboost.XGBClassifier
36 |     :members:
37 |     :show-inheritance:
38 | 
39 | Plotting API
40 | ------------
41 | .. automodule:: xgboost.plotting
42 | 
43 | .. autofunction:: xgboost.plot_importance
44 | 
45 | .. autofunction:: xgboost.plot_tree
46 | 
47 | .. autofunction:: xgboost.to_graphviz
48 | 


--------------------------------------------------------------------------------
/xgboost/doc/sphinx_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Helper utilty function for customization."""
 3 | import sys
 4 | import os
 5 | import docutils
 6 | import subprocess
 7 | 
 8 | READTHEDOCS_BUILD = (os.environ.get('READTHEDOCS', None) is not None)
 9 | 
10 | if not os.path.exists('../recommonmark'):
11 |     subprocess.call('cd ..; rm -rf recommonmark;' +
12 |                     'git clone https://github.com/tqchen/recommonmark', shell = True)
13 | else:
14 |     subprocess.call('cd ../recommonmark/; git pull', shell=True)
15 | 
16 | if not os.path.exists('web-data'):
17 |     subprocess.call('rm -rf web-data;' +
18 |                     'git clone https://github.com/dmlc/web-data', shell = True)
19 | else:
20 |     subprocess.call('cd web-data; git pull', shell=True)
21 | 
22 | 
23 | sys.path.insert(0, os.path.abspath('../recommonmark/'))
24 | sys.stderr.write('READTHEDOCS=%s\n' % (READTHEDOCS_BUILD))
25 | 
26 | 
27 | from recommonmark import parser, transform
28 | 
29 | MarkdownParser = parser.CommonMarkParser
30 | AutoStructify = transform.AutoStructify
31 | 


--------------------------------------------------------------------------------
/xgboost/doc/tutorials/index.md:
--------------------------------------------------------------------------------
 1 | # XGBoost Tutorials
 2 | 
 3 | This section contains official tutorials inside XGBoost package.
 4 | See [Awesome XGBoost](https://github.com/dmlc/xgboost/tree/master/demo) for links to more resources.
 5 | 
 6 | ## Contents
 7 | - [Introduction to Boosted Trees](../model.md)
 8 | - [Distributed XGBoost YARN on AWS](aws_yarn.md)
 9 | - [DART booster](dart.md)
10 | - [Monotonic Constraints](monotonic.md)
11 | 


--------------------------------------------------------------------------------
/xgboost/include/xgboost/logging.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2015 by Contributors
 3 |  * \file logging.h
 4 |  * \brief defines console logging options for xgboost.
 5 |  *  Use to enforce unified print behavior.
 6 |  *  For debug loggers, use LOG(INFO) and LOG(ERROR).
 7 |  */
 8 | #ifndef XGBOOST_LOGGING_H_
 9 | #define XGBOOST_LOGGING_H_
10 | 
11 | #include <dmlc/logging.h>
12 | #include <sstream>
13 | #include "./base.h"
14 | 
15 | namespace xgboost {
16 | 
17 | class BaseLogger {
18 |  public:
19 |   BaseLogger() {
20 | #if XGBOOST_LOG_WITH_TIME
21 |     log_stream_ << "[" << dmlc::DateLogger().HumanDate() << "] ";
22 | #endif
23 |   }
24 |   std::ostream& stream() { return log_stream_; }
25 | 
26 |  protected:
27 |   std::ostringstream log_stream_;
28 | };
29 | 
30 | class ConsoleLogger : public BaseLogger {
31 |  public:
32 |   ~ConsoleLogger();
33 | };
34 | 
35 | class TrackerLogger : public BaseLogger {
36 |  public:
37 |   ~TrackerLogger();
38 | };
39 | 
40 | // redefines the logging macro if not existed
41 | #ifndef LOG
42 | #define LOG(severity) LOG_##severity.stream()
43 | #endif
44 | 
45 | // Enable LOG(CONSOLE) for print messages to console.
46 | #define LOG_CONSOLE ::xgboost::ConsoleLogger()
47 | // Enable LOG(TRACKER) for print messages to tracker
48 | #define LOG_TRACKER ::xgboost::TrackerLogger()
49 | }  // namespace xgboost.
50 | #endif  // XGBOOST_LOGGING_H_
51 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/.gitignore:
--------------------------------------------------------------------------------
1 | tracker.py
2 | build.sh


--------------------------------------------------------------------------------
/xgboost/jvm-packages/checkstyle-suppressions.xml:
--------------------------------------------------------------------------------
 1 | <!--
 2 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
 3 |   ~ contributor license agreements.  See the NOTICE file distributed with
 4 |   ~ this work for additional information regarding copyright ownership.
 5 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
 6 |   ~ (the "License"); you may not use this file except in compliance with
 7 |   ~ the License.  You may obtain a copy of the License at
 8 |   ~
 9 |   ~    http://www.apache.org/licenses/LICENSE-2.0
10 |   ~
11 |   ~ Unless required by applicable law or agreed to in writing, software
12 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
13 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |   ~ See the License for the specific language governing permissions and
15 |   ~ limitations under the License.
16 |   -->
17 | 
18 | <!DOCTYPE suppressions PUBLIC
19 | "-//Puppy Crawl//DTD Suppressions 1.1//EN"
20 | "http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
21 | 
22 | <!--
23 | 
24 |     This file contains suppression rules for Checkstyle checks.
25 |     Ideally only files that cannot be modified (e.g. third-party code)
26 |     should be added here. All other violations should be fixed.
27 | 
28 | -->
29 | 
30 | <suppressions>
31 |   <suppress checks=".*" files="XGBoostJNI.java"/>
32 | </suppressions>
33 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j-example/LICENSE:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2014 by Contributors 
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 |     
 8 |    http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j-spark/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <modelVersion>4.0.0</modelVersion>
 6 |     <parent>
 7 |         <groupId>ml.dmlc</groupId>
 8 |         <artifactId>xgboost-jvm</artifactId>
 9 |         <version>0.7</version>
10 |     </parent>
11 |     <artifactId>xgboost4j-spark</artifactId>
12 |     <build>
13 |         <plugins>
14 |             <plugin>
15 |                 <groupId>org.apache.maven.plugins</groupId>
16 |                 <artifactId>maven-assembly-plugin</artifactId>
17 |                 <configuration>
18 |                     <skipAssembly>false</skipAssembly>
19 |                 </configuration>
20 |             </plugin>
21 |         </plugins>
22 |     </build>
23 |     <dependencies>
24 |         <dependency>
25 |             <groupId>ml.dmlc</groupId>
26 |             <artifactId>xgboost4j</artifactId>
27 |             <version>0.7</version>
28 |         </dependency>
29 |         <dependency>
30 |             <groupId>org.apache.spark</groupId>
31 |             <artifactId>spark-mllib_${scala.binary.version}</artifactId>
32 |             <version>${spark.version}</version>
33 |             <scope>provided</scope>
34 |         </dependency>
35 |     </dependencies>
36 | </project>
37 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/Utils.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright (c) 2014 by Contributors
 3 | 
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 | 
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 |  */
16 | 
17 | package ml.dmlc.xgboost4j.scala.spark.params
18 | 
19 | // based on org.apache.spark.util copy /paste
20 | private[spark] object Utils {
21 | 
22 |   def getSparkClassLoader: ClassLoader = getClass.getClassLoader
23 | 
24 |   def getContextOrSparkClassLoader: ClassLoader =
25 |     Option(Thread.currentThread().getContextClassLoader).getOrElse(getSparkClassLoader)
26 | 
27 |   // scalastyle:off classforname
28 |   /** Preferred alternative to Class.forName(className) */
29 |   def classForName(className: String): Class[_] = {
30 |     Class.forName(className, true, getContextOrSparkClassLoader)
31 |     // scalastyle:on classforname
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j-spark/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.logger.org.apache.spark=ERROR


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/SharedSparkContext.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright (c) 2014 by Contributors
 3 | 
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 | 
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 |  */
16 | 
17 | package ml.dmlc.xgboost4j.scala.spark
18 | 
19 | import org.apache.spark.{SparkConf, SparkContext}
20 | import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
21 | 
22 | trait SharedSparkContext extends FunSuite with BeforeAndAfter with BeforeAndAfterAll
23 |   with Serializable {
24 | 
25 |   @transient protected implicit var sc: SparkContext = _
26 | 
27 |   override def beforeAll() {
28 |     val sparkConf = new SparkConf()
29 |       .setMaster("local[*]")
30 |       .setAppName("XGBoostSuite")
31 |       .set("spark.driver.memory", "512m")
32 |       .set("spark.ui.enabled", "false")
33 | 
34 |     sc = new SparkContext(sparkConf)
35 |   }
36 | 
37 |   override def afterAll() {
38 |     if (sc != null) {
39 |       sc.stop()
40 |       sc = null
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j/LICENSE:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright (c) 2014 by Contributors 
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 |     
 8 |    http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IEvaluation.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright (c) 2014 by Contributors
 3 | 
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 | 
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 |  */
16 | package ml.dmlc.xgboost4j.java;
17 | 
18 | import java.io.Serializable;
19 | 
20 | /**
21 |  * interface for customized evaluation
22 |  *
23 |  * @author hzx
24 |  */
25 | public interface IEvaluation extends Serializable {
26 |   /**
27 |    * get evaluate metric
28 |    *
29 |    * @return evalMetric
30 |    */
31 |   String getMetric();
32 | 
33 |   /**
34 |    * evaluate with predicts and data
35 |    *
36 |    * @param predicts predictions as array
37 |    * @param dmat     data matrix to evaluate
38 |    * @return result of the metric
39 |    */
40 |   float eval(float[][] predicts, DMatrix dmat);
41 | }
42 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IObjective.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright (c) 2014 by Contributors
 3 | 
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 | 
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 |  */
16 | package ml.dmlc.xgboost4j.java;
17 | 
18 | import java.io.Serializable;
19 | import java.util.List;
20 | 
21 | /**
22 |  * interface for customize Object function
23 |  *
24 |  * @author hzx
25 |  */
26 | public interface IObjective extends Serializable {
27 |   /**
28 |    * user define objective function, return gradient and second order gradient
29 |    *
30 |    * @param predicts untransformed margin predicts
31 |    * @param dtrain   training data
32 |    * @return List with two float array, correspond to first order grad and second order grad
33 |    */
34 |   List<float[]> getGradient(float[][] predicts, DMatrix dtrain);
35 | }
36 | 


--------------------------------------------------------------------------------
/xgboost/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostError.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  Copyright (c) 2014 by Contributors
 3 | 
 4 |  Licensed under the Apache License, Version 2.0 (the "License");
 5 |  you may not use this file except in compliance with the License.
 6 |  You may obtain a copy of the License at
 7 | 
 8 |  http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 |  Unless required by applicable law or agreed to in writing, software
11 |  distributed under the License is distributed on an "AS IS" BASIS,
12 |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  See the License for the specific language governing permissions and
14 |  limitations under the License.
15 |  */
16 | package ml.dmlc.xgboost4j.java;
17 | 
18 | /**
19 |  * custom error class for xgboost
20 |  *
21 |  * @author hzx
22 |  */
23 | public class XGBoostError extends Exception {
24 |   public XGBoostError(String message) {
25 |     super(message);
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/xgboost/make/mingw64.mk:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------------
 2 | # xgboost: Configuration for MinGW(Windows 64bit)
 3 | # This allows to compile xgboost on windows by using mingw.
 4 | # You will need to get install an mingw toolchain.
 5 | # g++-4.6 or later is required.
 6 | #
 7 | # see config.mk for template.
 8 | #-----------------------------------------------------------
 9 | export CXX=g++ -m64
10 | export CC=gcc -m64
11 | 
12 | # Whether enable openmp support, needed for multi-threading.
13 | USE_OPENMP = 1
14 | 
15 | # whether use HDFS support during compile
16 | USE_HDFS = 0
17 | 
18 | # whether use AWS S3 support during compile
19 | USE_S3 = 0
20 | 
21 | # whether use Azure blob support during compile
22 | USE_AZURE = 0
23 | 
24 | # Rabit library version,
25 | # - librabit.a Normal distributed version.
26 | # - librabit_empty.a Non distributed mock version,
27 | LIB_RABIT = librabit_empty.a
28 | 
29 | DMLC_CFLAGS = -DDMLC_ENABLE_STD_THREAD=0
30 | ADD_CFLAGS = -DDMLC_ENABLE_STD_THREAD=0


--------------------------------------------------------------------------------
/xgboost/make/minimum.mk:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------
 2 | # xgboost: minumum dependency configuration,
 3 | # see config.mk for template.
 4 | #----------------------------------------------------
 5 | 
 6 | # Whether enable openmp support, needed for multi-threading.
 7 | USE_OPENMP = 0
 8 | 
 9 | # whether use HDFS support during compile
10 | USE_HDFS = 0
11 | 
12 | # whether use AWS S3 support during compile
13 | USE_S3 = 0
14 | 
15 | # whether use Azure blob support during compile
16 | USE_AZURE = 0
17 | 
18 | # Rabit library version,
19 | # - librabit.a Normal distributed version.
20 | # - librabit_empty.a Non distributed mock version,
21 | LIB_RABIT = librabit_empty.a
22 | 
23 | 


--------------------------------------------------------------------------------
/xgboost/make/minimum_parallel.mk:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------
 2 | # xgboost: minumum dependency configuration with Parallelization.
 3 | # This configuration is standard but cannot run distributed computing.
 4 | #
 5 | # see config.mk for template.
 6 | #------------------------------------------------------------------------
 7 | 
 8 | # Whether enable openmp support, needed for multi-threading.
 9 | USE_OPENMP = 1
10 | 
11 | # whether use HDFS support during compile
12 | USE_HDFS = 0
13 | 
14 | # whether use AWS S3 support during compile
15 | USE_S3 = 0
16 | 
17 | # whether use Azure blob support during compile
18 | USE_AZURE = 0
19 | 
20 | # Rabit library version,
21 | # - librabit.a Normal distributed version.
22 | # - librabit_empty.a Non distributed mock version,
23 | LIB_RABIT = librabit_empty.a
24 | 


--------------------------------------------------------------------------------
/xgboost/make/travis.mk:
--------------------------------------------------------------------------------
 1 | 
 2 | # the additional link flags you want to add
 3 | ADD_LDFLAGS =
 4 | 
 5 | # the additional compile flags you want to add
 6 | ADD_CFLAGS =
 7 | 
 8 | # Whether enable openmp support, needed for multi-threading.
 9 | USE_OPENMP = 1
10 | 
11 | # whether use HDFS support during compile
12 | USE_HDFS = 0
13 | 
14 | # whether use AWS S3 support during compile
15 | USE_S3 = 0
16 | 
17 | # whether use Azure blob support during compile
18 | USE_AZURE = 0
19 | 
20 | # Rabit library version,
21 | # - librabit.a Normal distributed version.
22 | # - librabit_empty.a Non distributed mock version,
23 | LIB_RABIT = librabit.a
24 | 
25 | # path to libjvm.so
26 | LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
27 | 
28 | # path to googletest and whether to measure coverage or not
29 | GTEST_PATH =
30 | WITH_COVER = 1
31 | 
32 | # List of additional plugins, checkout plugin folder.
33 | # uncomment the following lines to include these plugins
34 | # you can also add your own plugin like this
35 | #
36 | XGB_PLUGINS += plugin/example/plugin.mk
37 | XGB_PLUGINS += plugin/lz4/plugin.mk
38 | XGB_PLUGINS += plugin/dense_parser/plugin.mk
39 | 


--------------------------------------------------------------------------------
/xgboost/nccl/.gitignore:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
 2 | /build
 3 | Win32/
 4 | x64/
 5 | *.sdf
 6 | *.user
 7 | *.opensdf
 8 | *.pdb
 9 | *.suo
10 | windows/test/test/
11 | *.VC.db
12 | 


--------------------------------------------------------------------------------
/xgboost/nccl/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required (VERSION 3.5)
 2 | project(nccl)
 3 | find_package(CUDA 7.5 QUIET REQUIRED)
 4 | 
 5 | set(NCCL_MAJOR 1)
 6 | set(NCCL_MINOR 3)
 7 | set(NCCL_PATCH 4)
 8 | 
 9 | # Call add_subdirectory(nccl) after nvcc flags have been set in the parent project to propagate flags to nccl
10 | set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DNCCL_MAJOR=${NCCL_MAJOR} -DNCCL_MINOR=${NCCL_MINOR} -DNCCL_PATCH=${NCCL_PATCH} -DCUDA_MAJOR=7 -DCUDA_MINOR=5")
11 | file(GLOB SOURCES src/*.cu)
12 | 
13 | cuda_add_library(${PROJECT_NAME} STATIC ${SOURCES})
14 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/.gitignore:
--------------------------------------------------------------------------------
1 | /*.debhelper.log
2 | /*.debhelper
3 | /*.substvars
4 | /tmp/
5 | /files
6 | /libnccl1/
7 | /libnccl-dev/
8 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/changelog.in:
--------------------------------------------------------------------------------
1 | nccl (${nccl:Major}.${nccl:Minor}.${nccl:Patch}-${deb:Revision}+cuda${cuda:Major}.${cuda:Minor}) trusty; urgency=medium
2 | 
3 |   * Automatic Debian package from build
4 | 
5 |  -- cudatools <cudatools@nvidia.com>  ${deb:Timestamp}
6 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/compat:
--------------------------------------------------------------------------------
1 | 9
2 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/control.in:
--------------------------------------------------------------------------------
 1 | Source: nccl
 2 | Section: libs
 3 | Maintainer: cudatools <cudatools@nvidia.com>
 4 | Priority: optional
 5 | Build-depends: debhelper(>=9)
 6 | Standards-Version: 3.9.5
 7 | 
 8 | Package: libnccl${nccl:Major}
 9 | Section: libs
10 | Architecture: ${deb:Arch}
11 | Depends: ${misc:Depends}, ${shlibs:Depends}
12 | Description: NVIDIA Collectives Communication Library (NCCL) Runtime
13 |  NCCL (pronounced "Nickel") is a stand-alone library of standard collective
14 |  communication routines for GPUs, such as all-gather, reduce, broadcast, etc.,
15 |  that have been optimized to achieve high bandwidth over PCIe. NCCL supports up
16 |  to eight GPUs and can be used in either single- or multi-process (e.g., MPI)
17 |  applications.
18 | 
19 | Package: libnccl-dev
20 | Section: libdevel
21 | Architecture: ${deb:Arch}
22 | Depends: ${misc:Depends}, ${shlibs:Depends}, libnccl${nccl:Major} (= ${binary:Version})
23 | Description: NVIDIA Collectives Communication Library (NCCL) Development Files
24 |  NCCL (pronounced "Nickel") is a stand-alone library of standard collective
25 |  communication routines for GPUs, such as all-gather, reduce, broadcast, etc.,
26 |  that have been optimized to achieve high bandwidth over PCIe. NCCL supports up
27 |  to eight GPUs and can be used in either single- or multi-process (e.g., MPI)
28 |  applications.
29 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/copyright:
--------------------------------------------------------------------------------
1 | ../LICENSE.txt


--------------------------------------------------------------------------------
/xgboost/nccl/debian/libnccl-dev.install:
--------------------------------------------------------------------------------
1 | include/nccl.h usr/include
2 | lib/libnccl.so /usr/lib/x86_64-linux-gnu
3 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/libnccl-dev.manpages:
--------------------------------------------------------------------------------
1 | debian/nccl.7
2 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/libnccl1.install.in:
--------------------------------------------------------------------------------
1 | lib/libnccl.so.${nccl:Major} /usr/lib/x86_64-linux-gnu
2 | lib/libnccl.so.${nccl:Major}.${nccl:Minor}.${nccl:Patch} /usr/lib/x86_64-linux-gnu
3 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | %:
 4 | 	dh $@ --parallel
 5 | 
 6 | override_dh_auto_install:
 7 | 	PREFIX=debian/tmp dh_auto_install
 8 | 
 9 | override_dh_auto_test:
10 | 	# Do not make test
11 | 
12 | override_dh_auto_clean:
13 | 	# Do not make clean
14 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/shlibs.local.in:
--------------------------------------------------------------------------------
1 | libcudart ${cuda:Major}.${cuda:Minor} cuda-cudart-${cuda:Major}-${cuda:Minor}
2 | 


--------------------------------------------------------------------------------
/xgboost/nccl/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (native)
2 | 


--------------------------------------------------------------------------------
/xgboost/plugin/dense_parser/plugin.mk:
--------------------------------------------------------------------------------
1 | PLUGIN_OBJS += build_plugin/dense_parser/dense_libsvm.o
2 | PLUGIN_LDFLAGS +=
3 | 


--------------------------------------------------------------------------------
/xgboost/plugin/example/README.md:
--------------------------------------------------------------------------------
 1 | XGBoost Plugin Example
 2 | ======================
 3 | This folder provides an example of xgboost plugin.
 4 | 
 5 | There are three steps you need to do to add a plugin to xgboost
 6 | - Create your source .cc file, implement a new extension
 7 |   - In this example [custom_obj.cc](custom_obj.cc)
 8 | - Register this extension to xgboost via a registration macro
 9 |   - In this example ```XGBOOST_REGISTER_OBJECTIVE``` in [this line](custom_obj.cc#L75)
10 | - Create a [plugin.mk](plugin.mk) on this folder
11 | 
12 | To add this plugin, add the following line to ```config.mk```(template in make/config.mk).
13 | ```makefile
14 | # Add plugin by include the plugin in config
15 | XGB_PLUGINS += plugin/plugin_a/plugin.mk
16 | ```
17 | 
18 | Then you can test this plugin by using ```objective=mylogistic``` parameter.
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/xgboost/plugin/example/plugin.mk:
--------------------------------------------------------------------------------
1 | # Add the object files you like to include in this plugin.
2 | PLUGIN_OBJS += build_plugin/example/custom_obj.o
3 | # Add additional dependent libraries this plugin might have
4 | PLUGIN_LDFLAGS +=


--------------------------------------------------------------------------------
/xgboost/plugin/lz4/plugin.mk:
--------------------------------------------------------------------------------
1 | PLUGIN_OBJS += build_plugin/lz4/sparse_page_lz4_format.o
2 | PLUGIN_LDFLAGS += -llz4
3 | 


--------------------------------------------------------------------------------
/xgboost/plugin/updater_gpu/gitshallow_submodules.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | git submodule init
 3 | for i in $(git submodule | awk '{print $2}'); do
 4 |     spath=$(git config -f .gitmodules --get submodule.$i.path)
 5 |     surl=$(git config -f .gitmodules --get submodule.$i.url)
 6 |     if [ $spath == "cub" ]
 7 |     then
 8 |         git submodule update --depth 3 $spath
 9 |     else
10 |         git submodule update $spath
11 |     fi
12 | done
13 | 


--------------------------------------------------------------------------------
/xgboost/plugin/updater_gpu/plugin.mk:
--------------------------------------------------------------------------------
1 | 
2 | PLUGIN_OBJS += build_plugin/updater_gpu/src/register_updater_gpu.o \
3 |                build_plugin/updater_gpu/src/updater_gpu.o \
4 |                build_plugin/updater_gpu/src/gpu_hist_builder.o
5 | PLUGIN_LDFLAGS += -L$(CUDA_ROOT)/lib64 -lcudart
6 | 


--------------------------------------------------------------------------------
/xgboost/plugin/updater_gpu/src/register_updater_gpu.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2017 XGBoost contributors
 3 |  */
 4 | #include <xgboost/tree_updater.h>
 5 | #include "updater_gpu.cuh"
 6 | 
 7 | namespace xgboost {
 8 | namespace tree {
 9 | DMLC_REGISTRY_FILE_TAG(updater_gpumaker);
10 | 
11 | 
12 | XGBOOST_REGISTER_TREE_UPDATER(GPUMaker, "grow_gpu")
13 |     .describe("Grow tree with GPU.")
14 |     .set_body([]() { return new GPUMaker(); });
15 | 
16 | XGBOOST_REGISTER_TREE_UPDATER(GPUHistMaker, "grow_gpu_hist")
17 |     .describe("Grow tree with GPU.")
18 |     .set_body([]() { return new GPUHistMaker(); });
19 | }  // namespace tree
20 | }  // namespace xgboost
21 | 


--------------------------------------------------------------------------------
/xgboost/plugin/updater_gpu/src/updater_gpu.cuh:
--------------------------------------------------------------------------------
 1 | 
 2 | /*!
 3 |  * Copyright 2017 XGBoost contributors
 4 |  */
 5 | #pragma once
 6 | #include <xgboost/tree_updater.h>
 7 | #include <memory>
 8 | #include "../../../src/tree/param.h"
 9 | 
10 | namespace xgboost {
11 | namespace tree {
12 | 
13 | // Forward declare builder classes
14 | class GPUHistBuilder;
15 | namespace exact {
16 | template <typename node_id_t>
17 | class GPUBuilder;
18 | }
19 | 
20 | class GPUMaker : public TreeUpdater {
21 |  protected:
22 |   TrainParam param;
23 |   std::unique_ptr<exact::GPUBuilder<int16_t>> builder;
24 | 
25 |  public:
26 |   GPUMaker();
27 |   void Init(
28 |       const std::vector<std::pair<std::string, std::string>>& args) override;
29 |   void Update(const std::vector<bst_gpair>& gpair, DMatrix* dmat,
30 |               const std::vector<RegTree*>& trees);
31 | };
32 | 
33 | class GPUHistMaker : public TreeUpdater {
34 |  public:
35 |   GPUHistMaker();
36 |   void Init(
37 |       const std::vector<std::pair<std::string, std::string>>& args) override;
38 |   void Update(const std::vector<bst_gpair>& gpair, DMatrix* dmat,
39 |               const std::vector<RegTree*>& trees) override;
40 |   bool UpdatePredictionCache(const DMatrix* data,
41 |                              std::vector<bst_float>* out_preds) override;
42 | 
43 |  protected:
44 |   TrainParam param;
45 |   std::unique_ptr<GPUHistBuilder> builder;
46 | };
47 | }  // namespace tree
48 | }  // namespace xgboost
49 | 


--------------------------------------------------------------------------------
/xgboost/python-package/.gitignore:
--------------------------------------------------------------------------------
1 | build
2 | dist
3 | *.egg*


--------------------------------------------------------------------------------
/xgboost/python-package/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | 
 3 | ignore=tests
 4 | 
 5 | disiable=unexpected-special-method-signature,too-many-nested-blocks
 6 | 
 7 | dummy-variables-rgx=(unused|)_.*
 8 | 
 9 | reports=no
10 | 


--------------------------------------------------------------------------------
/xgboost/python-package/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include *.md *.rst
 2 | recursive-include xgboost *
 3 | recursive-include xgboost/include *
 4 | recursive-include xgboost/src *
 5 | recursive-include xgboost/make *
 6 | recursive-include xgboost/rabit *
 7 | recursive-include xgboost/lib *
 8 | recursive-include xgboost/dmlc-core *
 9 | #exclude pre-compiled .o and .a file for less confusions
10 | #make sure .a files are all removed for forcing compiling
11 | #include the pre-compiled .so is needed as a placeholder
12 | #since it will be copy after compiling on the fly
13 | global-exclude *.o
14 | global-exclude *.a
15 | global-exclude *.pyo
16 | global-exclude *.pyc
17 | 


--------------------------------------------------------------------------------
/xgboost/python-package/prep_pip.sh:
--------------------------------------------------------------------------------
 1 | # this script is for preparation for PyPI installation package, 
 2 | # please don't use it for installing xgboost from github
 3 | 
 4 | # after executing `make pippack`, cd xgboost-python,
 5 | #run this script and get the sdist tar.gz in ./dist/
 6 | sh ./xgboost/build-python.sh
 7 | cp setup_pip.py setup.py
 8 | python setup.py sdist
 9 | 
10 | #make sure you know what you gonna do, and uncomment the following line
11 | #python setup.py register upload
12 | 


--------------------------------------------------------------------------------
/xgboost/python-package/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst
3 | 


--------------------------------------------------------------------------------
/xgboost/rabit/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled Object files
 2 | *.slo
 3 | *.lo
 4 | *.o
 5 | *.obj
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | *.lnk
11 | # Compiled Dynamic libraries
12 | *.so
13 | *.dylib
14 | *.dll
15 | 
16 | # Fortran module files
17 | *.mod
18 | 
19 | # Compiled Static libraries
20 | *.lai
21 | *.la
22 | *.a
23 | *.lib
24 | 
25 | # Executables
26 | *.exe
27 | *.out
28 | *.app
29 | *~
30 | *.pyc
31 | *.mpi
32 | *.exe
33 | *.txt
34 | *tmp*
35 | *.rabit
36 | *.mock
37 | dmlc-core
38 | recommonmark
39 | recom
40 | 


--------------------------------------------------------------------------------
/xgboost/rabit/.travis.yml:
--------------------------------------------------------------------------------
 1 | # disable sudo to use container based build
 2 | sudo: false
 3 | 
 4 | # Use Build Matrix to do lint and build seperately
 5 | env:
 6 |   matrix:
 7 |     - TASK=lint LINT_LANG=cpp
 8 |     - TASK=lint LINT_LANG=python
 9 |     - TASK=doc
10 |     - TASK=build CXX=g++
11 |     - TASK=test CXX=g++
12 | 
13 | # dependent apt packages
14 | addons:
15 |   apt:
16 |     packages:
17 |       - doxygen
18 |       - libopenmpi-dev
19 |       - wget
20 |       - git
21 |       - libcurl4-openssl-dev
22 |       - unzip
23 |       - python-numpy
24 |       
25 | before_install:
26 |   - git clone https://github.com/dmlc/dmlc-core
27 |   - export TRAVIS=dmlc-core/scripts/travis/
28 |   - source ${TRAVIS}/travis_setup_env.sh
29 | 
30 | install:
31 |   - pip install --user cpplint pylint 
32 | 
33 | script: scripts/travis_script.sh
34 | 
35 | 
36 | before_cache:
37 |   - ${TRAVIS}/travis_before_cache.sh
38 | 
39 | 
40 | cache:
41 |   directories:
42 |     - ${HOME}/.cache/usr
43 | 
44 | 
45 | notifications:
46 | # Emails are sent to the committer's git-configured email address by default,
47 |   email:
48 |     on_success: change
49 |     on_failure: always
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/xgboost/rabit/doc/.gitignore:
--------------------------------------------------------------------------------
1 | html
2 | latex
3 | *.sh
4 | _*
5 | doxygen
6 | 


--------------------------------------------------------------------------------
/xgboost/rabit/doc/cpp_api.md:
--------------------------------------------------------------------------------
 1 | C++ Library API of Rabit
 2 | ========================
 3 | This page contains document of Library API of rabit.
 4 | 
 5 | ```eval_rst
 6 | .. toctree::
 7 | 
 8 | .. doxygennamespace:: rabit
 9 | ```
10 | 


--------------------------------------------------------------------------------
/xgboost/rabit/doc/index.md:
--------------------------------------------------------------------------------
 1 | Rabit Documentation
 2 | =====================
 3 | rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. The goal of rabit is to support **portable** , **scalable** and **reliable** distributed machine learning programs.
 4 | 
 5 | API Documents
 6 | -------------
 7 | ```eval_rst
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    python_api.md
13 |    cpp_api.md
14 |    parameters.md
15 |    guide.md
16 | ```
17 | Indices and tables
18 | ------------------
19 | 
20 | ```eval_rst
21 | * :ref:`genindex`
22 | * :ref:`modindex`
23 | * :ref:`search`
24 | ```


--------------------------------------------------------------------------------
/xgboost/rabit/doc/parameters.md:
--------------------------------------------------------------------------------
 1 | Parameters
 2 | ==========
 3 | This section list all the parameters that can be passed to rabit::Init function as argv.
 4 | All the parameters are passed in as string in format of ``parameter-name=parameter-value``.
 5 | In most setting these parameters have default value or will be automatically detected,
 6 | and do not need to be manually configured.
 7 | 
 8 | * rabit_tracker_uri [passed in automatically by tracker]
 9 |   - The uri/ip of rabit tracker
10 | * rabit_tracker_port [passed in automatically by tracker]
11 |   - The port of rabit tracker
12 | * rabit_task_id [automatically detected]
13 |   - The unique identifier of computing process
14 |   - When running on hadoop, this is automatically extracted from enviroment variable
15 | * rabit_reduce_buffer [default = 256MB]
16 |   - The memory buffer used to store intermediate result of reduction
17 |   - Format "digits + unit", can be 128M, 1G
18 | * rabit_global_replica [default = 5]
19 |   - Number of replication copies of result kept for each Allreduce/Broadcast call
20 | * rabit_local_replica [default = 2]
21 |   - Number of replication of local model in check point
22 | 


--------------------------------------------------------------------------------
/xgboost/rabit/doc/python_api.md:
--------------------------------------------------------------------------------
 1 | Python API of Rabit
 2 | ===================
 3 | This page contains document of python API of rabit.
 4 | 
 5 | ```eval_rst
 6 | .. toctree::
 7 | 
 8 | .. automodule:: rabit
 9 |     :members:
10 |     :show-inheritance:
11 | ```
12 | 


--------------------------------------------------------------------------------
/xgboost/rabit/doc/sphinx_util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Helper utilty function for customization."""
 3 | import sys
 4 | import os
 5 | import docutils
 6 | import subprocess
 7 | 
 8 | if os.environ.get('READTHEDOCS', None) == 'True':
 9 |     subprocess.call('cd ..; rm -rf recommonmark;' +
10 |                     'git clone https://github.com/tqchen/recommonmark', shell=True)
11 | 
12 | sys.path.insert(0, os.path.abspath('../recommonmark/'))
13 | from recommonmark import parser, transform
14 | 
15 | MarkdownParser = parser.CommonMarkParser
16 | AutoStructify = transform.AutoStructify
17 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/Makefile:
--------------------------------------------------------------------------------
 1 | export CC  = gcc
 2 | export CXX = g++
 3 | export MPICXX = mpicxx
 4 | export LDFLAGS= -pthread -lm -L../lib
 5 | export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fPIC -fopenmp -I../include
 6 | 
 7 | .PHONY: clean all lib libmpi
 8 | BIN = basic.rabit broadcast.rabit
 9 | MOCKBIN= lazy_allreduce.mock
10 | 
11 | all: $(BIN)
12 | basic.rabit: basic.cc lib ../lib/librabit.a
13 | broadcast.rabit: broadcast.cc lib ../lib/librabit.a
14 | lazy_allreduce.mock: lazy_allreduce.cc lib ../lib/librabit.a
15 | 
16 | $(BIN) :
17 | 	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a,  $^) $(LDFLAGS)
18 | 
19 | $(MOCKBIN) :
20 | 	$(CXX) $(CFLAGS) -std=c++11 -o $@ $(filter %.cpp %.o %.c %.cc,  $^) $(LDFLAGS) -lrabit_mock
21 | 
22 | $(OBJ) :
23 | 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
24 | 
25 | clean:
26 | 	$(RM) $(OBJ) $(BIN) $(MOCKBIN) *~ ../src/*~
27 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/README:
--------------------------------------------------------------------------------
1 | See tutorial at ../doc/guide.md


--------------------------------------------------------------------------------
/xgboost/rabit/guide/basic.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2014 by Contributors
 3 |  * \file basic.cc
 4 |  * \brief This is an example demonstrating what is Allreduce
 5 |  *
 6 |  * \author Tianqi Chen
 7 |  */
 8 | #define _CRT_SECURE_NO_WARNINGS
 9 | #define _CRT_SECURE_NO_DEPRECATE
10 | #include <vector>
11 | #include <rabit/rabit.h>
12 | using namespace rabit;
13 | int main(int argc, char *argv[]) {
14 |   int N = 3;
15 |   if (argc > 1) {
16 |     N = atoi(argv[1]);
17 |   }
18 |   std::vector<int> a(N);
19 |   rabit::Init(argc, argv);
20 |   for (int i = 0; i < N; ++i) {
21 |     a[i] = rabit::GetRank() + i;
22 |   }
23 |   printf("@node[%d] before-allreduce: a={%d, %d, %d}\n",
24 |          rabit::GetRank(), a[0], a[1], a[2]);
25 |   // allreduce take max of each elements in all processes
26 |   Allreduce<op::Max>(&a[0], N);
27 |   printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n",
28 |          rabit::GetRank(), a[0], a[1], a[2]);
29 |   // second allreduce that sums everything up
30 |   Allreduce<op::Sum>(&a[0], N);
31 |   printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n",
32 |          rabit::GetRank(), a[0], a[1], a[2]);
33 |   rabit::Finalize();
34 |   return 0;
35 | }
36 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/basic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | demo python script of rabit
 4 | """
 5 | import os
 6 | import sys
 7 | import numpy as np
 8 | # import rabit, the tracker script will setup the lib path correctly
 9 | # for normal run without tracker script, add following line
10 | # sys.path.append(os.path.dirname(__file__) + '/../python')
11 | import rabit
12 | 
13 | rabit.init()
14 | n = 3
15 | rank = rabit.get_rank()
16 | a = np.zeros(n)
17 | for i in xrange(n):
18 |     a[i] = rank + i
19 | 
20 | print '@node[%d] before-allreduce: a=%s' % (rank, str(a))
21 | a = rabit.allreduce(a, rabit.MAX)
22 | print '@node[%d] after-allreduce-max: a=%s' % (rank, str(a))
23 | a = rabit.allreduce(a, rabit.SUM)
24 | print '@node[%d] after-allreduce-sum: a=%s' % (rank, str(a))
25 | rabit.finalize()
26 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/broadcast.cc:
--------------------------------------------------------------------------------
 1 | #include <rabit/rabit.h>
 2 | using namespace rabit;
 3 | const int N = 3;
 4 | int main(int argc, char *argv[]) {
 5 |   rabit::Init(argc, argv);
 6 |   std::string s;
 7 |   if (rabit::GetRank() == 0) s = "hello world";
 8 |   printf("@node[%d] before-broadcast: s=\"%s\"\n",
 9 |          rabit::GetRank(), s.c_str());
10 |   // broadcast s from node 0 to all other nodes
11 |   rabit::Broadcast(&s, 0);
12 |   printf("@node[%d] after-broadcast: s=\"%s\"\n",
13 |          rabit::GetRank(), s.c_str());
14 |   rabit::Finalize();
15 |   return 0;
16 | }
17 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/broadcast.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | demo python script of rabit
 4 | """
 5 | import os
 6 | import sys
 7 | # add path to wrapper
 8 | # for normal run without tracker script, add following line
 9 | # sys.path.append(os.path.dirname(__file__) + '/../wrapper')
10 | import rabit
11 | 
12 | rabit.init()
13 | n = 3
14 | rank = rabit.get_rank()
15 | s = None
16 | if rank == 0:
17 |     s = {'hello world':100, 2:3}
18 | print '@node[%d] before-broadcast: s=\"%s\"' % (rank, str(s))
19 | s = rabit.broadcast(s, 0)
20 | 
21 | print '@node[%d] after-broadcast: s=\"%s\"' % (rank, str(s))
22 | rabit.finalize()
23 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/lazy_allreduce.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2014 by Contributors
 3 |  * \file basic.cc
 4 |  * \brief This is an example demonstrating what is Allreduce
 5 |  *
 6 |  * \author Tianqi Chen
 7 |  */
 8 | #include <rabit/rabit.h>
 9 | 
10 | using namespace rabit;
11 | const int N = 3;
12 | int main(int argc, char *argv[]) {
13 |   int a[N] = {0};
14 |   rabit::Init(argc, argv);
15 |   // lazy preparation function
16 |   auto prepare = [&]() {
17 |     printf("@node[%d] run prepare function\n", rabit::GetRank());
18 |     for (int i = 0; i < N; ++i) {
19 |       a[i] = rabit::GetRank() + i;
20 |     }
21 |   };
22 |   printf("@node[%d] before-allreduce: a={%d, %d, %d}\n",
23 |          rabit::GetRank(), a[0], a[1], a[2]);
24 |   // allreduce take max of each elements in all processes
25 |   Allreduce<op::Max>(&a[0], N, prepare);
26 |   printf("@node[%d] after-allreduce-sum: a={%d, %d, %d}\n",
27 |          rabit::GetRank(), a[0], a[1], a[2]);
28 |   // rum second allreduce
29 |   Allreduce<op::Sum>(&a[0], N);
30 |   printf("@node[%d] after-allreduce-max: a={%d, %d, %d}\n",
31 |          rabit::GetRank(), a[0], a[1], a[2]);
32 |   rabit::Finalize();
33 |   return 0;
34 | }
35 | 


--------------------------------------------------------------------------------
/xgboost/rabit/guide/lazy_allreduce.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | """
 3 | demo python script of rabit: Lazy preparation function
 4 | """
 5 | import os
 6 | import sys
 7 | import numpy as np
 8 | # import rabit, the tracker script will setup the lib path correctly
 9 | # for normal run without tracker script, add following line
10 | # sys.path.append(os.path.dirname(__file__) + '/../wrapper')
11 | import rabit
12 | 
13 | 
14 | # use mock library so that we can run failure test
15 | rabit.init(lib = 'mock')
16 | n = 3
17 | rank = rabit.get_rank()
18 | a = np.zeros(n)
19 | 
20 | def prepare(a):
21 |     print('@node[%d] run prepare function' % rank)
22 |     # must take in reference and modify the reference
23 |     for i in xrange(n):
24 |         a[i] = rank + i
25 | 
26 | print('@node[%d] before-allreduce: a=%s' % (rank, str(a)))
27 | a = rabit.allreduce(a, rabit.MAX, prepare_fun = prepare)
28 | print('@node[%d] after-allreduce-max: a=%s' % (rank, str(a)))
29 | a = rabit.allreduce(a, rabit.SUM)
30 | print('@node[%d] after-allreduce-sum: a=%s' % (rank, str(a)))
31 | rabit.finalize()
32 | 


--------------------------------------------------------------------------------
/xgboost/rabit/include/dmlc/README.md:
--------------------------------------------------------------------------------
1 | This folder is part of dmlc-core library, this allows rabit to use unified stream interface with other dmlc projects.
2 | 
3 | - Since it is only interface dependency DMLC core is not required to compile rabit
4 | - To compile project that uses dmlc-core functions, link to libdmlc.a (provided by dmlc-core) will be required.
5 | 


--------------------------------------------------------------------------------
/xgboost/rabit/include/rabit/internal/timer.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright by Contributors
 3 |  * \file timer.h
 4 |  * \brief This file defines the utils for timing
 5 |  * \author Tianqi Chen, Nacho, Tianyi
 6 |  */
 7 | #ifndef RABIT_INTERNAL_TIMER_H_
 8 | #define RABIT_INTERNAL_TIMER_H_
 9 | #include <time.h>
10 | #ifdef __MACH__
11 | #include <mach/clock.h>
12 | #include <mach/mach.h>
13 | #endif
14 | #include "./utils.h"
15 | 
16 | namespace rabit {
17 | namespace utils {
18 | /*!
19 |  * \brief return time in seconds, not cross platform, avoid to use this in most places
20 |  */
21 | inline double GetTime(void) {
22 |   #ifdef __MACH__
23 |   clock_serv_t cclock;
24 |   mach_timespec_t mts;
25 |   host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
26 |   utils::Check(clock_get_time(cclock, &mts) == 0, "failed to get time");
27 |   mach_port_deallocate(mach_task_self(), cclock);
28 |   return static_cast<double>(mts.tv_sec) + static_cast<double>(mts.tv_nsec) * 1e-9;
29 |   #else
30 |   #if defined(__unix__) || defined(__linux__)
31 |   timespec ts;
32 |   utils::Check(clock_gettime(CLOCK_REALTIME, &ts) == 0, "failed to get time");
33 |   return static_cast<double>(ts.tv_sec) + static_cast<double>(ts.tv_nsec) * 1e-9;
34 |   #else
35 |   return static_cast<double>(time(NULL));
36 |   #endif
37 |   #endif
38 | }
39 | }  // namespace utils
40 | }  // namespace rabit
41 | #endif  // RABIT_INTERNAL_TIMER_H_
42 | 


--------------------------------------------------------------------------------
/xgboost/rabit/include/rabit/serializable.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2014 by Contributors
 3 |  * \file serializable.h
 4 |  * \brief defines serializable interface of rabit
 5 |  * \author Tianqi Chen
 6 |  */
 7 | #ifndef RABIT_SERIALIZABLE_H_
 8 | #define RABIT_SERIALIZABLE_H_
 9 | #include <vector>
10 | #include <string>
11 | #include "./internal/utils.h"
12 | #include "../dmlc/io.h"
13 | 
14 | namespace rabit {
15 | /*!
16 |  * \brief defines stream used in rabit
17 |  * see definition of Stream in dmlc/io.h
18 |  */
19 | typedef dmlc::Stream Stream;
20 | /*!
21 |  * \brief defines serializable objects used in rabit
22 |  * see definition of Serializable in dmlc/io.h
23 |  */
24 | typedef dmlc::Serializable Serializable;
25 | 
26 | }  // namespace rabit
27 | #endif  // RABIT_SERIALIZABLE_H_
28 | 


--------------------------------------------------------------------------------
/xgboost/rabit/lib/flag:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/rabit/lib/flag


--------------------------------------------------------------------------------
/xgboost/rabit/lib/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nuanio/xgboost-node/ab214ec69367713995ee04070b2063daf4f4ffab/xgboost/rabit/lib/readme.md


--------------------------------------------------------------------------------
/xgboost/rabit/scripts/travis_runtest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | make -f test.mk model_recover_10_10k || exit -1
3 | make -f test.mk model_recover_10_10k_die_same  || exit -1
4 | make -f test.mk local_recover_10_10k || exit -1
5 | make -f test.mk lazy_recover_10_10k_die_hard || exit -1
6 | make -f test.mk lazy_recover_10_10k_die_same || exit -1
7 | make -f test.mk ringallreduce_10_10k || exit -1
8 | 


--------------------------------------------------------------------------------
/xgboost/rabit/scripts/travis_script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # main script of travis
 4 | if [ ${TASK} == "lint" ]; then
 5 |     make lint || exit -1
 6 | fi
 7 | 
 8 | if [ ${TASK} == "doc" ]; then
 9 |     make doc 2>log.txt
10 |     (cat log.txt| grep -v ENABLE_PREPROCESSING |grep -v "unsupported tag" |grep warning) && exit -1
11 | fi
12 | 
13 | if [ ${TASK} == "build" ]; then
14 |     make all || exit -1
15 | fi
16 | 
17 | if [ ${TASK} == "test" ]; then
18 |     cd test
19 |     make all || exit -1
20 |     ../scripts/travis_runtest.sh || exit -1
21 | fi
22 | 
23 | 


--------------------------------------------------------------------------------
/xgboost/rabit/src/README.md:
--------------------------------------------------------------------------------
1 | Source Files of Rabit
2 | ====
3 | * This folder contains the source files of rabit library
4 | * The library headers are in folder [include](../include)
5 | * The .h files in this folder are internal header files that are only used by rabit and will not be seen by users
6 | 
7 | 


--------------------------------------------------------------------------------
/xgboost/rabit/src/engine_base.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2014 by Contributors
 3 |  * \file engine_mock.cc
 4 |  * \brief this is an engine implementation that will 
 5 |  * insert failures in certain call point, to test if the engine is robust to failure
 6 |  * \author Tianqi Chen
 7 |  */
 8 | // define use MOCK, os we will use mock Manager
 9 | #define _CRT_SECURE_NO_WARNINGS
10 | #define _CRT_SECURE_NO_DEPRECATE
11 | #define NOMINMAX
12 | // switch engine to AllreduceMock
13 | #define RABIT_USE_BASE
14 | #include "./engine.cc"
15 | 
16 | 


--------------------------------------------------------------------------------
/xgboost/rabit/src/engine_mock.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2014 by Contributors
 3 |  * \file engine_mock.cc
 4 |  * \brief this is an engine implementation that will 
 5 |  * insert failures in certain call point, to test if the engine is robust to failure
 6 |  * \author Tianqi Chen
 7 |  */
 8 | // define use MOCK, os we will use mock Manager
 9 | #define _CRT_SECURE_NO_WARNINGS
10 | #define _CRT_SECURE_NO_DEPRECATE
11 | #define NOMINMAX
12 | // switch engine to AllreduceMock
13 | #define RABIT_USE_MOCK
14 | #include "./allreduce_mock.h"
15 | #include "./engine.cc"
16 | 
17 | 


--------------------------------------------------------------------------------
/xgboost/rabit/test/.gitignore:
--------------------------------------------------------------------------------
1 | *.mpi
2 | test_*
3 | *_test
4 | *_recover
5 | 


--------------------------------------------------------------------------------
/xgboost/rabit/test/Makefile:
--------------------------------------------------------------------------------
 1 | export CC  = gcc
 2 | export CXX = g++
 3 | export MPICXX = mpicxx
 4 | export LDFLAGS=  -L../lib -pthread -lm  -lrt
 5 | export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fPIC -I../include  -std=c++0x
 6 | 
 7 | # specify tensor path
 8 | BIN = speed_test model_recover local_recover lazy_recover
 9 | OBJ = $(RABIT_OBJ) speed_test.o model_recover.o local_recover.o lazy_recover.o
10 | MPIBIN = speed_test.mpi
11 | .PHONY: clean all lib mpi
12 | 
13 | all: $(BIN)
14 | lib:
15 | 	cd ..;make;cd -
16 | mpi:
17 | 	cd ..;make mpi;cd -
18 | # programs
19 | speed_test.o: speed_test.cc ../include/rabit/*.h lib mpi
20 | model_recover.o: model_recover.cc ../include/rabit/*.h lib
21 | local_recover.o: local_recover.cc ../include/rabit/*.h lib
22 | lazy_recover.o: lazy_recover.cc ../include/rabit/*.h lib
23 | 
24 | # we can link against MPI version to get use MPI
25 | speed_test: speed_test.o  $(RABIT_OBJ)
26 | speed_test.mpi: speed_test.o $(MPIOBJ)
27 | model_recover: model_recover.o  $(RABIT_OBJ)
28 | local_recover: local_recover.o  $(RABIT_OBJ)
29 | lazy_recover: lazy_recover.o  $(RABIT_OBJ)
30 | 
31 | $(BIN) :
32 | 	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) ../lib/librabit_mock.a $(LDFLAGS)
33 | 
34 | $(OBJ) :
35 | 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
36 | 
37 | $(MPIBIN) :
38 | 	$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mpi
39 | 
40 | clean:
41 | 	$(RM) $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) *~ ../src/*~
42 | 


--------------------------------------------------------------------------------
/xgboost/rabit/test/README.md:
--------------------------------------------------------------------------------
 1 | Testcases of Rabit
 2 | ====
 3 | This folder contains internal testcases to test correctness and efficiency of rabit API
 4 | 
 5 | The example running scripts for testcases are given by test.mk
 6 | * type ```make -f test.mk testcasename``` to run certain testcase
 7 | 
 8 | 
 9 | Helper Scripts
10 | ====
11 | * test.mk contains Makefile documentation of all testcases
12 | * keepalive.sh helper bash to restart a program when it dies abnormally
13 | 
14 | List of Programs
15 | ====
16 | * speed_test: test the running speed of rabit API
17 | * test_local_recover: test recovery of local state when error happens
18 | * test_model_recover: test recovery of global state when error happens
19 | 


--------------------------------------------------------------------------------
/xgboost/rabit/test/local_recover.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import rabit
 3 | import numpy as np
 4 | 
 5 | rabit.init(lib='mock')
 6 | rank = rabit.get_rank()
 7 | n = 10
 8 | nround = 3
 9 | data = np.ones(n) * rank
10 | 
11 | version, model, local = rabit.load_checkpoint(True)
12 | if version == 0:
13 |     model = np.zeros(n)
14 |     local = np.ones(n)
15 | else:
16 |     print '[%d] restart from version %d' % (rank, version)
17 | 
18 | for i in xrange(version, nround):    
19 |     res = rabit.allreduce(data + model+local, rabit.SUM)
20 |     print '[%d] iter=%d: %s' % (rank, i, str(res))
21 |     model = res
22 |     local[:] = i
23 |     rabit.checkpoint(model, local)
24 | 
25 | rabit.finalize()
26 | 


--------------------------------------------------------------------------------
/xgboost/rabit/test/speed_runner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import sys
 4 | 
 5 | def main():
 6 |   parser = argparse.ArgumentParser(description='TODO')
 7 |   parser.add_argument('-ho', '--host_dir', required=True)
 8 |   parser.add_argument('-s', '--submit_script', required=True)
 9 |   parser.add_argument('-rex', '--rabit_exec', required=True)
10 |   parser.add_argument('-mpi', '--mpi_exec', required=True)
11 |   args = parser.parse_args()
12 | 
13 |   ndata = [10**4, 10**5, 10**6, 10**7]
14 |   nrepeat = [10**4, 10**3, 10**2, 10]
15 | 
16 |   machines = [2,4,8,16,31]
17 | 
18 |   executables = [args.rabit_exec, args.mpi_exec]
19 | 
20 |   for executable in executables:
21 |     sys.stderr.write('Executable %s' % executable)
22 |     sys.stderr.flush()
23 |     for i, data in enumerate(ndata):
24 |       for machine in machines:
25 |         host_file = os.path.join(args.host_dir, 'hosts%d' % machine)
26 |         cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, executable, data, nrepeat[i])
27 |         sys.stderr.write('data=%d, repeat=%d, machine=%d\n' % (data, nrepeat[i], machine))
28 |         sys.stderr.flush()
29 |         os.system(cmd)
30 |     sys.stderr.write('\n')
31 |     sys.stderr.flush()
32 | 
33 | if __name__ == "__main__":
34 |   main()
35 | 


--------------------------------------------------------------------------------
/xgboost/src/c_api/c_api_error.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file c_api_error.cc
 4 |  * \brief C error handling
 5 |  */
 6 | #include <dmlc/thread_local.h>
 7 | #include "./c_api_error.h"
 8 | 
 9 | struct XGBAPIErrorEntry {
10 |   std::string last_error;
11 | };
12 | 
13 | typedef dmlc::ThreadLocalStore<XGBAPIErrorEntry> XGBAPIErrorStore;
14 | 
15 | const char *XGBGetLastError() {
16 |   return XGBAPIErrorStore::Get()->last_error.c_str();
17 | }
18 | 
19 | void XGBAPISetLastError(const char* msg) {
20 |   XGBAPIErrorStore::Get()->last_error = msg;
21 | }
22 | 


--------------------------------------------------------------------------------
/xgboost/src/c_api/c_api_error.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  *  Copyright (c) 2015 by Contributors
 3 |  * \file c_api_error.h
 4 |  * \brief Error handling for C API.
 5 |  */
 6 | #ifndef XGBOOST_C_API_C_API_ERROR_H_
 7 | #define XGBOOST_C_API_C_API_ERROR_H_
 8 | 
 9 | #include <dmlc/base.h>
10 | #include <dmlc/logging.h>
11 | #include <xgboost/c_api.h>
12 | 
13 | /*! \brief  macro to guard beginning and end section of all functions */
14 | #define API_BEGIN() try {
15 | /*! \brief every function starts with API_BEGIN();
16 |      and finishes with API_END() or API_END_HANDLE_ERROR */
17 | #define API_END() } catch(dmlc::Error &_except_) { return XGBAPIHandleException(_except_); } return 0;  // NOLINT(*)
18 | /*!
19 |  * \brief every function starts with API_BEGIN();
20 |  *   and finishes with API_END() or API_END_HANDLE_ERROR
21 |  *   The finally clause contains procedure to cleanup states when an error happens.
22 |  */
23 | #define API_END_HANDLE_ERROR(Finalize) } catch(dmlc::Error &_except_) { Finalize; return XGBAPIHandleException(_except_); } return 0; // NOLINT(*)
24 | 
25 | /*!
26 |  * \brief Set the last error message needed by C API
27 |  * \param msg The error message to set.
28 |  */
29 | void XGBAPISetLastError(const char* msg);
30 | /*!
31 |  * \brief handle exception thrown out
32 |  * \param e the exception
33 |  * \return the return value of API after exception is handled
34 |  */
35 | inline int XGBAPIHandleException(const dmlc::Error &e) {
36 |   XGBAPISetLastError(e.what());
37 |   return -1;
38 | }
39 | #endif  // XGBOOST_C_API_C_API_ERROR_H_
40 | 


--------------------------------------------------------------------------------
/xgboost/src/common/common.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file common.cc
 4 |  * \brief Enable all kinds of global variables in common.
 5 |  */
 6 | #include <dmlc/thread_local.h>
 7 | #include "./random.h"
 8 | 
 9 | namespace xgboost {
10 | namespace common {
11 | /*! \brief thread local entry for random. */
12 | struct RandomThreadLocalEntry {
13 |   /*! \brief the random engine instance. */
14 |   GlobalRandomEngine engine;
15 | };
16 | 
17 | typedef dmlc::ThreadLocalStore<RandomThreadLocalEntry> RandomThreadLocalStore;
18 | 
19 | GlobalRandomEngine& GlobalRandom() {
20 |   return RandomThreadLocalStore::Get()->engine;
21 | }
22 | }  // namespace common
23 | }  // namespace xgboost
24 | 


--------------------------------------------------------------------------------
/xgboost/src/common/common.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file common.h
 4 |  * \brief Common utilities
 5 |  */
 6 | #ifndef XGBOOST_COMMON_COMMON_H_
 7 | #define XGBOOST_COMMON_COMMON_H_
 8 | 
 9 | #include <vector>
10 | #include <string>
11 | #include <sstream>
12 | 
13 | namespace xgboost {
14 | namespace common {
15 | /*!
16 |  * \brief Split a string by delimiter
17 |  * \param s String to be splitted.
18 |  * \param delim The delimiter.
19 |  */
20 | inline std::vector<std::string> Split(const std::string& s, char delim) {
21 |   std::string item;
22 |   std::istringstream is(s);
23 |   std::vector<std::string> ret;
24 |   while (std::getline(is, item, delim)) {
25 |     ret.push_back(item);
26 |   }
27 |   return ret;
28 | }
29 | 
30 | // simple routine to convert any data to string
31 | template<typename T>
32 | inline std::string ToString(const T& data) {
33 |   std::ostringstream os;
34 |   os << data;
35 |   return os.str();
36 | }
37 | 
38 | }  // namespace common
39 | }  // namespace xgboost
40 | #endif  // XGBOOST_COMMON_COMMON_H_
41 | 


--------------------------------------------------------------------------------
/xgboost/src/common/sync.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2014 by Contributors
 3 |  * \file sync.h
 4 |  * \brief the synchronization module of rabit
 5 |  *        redirects to rabit header
 6 |  * \author Tianqi Chen
 7 |  */
 8 | #ifndef XGBOOST_COMMON_SYNC_H_
 9 | #define XGBOOST_COMMON_SYNC_H_
10 | 
11 | #include <rabit/rabit.h>
12 | 
13 | #endif  // XGBOOST_COMMON_SYNC_H_
14 | 


--------------------------------------------------------------------------------
/xgboost/src/gbm/gbm.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file gbm.cc
 4 |  * \brief Registry of gradient boosters.
 5 |  */
 6 | #include <xgboost/gbm.h>
 7 | #include <dmlc/registry.h>
 8 | 
 9 | namespace dmlc {
10 | DMLC_REGISTRY_ENABLE(::xgboost::GradientBoosterReg);
11 | }  // namespace dmlc
12 | 
13 | namespace xgboost {
14 | GradientBooster* GradientBooster::Create(
15 |     const std::string& name,
16 |     const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
17 |     bst_float base_margin) {
18 |   auto *e = ::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->Find(name);
19 |   if (e == nullptr) {
20 |     LOG(FATAL) << "Unknown gbm type " << name;
21 |   }
22 |   return (e->body)(cache_mats, base_margin);
23 | }
24 | }  // namespace xgboost
25 | 
26 | namespace xgboost {
27 | namespace gbm {
28 | // List of files that will be force linked in static links.
29 | DMLC_REGISTRY_LINK_TAG(gblinear);
30 | DMLC_REGISTRY_LINK_TAG(gbtree);
31 | }  // namespace gbm
32 | }  // namespace xgboost
33 | 


--------------------------------------------------------------------------------
/xgboost/src/logging.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file logging.cc
 4 |  * \brief Implementation of loggers.
 5 |  * \author Tianqi Chen
 6 |  */
 7 | #include <xgboost/logging.h>
 8 | #include <iostream>
 9 | #include "./common/sync.h"
10 | 
11 | namespace xgboost {
12 | 
13 | #if XGBOOST_CUSTOMIZE_LOGGER == 0
14 | ConsoleLogger::~ConsoleLogger() {
15 |   std::cerr << log_stream_.str() << std::endl;
16 | }
17 | 
18 | TrackerLogger::~TrackerLogger() {
19 |   log_stream_ << '\n';
20 |   rabit::TrackerPrint(log_stream_.str());
21 | }
22 | #endif
23 | }  // namespace xgboost
24 | 


--------------------------------------------------------------------------------
/xgboost/src/metric/metric.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file metric_registry.cc
 4 |  * \brief Registry of objective functions.
 5 |  */
 6 | #include <xgboost/metric.h>
 7 | #include <dmlc/registry.h>
 8 | 
 9 | namespace dmlc {
10 | DMLC_REGISTRY_ENABLE(::xgboost::MetricReg);
11 | }
12 | 
13 | namespace xgboost {
14 | Metric* Metric::Create(const std::string& name) {
15 |   std::string buf = name;
16 |   std::string prefix = name;
17 |   auto pos = buf.find('@');
18 |   if (pos == std::string::npos) {
19 |     auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(name);
20 |     if (e == nullptr) {
21 |       LOG(FATAL) << "Unknown metric function " << name;
22 |     }
23 |     return (e->body)(nullptr);
24 |   } else {
25 |     std::string prefix = buf.substr(0, pos);
26 |     auto *e = ::dmlc::Registry< ::xgboost::MetricReg>::Get()->Find(prefix.c_str());
27 |     if (e == nullptr) {
28 |       LOG(FATAL) << "Unknown metric function " << name;
29 |     }
30 |     return (e->body)(buf.substr(pos + 1, buf.length()).c_str());
31 |   }
32 | }
33 | }  // namespace xgboost
34 | 
35 | namespace xgboost {
36 | namespace metric {
37 | // List of files that will be force linked in static links.
38 | DMLC_REGISTRY_LINK_TAG(elementwise_metric);
39 | DMLC_REGISTRY_LINK_TAG(multiclass_metric);
40 | DMLC_REGISTRY_LINK_TAG(rank_metric);
41 | }  // namespace metric
42 | }  // namespace xgboost
43 | 


--------------------------------------------------------------------------------
/xgboost/src/objective/objective.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file objective.cc
 4 |  * \brief Registry of all objective functions.
 5 |  */
 6 | #include <xgboost/objective.h>
 7 | #include <dmlc/registry.h>
 8 | 
 9 | namespace dmlc {
10 | DMLC_REGISTRY_ENABLE(::xgboost::ObjFunctionReg);
11 | }  // namespace dmlc
12 | 
13 | namespace xgboost {
14 | // implement factory functions
15 | ObjFunction* ObjFunction::Create(const std::string& name) {
16 |   auto *e = ::dmlc::Registry< ::xgboost::ObjFunctionReg>::Get()->Find(name);
17 |   if (e == nullptr) {
18 |     for (const auto& entry : ::dmlc::Registry< ::xgboost::ObjFunctionReg>::List()) {
19 |       LOG(INFO) << "Objective candidate: " << entry->name;
20 |     }
21 |     LOG(FATAL) << "Unknown objective function " << name;
22 |   }
23 |   return (e->body)();
24 | }
25 | }  // namespace xgboost
26 | 
27 | namespace xgboost {
28 | namespace obj {
29 | // List of files that will be force linked in static links.
30 | DMLC_REGISTRY_LINK_TAG(regression_obj);
31 | DMLC_REGISTRY_LINK_TAG(multiclass_obj);
32 | DMLC_REGISTRY_LINK_TAG(rank_obj);
33 | }  // namespace obj
34 | }  // namespace xgboost
35 | 


--------------------------------------------------------------------------------
/xgboost/src/tree/tree_updater.cc:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright 2015 by Contributors
 3 |  * \file tree_updater.cc
 4 |  * \brief Registry of tree updaters.
 5 |  */
 6 | #include <xgboost/tree_updater.h>
 7 | #include <dmlc/registry.h>
 8 | 
 9 | namespace dmlc {
10 | DMLC_REGISTRY_ENABLE(::xgboost::TreeUpdaterReg);
11 | }  // namespace dmlc
12 | 
13 | namespace xgboost {
14 | 
15 | TreeUpdater* TreeUpdater::Create(const std::string& name) {
16 |   auto *e = ::dmlc::Registry< ::xgboost::TreeUpdaterReg>::Get()->Find(name);
17 |   if (e == nullptr) {
18 |     LOG(FATAL) << "Unknown tree updater " << name;
19 |   }
20 |   return (e->body)();
21 | }
22 | 
23 | }  // namespace xgboost
24 | 
25 | namespace xgboost {
26 | namespace tree {
27 | // List of files that will be force linked in static links.
28 | DMLC_REGISTRY_LINK_TAG(updater_colmaker);
29 | DMLC_REGISTRY_LINK_TAG(updater_skmaker);
30 | DMLC_REGISTRY_LINK_TAG(updater_refresh);
31 | DMLC_REGISTRY_LINK_TAG(updater_prune);
32 | DMLC_REGISTRY_LINK_TAG(updater_fast_hist);
33 | DMLC_REGISTRY_LINK_TAG(updater_histmaker);
34 | DMLC_REGISTRY_LINK_TAG(updater_sync);
35 | }  // namespace tree
36 | }  // namespace xgboost
37 | 


--------------------------------------------------------------------------------
/xgboost/tests/README.md:
--------------------------------------------------------------------------------
1 | This folder contains testcases for xgboost.
2 | 


--------------------------------------------------------------------------------
/xgboost/tests/ci_build/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:8.0-devel-ubuntu14.04
 2 | 
 3 | RUN apt-get update && apt-get -y upgrade
 4 | # CMAKE
 5 | RUN sudo apt-get install -y build-essential
 6 | RUN apt-get install -y wget
 7 | RUN wget http://www.cmake.org/files/v3.5/cmake-3.5.2.tar.gz 
 8 | RUN tar -xvzf cmake-3.5.2.tar.gz 
 9 | RUN cd cmake-3.5.2/ && ./configure && make && sudo make install
10 | 
11 | # BLAS
12 | RUN apt-get install -y libatlas-base-dev
13 | 
14 | # PYTHON2
15 | RUN apt-get install -y python-setuptools python-pip python-dev unzip gfortran
16 | RUN pip install numpy nose scipy scikit-learn
17 | 


--------------------------------------------------------------------------------
/xgboost/tests/ci_build/build_gpu_cmake.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | mkdir build
4 | cd build
5 | cmake .. -DPLUGIN_UPDATER_GPU=ON
6 | make
7 | 


--------------------------------------------------------------------------------
/xgboost/tests/ci_build/test_gpu.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | mkdir build
 4 | cd build
 5 | cmake .. -DPLUGIN_UPDATER_GPU=ON
 6 | make
 7 | cd ..
 8 | cd python-package
 9 | python setup.py install --user
10 | cd ../plugin/updater_gpu
11 | python -m nose test/python
12 | 


--------------------------------------------------------------------------------
/xgboost/tests/ci_build/with_the_same_user:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This script is a wrapper creating the same user inside container as the one
 4 | # running the ci_build.sh outside the container. It also set the home directory
 5 | # for the user inside container to match the same absolute path as the workspace
 6 | # outside of container.  Do not run this manually. It does not make sense. It is
 7 | # intended to be called by ci_build.sh only.
 8 | 
 9 | set -e
10 | 
11 | COMMAND=("$@")
12 | 
13 | if ! touch /this_is_writable_file_system; then
14 |   echo "You can't write to your filesystem!"
15 |   echo "If you are in Docker you should check you do not have too many images" \
16 |       "with too many files in them. Docker has some issue with it."
17 |   exit 1
18 | else
19 |   rm /this_is_writable_file_system
20 | fi
21 | 
22 | getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
23 | getent passwd "${CI_BUILD_UID}" || adduser --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
24 |     --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
25 |     --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}"
26 | usermod -a -G sudo "${CI_BUILD_USER}"
27 | echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
28 | 
29 | sudo -u "#${CI_BUILD_UID}" --preserve-env "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" \
30 | "HOME=${CI_BUILD_HOME}" ${COMMAND[@]}
31 | 


--------------------------------------------------------------------------------
/xgboost/tests/cpp/data/test_simple_csr_source.cc:
--------------------------------------------------------------------------------
 1 | // Copyright by Contributors
 2 | #include <xgboost/data.h>
 3 | #include "../../../src/data/simple_csr_source.h"
 4 | 
 5 | #include "../helpers.h"
 6 | 
 7 | TEST(SimpleCSRSource, SaveLoadBinary) {
 8 |   std::string tmp_file = CreateSimpleTestData();
 9 |   xgboost::DMatrix * dmat = xgboost::DMatrix::Load(tmp_file, true, false);
10 |   std::remove(tmp_file.c_str());
11 | 
12 |   std::string tmp_binfile = TempFileName();
13 |   dmat->SaveToLocalFile(tmp_binfile);
14 |   xgboost::DMatrix * dmat_read = xgboost::DMatrix::Load(tmp_binfile, true, false);
15 |   std::remove(tmp_binfile.c_str());
16 | 
17 |   EXPECT_EQ(dmat->info().num_col, dmat_read->info().num_col);
18 |   EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row);
19 |   EXPECT_EQ(dmat->info().num_row, dmat_read->info().num_row);
20 | 
21 |   dmlc::DataIter<xgboost::RowBatch> * row_iter = dmat->RowIterator();
22 |   dmlc::DataIter<xgboost::RowBatch> * row_iter_read = dmat_read->RowIterator();
23 |   // Test the data read into the first row
24 |   row_iter->BeforeFirst(); row_iter->Next();
25 |   row_iter_read->BeforeFirst(); row_iter_read->Next();
26 |   xgboost::SparseBatch::Inst first_row = row_iter->Value()[0];
27 |   xgboost::SparseBatch::Inst first_row_read = row_iter_read->Value()[0];
28 |   EXPECT_EQ(first_row.length, first_row_read.length);
29 |   EXPECT_EQ(first_row[2].index, first_row_read[2].index);
30 |   EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);
31 |   row_iter = nullptr; row_iter_read = nullptr;
32 | }
33 | 


--------------------------------------------------------------------------------
/xgboost/tests/cpp/helpers.h:
--------------------------------------------------------------------------------
 1 | #ifndef XGBOOST_TESTS_CPP_HELPERS_H_
 2 | #define XGBOOST_TESTS_CPP_HELPERS_H_
 3 | 
 4 | #include <iostream>
 5 | #include <fstream>
 6 | #include <cstdio>
 7 | #include <string>
 8 | #include <vector>
 9 | #include <sys/stat.h>
10 | #include <sys/types.h>
11 | 
12 | #include <gtest/gtest.h>
13 | 
14 | #include <xgboost/base.h>
15 | #include <xgboost/objective.h>
16 | #include <xgboost/metric.h>
17 | 
18 | std::string TempFileName();
19 | 
20 | bool FileExists(const std::string name);
21 | 
22 | long GetFileSize(const std::string filename);
23 | 
24 | std::string CreateSimpleTestData();
25 | 
26 | void CheckObjFunction(xgboost::ObjFunction * obj,
27 |                       std::vector<xgboost::bst_float> preds,
28 |                       std::vector<xgboost::bst_float> labels,
29 |                       std::vector<xgboost::bst_float> weights,
30 |                       std::vector<xgboost::bst_float> out_grad,
31 |                       std::vector<xgboost::bst_float> out_hess);
32 | 
33 | xgboost::bst_float GetMetricEval(
34 |   xgboost::Metric * metric,
35 |   std::vector<xgboost::bst_float> preds,
36 |   std::vector<xgboost::bst_float> labels,
37 |   std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float> ());
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/xgboost/tests/cpp/metric/test_metric.cc:
--------------------------------------------------------------------------------
 1 | // Copyright by Contributors
 2 | #include <xgboost/metric.h>
 3 | 
 4 | #include "../helpers.h"
 5 | 
 6 | TEST(Metric, UnknownMetric) {
 7 |   EXPECT_ANY_THROW(xgboost::Metric::Create("unknown_name"));
 8 |   EXPECT_NO_THROW(xgboost::Metric::Create("rmse"));
 9 |   EXPECT_ANY_THROW(xgboost::Metric::Create("unknown_name@1"));
10 |   EXPECT_NO_THROW(xgboost::Metric::Create("error@0.5f"));
11 | }
12 | 


--------------------------------------------------------------------------------
/xgboost/tests/cpp/objective/test_multiclass_metric.cc:
--------------------------------------------------------------------------------
 1 | // Copyright by Contributors
 2 | #include <xgboost/metric.h>
 3 | 
 4 | #include "../helpers.h"
 5 | 
 6 | TEST(Metric, MultiClassError) {
 7 |   xgboost::Metric * metric = xgboost::Metric::Create("merror");
 8 |   ASSERT_STREQ(metric->Name(), "merror");
 9 |   EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
10 |   EXPECT_NEAR(GetMetricEval(
11 |     metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10);
12 |   EXPECT_NEAR(GetMetricEval(metric,
13 |                             {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
14 |                             {0, 1, 2}),
15 |               0.666f, 0.001f);
16 | }
17 | 
18 | TEST(Metric, MultiClassLogLoss) {
19 |   xgboost::Metric * metric = xgboost::Metric::Create("mlogloss");
20 |   ASSERT_STREQ(metric->Name(), "mlogloss");
21 |   EXPECT_ANY_THROW(GetMetricEval(metric, {0}, {0, 0}));
22 |   EXPECT_NEAR(GetMetricEval(
23 |     metric, {1, 0, 0, 0, 1, 0, 0, 0, 1}, {0, 1, 2}), 0, 1e-10);
24 |   EXPECT_NEAR(GetMetricEval(metric,
25 |                             {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f},
26 |                             {0, 1, 2}),
27 |               2.302f, 0.001f);
28 | }
29 | 


--------------------------------------------------------------------------------
/xgboost/tests/cpp/objective/test_objective.cc:
--------------------------------------------------------------------------------
 1 | // Copyright by Contributors
 2 | #include <xgboost/objective.h>
 3 | 
 4 | #include "../helpers.h"
 5 | 
 6 | TEST(Objective, UnknownFunction) {
 7 |   EXPECT_ANY_THROW(xgboost::ObjFunction::Create("unknown_name"));
 8 |   EXPECT_NO_THROW(xgboost::ObjFunction::Create("reg:linear"));
 9 | }
10 | 


--------------------------------------------------------------------------------
/xgboost/tests/cpp/test_main.cc:
--------------------------------------------------------------------------------
1 | // Copyright by Contributors
2 | #include <gtest/gtest.h>
3 | 
4 | int main(int argc, char ** argv) {
5 |   testing::InitGoogleTest(&argc, argv);
6 |   testing::FLAGS_gtest_death_test_style = "threadsafe";
7 |   return RUN_ALL_TESTS();
8 | }
9 | 


--------------------------------------------------------------------------------
/xgboost/tests/distributed/runtests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | PYTHONPATH=../../python-package/ ../../dmlc-core/tracker/dmlc-submit  --cluster=local --num-workers=3\
4 |   python test_basic.py
5 | 


--------------------------------------------------------------------------------
/xgboost/tests/distributed/test_basic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import numpy as np
 3 | import scipy.sparse
 4 | import pickle
 5 | import xgboost as xgb
 6 | 
 7 | # always call this before using distributed module
 8 | xgb.rabit.init()
 9 | 
10 | # Load file, file will be automatically sharded in distributed mode.
11 | dtrain = xgb.DMatrix('../../demo/data/agaricus.txt.train')
12 | dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test')
13 | 
14 | # specify parameters via map, definition are same as c++ version
15 | param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
16 | 
17 | # specify validations set to watch performance
18 | watchlist  = [(dtest,'eval'), (dtrain,'train')]
19 | num_round = 20
20 | 
21 | # Run training, all the features in training API is available.
22 | # Currently, this script only support calling train once for fault recovery purpose.
23 | bst = xgb.train(param, dtrain, num_round, watchlist, early_stopping_rounds=2)
24 | 
25 | # save the model, only ask process 0 to save the model.
26 | if xgb.rabit.get_rank() == 0:
27 |     bst.save_model("test.model")
28 |     xgb.rabit.tracker_print("Finished training\n")
29 | 
30 | # Notify the tracker all training has been successful
31 | # This is only needed in distributed training.
32 | xgb.rabit.finalize()
33 | 


--------------------------------------------------------------------------------
/xgboost/tests/python/test_sparse_dmatrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import xgboost as xgb
 3 | from scipy.sparse import rand
 4 | 
 5 | rng = np.random.RandomState(1)
 6 | 
 7 | param = {'max_depth': 3, 'objective': 'binary:logistic', 'silent': 1}
 8 | 
 9 | 
10 | def test_sparse_dmatrix_csr():
11 |     nrow = 100
12 |     ncol = 1000
13 |     x = rand(nrow, ncol, density=0.0005, format='csr', random_state=rng)
14 |     assert x.indices.max() < ncol - 1
15 |     x.data[:] = 1
16 |     dtrain = xgb.DMatrix(x, label=np.random.binomial(1, 0.3, nrow))
17 |     assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
18 |     watchlist = [(dtrain, 'train')]
19 |     bst = xgb.train(param, dtrain, 5, watchlist)
20 |     bst.predict(dtrain)
21 | 
22 | 
23 | def test_sparse_dmatrix_csc():
24 |     nrow = 1000
25 |     ncol = 100
26 |     x = rand(nrow, ncol, density=0.0005, format='csc', random_state=rng)
27 |     assert x.indices.max() < nrow - 1
28 |     x.data[:] = 1
29 |     dtrain = xgb.DMatrix(x, label=np.random.binomial(1, 0.3, nrow))
30 |     assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol)
31 |     watchlist = [(dtrain, 'train')]
32 |     bst = xgb.train(param, dtrain, 5, watchlist)
33 |     bst.predict(dtrain)
34 | 


--------------------------------------------------------------------------------
/xgboost/tests/python/testing.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import nose
 4 | 
 5 | from xgboost.compat import SKLEARN_INSTALLED, PANDAS_INSTALLED
 6 | 
 7 | 
 8 | def _skip_if_no_sklearn():
 9 |     if not SKLEARN_INSTALLED:
10 |         raise nose.SkipTest()
11 | 
12 | 
13 | def _skip_if_no_pandas():
14 |     if not PANDAS_INSTALLED:
15 |         raise nose.SkipTest()
16 | 
17 | 
18 | def _skip_if_no_matplotlib():
19 |     try:
20 |         import matplotlib.pyplot as _     # noqa
21 |     except ImportError:
22 |         raise nose.SkipTest()
23 | 


--------------------------------------------------------------------------------
/xgboost/tests/travis/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ ${TRAVIS_OS_NAME} == "osx" ]; then
 4 |     brew update
 5 |     brew install graphviz
 6 | fi
 7 | 
 8 | if [ ${TASK} == "lint" ]; then
 9 |     pip install --user  cpplint 'pylint==1.4.4' 'astroid==1.3.6' 
10 | fi
11 | 
12 | 
13 | if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ]; then
14 |     # python2
15 |     if [ ${TRAVIS_OS_NAME} == "osx" ]; then
16 |         wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
17 |     else
18 |         wget -O conda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
19 |     fi
20 |     bash conda.sh -b -p $HOME/miniconda
21 |     export PATH="$HOME/miniconda/bin:$PATH"
22 |     hash -r
23 |     conda config --set always_yes yes --set changeps1 no
24 |     conda update -q conda
25 |     # Useful for debugging any issues with conda
26 |     conda info -a
27 |     conda create -n python3 python=3.5
28 |     conda create -n python2 python=2.7
29 | fi
30 | 


--------------------------------------------------------------------------------
/xgboost/tests/travis/travis_after_failure.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | if [ ${TASK} == "r_test" ]; then
4 |     cat xgboost/xgboost.Rcheck/*.log
5 |     echo "--------------------------"
6 |     cat xgboost/xgboost.Rcheck/*.out
7 | fi
8 | 


--------------------------------------------------------------------------------