├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── algorithms ├── HPdata │ ├── DESCRIPTION │ ├── LICENSE │ ├── NAMESPACE │ ├── R │ │ ├── db2darray.R │ │ ├── db2darrays.R │ │ ├── db2dframe.R │ │ ├── db2dframes.R │ │ ├── db2matrix.R │ │ ├── ddc.R │ │ ├── graphLoader.R │ │ ├── nativeLoader.R │ │ └── splitGraphFile.R │ ├── inst │ │ ├── data │ │ │ ├── copy_data.sql │ │ │ ├── data_10K.dat │ │ │ ├── data_10K_numeric.dat │ │ │ ├── graph1.dat │ │ │ ├── graph2.dat │ │ │ ├── graph3.dat │ │ │ ├── graph4.dat │ │ │ └── schema.sql │ │ └── tests │ │ │ ├── data │ │ │ ├── TestOrcFile.test1.orc │ │ │ ├── ex001.csv │ │ │ └── ex002.csv │ │ │ ├── test_1_setup.R │ │ │ ├── test_ddc.R │ │ │ ├── test_files.R │ │ │ ├── test_tables.R │ │ │ ├── test_views.R │ │ │ └── test_zz_cleanup.R │ ├── man │ │ ├── HPdata-package.Rd │ │ ├── csv2dframe.Rd │ │ ├── db2darray.Rd │ │ ├── db2darrays.Rd │ │ ├── db2dframe.Rd │ │ ├── db2dframes.Rd │ │ ├── db2dgraph.Rd │ │ ├── db2matrix.Rd │ │ ├── file2dgraph.Rd │ │ ├── orc2dframe.Rd │ │ └── splitGraphFile.Rd │ └── src │ │ ├── Makevars │ │ └── hpdsplitter.cpp ├── HPdclassifier │ ├── DESCRIPTION │ ├── LICENSE │ ├── NAMESPACE │ ├── R │ │ ├── deploy.hpdegbm.R │ │ ├── hpdRF_parallelForest.R │ │ ├── hpdRF_parallelTree.R │ │ ├── hpdRFdistributed.R │ │ ├── hpdRFutil.R │ │ ├── hpdegbm.R │ │ ├── hpdrpart.R │ │ ├── predict.hpdegbm.R │ │ ├── predictHPdRF.R │ │ └── varImp.R │ ├── inst │ │ └── tests │ │ │ ├── test_hpRF_parallelForest.R │ │ │ ├── test_hpdRF_parallelTree.R │ │ │ ├── test_hpdegbm.R │ │ │ ├── test_hpdrpart.R │ │ │ └── test_varImp.R │ ├── man │ │ ├── HPdclassifier-package.Rd │ │ ├── deploy.hpdRF_parallelTree.Rd │ │ ├── deploy.hpdegbm.Rd │ │ ├── hpdRF_parallelForest.Rd │ │ ├── hpdRF_parallelTree.Rd │ │ ├── hpdegbm.Rd │ │ ├── hpdrpart.Rd │ │ ├── predict.hpdRF_parallelForest.Rd │ │ ├── predict.hpdRF_parallelTree.Rd │ │ ├── predict.hpdegbm.Rd │ │ ├── predict.hpdrpart.Rd │ │ ├── predictHPdRF.Rd │ │ ├── print.hpdRF_parallelTree.Rd │ │ ├── print.hpdRFtree.Rd │ │ └── varImportance.Rd │ └── src │ │ ├── hpdRF.hpp │ │ ├── hpdRFImpute.cpp │ │ ├── hpdRFbuildHistograms.cpp │ │ ├── hpdRFcomputeSplits.cpp │ │ ├── hpdRFforest.cpp │ │ ├── hpdRFformatting.cpp │ │ ├── hpdRFpredict.cpp │ │ ├── hpdRFtrain.cpp │ │ ├── hpdRFtree.cpp │ │ └── hpdRFupdateNodes.cpp ├── HPdcluster │ ├── DESCRIPTION │ ├── LICENSE │ ├── NAMESPACE │ ├── R │ │ └── hpdkmeans.R │ ├── inst │ │ └── tests │ │ │ └── test_hpdkmeans.R │ ├── man │ │ ├── HPdcluster-package.Rd │ │ ├── hpdapply.Rd │ │ └── hpdkmeans.Rd │ └── src │ │ └── hpdkmeans.cpp ├── HPdgraph │ ├── DESCRIPTION │ ├── LICENSE │ ├── NAMESPACE │ ├── R │ │ └── hpdpagerank.R │ ├── inst │ │ └── tests │ │ │ └── test_hpdpagerank.R │ ├── man │ │ ├── HPdgraph-package.Rd │ │ ├── hpdpagerank.Rd │ │ └── hpdwhich.max.Rd │ └── src │ │ └── hpdpagerank.cpp ├── HPdregression │ ├── DESCRIPTION │ ├── LICENSE │ ├── NAMESPACE │ ├── R │ │ ├── hpdglm.R │ │ └── validation.R │ ├── inst │ │ └── tests │ │ │ └── test_hpdglm.R │ └── man │ │ ├── HPdregression-package.Rd │ │ ├── cv.hpdglm.Rd │ │ ├── family.hpdglm.Rd │ │ ├── hpdglm.Rd │ │ ├── hpdglm.control.Rd │ │ ├── predict.hpdglm.Rd │ │ ├── residuals.hpdglm.Rd │ │ ├── summary.hpdglm.Rd │ │ └── v.hpdglm.Rd └── HPdutility │ ├── DESCRIPTION │ ├── NAMESPACE │ ├── R │ ├── errMeasurement.R │ └── hpdsample.R │ ├── inst │ └── tests │ │ ├── test_errMeasurement.R │ │ └── test_hpdsample.R │ └── man │ ├── confusionMatrix.Rd │ ├── errRate.Rd │ ├── hpdsample.Rd │ ├── meanSquared.Rd │ └── rSquared.Rd ├── demo ├── example │ └── hpdglm │ │ ├── cat2num.R │ │ └── end-to-end-hpdglm-example.pdf └── shiny │ └── randomForest │ └── README.md ├── doc ├── Distributed_R_1.2.x_User_Guide.pdf ├── algorithms │ ├── HPdata │ │ └── HPdata-Manual.pdf │ ├── HPdclassifier │ │ └── HPdclassifier-Manual.pdf │ ├── HPdcluster │ │ └── HPdcluster-Manual.pdf │ ├── HPdgraph │ │ └── HPdgraph-Manual.pdf │ └── HPdregression │ │ └── HPdregression-Manual.pdf ├── platform │ ├── Distributed-R-FAQ.pdf │ └── Distributed-R-Manual.pdf └── vRODBC │ └── vRODBC-Installation-Guide.pdf ├── platform ├── executor │ ├── DESCRIPTION │ ├── NAMESPACE │ ├── R │ │ └── executor.R │ ├── man │ │ ├── spmv.Rd │ │ └── spvm.Rd │ └── src │ │ ├── Makevars │ │ ├── PrestoMatrixHelper.cpp │ │ ├── Rtools.cpp │ │ ├── executor.cpp │ │ └── trace_def.cpp └── master │ ├── DESCRIPTION │ ├── NAMESPACE │ ├── R │ ├── darray.R │ ├── darray_ops.R │ ├── deploy.R │ ├── dframe.R │ ├── dframe_ops.R │ ├── dlist.R │ ├── dobject.R │ ├── master.R │ ├── reduce.R │ └── splits.R │ ├── inst │ ├── bin │ │ └── start_proto_worker.sh │ ├── conf │ │ ├── cluster_conf.xml │ │ └── cluster_conf_localhost.xml │ ├── extdata │ │ ├── A0 │ │ ├── A1 │ │ ├── B0 │ │ ├── B1 │ │ ├── C0 │ │ ├── C1 │ │ ├── C2 │ │ ├── C3 │ │ ├── G0 │ │ ├── G1 │ │ ├── KmeansCenters │ │ ├── KmeansIp0 │ │ ├── KmeansIp1 │ │ ├── df_data1 │ │ ├── df_data2 │ │ ├── df_data3 │ │ ├── df_data4 │ │ ├── df_data_all │ │ ├── eig20 │ │ ├── eig21 │ │ ├── pr_ex1_0 │ │ ├── pr_ex1_1 │ │ ├── pr_ex1_2 │ │ ├── pr_ex1_3 │ │ ├── pr_ex1_4 │ │ ├── pr_notrp_1 │ │ ├── pr_notrp_2 │ │ ├── pr_notrp_3 │ │ ├── pr_notrp_4 │ │ └── pr_notrp_5 │ └── tests │ │ ├── test_1_darray.R │ │ ├── test_2_darray_ops.R │ │ ├── test_2_flex_dense_darray.R │ │ ├── test_2_flex_sparse_darray.R │ │ ├── test_3_foreach.R │ │ ├── test_4_dframe.R │ │ └── test_5_dlist.R │ ├── man │ ├── as.darray.Rd │ ├── as.dframe.Rd │ ├── as.factor.dframe.Rd │ ├── clone.Rd │ ├── darray.Rd │ ├── ddyn.load.Rd │ ├── ddyn.unload.Rd │ ├── deploy.model.Rd │ ├── dframe.Rd │ ├── dlist.Rd │ ├── factor.dframe.Rd │ ├── foreach.Rd │ ├── getpartition.Rd │ ├── is.darray.Rd │ ├── is.dframe.Rd │ ├── is.dlist.Rd │ ├── levels.dframe.Rd │ ├── load.darray.Rd │ ├── ls.Rd │ ├── master_info.Rd │ ├── npartitions.Rd │ ├── package.Rd │ ├── partitionsize.Rd │ ├── reduce.Rd │ ├── shutdown.Rd │ ├── splits.Rd │ ├── start.Rd │ ├── status.Rd │ ├── unfactor.dframe.Rd │ └── update.Rd │ ├── src │ ├── DataLoaderManager.cpp │ ├── DataLoaderManager.h │ ├── DdcWorkerSelector.cpp │ ├── DdcWorkerSelector.h │ ├── DeserializeArray.cpp │ ├── DeserializeArray.h │ ├── DistributedObject.cpp │ ├── DistributedObject.h │ ├── DistributedObjectMap.cpp │ ├── DistributedObjectMap.h │ ├── DistributedOperations.cpp │ ├── DistributedOperations.h │ ├── InMemoryScheduler.cpp │ ├── Makevars │ ├── OOCScheduler.cpp │ ├── PrestoMaster.cpp │ ├── PrestoMaster.h │ ├── PrestoMasterHandler.cpp │ ├── PrestoMasterHandler.h │ ├── ResourceManager.cpp │ ├── ResourceManager.h │ ├── Scheduler.cpp │ ├── Scheduler.h │ ├── common │ │ ├── ArrayData.cpp │ │ ├── ArrayData.h │ │ ├── Cgroups.cpp │ │ ├── Cgroups.h │ │ ├── DistDataFrame.cpp │ │ ├── DistDataFrame.h │ │ ├── DistList.cpp │ │ ├── DistList.h │ │ ├── Makefile │ │ ├── MasterClient.cpp │ │ ├── MasterClient.h │ │ ├── Observer.h │ │ ├── PrestoException.h │ │ ├── Rinternals_snippet.h │ │ ├── SharedMemory.h │ │ ├── TransferServer.cpp │ │ ├── TransferServer.h │ │ ├── UpdateUtils.h │ │ ├── WorkerInfo.cpp │ │ ├── WorkerInfo.h │ │ ├── common.cpp │ │ ├── common.h │ │ ├── dLogger.h │ │ ├── error.h │ │ ├── interprocess_sync.h │ │ ├── timer.h │ │ └── trace_def.cpp │ ├── executor │ │ ├── Makefile │ │ ├── executor.cpp │ │ └── executor.h │ ├── messaging │ │ ├── Makefile │ │ ├── master.proto │ │ ├── shared.proto │ │ └── worker.proto │ ├── third_party │ │ ├── Makefile │ │ ├── RInside_0.2.13.tar.gz │ │ ├── RODBC_1.3-11.tar.gz │ │ ├── RUnit_0.4.28.tar.gz │ │ ├── Rcpp_0.12.0.tar.gz │ │ ├── XML_3.98-1.1.tar.gz │ │ ├── atomicio │ │ │ ├── LICENSE │ │ │ ├── Makefile │ │ │ ├── atomicio.cpp │ │ │ └── atomicio.h │ │ ├── blkin.tar.gz │ │ ├── boost_1_50_0.tar.gz │ │ ├── boost_threadpool │ │ │ └── threadpool │ │ │ │ ├── CHANGE_LOG │ │ │ │ ├── COPYING │ │ │ │ ├── Jamfile.v2 │ │ │ │ ├── Jamrules │ │ │ │ ├── LICENSE_1_0.txt │ │ │ │ ├── Makefile │ │ │ │ ├── README │ │ │ │ ├── TODO │ │ │ │ ├── boost │ │ │ │ ├── threadpool.hpp │ │ │ │ └── threadpool │ │ │ │ │ ├── detail │ │ │ │ │ ├── future.hpp │ │ │ │ │ ├── locking_ptr.hpp │ │ │ │ │ ├── pool_core.hpp │ │ │ │ │ ├── scope_guard.hpp │ │ │ │ │ └── worker_thread.hpp │ │ │ │ │ ├── future.hpp │ │ │ │ │ ├── pool.hpp │ │ │ │ │ ├── pool_adaptors.hpp │ │ │ │ │ ├── scheduling_policies.hpp │ │ │ │ │ ├── shutdown_policies.hpp │ │ │ │ │ ├── size_policies.hpp │ │ │ │ │ └── task_adaptors.hpp │ │ │ │ ├── libs │ │ │ │ └── threadpool │ │ │ │ │ ├── doc │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── design.dox │ │ │ │ │ ├── design.txt │ │ │ │ │ ├── design_header.htm │ │ │ │ │ ├── examples.dox │ │ │ │ │ ├── examples.txt │ │ │ │ │ ├── examples_header.htm │ │ │ │ │ ├── footer │ │ │ │ │ │ ├── common_footer.htm │ │ │ │ │ │ └── overview_footer.htm │ │ │ │ │ ├── footer_sf │ │ │ │ │ │ ├── common_footer.htm │ │ │ │ │ │ └── overview_footer.htm │ │ │ │ │ ├── index.html │ │ │ │ │ ├── overview.dox │ │ │ │ │ ├── overview.txt │ │ │ │ │ ├── overview_header.htm │ │ │ │ │ ├── reference.dox │ │ │ │ │ ├── reference_header.htm │ │ │ │ │ ├── threadpool.css │ │ │ │ │ ├── tutorial.dox │ │ │ │ │ ├── tutorial.txt │ │ │ │ │ └── tutorial_header.htm │ │ │ │ │ ├── example │ │ │ │ │ └── mergesort │ │ │ │ │ │ ├── Jamfile.v2 │ │ │ │ │ │ └── mergesort.cpp │ │ │ │ │ ├── quickstart │ │ │ │ │ ├── Jamfile.v2 │ │ │ │ │ ├── README │ │ │ │ │ └── quickstart.cpp │ │ │ │ │ ├── test │ │ │ │ │ └── compile_all │ │ │ │ │ │ ├── Jamfile.v2 │ │ │ │ │ │ └── compile_all.cpp │ │ │ │ │ └── tutorial │ │ │ │ │ ├── Jamfile.v2 │ │ │ │ │ └── tutorial.cpp │ │ │ │ └── project-root.jam │ │ ├── chron_2.3-45.tar.gz │ │ ├── data.table_1.9.4.tar.gz │ │ ├── digest_0.6.8.tar.gz │ │ ├── gbm_2.1.1.tar.gz │ │ ├── libcgroup-0.41.tar.gz │ │ ├── libuuid-1.0.3.tar.gz │ │ ├── plyr_1.8.1.tar.gz │ │ ├── protobuf-2.3.0.tar.gz │ │ ├── protobuf-2.6.1.tar.gz │ │ ├── randomForest_4.6-10.tar.gz │ │ ├── reshape2_1.4.1.tar.gz │ │ ├── stringr_0.6.2.tar.gz │ │ ├── testthat_0.9.1.tar.gz │ │ ├── unixODBC-2.3.1.tar.gz │ │ ├── zeromq-2.2.0.tar.gz │ │ └── zmq.hpp │ └── worker │ │ ├── ArrayStore.cpp │ │ ├── ArrayStore.h │ │ ├── DataLoader.cpp │ │ ├── DataLoader.h │ │ ├── ExecutorPool.cpp │ │ ├── ExecutorPool.h │ │ ├── Makefile │ │ ├── PrestoWorker.cpp │ │ ├── PrestoWorker.h │ │ ├── RequestLogger.cpp │ │ ├── RequestLogger.h │ │ ├── TaskScheduler.cpp │ │ ├── TaskScheduler.h │ │ ├── align.h │ │ ├── man │ │ ├── onchange.Rd │ │ └── update.Rd │ │ └── scripts │ │ ├── Makefile │ │ ├── generate_diagrams.py │ │ ├── logs │ │ └── R_worker_jorgem_127.0.0.1.50000.log │ │ ├── msc-generator-4.4.tar.gz │ │ └── parse_request_trace.py │ └── vignettes │ ├── FAQ.tex │ ├── Tutorial.Rnw │ └── architecture.pdf └── vRODBC ├── ChangeLog ├── DESCRIPTION ├── GPL-2 ├── NAMESPACE ├── R ├── RODBC.R ├── TypeInfo.R ├── sql.R └── win.R ├── cleanup ├── configure ├── configure.ac ├── configure.win ├── inst ├── LICENCE ├── doc │ └── RODBC.pdf ├── po │ ├── da │ │ └── LC_MESSAGES │ │ │ ├── R-RODBC.mo │ │ │ └── RODBC.mo │ ├── de │ │ └── LC_MESSAGES │ │ │ ├── R-RODBC.mo │ │ │ └── RODBC.mo │ ├── en@quot │ │ └── LC_MESSAGES │ │ │ ├── R-RODBC.mo │ │ │ └── RODBC.mo │ └── pl │ │ └── LC_MESSAGES │ │ ├── R-RODBC.mo │ │ └── RODBC.mo └── tests.R ├── man ├── odbc.Rd ├── odbcClose.Rd ├── odbcConnect.Rd ├── odbcDataSources.Rd ├── odbcGetInfo.Rd ├── odbcSetAutoCommit.Rd ├── setSqlTypeInfo.Rd ├── sqlColumns.Rd ├── sqlCopy.Rd ├── sqlDrop.Rd ├── sqlFetch.Rd ├── sqlQuery.Rd ├── sqlSave.Rd ├── sqlTables.Rd ├── sqlTypeInfo.Rd ├── vRODBC-internal.Rd └── vRODBC-package.Rd ├── po ├── R-RODBC.pot ├── R-da.po ├── R-de.po ├── R-en@quot.po ├── R-pl.po ├── RODBC.pot ├── da.po ├── de.po ├── en@quot.po └── pl.po ├── src ├── Makevars.in ├── Makevars.win ├── RODBC.c ├── config.h.in └── config.h.win ├── tests ├── DB2.RR ├── DB2.Rout.save ├── Master.R ├── Notes ├── Oracle.RR ├── Oracle.Rout.save ├── SQLServer.RR ├── SQLServer.Rout.save ├── access.RR ├── access.Rout.save ├── excel.RR ├── excel.Rout.save ├── hills.xls ├── mimer.RR ├── mimer.Rout.save ├── mysql-win.RR ├── mysql-win.Rout.save ├── mysql.RR ├── mysql.Rout.save ├── postgresql.RR ├── postgresql.Rout.save ├── sqlite3.RR └── sqlite3.Rout.save └── vignettes ├── Access.png ├── Makefile ├── MySQL.png ├── Oracle.png ├── RODBC.Rnw ├── SQLserver.png ├── linuxDSN.png ├── linuxDSNsqlite.png ├── macAccess.png ├── macODBC.png ├── winDSN1.png └── winDSN2.png /.gitignore: -------------------------------------------------------------------------------- 1 | ### Generated Binaries and R Packages 2 | bin/ 3 | # Except .sh files 4 | !bin/*.sh 5 | 6 | # Produced R packages 7 | install/ 8 | 9 | ### Temporary swap files 10 | *.swp 11 | *~ 12 | 13 | ### Generated Protobuf Files 14 | platform/messaging/gen-cpp/ 15 | 16 | ### Compiled third_party packages 17 | third_party/boost_1_50_0/ 18 | third_party/install/ 19 | third_party/libuuid-1.0.3/ 20 | third_party/protobuf-2.3.0/ 21 | third_party/zeromq-2.2.0/ 22 | 23 | ### Generated C++ Files 24 | # Compiled Object files 25 | *.slo 26 | *.lo 27 | *.o 28 | *.obj 29 | 30 | # Precompiled Headers 31 | *.gch 32 | *.pch 33 | 34 | # Compiled Dynamic libraries 35 | *.so 36 | *.dylib 37 | *.dll 38 | 39 | # Fortran module files 40 | *.mod 41 | 42 | # Compiled Static libraries 43 | *.lai 44 | *.la 45 | *.a 46 | *.lib 47 | 48 | # Executables 49 | *.exe 50 | *.out 51 | *.app 52 | 53 | ### Generated R Files 54 | # History files 55 | .Rhistory 56 | .Rapp.history 57 | 58 | # Example code in package build process 59 | *-Ex.R 60 | 61 | # RStudio files 62 | .Rproj.user/ 63 | 64 | # produced vignettes 65 | vignettes/*.html 66 | vignettes/*.pdf 67 | 68 | # python pyc files 69 | *.pyc 70 | -------------------------------------------------------------------------------- /algorithms/HPdata/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HPdata 2 | Type: Package 3 | Title: Distributed Data Package 4 | Version: 1.2.0 5 | Date: 2015-04-17 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Depends: R (>= 3.0.0), distributedR, data.table (>= 1.8.10) 9 | LinkingTo: Rcpp 10 | Suggests: vRODBC 11 | Description: Functions to load data into distributed data structures. Written using HP Vertica Distributed R package. 12 | License: GPL (>= 2) | file LICENSE 13 | 14 | -------------------------------------------------------------------------------- /algorithms/HPdata/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright [2013] Hewlett-Packard Development Company, L.P. 2 | 3 | This program is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; either version 2 6 | of the License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 | 17 | -------------------------------------------------------------------------------- /algorithms/HPdata/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(HPdata) 2 | exportPattern("^[[:alpha:]]+") 3 | import(distributedR) 4 | -------------------------------------------------------------------------------- /algorithms/HPdata/R/db2matrix.R: -------------------------------------------------------------------------------- 1 | # Copyright [2013] Hewlett-Packard Development Company, L.P. 2 | # 3 | # This program is free software; you can redistribute it and/or 4 | # modify it under the terms of the GNU General Public License 5 | # as published by the Free Software Foundation; either version 2 6 | # of the License, or (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 | 17 | ## A simple function for reading a matrix from a table in a database 18 | ## tableName: name of the table 19 | ## features: a list containing the name of columns corresponding to attributes of the matrix (features of samples) 20 | ## dsn: ODBC configuration 21 | db2matrix <- function(tableName, dsn, features = list(...)) { 22 | 23 | if(!is.character(tableName)) 24 | stop("The name of the table should be specified") 25 | if(is.null(dsn)) 26 | stop("The ODBC configuration should be specified") 27 | 28 | if(missing(features) || length(features)==0 || features=="") 29 | features <- list("*") 30 | 31 | nFeatures <- length(features) # number of features 32 | # loading vRODBC or RODBC library for master 33 | if (! require(vRODBC) ) 34 | library(RODBC) 35 | # connecting to Vertica and reading the number of observations in the table 36 | qryString <- "select" 37 | if(nFeatures > 1) { 38 | for(i in 1:(nFeatures-1)) { 39 | qryString <- paste(qryString, "\"", features[i], '\",', sep="") 40 | } 41 | } 42 | qryString <- paste(qryString, "\"", features[nFeatures], "\"", sep="") 43 | qryString <- paste(qryString, " from", tableName) 44 | 45 | connect <- odbcConnect(dsn) 46 | segment<-sqlQuery(connect, qryString) 47 | odbcClose(connect) 48 | # check valid response from the database 49 | if (! is.data.frame(segment) ) 50 | stop(segment) 51 | 52 | as.matrix(segment) 53 | } 54 | # Example: 55 | # centers <- db2matrix("mortgage", dsn="RDev", list("mltvspline1", "mltvspline2", "agespline1", "agespline2", "hpichgspline", "ficospline")) 56 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/data/copy_data.sql: -------------------------------------------------------------------------------- 1 | \set pwd `pwd` 2 | 3 | \set datafile '\'':pwd'/../data/table_1k.dat\'' 4 | COPY table_1K from :datafile delimiter '|' DIRECT; 5 | 6 | \set datafile '\'':pwd'/../data/table_10k.dat\'' 7 | COPY table_10K from :datafile delimiter '|' DIRECT; 8 | 9 | \set datafile '\'':pwd'/../data/table_100k.dat\'' 10 | COPY table_100K from :datafile delimiter '|' DIRECT; 11 | 12 | \set datafile '\'':pwd'/../data/graph3.dat\'' 13 | COPY table_graph from :datafile delimiter '|' DIRECT; 14 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/data/graph1.dat: -------------------------------------------------------------------------------- 1 | # The original 2 | # small graph 3 | 0 1 4 | 1 2 5 | 2 6 6 | 2 7 7 | 3 4 8 | 3 6 9 | 5 3 10 | 5 4 11 | 6 7 12 | 7 1 13 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/data/graph2.dat: -------------------------------------------------------------------------------- 1 | # The original 2 | # small graph 3 | 0 1 2.2 4 | 1 2 5 | 2 6 1.1 6 | 2 7 7 | 3 4 8 | 3 6 9 | 5 3 10 | 5 4 11 | 6 7 12 | 7 1 3 13 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/data/graph3.dat: -------------------------------------------------------------------------------- 1 | 0 1 2.2 2 | 1 2 1.0 3 | 2 6 1.1 4 | 2 7 5 5 | 3 4 1 6 | 3 6 1 7 | 5 3 1 8 | 5 4 1 9 | 6 7 1 10 | 7 1 3 11 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/data/graph4.dat: -------------------------------------------------------------------------------- 1 | 0 1 2.2 2 | 1 2 1.0 3 | 2 6 1.1 4 | 2 7 5 5 | 3 4 1 6 | 3 6 1 7 | 5 3 1 8 | 5 4 1 9 | 6 7 1 10 | 7 1 3 11 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/data/schema.sql: -------------------------------------------------------------------------------- 1 | create table if not exists table_10K_numeric( 2 | rowid int not null, 3 | col1 int, 4 | col2 float, 5 | col3 float, 6 | col4 float, 7 | col5 float, 8 | col6 float, 9 | col7 float) 10 | order by rowid 11 | segmented by modularhash(rowid) all nodes; 12 | 13 | 14 | create table if not exists table_10K( 15 | rowid int not null, 16 | col1 int, 17 | col2 float, 18 | col3 float, 19 | col4 float, 20 | col5 float, 21 | col6 float, 22 | col7 float, 23 | col8 char(1) encoding rle, 24 | col9 char(1) encoding rle, 25 | col10 char(1) encoding rle) 26 | order by rowid 27 | segmented by modularhash(rowid) all nodes; 28 | 29 | create table if not exists table_graph( 30 | u int not null, 31 | v int not null, 32 | weight float) 33 | order by v 34 | segmented by modularhash(v) all nodes; 35 | 36 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/tests/data/TestOrcFile.test1.orc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/algorithms/HPdata/inst/tests/data/TestOrcFile.test1.orc -------------------------------------------------------------------------------- /algorithms/HPdata/inst/tests/data/ex001.csv: -------------------------------------------------------------------------------- 1 | 000,aaa 2 | 001,bbb 3 | 002,ccc 4 | 003,ddd 5 | 004,eee 6 | 005,fff 7 | 006,ggg 8 | 007,hhh 9 | 008,iii 10 | 009,jjj 11 | 010,kkk 12 | 011,lll 13 | 012,mmm 14 | 013,nnn 15 | 014,ooo 16 | 015,ppp 17 | 016,qqq 18 | 017,rrr 19 | 018,sss 20 | 019,ttt 21 | 020,uuu 22 | 021,vvv 23 | 022,www 24 | 023,xxx 25 | 024,yyy 26 | 025,zzz 27 | 026,aaa 28 | 027,bbb 29 | 028,ccc 30 | 029,ddd 31 | 030,eee 32 | 031,fff 33 | 032,ggg 34 | 033,hhh 35 | 034,iii 36 | 035,jjj 37 | 036,kkk 38 | 037,lll 39 | 038,mmm 40 | 039,nnn 41 | 040,ooo 42 | 041,ppp 43 | 042,qqq 44 | 043,rrr 45 | 044,sss 46 | 045,ttt 47 | 046,uuu 48 | 047,vvv 49 | 048,www 50 | 049,xxx 51 | 050,yyy 52 | 051,zzz 53 | 052,aaa 54 | 053,bbb 55 | 054,ccc 56 | 055,ddd 57 | 056,eee 58 | 057,fff 59 | 058,ggg 60 | 059,hhh 61 | 060,iii 62 | 061,jjj 63 | 062,kkk 64 | 063,lll 65 | 064,mmm 66 | 065,nnn 67 | 066,ooo 68 | 067,ppp 69 | 068,qqq 70 | 069,rrr 71 | 070,sss 72 | 071,ttt 73 | 072,uuu 74 | 073,vvv 75 | 074,www 76 | 075,xxx 77 | 076,yyy 78 | 077,zzz 79 | 078,aaa 80 | 079,bbb 81 | 080,ccc 82 | 081,ddd 83 | 082,eee 84 | 083,fff 85 | 084,ggg 86 | 085,hhh 87 | 086,iii 88 | 087,jjj 89 | 088,kkk 90 | 089,lll 91 | 090,mmm 92 | 091,nnn 93 | 092,ooo 94 | 093,ppp 95 | 094,qqq 96 | 095,rrr 97 | 096,sss 98 | 097,ttt 99 | 098,uuu 100 | 099,vvv 101 | 100,www 102 | 101,xxx 103 | 102,yyy 104 | 103,zzz 105 | 104,aaa 106 | 105,bbb 107 | 106,ccc 108 | 107,ddd 109 | 108,eee 110 | 109,fff 111 | 110,ggg 112 | 111,hhh 113 | 112,iii 114 | 113,jjj 115 | 114,kkk 116 | 115,lll 117 | 116,mmm 118 | 117,nnn 119 | 118,ooo 120 | 119,ppp 121 | 120,qqq 122 | 121,rrr 123 | 122,sss 124 | 123,ttt 125 | 124,uuu 126 | 125,vvv 127 | 126,www 128 | 127,xxx 129 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/tests/test_1_setup.R: -------------------------------------------------------------------------------- 1 | library(HPdata) 2 | library(vRODBC) 3 | 4 | data_path <- system.file("data", package="HPdata") 5 | connect <- odbcConnect("distr_regression") 6 | 7 | system(paste("vsql -f", data_path, "/schema.sql", sep="")) 8 | print("Created schema"); 9 | 10 | system(paste("vsql -c \"COPY table_10K_numeric FROM '", data_path, "/data_10K_numeric.dat' DELIMITER '|' DIRECT\"", sep="")) 11 | system(paste("vsql -c \"COPY table_10K FROM '", data_path, "/data_10K.dat' DELIMITER '|' DIRECT\"", sep="")) 12 | system(paste("vsql -c \"COPY table_graph FROM '", data_path, "/graph3.dat' DELIMITER ' ' DIRECT\"", sep="")) 13 | print("Uploaded data"); 14 | 15 | system(paste("vsql -c \"CREATE VIEW view_10K_numeric AS (SELECT * FROM table_10K_numeric)\"")) 16 | system(paste("vsql -c \"CREATE VIEW view_10K AS (SELECT * FROM table_10K)\"")) 17 | system(paste("vsql -c \"CREATE VIEW view_graph AS (SELECT * FROM table_graph)\"")) 18 | 19 | system("mkdir /tmp/graphSplits") 20 | print("Created /tmp/graphSplits folder for the output files of splitGraphFile function") 21 | 22 | context("Setup and validate setup") 23 | 24 | test_that("Schema is created and data is loaded correctly", { 25 | res <- sqlQuery(connect, "select count(*) from tables where table_name in ('table_10K_numeric', 'table_10K', 'table_graph')") 26 | expect_that(res, is_a("data.frame")) 27 | expect_equal(res[[1]][[1]], 3) 28 | 29 | res <- sqlQuery(connect, "select count(*) from views where table_name in ('view_10K', 'view_10K_numeric', 'view_graph')") 30 | expect_that(res, is_a("data.frame")) 31 | expect_equal(res[[1]][[1]], 3) 32 | }) 33 | 34 | test_that("DistributedR extension package is installed", { 35 | res <- sqlQuery(connect, "select count(*) from user_functions where function_name in ('DeployModelToVertica', 'ExportToDistributedR', 'DeleteModel')"); 36 | expect_that(res, is_a("data.frame")) 37 | expect_equal(res[[1]][[1]], 3) 38 | }) 39 | 40 | odbcClose(connect) 41 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/tests/test_ddc.R: -------------------------------------------------------------------------------- 1 | library(HPdata) 2 | 3 | context("Distributed Data Connector") 4 | 5 | test_that("ex001.csv", { 6 | df <- csv2dframe(paste(getwd(),'/data/ex001.csv',sep=''), schema='a:int64,b:character') 7 | localdf <- getpartition(df) 8 | expect_true(is.data.frame(localdf)) 9 | expect_equal(nrow(localdf), 128) 10 | expect_equal(ncol(localdf), 2) 11 | }) 12 | 13 | test_that("ex002.csv", { 14 | df <- csv2dframe(paste(getwd(),'/data/ex002.csv',sep=''), schema='a:int64,b:character,c:int64,d:character') 15 | localdf <- getpartition(df) 16 | expect_true(is.data.frame(localdf)) 17 | expect_equal(nrow(localdf), 128) 18 | expect_equal(ncol(localdf), 4) 19 | }) 20 | 21 | test_that("TestOrcFile.test1.orc", { 22 | df <- orc2dframe(paste(getwd(),'/data/TestOrcFile.test1.orc',sep=''), selectedStripes='0') 23 | localdf <- getpartition(df) 24 | expect_true(is.data.frame(localdf)) 25 | expect_equal(nrow(localdf), 2) 26 | expect_equal(ncol(localdf), 12) 27 | }) 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /algorithms/HPdata/inst/tests/test_zz_cleanup.R: -------------------------------------------------------------------------------- 1 | library(HPdata) 2 | library(vRODBC) 3 | 4 | connect <- odbcConnect("distr_regression") 5 | 6 | system(paste("vsql -c \"DROP table table_10K_numeric\"", sep="")) 7 | system(paste("vsql -c \"DROP table table_10K\"", sep="")) 8 | system(paste("vsql -c \"DROP table table_graph\"", sep="")) 9 | 10 | system(paste("vsql -c \"DROP VIEW view_10K_numeric\"")) 11 | system(paste("vsql -c \"DROP VIEW view_10K\"")) 12 | system(paste("vsql -c \"DROP VIEW view_graph\"")) 13 | 14 | system("rm -r /tmp/graphSplits") 15 | 16 | odbcClose(connect) 17 | -------------------------------------------------------------------------------- /algorithms/HPdata/man/HPdata-package.Rd: -------------------------------------------------------------------------------- 1 | \name{HPdata-package} 2 | \alias{HPdata-package} 3 | \alias{HPdata} 4 | \docType{package} 5 | \title{Distributed Data Package} 6 | \description{ 7 | \pkg{HPdata} encapsulate all data related functions - data loading, data preparation etc for distributed R environment. It is written based on the infrastructure created in HP-Labs for distributed computing in R. 8 | } 9 | 10 | \details{ 11 | \tabular{ll}{ 12 | Package: \tab HPdata\cr 13 | Type: \tab Package\cr 14 | Version: \tab 1.2.0\cr 15 | Date: \tab 2015-01-16\cr 16 | } 17 | Main Functions: 18 | \itemize{ 19 | \item {db2darray:} {It is an example for loading a set of unlabeled samples stored in a table to a darray.} 20 | \item {db2darrays:} {It is an example for loading a set of labeled samples stored in a table to two darrays.} 21 | \item {db2matrix:} {It is an example for loading a set of unlabeled samples stored in a table to a matrix.} 22 | \item {db2dframe:} {It is an example for loading a set of samples stored in a table to a dframe.} 23 | \item {db2dgraph:} {It loads an adjaceny matrix to a darray from an edgelist strored in a database.} 24 | \item {file2dgraph:} {It loads an adjaceny matrix to a darray from an edgelist strored in a set of files.} 25 | \item {splitGraphFile:} {It is an example for splitting an edge-list file of a graph, and distributing the results among the active nodes of a cluster system.} 26 | } 27 | } 28 | 29 | \author{ 30 | HP Vertica Analytics Team 31 | } 32 | \references{ 33 | \enumerate{ 34 | \item{Using R for Iterative and Incremental Processing. Shivaram Venkataraman, Indrajit Roy, Alvin AuYoung, Rob Schreiber. HotCloud 2012, Boston, USA.} 35 | } 36 | } 37 | \keyword{distributed R} 38 | \keyword{Big Data Analytics} 39 | 40 | -------------------------------------------------------------------------------- /algorithms/HPdata/man/db2matrix.Rd: -------------------------------------------------------------------------------- 1 | \name{db2matrix} 2 | \alias{db2matrix} 3 | \title{A simple loader for loading a matrix from a database} 4 | \description{ 5 | db2matrix function is an example for loading a set of unlabeled samples stored in a table to a matrix. It is assumed that samples are stored in a single table. All the rows of the table will be read, and each row will be a sample. 6 | } 7 | \usage{ 8 | db2matrix(tableName, dsn, features = list(...)) 9 | } 10 | 11 | \arguments{ 12 | \item{tableName}{ 13 | it is the name of the table in the database in string format. 14 | } 15 | \item{dsn}{ 16 | the Data Source Name(DSN) as provided in ODBC.INI file to connect to the database. 17 | } 18 | \item{features}{ 19 | the list of the name of columns corresponding to the features of a sample. 20 | } 21 | } 22 | 23 | \value{ 24 | \item{X}{the matrix} 25 | } 26 | 27 | \author{ 28 | HP Vertica Analytics Team 29 | } 30 | 31 | \examples{ 32 | \dontrun{ 33 | # Assuming that centers are stored in a table named "centers", 34 | # and the names of the columns are "col1", "col2", "col3", and "col4". 35 | loadedCenters <- db2matrix ("centers", list("col1", "col2", 36 | "col3", "col4"), dsn="RDev") 37 | } 38 | } 39 | 40 | \keyword{ Database } 41 | \keyword{ K-means } 42 | -------------------------------------------------------------------------------- /algorithms/HPdata/src/Makevars: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | #Copyright (C) [2014] Hewlett-Packard Development Company, L.P. 3 | 4 | #This program is free software; you can redistribute it and/or modify 5 | #it under the terms of the GNU General Public License as published by 6 | #the Free Software Foundation; either version 2 of the License, or (at 7 | #your option) any later version. 8 | 9 | #This program is distributed in the hope that it will be useful, but 10 | #WITHOUT ANY WARRANTY; without even the implied warranty of 11 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 | #General Public License for more details. You should have received a 13 | #copy of the GNU General Public License along with this program; if 14 | #not, write to the Free Software Foundation, Inc., 59 Temple Place, 15 | #Suite 330, Boston, MA 02111-1307 USA 16 | ##################################################################### 17 | 18 | PKG_CPPFLAGS=`Rscript -e 'Rcpp:::CxxFlags()'` 19 | PKG_LIBS=`Rscript -e 'Rcpp:::LdFlags()'` 20 | 21 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HPdclassifier 2 | Type: Package 3 | Title: Distributed classifiers for Big Data 4 | Version: 1.2.0 5 | Date: 2015-04-17 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Depends: R (>= 3.0.0), distributedR, HPdutility, randomForest (>= 4.6-10), rpart, gbm (>= 2.1.1) 9 | Description: Distributed algorithms for learning classifiers. Written using HP Vertica Distributed R package. 10 | License: GPL (>= 2) | file LICENSE 11 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright [2013] Hewlett-Packard Development Company, L.P. 2 | 3 | This program is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; either version 2 6 | of the License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 | 17 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(HPdclassifier) 2 | exportPattern("^[[:alpha:]]+") 3 | import(distributedR) 4 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/HPdclassifier-package.Rd: -------------------------------------------------------------------------------- 1 | \name{HPdclassifier-package} 2 | \alias{HPdclassifier-package} 3 | \alias{HPdclassifier} 4 | \docType{package} 5 | \title{Distributed algorithms for classifiers} 6 | \description{ 7 | \pkg{HPdclassifier} provides several distributed algorithms for classifiers. It is written based on the infrastructure created in HP-Labs for distributed computing in R. 8 | } 9 | 10 | \details{ 11 | \tabular{ll}{ 12 | Package: \tab HPdclassifier\cr 13 | Type: \tab Package\cr 14 | Version: \tab 1.2.0\cr 15 | Date: \tab 2015-01-16\cr 16 | } 17 | Main Functions: 18 | \itemize{ 19 | \item {hpdRF_parallelTree:} {It is a distributed function for randomForest that utilizes parallelism in creating each tree of the forest} 20 | \item {hpdRF_parallelForest:} {It is a distributed function for randomForest that utilizes parallelism in creating sub-forests of the forest} 21 | } 22 | } 23 | 24 | \author{ 25 | HP Vertica Analytics Team 26 | } 27 | \references{ 28 | \enumerate{ 29 | \item{Using R for Iterative and Incremental Processing. Shivaram Venkataraman, Indrajit Roy, Alvin AuYoung, Rob Schreiber. HotCloud 2012, Boston, USA.} 30 | } 31 | } 32 | \keyword{Distributed R} 33 | \keyword{Scalable Machine Learning algorithms} 34 | \keyword{Big Data Analytics} 35 | 36 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/deploy.hpdRF_parallelTree.Rd: -------------------------------------------------------------------------------- 1 | \name{deploy.hpdRF_parallelTree} 2 | \alias{deploy.hpdRF_parallelTree} 3 | \title{ 4 | Convert hpdRF_parallelTree model to that of randomForest model 5 | } 6 | \description{ 7 | This function converts the formatting of the trees to match that of randomForest model so that predict.randomForest can be used 8 | } 9 | \usage{ 10 | deploy.hpdRF_parallelTree <- function(model) 11 | 12 | } 13 | \arguments{ 14 | \item{model}{ 15 | an object of class hpdRF_parallelTree, as that created by the function hpdRF_parallelTree 16 | } 17 | } 18 | \details{ 19 | The randomForest model can only handle categorical variables with less than 32 categories 20 | } 21 | \value{ 22 | An object of class randomForest 23 | } 24 | \author{ 25 | HP Vertica Analytics Team 26 | } 27 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/deploy.hpdegbm.Rd: -------------------------------------------------------------------------------- 1 | \name{deploy.hpdegbm} 2 | \alias{deploy.hpdegbm} 3 | \title{ 4 | Verify the hpdegbm Model 5 | } 6 | \description{ 7 | This function verifies that the formatting of the input model matches that of distributed gbm so that predict.hpdegbm can be used. 8 | } 9 | \usage{ 10 | deploy.hpdegbm <- function(model) 11 | 12 | } 13 | \arguments{ 14 | \item{model}{ 15 | an object of class hpdegbm, such as that created by the function hpdegbm. 16 | } 17 | } 18 | 19 | \value{ 20 | An object of class gbm. 21 | } 22 | \author{ 23 | HP Vertica Analytics Team 24 | } 25 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/predict.hpdRF_parallelTree.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.hpdRF_parallelTree} 2 | \alias{predict.hpdRF_parallelTree} 3 | \title{ 4 | Predict function for distributed random forest model 5 | } 6 | \description{ 7 | given a distributed random forest model and new observations of the feature variables, predict the responses of the new observations 8 | } 9 | \usage{ 10 | predict.hpdRF(object, newdata, cutoff, do.trace = FALSE, na.action = na.fail) 11 | } 12 | \arguments{ 13 | \item{object}{ 14 | an object of class hpdRF_parallelTree, as that created by the function hpdRF_parallelTree 15 | 16 | } 17 | \item{newdata}{ 18 | a dframe or data.frame containing new data 19 | } 20 | \item{cutoff}{ 21 | (Classification only) A vector of length equal to number of classes. The ‘winning’ class for an observation is the one with the maximum ratio of proportion of votes to cutoff. Default is taken from the forest$cutoff component of object 22 | } 23 | \item{do.trace}{ 24 | If set to TRUE, give a more verbose output as randomForest is run. 25 | } 26 | 27 | \item{na.action}{ 28 | A function to specify the action to be taken if NAs are found 29 | } 30 | } 31 | 32 | \value{ 33 | A list that has the following components: 34 | \item{response}{ 35 | predictions of the newdata 36 | } 37 | } 38 | 39 | \author{ 40 | HP Vertica Analytics Team 41 | } 42 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/predict.hpdrpart.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.hpdrpart} 2 | \alias{predict.hpdrpart} 3 | \title{ 4 | Predict function for distributed rpart model 5 | } 6 | \description{ 7 | given a distributed rpart model and new observations of the feature variables, predict the responses of the new observations 8 | } 9 | \usage{ 10 | predict.hpdrpart(model, newdata, ..., do.trace = FALSE) 11 | } 12 | \arguments{ 13 | \item{object}{ 14 | an object of class hpdrpart, created by the function hpdrpart 15 | 16 | } 17 | \item{newdata}{ 18 | a dframe or data.frame containing new data 19 | } 20 | \item{...}{ 21 | optional parameters to be passed to the predict.rpart function 22 | } 23 | \item{do.trace}{ 24 | predict function will print out trace information when do.trace = TRUE 25 | } 26 | 27 | } 28 | 29 | \value{ 30 | A list that has the following components: 31 | \item{response}{ 32 | predictions of the newdata 33 | } 34 | } 35 | 36 | \author{ 37 | HP Vertica Analytics Team 38 | } 39 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/print.hpdRF_parallelTree.Rd: -------------------------------------------------------------------------------- 1 | \name{print.hpdRF_parallelTree} 2 | \alias{print.hpdRF_parallelTree} 3 | \title{ 4 | Print hpdRF_parallelTree models 5 | } 6 | \description{ 7 | A function that can print summary information for models of class hpdRF_parallelTree 8 | } 9 | \usage{ 10 | print.hpdRF_parallelTree <- function(model, max_depth = 2) 11 | } 12 | \arguments{ 13 | \item{model}{ 14 | an object of class hpdRF_parallelTree, as that created by the function hpdRF_parallelTree 15 | } 16 | \item{max_depth}{ 17 | The maximum depth the trees will be printed (trees can be very deep) 18 | } 19 | 20 | 21 | } 22 | \details{ 23 | Does not display the trees in the model. 24 | } 25 | \value{ 26 | A summary of the model is printed 27 | } 28 | \author{ 29 | HP Vertica Analytics Team 30 | } 31 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/print.hpdRFtree.Rd: -------------------------------------------------------------------------------- 1 | \name{print.hpdRFtree} 2 | \alias{print.hpdRFtree} 3 | \title{ 4 | Print Trees returned by hpdRF_parallelTree 5 | } 6 | \description{ 7 | A function that can print the trees returned by hpdRF_parallelTree 8 | } 9 | \usage{ 10 | print.hpdRFtree <- function(tree, max_depth = 2,classes) 11 | } 12 | \arguments{ 13 | \item{model}{ 14 | an object of class hpdRFtree, as created by the function hpdRF_parallelTree 15 | } 16 | \item{max_depth}{ 17 | The maximum depth the trees will be printed (trees can be very deep) 18 | } 19 | \item{classes}{ 20 | (Classification only) The list of classes. Default value is to check if there are any classes associated with the tree 21 | } 22 | 23 | } 24 | \details{ 25 | Classes should be be passed in otherwise numerical values will be displayed. The class of subtrees is not set to hpdRFtree so to print them, explicity use print.hpdRFtree(subtree) instead of just print(subtree) 26 | } 27 | \value{ 28 | Tree is printed in an XML format 29 | } 30 | \author{ 31 | HP Vertica Analytics Team 32 | } 33 | -------------------------------------------------------------------------------- /algorithms/HPdclassifier/man/varImportance.Rd: -------------------------------------------------------------------------------- 1 | \name{varImportance} 2 | \alias{varImportance} 3 | \title{Distributed Variable Importance function used in randomForest} 4 | \description{ 5 | varImportance function calculates the importance of each feature given a trained model and a set of testing data. To calculate the importance of each variable, the variable is permuted while the other variables are unchanged and then the permutated data is predicted. These predictions as well as unaltered predictions are compared to actual responses. The metric used to compare is dependant upon the data. 6 | } 7 | 8 | \usage{ 9 | varImportance(model, xtest, ytest, ..., distance_metric, trace = FALSE) 10 | } 11 | 12 | \arguments{ 13 | \item{model}{a model that can be used to predict inputs xtest} 14 | \item{xtest}{a dframe or data.frame with observations of features required by model} 15 | \item{ytest}{a dframe or data.frame with observations of responses corresponding to xtest. If this parameter is missing, varImportance will try to infer ytest based upon model$terms and xtest.} 16 | \item{...}{optional parameters to be passed to the predict function.} 17 | \item{distance_metric}{a model that can be used to predict inputs xtest} 18 | \item{trace}{when this argument is true, intermediate steps of the progress are displayed.} 19 | } 20 | 21 | \value{ 22 | A data.frame with a row for each variable that has the importance of each variable 23 | } 24 | 25 | \note{ 26 | Although only tested for models of type randomForest, the varImportance function can be used on any generic model type assuming the corresponding predict function returns a data.frame or dframe with 1 column as the output. 27 | } 28 | 29 | 30 | 31 | \author{ 32 | HP Vertica Analytics Team 33 | } 34 | 35 | -------------------------------------------------------------------------------- /algorithms/HPdcluster/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HPdcluster 2 | Type: Package 3 | Title: Distributed Clustering for Big Data 4 | Version: 1.2.0 5 | Date: 2015-04-17 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Depends: R (>= 3.0.0), distributedR 9 | LinkingTo: Rcpp 10 | Description: Distributed clustering algorithms. Written using HP Vertica Distributed R package. 11 | License: GPL (>= 2) | file LICENSE 12 | -------------------------------------------------------------------------------- /algorithms/HPdcluster/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright [2013] Hewlett-Packard Development Company, L.P. 2 | 3 | This program is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; either version 2 6 | of the License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 | 17 | -------------------------------------------------------------------------------- /algorithms/HPdcluster/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(HPdcluster) 2 | exportPattern("^[[:alpha:]]+") 3 | import(distributedR) 4 | -------------------------------------------------------------------------------- /algorithms/HPdcluster/man/HPdcluster-package.Rd: -------------------------------------------------------------------------------- 1 | \name{HPdcluster-package} 2 | \alias{HPdcluster-package} 3 | \alias{HPdcluster} 4 | \docType{package} 5 | \title{Distributed clustering for Big Data} 6 | \description{ 7 | \pkg{HPdcluster} provides a few distributed clustering functions. It is written based on the infrastructure created in HP-Labs for distributed computing in R. 8 | } 9 | 10 | \details{ 11 | \tabular{ll}{ 12 | Package: \tab HPdcluster\cr 13 | Type: \tab Package\cr 14 | Version: \tab 1.2.0\cr 15 | Date: \tab 2015-01-16\cr 16 | } 17 | Main Functions: 18 | \itemize{ 19 | \item {hpdkmeans:} {It is a distributed version of kmeans.} 20 | \item {hpdapply:} {It finds cluster label of a set of samples according to a given set of centers.} 21 | } 22 | } 23 | 24 | \author{ 25 | HP Vertica Analytics Team 26 | } 27 | \references{ 28 | \enumerate{ 29 | \item{Using R for Iterative and Incremental Processing. Shivaram Venkataraman, Indrajit Roy, Alvin AuYoung, Rob Schreiber. HotCloud 2012, Boston, USA.} 30 | \item{http://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html} 31 | } 32 | } 33 | \keyword{distributed R} 34 | \keyword{distributed clustering} 35 | \keyword{Big Data Analytics} 36 | 37 | -------------------------------------------------------------------------------- /algorithms/HPdcluster/man/hpdapply.Rd: -------------------------------------------------------------------------------- 1 | \name{hpdapply} 2 | \alias{hpdapply} 3 | \title{Cluster labeling} 4 | \description{ 5 | hpdapply function finds cluster label of a set of samples according to a given set of centers. 6 | } 7 | \usage{ 8 | hpdapply(newdata, centers, trace=FALSE) 9 | } 10 | 11 | \arguments{ 12 | \item{newdata}{ 13 | a darray (dense or sparse) or a matrix which contains the samples. 14 | } 15 | \item{centers}{ 16 | a matrix of cluster centres. Each row represents a center. Each sample in newdata will be assigned a label which indicates the row number of its corresponding center. 17 | } 18 | \item{trace}{ 19 | when this argument is true, intermediate steps of the progress are displayed. 20 | } 21 | } 22 | \details{ 23 | This function applies the centers found by hpdkmeans on a new set of samples in order to label them. 24 | } 25 | \value{ 26 | hpdapply returns a darray or a matrix based on the type of newdata which contains the corresponding label of each sample. 27 | } 28 | \author{ 29 | HP Vertica Analytics Team 30 | } 31 | 32 | \examples{ 33 | \dontrun{ 34 | iris2 <- iris 35 | iris2$Species <- NULL 36 | 37 | library(HPdcluster) 38 | distributedR_start() 39 | 40 | X <- as.darray(data.matrix(iris2)) 41 | 42 | mykm <- hpdkmeans(X,centers=3) 43 | 44 | newdata <- matrix(c(5,4,3,5,7,1,0,8),2,4) 45 | labels <- hpdapply(newdata,mykm$centers) 46 | } 47 | } 48 | 49 | \keyword{distributed R} 50 | \keyword{Big Data Analytics} 51 | \keyword{distributed clustering} 52 | \keyword{distributed kmeans} 53 | -------------------------------------------------------------------------------- /algorithms/HPdgraph/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HPdgraph 2 | Type: Package 3 | Title: Distributed algorithms for graph analytics 4 | Version: 1.2.0 5 | Date: 2015-04-17 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Depends: R (>= 3.0.0), distributedR 9 | LinkingTo: Rcpp 10 | Description: Distributed algorithms for graph analysis. Written using HP Vertica Distributed R package. 11 | License: GPL (>= 2) | file LICENSE 12 | -------------------------------------------------------------------------------- /algorithms/HPdgraph/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright [2013] Hewlett-Packard Development Company, L.P. 2 | 3 | This program is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; either version 2 6 | of the License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 | 17 | -------------------------------------------------------------------------------- /algorithms/HPdgraph/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(HPdgraph) 2 | exportPattern("^[[:alpha:]]+") 3 | import(distributedR) 4 | -------------------------------------------------------------------------------- /algorithms/HPdgraph/man/HPdgraph-package.Rd: -------------------------------------------------------------------------------- 1 | \name{HPdgraph-package} 2 | \alias{HPdgraph-package} 3 | \alias{HPdgraph} 4 | \docType{package} 5 | \title{Distributed algorithms for graph analytics} 6 | \description{ 7 | \pkg{HPdgraph} provides distributed algorithms for graph analytics. It is written based on the infrastructure created in HP Labs for distributed computing in R. 8 | } 9 | 10 | \details{ 11 | \tabular{ll}{ 12 | Package: \tab HPdgraph\cr 13 | Type: \tab Package\cr 14 | Version: \tab 1.2.0\cr 15 | Date: \tab 2015-01-16\cr 16 | } 17 | Main Functions: 18 | \itemize{ 19 | \item {hpdpagerank:} {compute pagerank of a graph in a distributed fashion.} 20 | \item {hpdwhich.max:} {returns the index of the maximum value stored in a darray.} 21 | } 22 | } 23 | 24 | \author{ 25 | HP Vertica Analytics Team 26 | } 27 | \references{ 28 | \enumerate{ 29 | \item{Using R for Iterative and Incremental Processing. Shivaram Venkataraman, Indrajit Roy, Alvin AuYoung, Rob Schreiber. HotCloud 2012, Boston, USA.} 30 | } 31 | } 32 | \keyword{Distributed R} 33 | \keyword{Distributed Graph Analytics} 34 | \keyword{Big Data Analytics} 35 | 36 | -------------------------------------------------------------------------------- /algorithms/HPdgraph/man/hpdwhich.max.Rd: -------------------------------------------------------------------------------- 1 | \name{hpdwhich.max} 2 | \alias{hpdwhich.max} 3 | \title{Distributed which.max} 4 | \description{ 5 | hpdwhich.max function is a distributed version of which.max function for a 1D-array which has darray as its input argument. 6 | } 7 | \usage{ 8 | hpdwhich.max(PR, trace=FALSE) 9 | } 10 | 11 | \arguments{ 12 | \item{PR}{ 13 | a darray (dense or sparse). It must have only a single row. 14 | } 15 | \item{trace}{ 16 | when this argument is TRUE, intermediate steps of the progress are displayed. 17 | } 18 | } 19 | \details{ 20 | This function finds and returns the index of the maximum value stored in a darray. The darray is assumed to have a single row which is similar to the pagerank vector returned by hpdpagerank. Therefore, it is suitable for finding the index of the page with the highest rank in the pagerank vector produced by hpdpagerank. 21 | } 22 | \value{ 23 | it returns the index of the maximum value stored in a darray. 24 | } 25 | \author{ 26 | HP Vertica Analytics Team 27 | } 28 | 29 | \examples{ 30 | \dontrun{ 31 | 32 | library(HPdgraph) 33 | distributedR_start() 34 | 35 | graph <- matrix(0, 6,6) 36 | graph[2,1] <- 1L;graph[2,3] <- 1L;graph[3,1] <- 1L;graph[3,2] <- 1L; 37 | graph[3,4] <- 1L;graph[4,5] <- 1L;graph[4,6] <- 1L;graph[5,4] <- 1L; 38 | graph[5,6] <- 1L;graph[6,4] <- 1L 39 | 40 | dgraph <- as.darray(graph, c(6,3)) 41 | pr <- hpdpagerank(dgraph) 42 | hpdwhich.max(pr) 43 | } 44 | } 45 | 46 | \keyword{distributed R} 47 | \keyword{Big Data Analytics} 48 | \keyword{distributed pagerank} 49 | -------------------------------------------------------------------------------- /algorithms/HPdregression/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HPdregression 2 | Type: Package 3 | Title: Distributed Regression for Big Data 4 | Version: 1.2.0 5 | Date: 2015-04-17 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Depends: R (>= 3.0.0), distributedR 9 | Description: Implementation of distributed generalized linear model. Written using HP Vertica Distributed R package. 10 | License: GPL (>= 2) | file LICENSE 11 | 12 | -------------------------------------------------------------------------------- /algorithms/HPdregression/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright [2013] Hewlett-Packard Development Company, L.P. 2 | 3 | This program is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; either version 2 6 | of the License, or (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 | 17 | -------------------------------------------------------------------------------- /algorithms/HPdregression/NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[[:alpha:]]+") 2 | import(distributedR) 3 | -------------------------------------------------------------------------------- /algorithms/HPdregression/man/HPdregression-package.Rd: -------------------------------------------------------------------------------- 1 | \name{HPdregression-package} 2 | \alias{HPdregression-package} 3 | \alias{HPdregression} 4 | \docType{package} 5 | \title{ HPdregression - Distributed Regression for Big Data} 6 | \description{ 7 | \pkg{HPdregression} provides distributed algorithms for regression models. It is written based on the infrastructure created in HP-Lab for distributed computing in R. 8 | } 9 | \details{ 10 | \tabular{ll}{ 11 | Package: \tab HPdregression\cr 12 | Type: \tab Package\cr 13 | Version: \tab 1.2.0\cr 14 | Date: \tab 2015-01-16\cr 15 | } 16 | Main Functions: 17 | \itemize{ 18 | \item {hpdglm:} {It is a distributed version of glm.} 19 | \item {v.hpdglm:} {This function is implemented for evaluating a model built by hpdglm using Split-Sample-Validation method.} 20 | \item {cv.hpdglm:} {This function is implemented for evaluating a model built by hpdglm using Cross-Validation method.} 21 | } 22 | 23 | } 24 | 25 | \author{ 26 | HP Vertica Analytics Team 27 | } 28 | 29 | \keyword{distributed R, distributed Regression, Big Data Analytics} 30 | 31 | 32 | -------------------------------------------------------------------------------- /algorithms/HPdregression/man/family.hpdglm.Rd: -------------------------------------------------------------------------------- 1 | \name{family.hpdglm} 2 | \alias{family.hpdglm} 3 | \title{ 4 | family of an hpdglm model 5 | } 6 | \description{ 7 | Returns the family used for building an hpdglm model. 8 | } 9 | \usage{ 10 | family.hpdglm(object, ...) 11 | } 12 | \arguments{ 13 | \item{object}{an hpdglm model} 14 | } 15 | \value{the family of the model} 16 | 17 | \keyword{ family } 18 | \keyword{ hpdglm } 19 | -------------------------------------------------------------------------------- /algorithms/HPdregression/man/hpdglm.control.Rd: -------------------------------------------------------------------------------- 1 | \name{hpdglm.control} 2 | \alias{hpdglm.control} 3 | \title{ 4 | Auxiliary for Controlling hpdglm Fitting 5 | } 6 | \description{ 7 | Auxiliary function for \code{\link{hpdglm}} fitting. 8 | Typically only used internally by \code{\link{hpdglm.fit}}, but may be 9 | used to construct a \code{control} argument to either function. 10 | } 11 | \usage{ 12 | hpdglm.control(epsilon = 1e-08, maxit = 25, trace = FALSE, 13 | rigorous = FALSE) 14 | } 15 | \arguments{ 16 | \item{epsilon}{ 17 | It is used to adjust desired accuracy of the result. 18 | } 19 | \item{maxit}{ 20 | It is the maximu, number of iterations before achieving the desired accuracy. 21 | } 22 | \item{trace}{ 23 | When this argument is true, intermediate steps of the progress are displayed. 24 | } 25 | \item{rigorous}{ 26 | When this argument is true, some extra checks are performed during fitting procedure. For example, mu and eta may be validating in each iteration to check if the fitted values are outside of the domain. Usually these checks are time consuming; therefore, the default value for this argument is FALSE.} 27 | } 28 | 29 | \value{ 30 | A list with components named as the arguments. 31 | } 32 | 33 | \examples{ 34 | \dontrun{ 35 | library(HPdregression) 36 | distributedR_start() 37 | Y <- as.darray(as.matrix(mtcars$am),c(ceiling(length(mtcars$am)/4),1)) 38 | X <- as.darray(as.matrix(cbind(mtcars$wt,mtcars$hp)), 39 | c(ceiling(length(mtcars$hp)/4),2)) 40 | 41 | myModel <- hpdglm(Y, X, binomial, control=list(epsilon=1e-02, maxit=5, 42 | trace=FALSE, rigorous=TRUE)) 43 | } 44 | } 45 | 46 | \keyword{ hpdglm model } 47 | \keyword{ Distributed R } 48 | -------------------------------------------------------------------------------- /algorithms/HPdregression/man/predict.hpdglm.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.hpdglm} 2 | \alias{predict.hpdglm} 3 | \title{ 4 | Predict Method for hpdglm fits 5 | } 6 | \description{ 7 | It produces predicted values, obtained by evaluating the regression function on provided new data. 8 | } 9 | \usage{ 10 | predict.hpdglm(object, newdata, type = c("link", "response"), 11 | na.action = na.pass, mask = NULL, trace = TRUE, ...) 12 | } 13 | \arguments{ 14 | \item{object}{ 15 | a built model of type hpdglm. 16 | } 17 | \item{newdata}{ 18 | a matrix or a darray containing predictors of new samples. 19 | } 20 | \item{type}{ 21 | the type of prediction required which can be "link" or "response". 22 | } 23 | \item{na.action}{ 24 | a function to determine what should be done with missing values. At this version it is always na.pass (reserved for future improvement). 25 | } 26 | \item{mask}{ 27 | a darray with a single column, and 0 or 1 as the value of its elements. It indicates which samples (rows) should be considered in the calculation. 28 | } 29 | \item{trace}{ 30 | when this argument is true, intermediate steps of the progress are displayed. 31 | } 32 | \item{\dots}{ 33 | further arguments passed to or from other methods. 34 | } 35 | } 36 | \details{ 37 | This function produces predicted values, obtained by evaluating the regression function on provided new data. New data can be either a darray or a normal matrix. 38 | } 39 | \value{ 40 | The output is a matrix or a darray, depending to the type of newdata, which contains predicted values for response. 41 | } 42 | \author{ 43 | HP Vertica Analytics Team 44 | } 45 | 46 | \examples{ 47 | \dontrun{ 48 | library(HPdregression) 49 | distributedR_start() 50 | Y <- as.darray(data.matrix(faithful["eruptions"])) 51 | X <- as.darray(data.matrix(faithful["waiting"])) 52 | 53 | myModel <- hpdglm(Y, X) 54 | newSamples <- matrix(c(1:3),,1) 55 | predict(myModel, newSamples, "link") 56 | } 57 | } 58 | 59 | \keyword{models} 60 | \keyword{regression} 61 | \keyword{distributed R} 62 | -------------------------------------------------------------------------------- /algorithms/HPdregression/man/residuals.hpdglm.Rd: -------------------------------------------------------------------------------- 1 | \name{residuals.hpdglm} 2 | \alias{residuals.hpdglm} 3 | \alias{residuals} 4 | \alias{resid} 5 | 6 | \title{ 7 | Extract Residuals of an hpdglm Model 8 | } 9 | \description{ 10 | This function extracts model residuals of an hpdglm model in a darray. The abbreviated function is resid. This function is only available for complete models. 11 | } 12 | \usage{ 13 | residuals.hpdglm(object, type = c("deviance", "pearson", 14 | "working", "response", "partial"), trace=FALSE, ...) 15 | } 16 | \arguments{ 17 | \item{object}{an hpdglm model} 18 | \item{type}{can be "deviance", "pearson", "working", "response", or "partial".} 19 | \item{trace}{ 20 | when TRUE, intermediate steps of the progress are displayed. 21 | } 22 | } 23 | \value{ 24 | darray of residuals 25 | } 26 | 27 | \examples{ 28 | \dontrun{ 29 | library(HPdregression) 30 | distributedR_start() 31 | Y <- as.darray(as.matrix(mtcars$am), 32 | c(ceiling(length(mtcars$am)/4),1)) 33 | X <- as.darray(as.matrix(cbind(mtcars$wt,mtcars$hp)), 34 | c(ceiling(length(mtcars$hp)/4),2)) 35 | 36 | myModel <- hpdglm(responses=Y, predictors=X, 37 | family=binomial(logit), completeModel=TRUE) 38 | res <- resid(myModel) 39 | } 40 | } 41 | 42 | \keyword{ hpdglm model } 43 | \keyword{ Distributed R } 44 | -------------------------------------------------------------------------------- /algorithms/HPdutility/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: HPdutility 2 | Type: Package 3 | Title: Distributed classifiers for Big Data 4 | Version: 1.2.0 5 | Date: 2015-04-17 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Depends: R (>= 3.0.0), distributedR 9 | Description: General purpose distributed algorithms. Written using HP Vertica Distributed R package. 10 | License: GPL (>= 2) | file LICENSE 11 | -------------------------------------------------------------------------------- /algorithms/HPdutility/NAMESPACE: -------------------------------------------------------------------------------- 1 | exportPattern("^[[:alpha:]]+") 2 | import(distributedR) 3 | -------------------------------------------------------------------------------- /algorithms/HPdutility/man/confusionMatrix.Rd: -------------------------------------------------------------------------------- 1 | \name{confusionMatrix} 2 | \alias{confusionMatrix} 3 | \title{Confusion Matrix} 4 | \description{ 5 | This function generates confusion matrix for observed and predicted values of a classifier. 6 | } 7 | \usage{ 8 | confusionMatrix(observed, predicted) 9 | } 10 | \arguments{ 11 | \item{observed}{the response observed in the test data.} 12 | \item{predicted}{the predicted value for response.} 13 | } 14 | 15 | \value{ 16 | the returned value is the generated confusion matrix. 17 | } 18 | \note{ 19 | it is assumed that an appropriate predict function has generated 'provided' input. 20 | } 21 | \author{ 22 | HP Vertica Analytics Team 23 | } 24 | 25 | \examples{ 26 | \dontrun{ 27 | library(HPdclassifier) 28 | distributedR_start() 29 | 30 | rRF <- randomForest(Species ~ ., data=iris, keep.forest=TRUE, 31 | xtest=iris[,-5],ytest=iris[,5]) 32 | 33 | predicted <- predict(rRF,iris[, -5]) 34 | confusionMatrix(iris[,5], predicted) 35 | 36 | } 37 | } 38 | \keyword{classification} 39 | 40 | -------------------------------------------------------------------------------- /algorithms/HPdutility/man/errRate.Rd: -------------------------------------------------------------------------------- 1 | \name{errorRate} 2 | \alias{errorRate} 3 | \title{Error Rates} 4 | \description{ 5 | This function calculates total error rate and error rates of each class for observed and predicted values of a classifier. 6 | } 7 | \usage{ 8 | errorRate(observed, predicted) 9 | } 10 | \arguments{ 11 | \item{observed}{the response observed in the test data.} 12 | \item{predicted}{the predicted value for response.} 13 | } 14 | 15 | \value{ 16 | the returned value is an array. The first element of the array is the error rate, 17 | which equals to the total number of correct predictions divided by the total number of predictions. 18 | The remaind elements of the array, represent error rates per class. An error rate per class 19 | is the error rate for the samples with a particular category in their response. 20 | } 21 | \note{ 22 | it is assumed that an appropriate predict function has generated 'provided' input. 23 | } 24 | \author{ 25 | HP Vertica Analytics Team 26 | } 27 | 28 | \examples{ 29 | \dontrun{ 30 | library(HPdclassifier) 31 | distributedR_start() 32 | 33 | rRF <- randomForest(Species ~ ., data=iris, keep.forest=TRUE, 34 | xtest=iris[,-5],ytest=iris[,5]) 35 | 36 | predicted <- predict(rRF,iris[, -5]) 37 | errorRate(iris[,5], predicted) 38 | 39 | } 40 | } 41 | \keyword{classification} 42 | 43 | -------------------------------------------------------------------------------- /algorithms/HPdutility/man/meanSquared.Rd: -------------------------------------------------------------------------------- 1 | \name{meanSquared} 2 | \alias{meanSquared} 3 | \title{Mean Squared Residuals} 4 | \description{ 5 | This function calculates mean squared residuals for observed and predicted values. 6 | } 7 | \usage{ 8 | meanSquared(observed, predicted, na.rm=FALSE) 9 | } 10 | \arguments{ 11 | \item{observed}{the response observed in the test data.} 12 | \item{predicted}{the predicted value for response.} 13 | \item{na.rm}{logical. Should missing values (including ‘NaN’) be removed?} 14 | } 15 | 16 | \value{ 17 | the mean squared of residuals is returned. 18 | } 19 | \note{ 20 | it is assumed that an appropriate predict function has generated 'provided' input. 21 | } 22 | \author{ 23 | HP Vertica Analytics Team 24 | } 25 | 26 | \examples{ 27 | \dontrun{ 28 | library(HPdclassifier) 29 | distributedR_start() 30 | 31 | testData <- na.omit(airquality) 32 | rRF.ozone <- randomForest(Ozone ~ ., data=airquality, 33 | mtry=3, na.action=na.omit, 34 | xtest=testData[,-1],ytest=testData[,1],keep.forest=TRUE) 35 | 36 | predicted <- predict(rRF.ozone,testData[,-1]) 37 | meanSquared(testData[,1], predicted) 38 | 39 | } 40 | } 41 | \keyword{regression} 42 | 43 | -------------------------------------------------------------------------------- /algorithms/HPdutility/man/rSquared.Rd: -------------------------------------------------------------------------------- 1 | \name{rSquared} 2 | \alias{rSquared} 3 | \title{R-squared} 4 | \description{ 5 | This function calculates R-squared ( 1 - mse / Var(y)) for observed and predicted values. 6 | } 7 | \usage{ 8 | rSquared(observed, predicted, na.rm=FALSE) 9 | } 10 | \arguments{ 11 | \item{observed}{the response observed in the test data.} 12 | \item{predicted}{the predicted value for response.} 13 | \item{na.rm}{logical. Should missing values (including ‘NaN’) be removed?} 14 | } 15 | 16 | \value{ 17 | the value of R-squared is returned. 18 | } 19 | \note{ 20 | it is assumed that an appropriate predict function has generated 'provided' input. 21 | } 22 | \author{ 23 | HP Vertica Analytics Team 24 | } 25 | 26 | \examples{ 27 | \dontrun{ 28 | library(HPdclassifier) 29 | distributedR_start() 30 | 31 | testData <- na.omit(airquality) 32 | rRF.ozone <- randomForest(Ozone ~ ., data=airquality, 33 | mtry=3, na.action=na.omit, 34 | xtest=testData[,-1],ytest=testData[,1],keep.forest=TRUE) 35 | 36 | predicted <- predict(rRF.ozone,testData[,-1]) 37 | rSquared(testData[,1], predicted) 38 | } 39 | } 40 | \keyword{regression} 41 | 42 | -------------------------------------------------------------------------------- /demo/example/hpdglm/end-to-end-hpdglm-example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/demo/example/hpdglm/end-to-end-hpdglm-example.pdf -------------------------------------------------------------------------------- /demo/shiny/randomForest/README.md: -------------------------------------------------------------------------------- 1 | ## Distributed R randomForest demo using Shiny 2 | -------------------------------------------------------------------------------- /doc/Distributed_R_1.2.x_User_Guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/Distributed_R_1.2.x_User_Guide.pdf -------------------------------------------------------------------------------- /doc/algorithms/HPdata/HPdata-Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/algorithms/HPdata/HPdata-Manual.pdf -------------------------------------------------------------------------------- /doc/algorithms/HPdclassifier/HPdclassifier-Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/algorithms/HPdclassifier/HPdclassifier-Manual.pdf -------------------------------------------------------------------------------- /doc/algorithms/HPdcluster/HPdcluster-Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/algorithms/HPdcluster/HPdcluster-Manual.pdf -------------------------------------------------------------------------------- /doc/algorithms/HPdgraph/HPdgraph-Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/algorithms/HPdgraph/HPdgraph-Manual.pdf -------------------------------------------------------------------------------- /doc/algorithms/HPdregression/HPdregression-Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/algorithms/HPdregression/HPdregression-Manual.pdf -------------------------------------------------------------------------------- /doc/platform/Distributed-R-FAQ.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/platform/Distributed-R-FAQ.pdf -------------------------------------------------------------------------------- /doc/platform/Distributed-R-Manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/platform/Distributed-R-Manual.pdf -------------------------------------------------------------------------------- /doc/vRODBC/vRODBC-Installation-Guide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/doc/vRODBC/vRODBC-Installation-Guide.pdf -------------------------------------------------------------------------------- /platform/executor/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: Executor 2 | Type: Package 3 | Title: Distributed R for Big Data 4 | Version: 1.2.0 5 | Date: 2015-01-16 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Description: Distributed R for Big Data 9 | License: GPLv2 10 | LazyLoad: yes 11 | Depends: Rcpp (>= 0.11.0), RInside (>= 0.2.11),R (>= 3.0.0) 12 | LinkingTo: Rcpp, RInside 13 | -------------------------------------------------------------------------------- /platform/executor/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(Executor) 2 | exportPattern("^[[:alpha:]]+") 3 | import(Rcpp) 4 | -------------------------------------------------------------------------------- /platform/executor/man/spmv.Rd: -------------------------------------------------------------------------------- 1 | \name{spmv} 2 | \alias{spmv} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | spmv 6 | } 7 | \description{ 8 | This function performs a multiplication of sparse matix and vector/dense matrix 9 | } 10 | \usage{ 11 | spmv(x, y) 12 | } 13 | \arguments{ 14 | \item{x}{ 15 | input sparse matrix. The number of column should be the same as the number of row of y. 16 | } 17 | \item{y}{ 18 | input vector/dense matrix. 19 | } 20 | } 21 | \details{ 22 | } 23 | \value{ 24 | Multiplication of x and y 25 | } 26 | \references{ 27 | %% ~put references to the literature/web site here ~ 28 | } 29 | \author{ 30 | HP Vertica Development Team 31 | } 32 | \note{ 33 | %% ~~further notes~~ 34 | } 35 | 36 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 37 | 38 | \seealso{ 39 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 40 | } 41 | \examples{ 42 | library(Matrix) 43 | sm=sparseMatrix(i=sample(1:10, size=5),j=sample(1:10, size=5),x=sample(1:100, size=5, replace=TRUE), dims=c(10,10)) 44 | in_vector = as.numeric(sample(-500:500, size=10, replace=TRUE)) 45 | in_mat = (matrix(as.numeric(sample(-100:100,size=100,replace=TRUE)),nrow=10,ncol=10)) 46 | spmv(sm, in_vector) 47 | spmv(sm, in_mat) 48 | } 49 | -------------------------------------------------------------------------------- /platform/executor/man/spvm.Rd: -------------------------------------------------------------------------------- 1 | \name{spvm} 2 | \alias{spvm} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | spvm 6 | } 7 | \description{ 8 | This function performs a multiplication of vector/matrix and sparse matrix 9 | } 10 | \usage{ 11 | spvm(x, y) 12 | } 13 | \arguments{ 14 | \item{x}{ 15 | input vector or matrix. The number of column should be the same as the number of row of y. 16 | } 17 | \item{y}{ 18 | input sparse matrix. 19 | } 20 | } 21 | \details{ 22 | } 23 | \value{ 24 | Multiplication of x and y 25 | } 26 | \references{ 27 | %% ~put references to the literature/web site here ~ 28 | } 29 | \author{ 30 | HP Vertica Development Team 31 | } 32 | \note{ 33 | %% ~~further notes~~ 34 | } 35 | 36 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 37 | 38 | \seealso{ 39 | %% ~~objects to See Also as \code{\link{help}}, ~~~ 40 | } 41 | \examples{ 42 | library(Matrix) 43 | sm=sparseMatrix(i=sample(1:10, size=5),j=sample(1:10, size=5),x=sample(1:100, size=5, replace=TRUE), dims=c(10,10)) 44 | in_vector = as.numeric(sample(-500:500, size=10, replace=TRUE)) 45 | in_mat = (matrix(as.numeric(sample(-100:100,size=100,replace=TRUE)),nrow=10,ncol=10)) 46 | spvm(in_vector, sm) 47 | spvm(in_mat, sm) 48 | } 49 | -------------------------------------------------------------------------------- /platform/executor/src/Makevars: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | #A scalable and high-performance platform for R. 3 | #Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | #This program is free software; you can redistribute it and/or modify 6 | #it under the terms of the GNU General Public License as published by 7 | #the Free Software Foundation; either version 2 of the License, or (at 8 | #your option) any later version. 9 | 10 | #This program is distributed in the hope that it will be useful, but 11 | #WITHOUT ANY WARRANTY; without even the implied warranty of 12 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | #General Public License for more details. You should have received a 14 | #copy of the GNU General Public License along with this program; if 15 | #not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | #Suite 330, Boston, MA 02111-1307 USA 17 | ##################################################################### 18 | 19 | 20 | ## Use the R_HOME indirection to support installations of multiple R version 21 | R_INCLUDE_FLAGS = `$(R_HOME)/bin/R CMD config --cppflags` 22 | 23 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` 24 | 25 | PKG_CPPFLAGS = -g -DSTRICT_R_HEADERS -DCSTACK_DEFNS -std=c++0x `$(R_HOME)/bin/Rscript -e "Rcpp:::CxxFlags()"` 26 | -------------------------------------------------------------------------------- /platform/executor/src/trace_def.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifdef PERF_TRACE 20 | #include 21 | #include 22 | #include "dLogger.h" 23 | 24 | namespace presto{ 25 | bool trace_master = false; 26 | ZTracer::ZTraceRef master_trace; 27 | 28 | bool is_master = false; 29 | 30 | boost::thread_specific_ptr trace_worker; 31 | boost::thread_specific_ptr worker_trace; 32 | 33 | bool trace_executor = false; 34 | ZTracer::ZTraceRef executor_trace; 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /platform/master/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: distributedR 2 | Type: Package 3 | Title: Distributed computation using R 4 | Version: 1.2.0 5 | Date: 2015-01-16 6 | Author: HP Vertica Analytics Team 7 | Maintainer: HP Vertica Analytics Team 8 | Description: Distribute R for Big Data 9 | License: GPLv2 10 | LazyLoad: yes 11 | Depends: Rcpp (>= 0.11.0), RInside (>= 0.2.11), XML (>= 3.98-1.1), R (>= 3.0.0), Executor (>= 1.2.0) 12 | LinkingTo: Rcpp, RInside 13 | Collate: 'dobject.R' 'darray.R' 'dframe.R' 'dlist.R' 'splits.R' 'darray_ops.R' 'dframe_ops.R' 'master.R' 'reduce.R' 'deploy.R' 14 | RcppModules: master_module, dobject_module 15 | -------------------------------------------------------------------------------- /platform/master/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(distributedR) 2 | exportPattern("^[[:alpha:]]+") 3 | import(Rcpp) 4 | export(max, 5 | min, 6 | sum, 7 | mean, 8 | colSums, 9 | rowSums, 10 | colMeans, 11 | rowMeans, 12 | head, 13 | tail, 14 | norm 15 | ) 16 | -------------------------------------------------------------------------------- /platform/master/inst/bin/start_proto_worker.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ##################################################################### 4 | #A scalable and high-performance platform for R. 5 | #Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 6 | 7 | #This program is free software; you can redistribute it and/or modify 8 | #it under the terms of the GNU General Public License as published by 9 | #the Free Software Foundation; either version 2 of the License, or (at 10 | #your option) any later version. 11 | 12 | #This program is distributed in the hope that it will be useful, but 13 | #WITHOUT ANY WARRANTY; without even the implied warranty of 14 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | #General Public License for more details. You should have received a 16 | #copy of the GNU General Public License along with this program; if 17 | #not, write to the Free Software Foundation, Inc., 59 Temple Place, 18 | #Suite 330, Boston, MA 02111-1307 USA 19 | ##################################################################### 20 | 21 | R_LD_LIB_PATH=$(echo "cat(Sys.getenv(\"LD_LIBRARY_PATH\"))" | R --slave --vanilla) 22 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$R_LD_LIB_PATH 23 | 24 | ##For those building from source and using DISTRIBUTEDR_HOME to point to binaries. 25 | if [ -n "$DISTRIBUTEDR_HOME" ]; then 26 | export R_LIBS_USER=$R_LIBS_USER:$DISTRIBUTEDR_HOME/install 27 | fi 28 | 29 | R_WORKER_BINARY=`dirname $0`/R-worker-bin 30 | org_arg=$@ 31 | ## parse the arguments and get a list of environemnt variables (-v option) 32 | while [[ $# > 1 ]] 33 | do 34 | key="$1" 35 | shift 36 | case $key in 37 | -v) 38 | name=`echo $1 | awk 'BEGIN { FS = ":" };{print $1}'` 39 | value=`echo $1 | awk 'BEGIN { FS = ":" };{print $2}'` 40 | # if the environment variable is not set, set the value 41 | if [ -z ${!name} ]; then 42 | export $name=$value 43 | fi 44 | shift 45 | ;; 46 | esac 47 | done 48 | 49 | # RInside needs to know about R_HOME. Usually his is not a problem as 50 | # RInside gets compiled in the local machine and R_HOME gets the right value. 51 | # However we're compiling RInside in the build machine and linking it statically 52 | # to R-executor-bin. Due to this we have to define the right R_HOME environment 53 | # variable here. Otherwise the executors crash when they start. 54 | R_HOME=$(R --quiet --vanilla -e "R.home(component = 'home')" | sed -e "1d"|cut -c 6- | sed 's/.$//' | sed '/^$/d') 55 | R_HOME=$R_HOME $R_WORKER_BINARY $org_arg 56 | 57 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/A0: -------------------------------------------------------------------------------- 1 | 1 1 1 2 | 1 2 1 3 | 1 3 1 4 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/A1: -------------------------------------------------------------------------------- 1 | 3 1 1 2 | 3 2 1 3 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/B0: -------------------------------------------------------------------------------- 1 | 1 1 1 2 | 3 1 1 3 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/B1: -------------------------------------------------------------------------------- 1 | 1 2 1 2 | 1 3 1 3 | 3 2 1 4 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/C0: -------------------------------------------------------------------------------- 1 | 0 0 1 2 | 0 1 5 3 | 1 0 2 4 | 1 1 6 5 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/C1: -------------------------------------------------------------------------------- 1 | 0 2 9 2 | 0 3 13 3 | 1 2 10 4 | 1 3 14 5 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/C2: -------------------------------------------------------------------------------- 1 | 2 0 3 2 | 2 1 7 3 | 3 0 4 4 | 3 1 8 5 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/C3: -------------------------------------------------------------------------------- 1 | 2 2 11 2 | 2 3 15 3 | 3 2 12 4 | 3 3 16 5 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/G0: -------------------------------------------------------------------------------- 1 | 0 1 1 2 | 0 2 1 3 | 0 3 1 4 | 1 2 1 5 | 3 2 1 6 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/G1: -------------------------------------------------------------------------------- 1 | 1 4 1 2 | 2 4 1 3 | 3 4 1 4 | 4 5 1 5 | 4 6 1 6 | 5 7 1 7 | 6 7 1 8 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/KmeansCenters: -------------------------------------------------------------------------------- 1 | 8 6 2 | 3 11 3 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/KmeansIp0: -------------------------------------------------------------------------------- 1 | 0 0 19 2 | 0 1 10 3 | 1 0 4 4 | 1 1 19 5 | 2 0 8 6 | 2 1 6 7 | 3 0 20 8 | 3 1 9 9 | 4 0 19 10 | 4 1 3 11 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/KmeansIp1: -------------------------------------------------------------------------------- 1 | 5 0 8 2 | 5 1 11 3 | 6 0 7 4 | 6 1 11 5 | 7 0 3 6 | 7 1 12 7 | 8 0 14 8 | 8 1 16 9 | 9 0 2 10 | 9 1 10 -------------------------------------------------------------------------------- /platform/master/inst/extdata/df_data1: -------------------------------------------------------------------------------- 1 | Bolivia 46 2 | Brazil 74 3 | Chile 89 4 | Colombia 77 5 | CostaRica 84 6 | Cuba 89 7 | DominicanRep 68 8 | Ecuador 70 9 | ElSalvador 60 10 | Guatemala 55 11 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/df_data2: -------------------------------------------------------------------------------- 1 | 0 1 2 | 0 10 3 | 16 29 4 | 16 25 5 | 21 29 6 | 15 40 7 | 14 21 8 | 6 0 9 | 13 13 10 | 9 4 11 | 12 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/df_data3: -------------------------------------------------------------------------------- 1 | Haiti 35 2 | Honduras 51 3 | Jamaica 87 4 | Mexico 83 5 | Nicaragua 68 6 | Panama 84 7 | Paraguay 74 8 | Peru 73 9 | TrinidadTobago 84 10 | Venezuela 91 11 | 12 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/df_data4: -------------------------------------------------------------------------------- 1 | 3 0 2 | 7 7 3 | 23 21 4 | 4 9 5 | 0 7 6 | 19 22 7 | 3 6 8 | 0 2 9 | 15 29 10 | 7 11 11 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/df_data_all: -------------------------------------------------------------------------------- 1 | Bolivia 46 0 1 2 | Brazil 74 0 10 3 | Chile 89 16 29 4 | Colombia 77 16 25 5 | CostaRica 84 21 29 6 | Cuba 89 15 40 7 | DominicanRep 68 14 21 8 | Ecuador 70 6 0 9 | ElSalvador 60 13 13 10 | Guatemala 55 9 4 11 | Haiti 35 3 0 12 | Honduras 51 7 7 13 | Jamaica 87 23 21 14 | Mexico 83 4 9 15 | Nicaragua 68 0 7 16 | Panama 84 19 22 17 | Paraguay 74 3 6 18 | Peru 73 0 2 19 | TrinidadTobago 84 15 29 20 | Venezuela 91 7 11 21 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_ex1_0: -------------------------------------------------------------------------------- 1 | 0 4 1 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_ex1_1: -------------------------------------------------------------------------------- 1 | 1 0 1 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_ex1_2: -------------------------------------------------------------------------------- 1 | 2 0 1 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_ex1_3: -------------------------------------------------------------------------------- 1 | 3 1 1 2 | 3 2 1 3 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_ex1_4: -------------------------------------------------------------------------------- 1 | 4 2 1 2 | 4 3 1 3 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_notrp_1: -------------------------------------------------------------------------------- 1 | 0 0 0 0 1 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_notrp_2: -------------------------------------------------------------------------------- 1 | 1 0 0 0 0 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_notrp_3: -------------------------------------------------------------------------------- 1 | 1 0 0 0 0 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_notrp_4: -------------------------------------------------------------------------------- 1 | 0 1 1 0 0 2 | -------------------------------------------------------------------------------- /platform/master/inst/extdata/pr_notrp_5: -------------------------------------------------------------------------------- 1 | 0 0 1 1 0 2 | -------------------------------------------------------------------------------- /platform/master/man/ddyn.load.Rd: -------------------------------------------------------------------------------- 1 | \name{ddyn.load} 2 | \alias{ddyn.load} 3 | \title{ddyn.load} 4 | 5 | \description{Load \code{so} library on all Executors} 6 | 7 | \usage{ 8 | ddyn.load(x,trace=FALSE) 9 | } 10 | 11 | \arguments{ 12 | \item{x}{A vector of packages that their \code{so} libraries need to be loaded on all executors} 13 | \item{trace}{when it is FALSE (default) the progress of the foreach will be hidden.} 14 | } 15 | 16 | \details{ 17 | Use this function to load the specified list of libraries on all of the running executors. 18 | } 19 | 20 | \author{HP Vertica Development Team} 21 | 22 | \references{ 23 | \itemize{ 24 | \item Venkataraman, S., Bodzsar, E., Roy, I., 25 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 26 | Learning and Graph Processing with Sparse Matrices. \emph{EuroSys'13}, 27 | 197--210. 28 | \item Homepage: http://www.vertica.com/distributedr 29 | % \item Mailing list: presto-dev@external.groups.hp.com 30 | } 31 | } 32 | 33 | \seealso{ 34 | \code{\link{ddyn.unload}} 35 | } 36 | 37 | \examples{ 38 | \dontrun{ 39 | library(distributedR) 40 | distributedR_start() 41 | ddyn.load(c("HPdcluster")) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /platform/master/man/ddyn.unload.Rd: -------------------------------------------------------------------------------- 1 | \name{ddyn.unload} 2 | \alias{ddyn.unload} 3 | \title{ddyn.unload} 4 | 5 | \description{Unload \code{so} library from all Executors} 6 | 7 | \usage{ 8 | ddyn.unload(x,trace=FALSE) 9 | } 10 | 11 | \arguments{ 12 | \item{x}{A vector of packages that their \code{so} libraries need to be un-loaded on all executors} 13 | \item{trace}{when it is FALSE (default) the progress of the foreach will be hidden.} 14 | } 15 | 16 | \details{ 17 | Use this function to unload the specified list of libraries on all of the running executors. 18 | } 19 | 20 | \author{HP Vertica Development Team} 21 | 22 | \references{ 23 | \itemize{ 24 | \item Venkataraman, S., Bodzsar, E., Roy, I., 25 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 26 | Learning and Graph Processing with Sparse Matrices. \emph{EuroSys'13}, 27 | 197--210. 28 | \item Homepage: http://www.vertica.com/distributedr 29 | % \item Mailing list: presto-dev@external.groups.hp.com 30 | } 31 | } 32 | 33 | \seealso{ 34 | \code{\link{ddyn.load}} 35 | } 36 | 37 | \examples{ 38 | \dontrun{ 39 | library(distributedR) 40 | distributedR_start() 41 | ddyn.unload(c("HPdcluster")) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /platform/master/man/is.darray.Rd: -------------------------------------------------------------------------------- 1 | \name{is.darray} 2 | \alias{is.darray} 3 | \title{is.darray} 4 | 5 | \description{Check if input object is darray.} 6 | 7 | \usage{is.darray(x)} 8 | 9 | \arguments{ 10 | \item{x}{input object.} 11 | } 12 | 13 | \value{ Returns true if object is distributed array.} 14 | 15 | \references{ 16 | \itemize{ 17 | \item Venkataraman, S., Bodzsar, E., Roy, I., 18 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 19 | Learning and Graph Processing with Sparse 20 | Matrices. \emph{EuroSys'13}, 21 | 197--210. 22 | \item Homepage: http://www.vertica.com/distributedr 23 | % \item Mailing list: presto-dev@external.groups.hp.com 24 | } 25 | } 26 | 27 | \author{HP Vertica Development Team} 28 | 29 | \note{ 30 | %% ~~further notes~~ 31 | } 32 | 33 | \seealso{ 34 | \code{\link{darray}} 35 | } 36 | 37 | \examples{ 38 | \dontrun{ 39 | library(distributedR) 40 | distributedR_start() 41 | m<-matrix(sample(0:1, 16, replace=T), nrow=4) 42 | is.darray(m) 43 | dm<-darray(dim=c(5,5),blocks=c(1,5)) 44 | is.darray(dm) 45 | distributedR_shutdown() 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /platform/master/man/is.dframe.Rd: -------------------------------------------------------------------------------- 1 | \name{is.dframe} 2 | \alias{is.dframe} 3 | \title{is.dframe} 4 | 5 | \description{Check if input object is dframe.} 6 | 7 | \usage{is.dframe(x)} 8 | 9 | \arguments{ 10 | \item{x}{input object.} 11 | } 12 | 13 | \value{ Returns true if object is distributed data.frame.} 14 | 15 | \references{ 16 | \itemize{ 17 | \item Venkataraman, S., Bodzsar, E., Roy, I., 18 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 19 | Learning and Graph Processing with Sparse 20 | Matrices. \emph{EuroSys'13}, 21 | 197--210. 22 | \item Homepage: http://www.vertica.com/distributedr 23 | % \item Mailing list: presto-dev@external.groups.hp.com 24 | } 25 | } 26 | 27 | \author{HP Vertica Development Team} 28 | 29 | \note{ 30 | %% ~~further notes~~ 31 | } 32 | 33 | \seealso{ 34 | \code{\link{dframe}} 35 | } 36 | 37 | \examples{ 38 | \dontrun{ 39 | library(distributedR) 40 | distributedR_start() 41 | df<-data.frame(x=rep(sample(0:1),4), y=rep(sample(5:6), 4), z=rep(sample(2:3), 4)) 42 | is.dframe(df) 43 | ddf<-dframe(dim=c(5,5),blocks=c(1,5)) 44 | is.dframe(ddf) 45 | distributedR_shutdown() 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /platform/master/man/is.dlist.Rd: -------------------------------------------------------------------------------- 1 | \name{is.dlist} 2 | \alias{is.dlist} 3 | \title{is.dlist} 4 | 5 | \description{Check if input object is dlist.} 6 | 7 | \usage{is.dlist(x)} 8 | 9 | \arguments{ 10 | \item{x}{input object.} 11 | } 12 | 13 | \value{ Returns true if object is distributed list.} 14 | 15 | \references{ 16 | \itemize{ 17 | \item Venkataraman, S., Bodzsar, E., Roy, I., 18 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 19 | Learning and Graph Processing with Sparse 20 | Matrices. \emph{EuroSys'13}, 21 | 197--210. 22 | \item Homepage: http://www.vertica.com/distributedr 23 | % \item Mailing list: presto-dev@external.groups.hp.com 24 | } 25 | } 26 | 27 | \author{HP Vertica Development Team} 28 | 29 | \note{ 30 | %% ~~further notes~~ 31 | } 32 | 33 | \seealso{ 34 | \code{\link{dlist}} 35 | } 36 | 37 | \examples{ 38 | \dontrun{ 39 | library(distributedR) 40 | distributedR_start() 41 | l <- list(x=sample(1:10, 5), y=sample(1:50, 5)) 42 | is.dlist(l) 43 | dl<-dlist(naprtitions=4) 44 | is.dlist(dl) 45 | distributedR_shutdown() 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /platform/master/man/levels.dframe.Rd: -------------------------------------------------------------------------------- 1 | \name{levels.dframe} 2 | \alias{levels.dframe} 3 | \title{levels.dframe} 4 | 5 | \description{Finds the list of the labels on the categorical columns of a dframe.} 6 | 7 | \usage{levels.dframe(DF, colName, colID, trace=FALSE)} 8 | 9 | \arguments{ 10 | \item{DF}{the input dframe. It must be partitioned row-wise.} 11 | \item{colName}{a vector of the name of the interested categorical columns.} 12 | \item{colID}{when colName is not available, column positions can be specified using a numerical vector.} 13 | \item{trace}{when it is FALSE (default) the progress of the foreach will be hidden.} 14 | } 15 | 16 | \details{ 17 | The specified columns should be of type character, logical, or factor. When neither colName nor colID is specified, any column of these types will be selected. When both are specified, colID will be ignored. 18 | } 19 | 20 | \value{ 21 | \item{Levels}{the array of the labels on the categorical columns of a dframe.} 22 | \item{columns}{the position of columns that their levels are respectively returned in \code{Levels}.} 23 | } 24 | 25 | \references{ 26 | \itemize{ 27 | \item Venkataraman, S., Bodzsar, E., Roy, I., 28 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 29 | Learning and Graph Processing with Sparse 30 | Matrices. \emph{EuroSys'13}, 31 | 197--210. 32 | \item Homepage: http://www.vertica.com/distributedr 33 | % \item Mailing list: presto-dev@external.groups.hp.com 34 | } 35 | } 36 | 37 | \author{HP Vertica Development Team} 38 | 39 | \note{ 40 | %% ~~further notes~~ 41 | } 42 | 43 | \seealso{ 44 | \code{\link{dframe}} 45 | \code{\link{factor.dframe}} 46 | \code{\link{as.factor.dframe}} 47 | \code{\link{unfactor.dframe}} 48 | } 49 | 50 | \examples{ 51 | \dontrun{ 52 | library(distributedR) 53 | distributedR_start() 54 | originalDF <- dframe(c(9,3),c(3,3)) 55 | foreach(i,1:npartitions(originalDF),function(dfi=splits(originalDF,i),idx=i){ 56 | if(idx==1) { 57 | dfi[,1] <- 1:3 58 | dfi[,2] <- c('c1','c2','c3') 59 | dfi[,3] <- c('t1','t2','t3') 60 | } else if(idx==2) { 61 | dfi[,1] <- 2:4 62 | dfi[,2] <- c('c2','c3','c4') 63 | dfi[,3] <- c('t1','t2','t3') 64 | } else { 65 | dfi[,1] <- 11:13 66 | dfi[,2] <- c('c3','c4','c5') 67 | dfi[,3] <- c('t4','t5','t6') 68 | } 69 | update(dfi) 70 | }) 71 | levels.dframe(originalDF, colID=c(2,3)) 72 | distributedR_shutdown() 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /platform/master/man/load.darray.Rd: -------------------------------------------------------------------------------- 1 | \name{load.darray} 2 | \alias{load.darray} 3 | \title{ 4 | load.darray 5 | } 6 | \description{ 7 | \code{\link{load.darray}} allows loading data saved in a file or database into the input \code{\link{darray}} 8 | } 9 | \usage{ 10 | load.darray (da, filename, triplet = TRUE, transpose = FALSE) 11 | } 12 | \arguments{ 13 | \item{da}{ 14 | An input \code{\link{darray}} where we want to load the data into. 15 | } 16 | \item{filename}{ 17 | An input data file path. The path should be accessible from all worker nodes. The input file should be split into blocks prior to loading according to the dimension and block size of the input \code{\link{darray}}. Files with splitted blocks should numbered from 0 and incremented by 1, and the id is concatenated with the input \emph{filename}. Each file block is kept at a worker node that is responsible for \emph{id+1}th \code{\link{darray}}. For example, if an input file 'sameple_data' is divided into 3 blocks, files with name 'sample_data0', 'sample_data1', and 'sample_data2' should exist, and the data of 'sample_data0' is assgined to a node that is responsible for the 1st split of input \code{\link{darray}}. For automatic data splitting, refer to the $PRESTO_HOME/tools/splitter.cpp. 18 | } 19 | \item{triplet}{ 20 | A boolean flag which determines the format of the input data set. If this is set to TRUE, the input data should be expressed using triplet matrix expression, i.e., index_of_row index_of_column value. If this value is FALSE, the input file is expressed in the matrix format. The default value is TRUE. 21 | } 22 | \item{transpose}{ 23 | This boolean flag indicates if the input dataset needs to be transposed. The default value is FALSE. 24 | } 25 | } 26 | \value{ 27 | No data is returned. Each worker loads the responsible \code{\link{darray}} blocks and share the changes using \code{\link{update}} command. 28 | } 29 | \references{ 30 | %% ~put references to the literature/web site here ~ 31 | } 32 | \author{ 33 | } 34 | \note{ 35 | %% ~~further notes~~ 36 | } 37 | 38 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 39 | 40 | \seealso{ 41 | \code{\link{darray}},\code{\link{getpartition}} 42 | } 43 | \examples{ 44 | \dontrun{ 45 | library(distributedR) 46 | extdata.dir = system.file(package="distributedR", "extdata") 47 | distributedR_start() 48 | da1<-darray(dim=c(4,4), blocks=c(2,4)) 49 | getpartition(da1) 50 | load.darray(da1, paste(extdata.dir, "/A", sep="")) 51 | getpartition(da1) 52 | distributedR_shutdown() 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /platform/master/man/ls.Rd: -------------------------------------------------------------------------------- 1 | \name{distributedR_ls} 2 | \alias{distributedR_ls} 3 | \title{ 4 | distributedR_ls 5 | } 6 | \description{ 7 | This function shows a list of darray objects in the current Presto session. The size of a partition of a darray object is shown in KBytes with its name and the location where the splits is located. 8 | } 9 | \usage{ 10 | presto_ls() 11 | } 12 | \arguments{ 13 | } 14 | \value{ 15 | This function shows the status of workers in the current Presto session. 16 | } 17 | \references{ 18 | %% ~put references to the literature/web site here ~ 19 | } 20 | \author{ 21 | } 22 | \note{ 23 | %% ~~further notes~~ 24 | } 25 | 26 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 27 | 28 | \seealso{ 29 | \code{\link{distributedR_shutdown}} 30 | } 31 | \examples{ 32 | \dontrun{ 33 | library(distributedR) 34 | ##Start worker process here!! 35 | distributedR_start() 36 | da<-darray(c(4,4),c(2,4)) 37 | distributedR_ls() 38 | distributedR_shutdown() 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /platform/master/man/master_info.Rd: -------------------------------------------------------------------------------- 1 | \name{distributedR_master_info} 2 | \alias{distributedR_master_info} 3 | \title{distributedR_master_info} 4 | 5 | \description{Shows the master information of the current distributedR session. 6 | The master information includes the address, port number and session ID that idenfies the session from other distributedR sessions. 7 | } 8 | 9 | \usage{distributedR_master_info()} 10 | 11 | \arguments{ 12 | } 13 | 14 | \value{} 15 | 16 | \references{ 17 | \itemize{ 18 | \item Venkataraman, S., Bodzsar, E., Roy, I., 19 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 20 | Learning and Graph Processing with Sparse Matrices. \emph{EuroSys'13}, 21 | 197--210. 22 | \item Homepage: http://www.vertica.com/distributedr 23 | % \item Mailing list: presto-dev@external.groups.hp.com 24 | } 25 | } 26 | 27 | \author{HP Vertica Development Team} 28 | 29 | \note{ 30 | %% ~~further notes~~ 31 | } 32 | 33 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 34 | 35 | \seealso{ 36 | \code{\link{distributedR_start}}, \code{\link{distributedR_status}} 37 | } 38 | 39 | \examples{ 40 | \dontrun{ 41 | library(distributedR) 42 | ##Start worker process 43 | distributedR_start() 44 | distributedR_master_info() 45 | distributedR_shutdown() 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /platform/master/man/npartitions.Rd: -------------------------------------------------------------------------------- 1 | \name{npartitions} 2 | \alias{npartitions} 3 | \title{npartitions} 4 | \description{Return number of partitions in \code{\link{darray}}, 5 | \code{\link{dframe}} or \code{\link{dlist}}.} 6 | 7 | \usage{ 8 | npartitions (x) 9 | npartitions2D (x) 10 | } 11 | 12 | \arguments{ 13 | \item{x}{input distributed array, distributed data frame or distributed list.} 14 | } 15 | 16 | \details{ \code{npartitions} returns the total number of partitions in 17 | the distributed object. Use \code{npartitions2D} to obtain the 18 | number of partitions along each direction. 19 | 20 | In \code{darray(dim=c(9,10), blocks=c(3,5))}, the distributed array is 21 | partitioned blockwise. \code{npartittions} will return 6 (total 22 | number of partitions) while \code{npartitions2D} will return (3,2), 23 | i.e., 3 partitions along the row and 2 along the column 24 | axis. 25 | 26 | Mathematically, 27 | \code{npartitions(x)=npartitions2D(x)[1]*npartitions2D(x)[2]}} 28 | 29 | \value{ \code{npartitions} return an integer that denotes the number of partitions. 30 | \code{npartitions2D} return a vector that denotes the number of partitions in each direction. 31 | } 32 | 33 | \references{ 34 | \itemize{ 35 | \item Venkataraman, S., Bodzsar, E., Roy, I., 36 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed 37 | Machine 38 | Learning and Graph Processing with Sparse 39 | Matrices. \emph{EuroSys'13}, 40 | 197--210. 41 | \item Homepage: http://www.vertica.com/distributedr 42 | % \item Mailing list: presto-dev@external.groups.hp.com 43 | } 44 | } 45 | 46 | \author{HP Vertica Development Team} 47 | 48 | \note{ 49 | %% ~~further notes~~ 50 | } 51 | 52 | %% ~Make other sections like Warning with 53 | %% \section{Warning }{....} ~ 54 | 55 | \seealso{ 56 | \code{\link{darray}}, \code{\link{dframe}}, \code{\link{getpartition}}, \code{\link{dlist}} 57 | } 58 | 59 | \examples{ 60 | \dontrun{ 61 | library(distributedR) 62 | distributedR_start() 63 | ##Input array of size 5X5 with 4 partitions 64 | da<-darray(dim=c(5,5), blocks=c(3,3), data=7) 65 | npartitions(da) 66 | npartitions2D(da) 67 | distributedR_shutdown() 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /platform/master/man/partitionsize.Rd: -------------------------------------------------------------------------------- 1 | \name{partitionsize} 2 | \alias{partitionsize} 3 | \title{partitionsize} 4 | \description{Return dimension of partitions in \code{\link{darray}}, 5 | \code{\link{dframe}} or \code{\link{dlist}}.} 6 | 7 | \usage{ 8 | partitionsize (x, index) 9 | partitionsize (x) 10 | } 11 | 12 | \arguments{ 13 | \item{x}{input distributed array, distributed data frame or distributed list.} 14 | \item{index}{index of partition. If missing sizes of all partitions are returned.} 15 | } 16 | 17 | \value{ A matrix that denotes the number of rows and columns in the partition. Row i of the matrix corresponds or size of i'th partition.} 18 | 19 | \references{ 20 | \itemize{ 21 | \item Venkataraman, S., Bodzsar, E., Roy, I., 22 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed 23 | Machine 24 | Learning and Graph Processing with Sparse 25 | Matrices. \emph{EuroSys'13}, 26 | 197--210. 27 | \item Homepage: http://www.vertica.com/distributedr 28 | % \item Mailing list: presto-dev@external.groups.hp.com 29 | } 30 | } 31 | 32 | \author{HP Vertica Development Team} 33 | 34 | \note{ 35 | %% ~~further notes~~ 36 | } 37 | 38 | %% ~Make other sections like Warning with 39 | %% \section{Warning }{....} ~ 40 | 41 | \seealso{ 42 | \code{\link{darray}}, \code{\link{dframe}}, \code{\link{getpartition}}, \code{\link{dlist}} 43 | } 44 | 45 | \examples{ 46 | \dontrun{ 47 | library(distributedR) 48 | distributedR_start() 49 | ##Input array of size 5X5 with 4 partitions 50 | da<-darray(dim=c(5,5), blocks=c(3,3), data=7) 51 | partitionsize(da,1) 52 | partitionsize(da,2) 53 | partitionsize(da) 54 | distributedR_shutdown() 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /platform/master/man/reduce.Rd: -------------------------------------------------------------------------------- 1 | \name{getpartition} 2 | \alias{getpartition} 3 | \title{ 4 | reduce 5 | } 6 | \description{ 7 | \code{\link{reduce}} does a reduce operation on a darray, treating the 8 | splits as elements of a reduce. 9 | } 10 | \usage{ 11 | reduce(f,d,idx=1:length(splits(d))) 12 | } 13 | \arguments{ 14 | \item{f}{ 15 | The reduce operator. It most produce a valid split (of correct size) 16 | from two other splits, i.e. f(split,split) -> split. 17 | } 18 | \item{d}{ 19 | The input \code{\link{darray}}. 20 | } 21 | \item{idx}{ 22 | The splits we want to include in the reduce (by default all splits are included). 23 | } 24 | } 25 | \value{ 26 | The handle of a \code{\link{darray}} containing a single split that is 27 | the result of the reduction. 28 | } 29 | \references{ 30 | %% ~put references to the literature/web site here ~ 31 | } 32 | \author{ 33 | } 34 | \note{ 35 | %% ~~further notes~~ 36 | } 37 | 38 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 39 | 40 | \seealso{ 41 | \code{\link{darray}} 42 | } 43 | \examples{ 44 | \dontrun{ 45 | library(distributedR) 46 | distributedR_start() 47 | da1<-darray(dim=c(10,10), blocks=c(5,5), sparse=FALSE, data=1) %%input matrix size is 10X10 and a block size is 5X5 48 | blocksum<-reduce(`+`,da1) %%a 5x5 darray that is the sum of the blocks 49 | %%of da1 50 | distributedR_shutdown() 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /platform/master/man/shutdown.Rd: -------------------------------------------------------------------------------- 1 | \name{distributedR_shutdown} 2 | \alias{distributedR_shutdown} 3 | \title{distributedR_shutdown} 4 | 5 | \description{Shutdown session. Stops all workers, closes connections to 6 | them, and cleans resources. \code{\link{distributedR_shutdown}} is called 7 | automatically in the following cases: 8 | 9 | \itemize{ 10 | \item a worker or an executor is killed 11 | \item user interrupts execution using CTRL-C and decides to shutdown the whole session 12 | } 13 | } 14 | 15 | \usage{distributedR_shutdown()} 16 | 17 | \arguments{ 18 | % \item{pm}{pointer to PrestoMaster object. By default, finds the 19 | % object from session.} 20 | } 21 | 22 | \value{} 23 | 24 | \references{ 25 | \itemize{ 26 | \item Venkataraman, S., Bodzsar, E., Roy, I., 27 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 28 | Learning and Graph Processing with Sparse Matrices. \emph{EuroSys'13}, 29 | 197--210. 30 | \item Homepage: http://www.vertica.com/distributedr 31 | % \item Mailing list: presto-dev@external.groups.hp.com 32 | } 33 | } 34 | 35 | \author{HP Vertica Development Team} 36 | 37 | \note{ 38 | %% ~~further notes~~ 39 | } 40 | 41 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 42 | 43 | \seealso{ 44 | \code{\link{distributedR_start}}, \code{\link{distributedR_status}} 45 | } 46 | 47 | \examples{ 48 | \dontrun{ 49 | library(distributedR) 50 | ##Start worker process 51 | distributedR_start() 52 | distributedR_status() 53 | distributedR_shutdown() 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /platform/master/man/status.Rd: -------------------------------------------------------------------------------- 1 | \name{distributedR_status} 2 | \alias{distributedR_status} 3 | \title{distributedR_status} 4 | 5 | \description{ Show status of \pkg{distributedR} workers.} 6 | 7 | \usage{ 8 | distributedR_status (help=FALSE) 9 | } 10 | 11 | \arguments{ 12 | \item{help}{If true, describes each column} 13 | } 14 | 15 | \value{ 16 | Worker information is returned as a data.frame with the following 17 | columns: 18 | \item{Workers}{IP and port of each worker.} 19 | \item{Inst}{number of executors at each worker.} 20 | \item{SysMem}{total system memory at each worker.} 21 | \item{MemUsed}{used system memory at each worker.} 22 | \item{DarrayQuota}{total memory assgined for arrays. Not enforced by runtime.} 23 | \item{DarrayUsed}{memory used to store arrays.} 24 | } 25 | 26 | \references{ 27 | \itemize{ 28 | \item Venkataraman, S., Bodzsar, E., Roy, I., 29 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 30 | Learning and Graph Processing with Sparse Matrices. \emph{EuroSys'13}, 31 | 197--210. 32 | \item Homepage: http://www.vertica.com/distributedr 33 | % \item Mailing list: presto-dev@external.groups.hp.com 34 | } 35 | } 36 | 37 | \author{HP Vertica Development Team} 38 | \note{ 39 | %% ~~further notes~~ 40 | } 41 | 42 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 43 | 44 | \seealso{ 45 | \code{\link{distributedR_start}}, \code{\link{distributedR_shutdown}} 46 | } 47 | 48 | \examples{ 49 | \dontrun{ 50 | library(distributedR) 51 | ##Start worker process 52 | distributedR_start() 53 | distributedR_status() 54 | distributedR_shutdown() 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /platform/master/man/unfactor.dframe.Rd: -------------------------------------------------------------------------------- 1 | \name{unfactor.dframe} 2 | \alias{unfactor.dframe} 3 | \title{unfactor.dframe} 4 | 5 | \description{Converts in-place the specified categorical columns of a dframe from factor to their labels of type character.} 6 | 7 | \usage{unfactor.dframe(DF, colName, colID, trace=FALSE)} 8 | 9 | \arguments{ 10 | \item{DF}{the input dframe. It must be partitioned row-wise.} 11 | \item{colName}{a vector of the name of the interested categorical columns.} 12 | \item{colID}{when colName is not available, column positions can be specified using a numerical vector.} 13 | \item{trace}{when it is FALSE (default) the progress of the foreach will be hidden.} 14 | } 15 | 16 | \details{ 17 | The specified columns should be of type character, logical, or factor. When neither colName nor colID is specified, any column of these types will be selected. When both are specified, colID will be ignored. 18 | } 19 | 20 | \value{ the input dframe is modified in-place and the function returns nothing.} 21 | 22 | \references{ 23 | \itemize{ 24 | \item Venkataraman, S., Bodzsar, E., Roy, I., 25 | AuYoung, A., and Schreiber, R. (2013) Presto: Distributed Machine 26 | Learning and Graph Processing with Sparse 27 | Matrices. \emph{EuroSys'13}, 28 | 197--210. 29 | \item Homepage: http://www.vertica.com/distributedr 30 | % \item Mailing list: presto-dev@external.groups.hp.com 31 | } 32 | } 33 | 34 | \author{HP Vertica Development Team} 35 | 36 | \note{ 37 | %% ~~further notes~~ 38 | } 39 | 40 | \seealso{ 41 | \code{\link{dframe}} 42 | \code{\link{factor.dframe}} 43 | \code{\link{as.factor.dframe}} 44 | \code{\link{levels.dframe}} 45 | } 46 | 47 | \examples{ 48 | \dontrun{ 49 | library(distributedR) 50 | distributedR_start() 51 | originalDF <- dframe(c(9,3),c(3,3)) 52 | foreach(i,1:npartitions(originalDF),function(dfi=splits(originalDF,i),idx=i){ 53 | if(idx==1) { 54 | dfi[,1] <- 1:3 55 | dfi[,2] <- c('c1','c2','c3') 56 | dfi[,3] <- c('t1','t2','t3') 57 | } else if(idx==2) { 58 | dfi[,1] <- 2:4 59 | dfi[,2] <- c('c2','c3','c4') 60 | dfi[,3] <- c('t1','t2','t3') 61 | } else { 62 | dfi[,1] <- 11:13 63 | dfi[,2] <- c('c3','c4','c5') 64 | dfi[,3] <- c('t4','t5','t6') 65 | } 66 | update(dfi) 67 | }) 68 | factor.dframe(originalDF, colID=c(2,3)) 69 | op <- getpartition(originalDF,3) 70 | op[,3] 71 | unfactor.dframe(originalDF, colID=c(2,3)) 72 | op <- getpartition(originalDF,3) 73 | op[,3] 74 | distributedR_shutdown() 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /platform/master/src/DdcWorkerSelector.cpp: -------------------------------------------------------------------------------- 1 | #include "DdcWorkerSelector.h" 2 | 3 | namespace ddc { 4 | 5 | ChunkWorkerMap WorkerSelector::chunkWorkerMap() const 6 | { 7 | return chunkWorkerMap_; 8 | } 9 | 10 | void WorkerSelector::setChunkWorkerMap(const ChunkWorkerMap &chunkWorkerMap) 11 | { 12 | chunkWorkerMap_ = chunkWorkerMap; 13 | chunkIndex_ = 0; 14 | configured_ = true; 15 | } 16 | 17 | int32_t WorkerSelector::getNextWorker() { 18 | if(!configured_) { 19 | throw std::runtime_error("Need to call setChunkWorkerMap() first."); 20 | } 21 | if (chunkWorkerMap_.find(chunkIndex_) == chunkWorkerMap_.end()) { 22 | // not found 23 | throw std::runtime_error("Key not found in chunkWorkerMap_"); 24 | } 25 | int32_t worker = chunkWorkerMap_[chunkIndex_]; 26 | ++chunkIndex_; 27 | return worker; 28 | } 29 | 30 | 31 | 32 | } // namespace ddc 33 | -------------------------------------------------------------------------------- /platform/master/src/DdcWorkerSelector.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef DDC_WORKERSELECTOR_H_ 3 | #define DDC_WORKERSELECTOR_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace ddc { 11 | 12 | // chunkId -> worker. E.g.: 13 | // chunk0 -> worker0, chunk1 -> worker1 ... 14 | typedef std::map ChunkWorkerMap; 15 | 16 | class WorkerInfo { 17 | public: 18 | explicit WorkerInfo(const std::string& hostname, 19 | const uint64_t port, 20 | const uint64_t numExecutors) 21 | : hostname_(hostname), 22 | port_(port), 23 | numExecutors_(numExecutors) { 24 | } 25 | 26 | WorkerInfo() { 27 | 28 | } 29 | 30 | std::string hostname() const { 31 | return hostname_; 32 | } 33 | uint64_t port() const { 34 | return port_; 35 | } 36 | uint64_t numExecutors() const { 37 | return numExecutors_; 38 | } 39 | 40 | private: 41 | std::string hostname_; 42 | uint64_t port_; 43 | uint64_t numExecutors_; 44 | }; 45 | 46 | 47 | /** 48 | * Determines which worker processes which chunk. 49 | */ 50 | class WorkerSelector { 51 | public: 52 | WorkerSelector() : 53 | chunkIndex_(0), 54 | configured_(false) 55 | { 56 | 57 | } 58 | 59 | int32_t getNextWorker(); 60 | 61 | ChunkWorkerMap chunkWorkerMap() const; 62 | void setChunkWorkerMap(const ChunkWorkerMap &chunkWorkerMap); 63 | 64 | private: 65 | ChunkWorkerMap chunkWorkerMap_; 66 | uint64_t chunkIndex_; 67 | bool configured_; 68 | }; 69 | 70 | } // namespace ddc 71 | 72 | #endif // DDC_WORKERSELECTOR_H_ 73 | -------------------------------------------------------------------------------- /platform/master/src/DeserializeArray.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef __DESERIALIZE_ARRAY_H__ 20 | #define __DESERIALIZE_ARRAY_H__ 21 | 22 | #include 23 | 24 | namespace presto { 25 | SEXP Deserialize(void *data, size_t size=-1); 26 | } 27 | #endif 28 | -------------------------------------------------------------------------------- /platform/master/src/DistributedObjectMap.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | /** 20 | * Class which maintains a map from names of Distributed arrays to their 21 | * pointers and provides thread safe operations on it. 22 | */ 23 | 24 | #ifndef __DISTRIBUTED_OBJECT_MAP_ 25 | #define __DISTRIBUTED_OBJECT_MAP_ 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | using namespace std; 34 | //using namespace boost; 35 | 36 | namespace presto { 37 | 38 | class DistributedObject; 39 | 40 | class DistributedObjectMap { 41 | public: 42 | DistributedObjectMap(); 43 | ~DistributedObjectMap(); 44 | 45 | void PutDistributedObject(const string& name, DistributedObject* d); 46 | void DeleteDobject(const string& name); 47 | DistributedObject* GetDistributedObject(const string& name); 48 | 49 | private: 50 | boost::shared_ptr > dobject_map_; 51 | boost::shared_ptr mutex_; 52 | }; 53 | 54 | } // namespace presto 55 | 56 | #endif // __DISTRIBUTED_ARRAY_MAP_ 57 | -------------------------------------------------------------------------------- /platform/master/src/DistributedOperations.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef __DISTRIBUTED_OPERATIONS_ 20 | #define __DISTRIBUTED_OPERATIONS_ 21 | 22 | #include 23 | #include 24 | 25 | using namespace std; 26 | using namespace Rcpp; 27 | 28 | namespace presto { 29 | RcppExport SEXP DistributedObject_ExecR(SEXP presto_master_exp, 30 | SEXP func_body_exp, 31 | SEXP num_calls_exp, 32 | SEXP arg_names_exp, 33 | SEXP split_names_exp, 34 | SEXP arg_vals_exp, 35 | SEXP list_args_exp, 36 | SEXP raw_names_exp, 37 | SEXP raw_vals_exp, 38 | SEXP wait_exp, 39 | SEXP scheduler_policy_exp, 40 | SEXP inputs_sexp, 41 | SEXP progress_sexp, 42 | SEXP trace_sexp); 43 | 44 | RcppExport SEXP DistributedObject_Get(SEXP presto_master_exp, 45 | SEXP split); 46 | 47 | RcppExport SEXP DistributedObject_PrintStats(SEXP presto_master_exp, 48 | SEXP splits); 49 | } // namespace presto 50 | 51 | #endif // __DISTRIBUTED_OPERATIONS_ 52 | -------------------------------------------------------------------------------- /platform/master/src/PrestoMasterHandler.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | /** 20 | * Class that handles a request from a Worker to Master 21 | * This class is thread-safe as multiple worker threads invoke the Run 22 | * function. 23 | */ 24 | 25 | #ifndef _PRESTO_MASTER_HANDLER_ 26 | #define _PRESTO_MASTER_HANDLER_ 27 | 28 | #include 29 | #include "master.pb.h" 30 | #include "shared.pb.h" 31 | 32 | #include "common.h" 33 | #include "dLogger.h" 34 | 35 | using namespace std; 36 | using namespace zmq; 37 | 38 | namespace presto { 39 | 40 | class DistributedObjectMap; 41 | class Scheduler; 42 | class PrestoMaster; 43 | 44 | class PrestoMasterHandler { 45 | public: 46 | PrestoMasterHandler(DistributedObjectMap* darray_map, Scheduler *scheduler, 47 | PrestoMaster *presto_master); 48 | ~PrestoMasterHandler(); 49 | 50 | void Run(context_t* ctx, int port_start, int port_end); 51 | private: 52 | bool NewUpdate(NewUpdateRequest update); 53 | bool HandleTaskDone(TaskDoneRequest done); 54 | bool ValidateUpdates(TaskDoneRequest* req); 55 | bool UpdateFlexObjectSizes(std::set names); 56 | 57 | DistributedObjectMap* darray_map_; 58 | Scheduler* scheduler_; 59 | PrestoMaster *presto_master_; 60 | }; 61 | 62 | } // namespace 63 | 64 | #endif // _PRESTO_MASTER_HANDLER_ 65 | -------------------------------------------------------------------------------- /platform/master/src/ResourceManager.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef ___RESOURCE_MANAGER_H__ 20 | #define ___RESOURCE_MANAGER_H__ 21 | 22 | #include 23 | #include "Scheduler.h" 24 | #include "dLogger.h" 25 | 26 | namespace presto { 27 | 28 | class PrestoMaster; 29 | class ResourceManager { 30 | public: 31 | ResourceManager(Scheduler* scheduler, PrestoMaster *pm); 32 | ~ResourceManager(); 33 | void Run(); 34 | void SendHello(WorkerInfo* worker); 35 | 36 | protected: 37 | void SetReplyAttrFlag(int flag); 38 | bool CheckIfContacted(Worker* worker); 39 | bool CheckIfDead(Worker* worker); 40 | void ShutDown(string msg); 41 | 42 | private: 43 | int hello_reply_flag_; 44 | Scheduler* scheduler_; 45 | PrestoMaster *pm_; 46 | volatile bool is_interrupted; 47 | }; 48 | } 49 | #endif 50 | -------------------------------------------------------------------------------- /platform/master/src/common/DistDataFrame.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef __DIST_DATA_FRAME_H__ 20 | #define __DIST_DATA_FRAME_H__ 21 | 22 | #include "ArrayData.h" 23 | #include "dLogger.h" 24 | 25 | namespace presto { 26 | typedef struct { 27 | int64_t type; 28 | int64_t size; 29 | int64_t dims[2]; 30 | StorageLayer store; 31 | } dframe_header_t; 32 | 33 | class DistDataFrame : public ArrayData { 34 | public: 35 | explicit DistDataFrame(const std::string &name, StorageLayer store=WORKER); 36 | DistDataFrame(const std::string &name, StorageLayer store, size_t r_size, 37 | const SEXP sexp, size_t size); 38 | virtual void LoadInR(RInside &R, const std::string &varname); 39 | virtual std::pair GetDims() const; 40 | virtual ~DistDataFrame(); 41 | 42 | private: 43 | dframe_header_t *header; 44 | }; 45 | } 46 | #endif 47 | -------------------------------------------------------------------------------- /platform/master/src/common/DistList.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef __DIST_LIST_H__ 20 | #define __DIST_LIST_H__ 21 | 22 | #include "ArrayData.h" 23 | #include "dLogger.h" 24 | 25 | namespace presto { 26 | typedef struct { 27 | int64_t type; 28 | int64_t size; 29 | int64_t dims[2]; 30 | StorageLayer store; 31 | } dlist_header_t; 32 | 33 | class DistList : public ArrayData { 34 | public: 35 | explicit DistList(const std::string &name, StorageLayer store=WORKER); 36 | DistList(const std::string &name, StorageLayer store, size_t r_size, 37 | const SEXP sexp, size_t size, int split_len); 38 | virtual void LoadInR(RInside &R, const std::string &varname); 39 | virtual std::pair GetDims() const; 40 | virtual ~DistList(); 41 | 42 | private: 43 | dlist_header_t *header; 44 | }; 45 | } 46 | #endif 47 | -------------------------------------------------------------------------------- /platform/master/src/common/Makefile: -------------------------------------------------------------------------------- 1 | # --- COMMON --- 2 | 3 | 4 | R_CXXFLAGS = $(shell R CMD config --cppflags) $(shell Rscript -e "Rcpp:::CxxFlags()") $(shell Rscript -e "RInside:::CxxFlags()") 5 | 6 | CXXFLAGS = -fPIC -std=c++0x -g -O2 -fopenmp -finline-limit=10000 -DNDEBUG -I../third_party/install/include -I../messaging/gen-cpp/ -I./ -I../third_party/boost_1_50_0/ 7 | 8 | LDFLAGS = -L../third_party/install/lib 9 | 10 | PRESTO_COMMON_DIR = . 11 | PRESTO_COMMON_HEADERS = $(wildcard ${PRESTO_COMMON_DIR}/*.h) 12 | PRESTO_COMMON_SRC = $(wildcard ${PRESTO_COMMON_DIR}/*.cpp) 13 | PRESTO_COMMON_OBJS = $(PRESTO_COMMON_SRC:.cpp=.o) 14 | 15 | all: $(PRESTO_COMMON_OBJS) ../third_party/install/lib/libR-common.a 16 | 17 | # Common objects 18 | %.o: %.cpp $(PRESTO_COMMON_HEADERS) 19 | g++ -c $< $(CXXFLAGS) $(R_CXXFLAGS) -o $@ 20 | 21 | # Common library 22 | ../third_party/install/lib/libR-common.a: ../third_party/install/lib/libR-proto.a $(PRESTO_COMMON_OBJS) 23 | mkdir -p ../third_party/install/lib 24 | ar rcs $@ $(PRESTO_COMMON_OBJS) 25 | 26 | clean: 27 | rm -rf *.o ../third_party/install/lib/libR-common.a 28 | -------------------------------------------------------------------------------- /platform/master/src/common/Observer.h: -------------------------------------------------------------------------------- 1 | #ifndef OBSERVER_H 2 | #define OBSERVER_H 3 | 4 | #include 5 | #include 6 | 7 | template 8 | class IObserver 9 | { 10 | public: 11 | virtual ~IObserver() {} 12 | virtual void Update(T& data) = 0; 13 | }; 14 | 15 | 16 | template 17 | class ISubject 18 | { 19 | public: 20 | virtual ~ISubject(){} 21 | 22 | virtual void Subscribe(IObserver *observer) { 23 | mObservers.push_back(observer); 24 | } 25 | 26 | virtual void Unsubscribe(IObserver *observer) { 27 | assert(0); //unimplemented TODO 28 | } 29 | 30 | virtual void Notify(T& data) { 31 | for (auto i = mObservers.begin(); i != mObservers.end(); ++i){ 32 | (*i)->Update(data); 33 | } 34 | } 35 | 36 | typedef std::vector *> ObserverList; 37 | ObserverList mObservers; 38 | }; 39 | 40 | #endif // OBSERVER_H 41 | -------------------------------------------------------------------------------- /platform/master/src/common/PrestoException.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef __PRESTO_EXCEPTION__ 20 | #define __PRESTO_EXCEPTION__ 21 | 22 | #include 23 | 24 | #include "dLogger.h" 25 | 26 | using namespace std; 27 | 28 | namespace presto { 29 | class PrestoWarningException : public std::exception { 30 | public: 31 | /** An expression of PrestoWarningException. Usually this exception is intended to be shown an error message 32 | */ 33 | PrestoWarningException(string m = "PrestoWarningException") : msg(m) {LOG_WARN(m.c_str());} // NOLINT 34 | ~PrestoWarningException() throw() {} 35 | const char* what() const throw() { return msg.c_str(); } 36 | 37 | private: 38 | string msg; 39 | }; 40 | 41 | class PrestoShutdownException : public std::exception { 42 | public: 43 | /** PrestoShutdownException is thrown when the entire presto session needs to be shutdown 44 | */ 45 | PrestoShutdownException(string m = "PrestoShutdownException") : msg(m) {LOG_ERROR(m.c_str());} // NOLINT 46 | ~PrestoShutdownException() throw() {} 47 | const char* what() const throw() { return msg.c_str(); } 48 | 49 | private: 50 | string msg; 51 | }; 52 | } 53 | #endif 54 | -------------------------------------------------------------------------------- /platform/master/src/common/error.h: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifndef __ERROR_H__ 20 | #define __ERROR_H__ 21 | 22 | #include 23 | 24 | #define ERROR(msg, action) \ 25 | fprintf(stderr, "Error in %s:%d: %s\n", __FILE__, __LINE__, msg); \ 26 | action; 27 | 28 | #define CHECK(cond, msg, action) if (!cond) { ERROR(msg, action) } 29 | 30 | #endif // __ERROR_ 31 | -------------------------------------------------------------------------------- /platform/master/src/common/trace_def.cpp: -------------------------------------------------------------------------------- 1 | /******************************************************************** 2 | *A scalable and high-performance platform for R. 3 | *Copyright (C) [2013] Hewlett-Packard Development Company, L.P. 4 | 5 | *This program is free software; you can redistribute it and/or modify 6 | *it under the terms of the GNU General Public License as published by 7 | *the Free Software Foundation; either version 2 of the License, or (at 8 | *your option) any later version. 9 | 10 | *This program is distributed in the hope that it will be useful, but 11 | *WITHOUT ANY WARRANTY; without even the implied warranty of 12 | *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | *General Public License for more details. You should have received a 14 | *copy of the GNU General Public License along with this program; if 15 | *not, write to the Free Software Foundation, Inc., 59 Temple Place, 16 | *Suite 330, Boston, MA 02111-1307 USA 17 | ********************************************************************/ 18 | 19 | #ifdef PERF_TRACE 20 | #include 21 | #include 22 | #include "dLogger.h" 23 | 24 | namespace presto{ 25 | bool trace_master = false; 26 | ZTracer::ZTraceRef master_trace; 27 | 28 | bool is_master = false; 29 | 30 | boost::thread_specific_ptr trace_worker; 31 | boost::thread_specific_ptr worker_trace; 32 | 33 | bool trace_executor = false; 34 | ZTracer::ZTraceRef executor_trace; 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /platform/master/src/executor/Makefile: -------------------------------------------------------------------------------- 1 | # --- COMMON --- 2 | 3 | R_CXXFLAGS = $(shell R CMD config --cppflags) $(shell Rscript -e "Rcpp:::CxxFlags()") $(shell Rscript -e "RInside:::CxxFlags()") 4 | #R_LDFLAGS = $(shell R CMD config --ldflags) $(shell Rscript -e "Rcpp:::LdFlags()") $(shell Rscript -e "RInside:::LdFlags()") 5 | R_LDFLAGS = -L$(shell Rscript -e "cat(R.home('home'), '/lib', sep='')") -lR -L$(shell Rscript -e "cat(system.file(package='RInside'),'/lib',sep='')") -Wl,-static -lRInside -Wl,-Bdynamic -Wl,-rpath,$(shell Rscript -e "cat(system.file(package='RInside'),'/lib',sep='')") 6 | 7 | CXXFLAGS = -fPIC -std=c++0x -g -O2 -fopenmp -finline-limit=10000 -DNDEBUG -I../third_party/install/include -I./ -I../common/ -I../messaging/gen-cpp -DSTRICT_R_HEADERS -I../third_party/boost_1_50_0/ -I.. 8 | 9 | LDFLAGS = -L../third_party/install/lib -lpthread -lboost_thread -lboost_system -lboost_log -lboost_log_setup -lboost_chrono -lboost_filesystem -lboost_date_time -latomicio -lR-common -lzmq -luuid -lrt 10 | 11 | # To enable zipkin-based tracing add the following to CXXFLAGS and LDFLAGS 12 | # BLKIN_INCLUDE = -I../third_party/install/include/blkin -DPERF_TRACE 13 | # BLKIN_LINKER_FLAGS = -lzipkin-c -lblkin-front -lzipkin-cpp 14 | 15 | PRESTO_COMMON_DIR = . 16 | PRESTO_COMMON_HEADERS = $(wildcard ${PRESTO_COMMON_DIR}/*.h) 17 | #PRESTO_COMMON_SRC = $(wildcard ${PRESTO_COMMON_DIR}/*.cpp) 18 | #PRESTO_COMMON_OBJS = $(PRESTO_COMMON_SRC:.cpp=.o) 19 | PRESTO_COMMON_OBJS = executor.o 20 | 21 | all: $(PRESTO_COMMON_OBJS) ../../inst/bin/R-executor-bin 22 | 23 | # Common objects 24 | %.o: %.cpp $(PRESTO_COMMON_HEADERS) $(wildcard ../common/*.h) 25 | g++ -c $< $(CXXFLAGS) $(R_CXXFLAGS) -o $@ 26 | 27 | # Common library 28 | ../../inst/bin/R-executor-bin: $(PRESTO_COMMON_OBJS) 29 | mkdir -p ../../inst/bin 30 | g++ $(PRESTO_COMMON_OBJS) $(LDFLAGS) $(R_LDFLAGS) -o $@ 31 | 32 | clean: 33 | rm -rf *.o ../../inst/bin/R-executor-bin 34 | -------------------------------------------------------------------------------- /platform/master/src/executor/executor.h: -------------------------------------------------------------------------------- 1 | #ifndef _DR_EXECUTOR_H__ 2 | #define _DR_EXECUTOR_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | //#include 17 | 18 | namespace presto { 19 | 20 | FILE* out; 21 | 22 | class Executor { 23 | public: 24 | Executor(RInside & R) : RR(R) 25 | { 26 | in_memory_partitions.clear(); 27 | in_memory_composites.clear(); 28 | 29 | var_to_Partition.clear(); 30 | var_to_Composite.clear(); 31 | var_to_list_type.clear(); 32 | 33 | memset(err_msg, 0x00, sizeof(err_msg)); 34 | } 35 | 36 | ~Executor(); 37 | 38 | ExecutorEvent GetNextEvent(); 39 | int Execute(std::set>>> const & updates); 40 | int Clear(); 41 | int PersistToWorker(); 42 | 43 | int ReadSplitArgs(); 44 | int ReadRawArgs(); 45 | int ReadCompositeArgs(); 46 | 47 | void ClearTaskData(); 48 | //void HandleResult(); 49 | void CreateUpdate(const std::string& varname, const std::string& splitname, int64_t nrow, int64_t ncol, StorageLayer store); 50 | 51 | char err_msg[EXCEPTION_MSG_SIZE]; 52 | 53 | private: 54 | 55 | // reference of the R instance passed through constructor 56 | RInside & RR; 57 | 58 | //Permanent data strucctures 59 | std::map in_memory_partitions; 60 | std::map in_memory_composites; 61 | 62 | //Temporary data structures / iteration 63 | std::map var_to_Partition; 64 | std::map var_to_Composite; 65 | std::map var_to_list_type; 66 | 67 | std::string prev_func_body; 68 | Rcpp::Language exec_call; 69 | 70 | //boost::mutex R_mutex; 71 | //boost::recursive_mutex metadata_mutex; 72 | 73 | //std::set server_threads; 74 | }; 75 | 76 | } // end namespace presto 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /platform/master/src/messaging/Makefile: -------------------------------------------------------------------------------- 1 | # --- COMMON --- 2 | 3 | CXXFLAGS = -fPIC -std=c++0x -g -O2 -fopenmp -finline-limit=10000 -DNDEBUG -I../third_party/install/include 4 | LDFLAGS = -L../third_party/install/lib 5 | 6 | PROTO_SRC = $(wildcard *.proto) 7 | GEN_PROTO_SRC_TMP = $(PROTO_SRC:.proto=.pb.cc) 8 | GEN_PROTO_DIR = ./gen-cpp 9 | GEN_PROTO_HEADERS = $(wildcard ${GEN_PROTO_DIR}/*.h) 10 | GEN_PROTO_SRC = $(subst .,${GEN_PROTO_DIR},${GEN_PROTO_SRC_TMP}) 11 | GEN_PROTO_OBJS = $(GEN_PROTO_SRC:.cc=.o) 12 | 13 | INSTALLDIR=$(abspath ../third_party/install/) 14 | PROTOC_BIN = $(INSTALLDIR)/bin/protoc 15 | 16 | # --- PROTOCOL BUFFERS --- 17 | 18 | all: ../third_party/install/lib/libR-proto.a 19 | 20 | # Protocol Buffers C++ src files 21 | ${GEN_PROTO_DIR}/master.pb.cc ${GEN_PROTO_DIR}/master.pb.h: ./master.proto 22 | mkdir -p ${GEN_PROTO_DIR} 23 | ${PROTOC_BIN} --cpp_out=${GEN_PROTO_DIR} --proto_path=. $^ 24 | 25 | ${GEN_PROTO_DIR}/worker.pb.cc ${GEN_PROTO_DIR}/worker.pb.h: ./worker.proto 26 | mkdir -p ${GEN_PROTO_DIR} 27 | ${PROTOC_BIN} --cpp_out=${GEN_PROTO_DIR} --proto_path=. $^ 28 | 29 | ${GEN_PROTO_DIR}/shared.pb.cc ${GEN_PROTO_DIR}/shared.pb.h: ./shared.proto 30 | mkdir -p ${GEN_PROTO_DIR} 31 | ${PROTOC_BIN} --cpp_out=${GEN_PROTO_DIR} --proto_path=. $^ 32 | 33 | ${GEN_PROTO_DIR}/master.pb.o: ${GEN_PROTO_DIR}/master.pb.cc ${GEN_PROTO_DIR}/master.pb.h ${GEN_PROTO_DIR}/shared.pb.h 34 | g++ -c $< $(CXXFLAGS) -o $@ 35 | 36 | ${GEN_PROTO_DIR}/worker.pb.o: ${GEN_PROTO_DIR}/worker.pb.cc ${GEN_PROTO_DIR}/worker.pb.h ${GEN_PROTO_DIR}/shared.pb.h 37 | g++ -c $< $(CXXFLAGS) -o $@ 38 | 39 | ${GEN_PROTO_DIR}/shared.pb.o: ${GEN_PROTO_DIR}/shared.pb.cc ${GEN_PROTO_DIR}/shared.pb.h 40 | g++ -c $< $(CXXFLAGS) -o $@ 41 | 42 | 43 | # Protocol Buffers library 44 | ../third_party/install/lib/libR-proto.a: ${GEN_PROTO_DIR}/shared.pb.o ${GEN_PROTO_DIR}/worker.pb.o ${GEN_PROTO_DIR}/master.pb.o 45 | mkdir -p ../third_party/install/lib/ 46 | ar rcs $@ ${GEN_PROTO_DIR}/shared.pb.o ${GEN_PROTO_DIR}/worker.pb.o ${GEN_PROTO_DIR}/master.pb.o 47 | 48 | 49 | clean: 50 | rm -rf *.o gen-cpp/ ../third_party/install/lib/libR-proto.a 51 | -------------------------------------------------------------------------------- /platform/master/src/third_party/RInside_0.2.13.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/RInside_0.2.13.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/RODBC_1.3-11.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/RODBC_1.3-11.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/RUnit_0.4.28.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/RUnit_0.4.28.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/Rcpp_0.12.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/Rcpp_0.12.0.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/XML_3.98-1.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/XML_3.98-1.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/atomicio/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2006 Damien Miller. All rights reserved. 2 | Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR 15 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 | IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /platform/master/src/third_party/atomicio/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for building atomicio shared library 2 | 3 | 4 | INSTDIR=../../third_party/install/lib/ 5 | 6 | all: $(INSTDIR)/libatomicio.a $(INSTDIR)/../include/atomicio.h 7 | 8 | #libatomicio.so: atomicio.o 9 | # g++ -O3 -fPIC -shared -o $@ $^ 10 | 11 | $(INSTDIR)/libatomicio.a: atomicio.o 12 | mkdir -p $(INSTDIR) 13 | ar rcs $@ $^ 14 | 15 | $(INSTDIR)/../include/atomicio.h: 16 | mkdir -p $(INSTDIR)/../include 17 | cp atomicio.h $@ 18 | 19 | %.o: %.cpp 20 | g++ -O3 -fPIC -o $@ -c $^ 21 | 22 | clean: 23 | rm -rf *.o $(INSTDIR)/libatomicio.a $(INSTDIR)/../include/atomicio.h 24 | -------------------------------------------------------------------------------- /platform/master/src/third_party/atomicio/atomicio.h: -------------------------------------------------------------------------------- 1 | /* $OpenBSD: atomicio.h,v 1.11 2010/09/22 22:58:51 djm Exp $ */ 2 | 3 | /* 4 | * Copyright (c) 2006 Damien Miller. All rights reserved. 5 | * Copyright (c) 1995,1999 Theo de Raadt. All rights reserved. 6 | * All rights reserved. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 1. Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * 2. Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in the 15 | * documentation and/or other materials provided with the distribution. 16 | * 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR 18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef _ATOMICIO_H 30 | #define _ATOMICIO_H 31 | 32 | /* 33 | * Ensure all of data on socket comes through. f==read || f==vwrite 34 | */ 35 | size_t 36 | atomicio6(ssize_t (*f) (int, void *, size_t), int fd, void *_s, size_t n, 37 | int (*cb)(void *, size_t), void *); 38 | size_t atomicio(ssize_t (*)(int, void *, size_t), int, void *, size_t); 39 | 40 | #define vwrite (ssize_t (*)(int, void *, size_t))write 41 | 42 | /* 43 | * ensure all of data on socket comes through. f==readv || f==writev 44 | */ 45 | size_t 46 | atomiciov6(ssize_t (*f) (int, const struct iovec *, int), int fd, 47 | const struct iovec *_iov, int iovcnt, int (*cb)(void *, size_t), void *); 48 | size_t atomiciov(ssize_t (*)(int, const struct iovec *, int), 49 | int, const struct iovec *, int); 50 | 51 | #endif /* _ATOMICIO_H */ 52 | -------------------------------------------------------------------------------- /platform/master/src/third_party/blkin.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/blkin.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_1_50_0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/boost_1_50_0.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2007 Philipp Henkel 2 | 3 | Use, modification, and distribution are subject to the 4 | Boost Software License, Version 1.0. (See accompanying file 5 | LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/Jamfile.v2: -------------------------------------------------------------------------------- 1 | use-project /boost 2 | : $(BOOST_ROOT) 3 | ; 4 | 5 | project threadpool 6 | : requirements .&&$(BOOST_ROOT) 7 | # disable auto-linking for all targets here, 8 | # primarily because it caused troubles with V2 9 | BOOST_ALL_NO_LIB=1 10 | : usage-requirements .&&$(BOOST_ROOT) 11 | : build-dir bin.v2 12 | ; 13 | 14 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/Jamrules: -------------------------------------------------------------------------------- 1 | project boost : $(BOOST_ROOT) ; 2 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/LICENSE_1_0.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/Makefile: -------------------------------------------------------------------------------- 1 | export RELEASE_MANAGER=philipphenkel 2 | export RELEASE_VERSION=0_2_5 3 | 4 | doc: clean 5 | make --directory=./libs/threadpool/doc --print-directory doc 6 | 7 | release_doc: 8 | make --directory=./libs/threadpool/doc --print-directory release_doc 9 | 10 | release_src: clean fetch_clean_src 11 | mv clean_src threadpool-$(RELEASE_VERSION)-src 12 | zip -r9 threadpool-$(RELEASE_VERSION)-src.zip threadpool-$(RELEASE_VERSION)-src 13 | rm -rf threadpool-$(RELEASE_VERSION)-src 14 | 15 | deploy_website: 16 | make --directory=./libs/threadpool/doc --print-directory deploy_sf 17 | 18 | clean: 19 | rm -rf clean_src 20 | rm -rf threadpool-$(RELEASE_VERSION)-src.zip 21 | rm -rf threadpool-$(RELEASE_VERSION)-doc.zip 22 | make --directory=./libs/threadpool/doc --print-directory clean 23 | 24 | fetch_clean_src: 25 | rm -rf clean_src 26 | mkdir clean_src 27 | # cvs -d:pserver:anonymous@threadpool.cvs.sourceforge.net:/cvsroot/threadpool login 28 | cd clean_src; cvs -z3 -d:pserver:anonymous@threadpool.cvs.sourceforge.net:/cvsroot/threadpool export -r RELEASE_$(RELEASE_VERSION) threadpool 29 | # cd clean_src; cvs -z3 -d:ext:$(RELEASE_MANAGER)@cvs.sourceforge.net:/cvsroot/threadpool export -r RELEASE_$(RELEASE_VERSION) threadpool 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/README: -------------------------------------------------------------------------------- 1 | threadpool 2 | 3 | Copyright (c) 2005-2007 Philipp Henkel 4 | 5 | threadpool is a cross-platform C++ thread pool library and released under the Boost Software License. 6 | 7 | See doc/index.html for information on: 8 | - API documentation and a tutorial 9 | - External dependencies 10 | - Using threadpool 11 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/TODO: -------------------------------------------------------------------------------- 1 | threadpool to-do items 2 | ====================== 3 | 4 | Documentation 5 | -------------------------------------------- 6 | - Source code documentation 7 | - Design rationale 8 | - Tutorial 9 | - Finish Quickstart Tutorial/Example 10 | 11 | Functionality 12 | -------------------------------------------- 13 | - Implement a size policy which dynamically 14 | increase/decrease the pool's size: 15 | - init with min/max threads 16 | - auto increase 17 | - auto decrease (using timed cleanup tasks) 18 | - Add some kind of deadline scheduler 19 | - Add futures to pool 20 | 21 | 22 | Examples 23 | -------------------------------------------- 24 | - Buffering Client Requests: Handle 'bursty' client traffic 25 | Some applications need more buffering than is provided by OS I/O subsystem 26 | 27 | Working on 'active' buffer for boost::iostreams 28 | buffer_filter which provides a dynamic amount of buffer objects. 29 | buffer_filter uses a threadpool with one thread which provides the buffers to 30 | consumer. (TODO Philipp) 31 | 32 | 2nd implemention step: 33 | Flexbile configuration: Buffer capacities can be configured according to 34 | - maximum number of requests 35 | - maximum number of bytes 36 | 37 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/boost/threadpool.hpp: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * \brief Main include. 3 | * 4 | * This is the only file you have to include in order to use the 5 | * complete threadpool library. 6 | * 7 | * Copyright (c) 2005-2007 Philipp Henkel 8 | * 9 | * Use, modification, and distribution are subject to the 10 | * Boost Software License, Version 1.0. (See accompanying file 11 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 12 | * 13 | * http://threadpool.sourceforge.net 14 | * 15 | */ 16 | 17 | #ifndef THREADPOOL_HPP_INCLUDED 18 | #define THREADPOOL_HPP_INCLUDED 19 | 20 | #include "./threadpool/future.hpp" 21 | #include "./threadpool/pool.hpp" 22 | 23 | #include "./threadpool/pool_adaptors.hpp" 24 | #include "./threadpool/task_adaptors.hpp" 25 | 26 | 27 | #endif // THREADPOOL_HPP_INCLUDED 28 | 29 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/boost/threadpool/detail/locking_ptr.hpp: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * \brief The locking_ptr is smart pointer with a scoped locking mechanism. 3 | * 4 | * The class is a wrapper for a volatile pointer. It enables synchronized access to the 5 | * internal pointer by locking the passed mutex. 6 | * locking_ptr is based on Andrei Alexandrescu's LockingPtr. For more information 7 | * see article "volatile - Multithreaded Programmer's Best Friend" by A. Alexandrescu. 8 | * 9 | * 10 | * Copyright (c) 2005-2007 Philipp Henkel 11 | * 12 | * Use, modification, and distribution are subject to the 13 | * Boost Software License, Version 1.0. (See accompanying file 14 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 15 | * 16 | * http://threadpool.sourceforge.net 17 | * 18 | */ 19 | 20 | 21 | #ifndef THREADPOOL_DETAIL_LOCKING_PTR_HPP_INCLUDED 22 | #define THREADPOOL_DETAIL_LOCKING_PTR_HPP_INCLUDED 23 | 24 | #include 25 | #include 26 | 27 | 28 | namespace boost { namespace threadpool { namespace detail 29 | { 30 | 31 | /*! \brief Smart pointer with a scoped locking mechanism. 32 | * 33 | * This class is a wrapper for a volatile pointer. It enables synchronized access to the 34 | * internal pointer by locking the passed mutex. 35 | */ 36 | template 37 | class locking_ptr 38 | : private noncopyable 39 | { 40 | T* m_obj; //!< The instance pointer. 41 | Mutex & m_mutex; //!< Mutex is used for scoped locking. 42 | 43 | public: 44 | /// Constructor. 45 | locking_ptr(volatile T& obj, const volatile Mutex& mtx) 46 | : m_obj(const_cast(&obj)) 47 | , m_mutex(*const_cast(&mtx)) 48 | { 49 | // Lock mutex 50 | m_mutex.lock(); 51 | } 52 | 53 | 54 | /// Destructor. 55 | ~locking_ptr() 56 | { 57 | // Unlock mutex 58 | m_mutex.unlock(); 59 | } 60 | 61 | 62 | /*! Returns a reference to the stored instance. 63 | * \return The instance's reference. 64 | */ 65 | T& operator*() const 66 | { 67 | return *m_obj; 68 | } 69 | 70 | 71 | /*! Returns a pointer to the stored instance. 72 | * \return The instance's pointer. 73 | */ 74 | T* operator->() const 75 | { 76 | return m_obj; 77 | } 78 | }; 79 | 80 | 81 | } } } // namespace boost::threadpool::detail 82 | 83 | 84 | #endif // THREADPOOL_DETAIL_LOCKING_PTR_HPP_INCLUDED 85 | 86 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/boost/threadpool/detail/scope_guard.hpp: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * \brief TODO. 3 | * 4 | * TODO. 5 | * 6 | * Copyright (c) 2005-2007 Philipp Henkel 7 | * 8 | * Use, modification, and distribution are subject to the 9 | * Boost Software License, Version 1.0. (See accompanying file 10 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 11 | * 12 | * http://threadpool.sourceforge.net 13 | * 14 | */ 15 | 16 | 17 | #ifndef THREADPOOL_DETAIL_SCOPE_GUARD_HPP_INCLUDED 18 | #define THREADPOOL_DETAIL_SCOPE_GUARD_HPP_INCLUDED 19 | 20 | 21 | 22 | #include 23 | 24 | 25 | namespace boost { namespace threadpool { namespace detail 26 | { 27 | 28 | // TODO documentation 29 | class scope_guard 30 | : private boost::noncopyable 31 | { 32 | function0 const m_function; 33 | bool m_is_active; 34 | 35 | public: 36 | scope_guard(function0 const & call_on_exit) 37 | : m_function(call_on_exit) 38 | , m_is_active(true) 39 | { 40 | } 41 | 42 | ~scope_guard() 43 | { 44 | if(m_is_active && m_function) 45 | { 46 | m_function(); 47 | } 48 | } 49 | 50 | void disable() 51 | { 52 | m_is_active = false; 53 | } 54 | }; 55 | 56 | 57 | 58 | 59 | 60 | 61 | } } } // namespace boost::threadpool::detail 62 | 63 | #endif // THREADPOOL_DETAIL_SCOPE_GUARD_HPP_INCLUDED 64 | 65 | 66 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/boost/threadpool/pool_adaptors.hpp: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * \brief Pool adaptors. 3 | * 4 | * This file contains an easy-to-use adaptor similar to a smart 5 | * pointer for the pool class. 6 | * 7 | * Copyright (c) 2005-2007 Philipp Henkel 8 | * 9 | * Use, modification, and distribution are subject to the 10 | * Boost Software License, Version 1.0. (See accompanying file 11 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 12 | * 13 | * http://threadpool.sourceforge.net 14 | * 15 | */ 16 | 17 | 18 | #ifndef THREADPOOL_POOL_ADAPTORS_HPP_INCLUDED 19 | #define THREADPOOL_POOL_ADAPTORS_HPP_INCLUDED 20 | 21 | #include 22 | 23 | 24 | namespace boost { namespace threadpool 25 | { 26 | 27 | 28 | // TODO convenience scheduling function 29 | /*! Schedules a Runnable for asynchronous execution. A Runnable is an arbitrary class with a run() 30 | * member function. This a convenience shorthand for pool->schedule(bind(&Runnable::run, task_object)). 31 | * \param 32 | * \param obj The Runnable object. The member function run() will be exectued and should not throw execeptions. 33 | * \return true, if the task could be scheduled and false otherwise. 34 | */ 35 | template 36 | bool schedule(Pool& pool, shared_ptr const & obj) 37 | { 38 | return pool->schedule(bind(&Runnable::run, obj)); 39 | } 40 | 41 | /*! Schedules a task for asynchronous execution. The task will be executed once only. 42 | * \param task The task function object. 43 | */ 44 | template 45 | typename enable_if < 46 | is_void< typename result_of< typename Pool::task_type() >::type >, 47 | bool 48 | >::type 49 | schedule(Pool& pool, typename Pool::task_type const & task) 50 | { 51 | return pool.schedule(task); 52 | } 53 | 54 | 55 | template 56 | typename enable_if < 57 | is_void< typename result_of< typename Pool::task_type() >::type >, 58 | bool 59 | >::type 60 | schedule(shared_ptr const pool, typename Pool::task_type const & task) 61 | { 62 | return pool->schedule(task); 63 | } 64 | 65 | 66 | } } // namespace boost::threadpool 67 | 68 | #endif // THREADPOOL_POOL_ADAPTORS_HPP_INCLUDED 69 | 70 | 71 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/boost/threadpool/shutdown_policies.hpp: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * \brief Shutdown policies. 3 | * 4 | * This file contains shutdown policies for thread_pool. 5 | * A shutdown policy controls the pool's behavior from the time 6 | * when the pool is not referenced any longer. 7 | * 8 | * Copyright (c) 2005-2007 Philipp Henkel 9 | * 10 | * Use, modification, and distribution are subject to the 11 | * Boost Software License, Version 1.0. (See accompanying file 12 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 13 | * 14 | * http://threadpool.sourceforge.net 15 | * 16 | */ 17 | 18 | 19 | #ifndef THREADPOOL_SHUTDOWN_POLICIES_HPP_INCLUDED 20 | #define THREADPOOL_SHUTDOWN_POLICIES_HPP_INCLUDED 21 | 22 | 23 | 24 | /// The namespace threadpool contains a thread pool and related utility classes. 25 | namespace boost { namespace threadpool 26 | { 27 | 28 | 29 | /*! \brief ShutdownPolicy which waits for the completion of all tasks 30 | * and the worker termination afterwards. 31 | * 32 | * \param Pool The pool's core type. 33 | */ 34 | template 35 | class wait_for_all_tasks 36 | { 37 | public: 38 | static void shutdown(Pool& pool) 39 | { 40 | pool.wait(); 41 | pool.terminate_all_workers(true); 42 | } 43 | }; 44 | 45 | 46 | /*! \brief ShutdownPolicy which waits for the completion of all active tasks 47 | * and the worker termination afterwards. 48 | * 49 | * \param Pool The pool's core type. 50 | */ 51 | template 52 | class wait_for_active_tasks 53 | { 54 | public: 55 | static void shutdown(Pool& pool) 56 | { 57 | pool.clear(); 58 | pool.wait(); 59 | pool.terminate_all_workers(true); 60 | } 61 | }; 62 | 63 | 64 | /*! \brief ShutdownPolicy which does not wait for any tasks or worker termination. 65 | * 66 | * This policy does not wait for any tasks. Nevertheless all active tasks will be processed completely. 67 | * 68 | * \param Pool The pool's core type. 69 | */ 70 | template 71 | class immediately 72 | { 73 | public: 74 | static void shutdown(Pool& pool) 75 | { 76 | pool.clear(); 77 | pool.terminate_all_workers(false); 78 | } 79 | }; 80 | 81 | } } // namespace boost::threadpool 82 | 83 | #endif // THREADPOOL_SHUTDOWN_POLICIES_HPP_INCLUDED 84 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/Makefile: -------------------------------------------------------------------------------- 1 | DOXYGEN=doxygen 2 | 3 | doc: clean copy_footer generate_doc 4 | 5 | copy_footer: 6 | cp footer/*.htm . 7 | 8 | copy_footer_sf: 9 | cp footer_sf/*.htm . 10 | 11 | generate_doc: 12 | $(DOXYGEN) overview.dox 13 | $(DOXYGEN) reference.dox 14 | $(DOXYGEN) tutorial.dox 15 | $(DOXYGEN) examples.dox 16 | $(DOXYGEN) design.dox 17 | 18 | clean: 19 | rm -rf ../../../doc 20 | rm -rf ../../../threadpool-$(RELEASE_VERSION)-doc 21 | rm -rf ../../../threadpool-$(RELEASE_VERSION)-doc.zip 22 | 23 | release_doc: clean doc 24 | cd ../../..; mv doc threadpool-$(RELEASE_VERSION)-doc 25 | cd ../../..; zip -r9 threadpool-$(RELEASE_VERSION)-doc.zip threadpool-$(RELEASE_VERSION)-doc 26 | cd ../../..; mv threadpool-$(RELEASE_VERSION)-doc doc 27 | 28 | doc_sf: clean copy_footer_sf generate_doc 29 | 30 | deploy_sf: doc_sf 31 | scp -vr ../../../doc/* $(RELEASE_MANAGER)@shell.sourceforge.net:/home/groups/t/th/threadpool/htdocs 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/design.txt: -------------------------------------------------------------------------------- 1 | /*! \page intro TODO5 2 | 3 | 4 | A thread pool manages a homogeneous pool of worker threads. In general worker threads are created once and are used to 5 | process a sequence of tasks. The re-use of threads reduces the thread creation and teardown overhead. The resource 6 | consumption is limited as the number of threads can be restricted. This leads to an improved system stability. 7 | 8 | Thread-per-task 9 | 10 | TODO5 11 | 12 | light-weight, no administration/management thread, synchronized scheduler, e.g. queue 13 | 14 | 15 | */ 16 | 17 | 18 | /*! \page pattern Thread Pool Pattern 19 | 20 | 21 | In the thread pool pattern in programming, a number of N threads are created to perform a number of M tasks, usually organized in a queue. Typically, N << M. As soon as a thread completes its task, it will request the next task from the queue until all tasks have been completed. The thread can then terminate, or sleep until there are new tasks available. 22 | 23 | The number of threads used (N) is a parameter that can be tuned to provide the best performance. 24 | 25 | The advantage of using a Thread Pool over creating a new thread for each task, is that thread creation and destruction overhead is negated, which may result in better performance and better system stability. 26 | 27 |
28 | From Wikipedia , the free encyclopedia. 29 | 30 | 31 | 32 | */ 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/design_header.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | threadpool Documentation 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 15 | 22 | 23 | 24 |
13 | $projectname 14 | 16 | Overview | 17 | Reference | 18 | Tutorial | 19 | Examples | 20 | Design 21 |
25 |
26 |
27 | 28 | 29 | 32 | 36 | 37 |
30 | Design 31 | 33 | TODO5 | 34 | Thread Pool Pattern 35 |
38 |
39 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/examples.txt: -------------------------------------------------------------------------------- 1 | 2 | /*! \page intro TODO3 3 | 4 | See src/examples/mergesort/mergesort.cpp 5 |
6 | 7 | */ 8 | 9 | 10 | /*! \page pattern TODO4 11 | TODO4 12 | 13 | */ 14 | 15 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/examples_header.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | threadpool Documentation 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 15 | 22 | 23 | 24 |
13 | $projectname 14 | 16 | Overview | 17 | Reference | 18 | Tutorial | 19 | Examples | 20 | Design 21 |
25 |
26 |
27 | 28 | 29 | 32 | 36 | 37 |
30 | Examples 31 | 33 | TODO3 | 34 | TODO4 35 |
38 |
39 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/footer/common_footer.htm: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 5 | 6 | 13 | 14 |
Copyright © 2005-2008 Philipp Henkel 7 | Overview | 8 | Reference | 9 | Tutorial | 10 | Examples | 11 | Design 12 |
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/footer/overview_footer.htm: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 5 | 6 | 13 | 14 |
Copyright © 2005-2008 Philipp Henkel 7 | Overview | 8 | Reference | 9 | Tutorial | 10 | Examples | 11 | Design 12 |
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/footer_sf/common_footer.htm: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 5 | 6 | 13 | 14 |
Copyright © 2005-2008 Philipp Henkel 7 | Overview | 8 | Reference | 9 | Tutorial | 10 | Examples | 11 | Design 12 |
15 |
16 | 17 |

18 | Hosted by 19 | 20 | SourceForge.net Logo 26 | 27 |

28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/footer_sf/overview_footer.htm: -------------------------------------------------------------------------------- 1 |
2 |
3 | 4 | 5 | 6 | 13 | 14 |
Copyright © 2005-2008 Philipp Henkel 7 | Overview | 8 | Reference | 9 | Tutorial | 10 | Examples | 11 | Design 12 |
15 |
16 | 17 |

18 | Hosted by 19 | 20 | SourceForge.net Logo 26 | 27 |

28 | 29 | 30 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | threadpool Documentation 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/overview_header.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | threadpool Documentation 4 | 5 | 6 | 7 |
8 | 9 | 10 | 13 | 20 | 21 |
11 | $projectname 12 | 14 | Overview | 15 | Reference | 16 | Tutorial | 17 | Examples | 18 | Design 19 |
22 |
23 |
24 | 25 | 26 | 29 | 34 | 35 |
27 | Overview 28 | 30 | Main Page | 31 | Change Log | 32 | Software License 33 |
36 |
37 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/reference_header.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | threadpool Documentation 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 15 | 22 | 23 | 24 |
13 | $projectname 14 | 16 | Overview | 17 | Reference | 18 | Tutorial | 19 | Examples | 20 | Design 21 |
25 |
26 |
27 | 28 | 29 | 32 | 38 | 39 |
30 | Reference 31 | 33 | Class List | 34 | Member Functions | 35 | Typedefs | 36 | File List 37 |
40 |
41 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/doc/tutorial_header.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | threadpool Documentation 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 15 | 22 | 23 | 24 |
13 | $projectname 14 | 16 | Overview | 17 | Reference | 18 | Tutorial | 19 | Examples | 20 | Design 21 |
25 |
26 |
27 | 28 | 29 | 32 | 38 | 39 |
30 | Tutorial 31 | 33 | Quick Start | 34 | Prioritized Tasks | 35 | Arbitrary Task Functions | 36 | Advanced Pool Instantiation 37 |
40 |
41 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/example/mergesort/Jamfile.v2: -------------------------------------------------------------------------------- 1 | 2 | project 3 | : requirements 4 | ../../../.. 5 | /boost/thread//boost_thread 6 | BOOST_ALL_NO_LIB=1 7 | multi 8 | static 9 | ; 10 | 11 | exe mergesort : mergesort.cpp ; 12 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/quickstart/Jamfile.v2: -------------------------------------------------------------------------------- 1 | 2 | project 3 | : requirements 4 | ../../.. 5 | /boost/thread//boost_thread 6 | BOOST_ALL_NO_LIB=1 7 | multi 8 | static 9 | ; 10 | 11 | exe quickstart : quickstart.cpp ; 12 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/quickstart/README: -------------------------------------------------------------------------------- 1 | Supposing the boost directory is in /usr/local/lib, it is also worth to mention that they need to 2 | set LD_LIBRARY_PATH as follows: 3 | export LD_LIBRARY_PATH=$(LD_LIBRARY_PATH):/usr/local/lib 4 | 5 | Please adapt threadpool.makefile to your system. 6 | 7 | Special thanks to Rudy Prabowo who contributed this example to help linux users. 8 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/quickstart/quickstart.cpp: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * \brief Quick start example. 3 | * 4 | * This is a very simple example which can be used to configure the threadpool environment on your system. 5 | * 6 | * Copyright (c) 2005-2006 Philipp Henkel 7 | * 8 | * Distributed under the Boost Software License, Version 1.0. (See 9 | * accompanying file LICENSE_1_0.txt or copy at 10 | * http://www.boost.org/LICENSE_1_0.txt) 11 | * 12 | * http://threadpool.sourceforge.net 13 | * 14 | */ 15 | 16 | #include 17 | 18 | #include 19 | 20 | using namespace std; 21 | using namespace boost::threadpool; 22 | 23 | // Some example tasks 24 | void first_task() 25 | { 26 | cout << "first task is running\n" ; 27 | } 28 | 29 | void second_task() 30 | { 31 | cout << "second task is running\n" ; 32 | } 33 | 34 | int main(int argc,char *argv[]) 35 | { 36 | // Create fifo thread pool container with two threads. 37 | pool tp(2); 38 | 39 | // Add some tasks to the pool. 40 | tp.schedule(&first_task); 41 | tp.schedule(&second_task); 42 | 43 | // Wait until all tasks are finished. 44 | tp.wait(); 45 | 46 | // Now all tasks are finished! 47 | return(0); 48 | } 49 | 50 | 51 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/test/compile_all/Jamfile.v2: -------------------------------------------------------------------------------- 1 | 2 | project 3 | : requirements 4 | ../../../.. 5 | /boost/thread//boost_thread 6 | BOOST_ALL_NO_LIB=1 7 | multi 8 | static 9 | ; 10 | 11 | exe compile_all : compile_all.cpp ; 12 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/libs/threadpool/tutorial/Jamfile.v2: -------------------------------------------------------------------------------- 1 | 2 | project 3 | : requirements 4 | ../../.. 5 | /boost/thread//boost_thread 6 | BOOST_ALL_NO_LIB=1 7 | multi 8 | static 9 | ; 10 | 11 | exe tutorial : tutorial.cpp ; 12 | -------------------------------------------------------------------------------- /platform/master/src/third_party/boost_threadpool/threadpool/project-root.jam: -------------------------------------------------------------------------------- 1 | import os ; 2 | path-constant BOOST_ROOT : [ os.environ BOOST_ROOT ] ; 3 | -------------------------------------------------------------------------------- /platform/master/src/third_party/chron_2.3-45.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/chron_2.3-45.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/data.table_1.9.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/data.table_1.9.4.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/digest_0.6.8.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/digest_0.6.8.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/gbm_2.1.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/gbm_2.1.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/libcgroup-0.41.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/libcgroup-0.41.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/libuuid-1.0.3.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/libuuid-1.0.3.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/plyr_1.8.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/plyr_1.8.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/protobuf-2.3.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/protobuf-2.3.0.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/protobuf-2.6.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/protobuf-2.6.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/randomForest_4.6-10.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/randomForest_4.6-10.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/reshape2_1.4.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/reshape2_1.4.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/stringr_0.6.2.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/stringr_0.6.2.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/testthat_0.9.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/testthat_0.9.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/unixODBC-2.3.1.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/unixODBC-2.3.1.tar.gz -------------------------------------------------------------------------------- /platform/master/src/third_party/zeromq-2.2.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/third_party/zeromq-2.2.0.tar.gz -------------------------------------------------------------------------------- /platform/master/src/worker/Makefile: -------------------------------------------------------------------------------- 1 | # --- COMMON --- 2 | 3 | R_CXXFLAGS = $(shell R CMD config --cppflags) $(shell Rscript -e "Rcpp:::CxxFlags()") $(shell Rscript -e "RInside:::CxxFlags()") 4 | 5 | #R_LDFLAGS = $(shell R CMD config --ldflags) $(shell Rscript -e "Rcpp:::LdFlags()") $(shell Rscript -e "RInside:::LdFlags()") 6 | R_LDFLAGS = -L$(shell Rscript -e "cat(R.home('home'), '/lib', sep='')") -lR -L$(shell Rscript -e "cat(system.file(package='RInside'),'/lib',sep='')") -Wl,-static -lRInside -Wl,-Bdynamic -Wl,-rpath,$(shell Rscript -e "cat(system.file(package='RInside'),'/lib',sep='')") 7 | 8 | CXXFLAGS = -fPIC -std=c++0x -g -O2 -fopenmp -finline-limit=10000 -DNDEBUG -I../third_party/install/include -I./ -I../common/ -I../messaging/gen-cpp/ -I../third_party/boost_1_50_0/ -I../third_party/boost_threadpool/threadpool 9 | 10 | LDFLAGS = -L../third_party/install/lib -lpthread -lboost_thread -lboost_system -lboost_log -lboost_log_setup -lboost_chrono -lboost_filesystem -lboost_date_time -latomicio -lR-common -lR-proto -lzmq -luuid -lprotobuf -lrt -lcgroup 11 | 12 | # To enable zipkin-based tracing add the following to CXXFLAGS and LDFLAGS 13 | # BLKIN_INCLUDE = -I../third_party/install/include/blkin -DPERF_TRACE 14 | # BLKIN_LINKER_FLAGS = -lzipkin-c -lblkin-front -lzipkin-cpp 15 | 16 | PRESTO_COMMON_DIR = . 17 | PRESTO_COMMON_HEADERS = $(wildcard ${PRESTO_COMMON_DIR}/*.h) 18 | PRESTO_COMMON_SRC = $(wildcard ${PRESTO_COMMON_DIR}/*.cpp) 19 | PRESTO_COMMON_OBJS = $(PRESTO_COMMON_SRC:.cpp=.o) 20 | 21 | all: $(PRESTO_COMMON_OBJS) ../../inst/bin/R-worker-bin 22 | 23 | # Common objects 24 | %.o: %.cpp $(PRESTO_COMMON_HEADERS) $(wildcard ../common/*.h) 25 | g++ -c $< $(CXXFLAGS) $(R_CXXFLAGS) -o $@ 26 | 27 | # Common library 28 | ../../inst/bin/R-worker-bin: $(PRESTO_COMMON_OBJS) 29 | mkdir -p ../../inst/lib 30 | mkdir -p ../../inst/bin 31 | g++ $(PRESTO_COMMON_OBJS) $(LDFLAGS) $(R_LDFLAGS) -o $@ 32 | 33 | clean: 34 | rm -rf *.o ../../inst/bin/R-worker-bin 35 | -------------------------------------------------------------------------------- /platform/master/src/worker/RequestLogger.cpp: -------------------------------------------------------------------------------- 1 | #include "RequestLogger.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "dLogger.h" 8 | 9 | namespace presto { 10 | 11 | 12 | RequestLogger::RequestLogger(const string &name) : mName(name) 13 | { 14 | } 15 | 16 | RequestLogger::~RequestLogger() 17 | { 18 | } 19 | 20 | void RequestLogger::Update(google::protobuf::Message& message) 21 | { 22 | string message_text; 23 | google::protobuf::TextFormat::PrintToString(message, &message_text); 24 | 25 | LOG_DEBUG("### begin %s", mName.c_str()); 26 | // max log message size is 1024, platform/common/dLogger.h 27 | // split in chunks 28 | // choose 768 to be on the safe side 29 | unsigned int MAX_LOG_SIZE = 768; 30 | for(unsigned int i = 0; i < message_text.size(); i+= MAX_LOG_SIZE) { 31 | unsigned int chunk_size = min(static_cast(message_text.size()) - i, MAX_LOG_SIZE); 32 | LOG_DEBUG("%s", message_text.substr(i, chunk_size).c_str()); 33 | } 34 | LOG_DEBUG("### end %s", mName.c_str()); 35 | } 36 | 37 | } //namespace presto 38 | -------------------------------------------------------------------------------- /platform/master/src/worker/RequestLogger.h: -------------------------------------------------------------------------------- 1 | #ifndef REQUESTLOGGER_H 2 | #define REQUESTLOGGER_H 3 | 4 | #include "Observer.h" 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | namespace presto { 11 | 12 | class RequestLogger : public IObserver 13 | { 14 | public: 15 | explicit RequestLogger(const string &name); 16 | ~RequestLogger(); 17 | 18 | void Update(google::protobuf::Message& message); 19 | private: 20 | string mName; 21 | 22 | }; 23 | 24 | } //namespace presto 25 | 26 | 27 | #endif // REQUESTLOGGER_H 28 | -------------------------------------------------------------------------------- /platform/master/src/worker/man/update.Rd: -------------------------------------------------------------------------------- 1 | \name{update} 2 | \alias{update} 3 | \title{ 4 | update 5 | } 6 | \description{ 7 | \code{\link{update}} allows propagating changes of a given \code{\link{darray}}. The \code{\link{update}} constructor creates a version vector that describes the state of the input \code{\link{darray}}, including the version of partitions that may be distributed across machines. The version vector is sent to all waiting tasks registered by \code{\link{onchange}}. 8 | } 9 | \usage{ 10 | update(inDarray) 11 | } 12 | \arguments{ 13 | \item{inDarray}{ 14 | An input \code{\link{darray}} whose changes are going to be propagated 15 | } 16 | } 17 | \value{ 18 | No return value. The changes of the input \code{\link{darray}} will be propagated to workers that are registered using \code{\link{onchange}}. 19 | } 20 | \references{ 21 | %% ~put references to the literature/web site here ~ 22 | } 23 | \author{ 24 | } 25 | \note{ 26 | %% ~~further notes~~ 27 | } 28 | 29 | %% ~Make other sections like Warning with \section{Warning }{....} ~ 30 | 31 | \seealso{ 32 | \code{\link{darray}}, \code{\link{splits}}, \code{\link{update}}, \code{\link{onchange}} 33 | } 34 | \examples{ 35 | \dontrun{ 36 | library(PrestoMaster) 37 | presto_start() 38 | da1 <- darray(dim=c(9,9), blocks=c(3,3), sparse=F, data=10) 39 | cat("Number of partitions of da1 is ", length(splits(da1)), " and ", numsplits(da1)) 40 | da2 <- darray(dim=c(9,9), blocks=c(3,3), sparse=F, data=5) 41 | result <- darray(dim=c(9,9), blocks=c(3,3)) 42 | foreach(i, 1:numsplits(da1), 43 | add<-function(a = splits(da1,i), 44 | b = splits(da2,i), 45 | c = splits(result,i)){ 46 | c <- a + 2*b 47 | update(c) 48 | }) 49 | getpartition(result) 50 | presto_shutdown() 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /platform/master/src/worker/scripts/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | mkdir -p diagrams/ 3 | python generate_diagrams.py logs/R_worker_jorgem_127.0.0.1.50000.log diagrams/a.js diagrams/a.msc 4 | ./msc-generator-4.4/src/msc-gen --nocopyright -T png -o diagrams/a.png -i diagrams/a.msc 5 | -------------------------------------------------------------------------------- /platform/master/src/worker/scripts/msc-generator-4.4.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/src/worker/scripts/msc-generator-4.4.tar.gz -------------------------------------------------------------------------------- /platform/master/src/worker/scripts/parse_request_trace.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | from datetime import datetime 4 | import pdb 5 | 6 | 7 | class RequestTraceParser: 8 | requests = [] 9 | def __init__(self): 10 | self.regex = r'(\d+-\w+-\d+\s+\d+:\d+:\d+\.\d+)\s+\[DEBUG\]\s+### begin(.*?)(\d+-\w+-\d+\s+\d+:\d+:\d+\.\d+)\s+\[DEBUG\]\s+### end' 11 | #self.regex2 = r'type\:\s+(\w+)' 12 | self.regex3 = r'(.*?)(\d+-\w+-\d+\s+\d+:\d+:\d+\.\d+)\s+\[DEBUG\]\s+type:\s+(\w+)(.*)' 13 | self.excludes = ['HELLO','HELLOREPLY'] 14 | 15 | def parse(self, filename, excludes=[]): 16 | with open(filename) as f: 17 | c = f.read() 18 | tasks = re.findall(self.regex, c, re.DOTALL|re.MULTILINE) 19 | try: 20 | for i,(parsed_ts,t,_) in enumerate(tasks): 21 | component,ts,reqtype,detailed_task = re.findall(self.regex3, t, re.DOTALL|re.MULTILINE)[0] 22 | component = component.replace('\n','').replace(' ','') 23 | if reqtype not in excludes: 24 | request = {'component': component, 'timestamp':ts, 'type': reqtype, 'detailed_message': detailed_task} 25 | self.requests.append(request) 26 | except: 27 | pdb.set_trace() 28 | 29 | def get_by_type(self, reqtype): 30 | return self.get_by_criteria('type', reqtype) 31 | 32 | def get_by_component(self, component): 33 | return self.get_by_criteria('component', component) 34 | 35 | def get_by_criteria(self, key, value): 36 | requests = [] 37 | for r in self.requests: 38 | if r[key] == value: requests.append(r) 39 | return requests 40 | 41 | def get_all(self): 42 | return self.requests 43 | 44 | 45 | 46 | 47 | if __name__ == "__main__": 48 | r = RequestTraceParser() 49 | r.parse(sys.argv[1], excludes=['HELLO', 'HELLOREPLY']) 50 | 51 | print '###\n### Master\n###\n' 52 | for request in r.get_by_component('MasterRequestLogger'): 53 | print request 54 | print '###\n### Worker\n###\n' 55 | for request in r.get_by_component('WorkerRequestLogger'): 56 | print request 57 | -------------------------------------------------------------------------------- /platform/master/vignettes/architecture.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/platform/master/vignettes/architecture.pdf -------------------------------------------------------------------------------- /vRODBC/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: vRODBC 2 | Version: 1.2.0 3 | Revision: $Rev: 1 $ 4 | Date: 2015-04-17 5 | Author: HP Vertica Analytics Team. RODBC package authored by Brian Ripley, and from 1999 to Oct 2002 Michael Lapsley. 6 | Maintainer: HP Vertica Analytics Team 7 | Title: ODBC Database Access 8 | Description: An ODBC database interface. Includes modifications for improved access to Vertica database 9 | SystemRequirements: An ODBC3 driver manager and drivers. 10 | Depends: R (>= 2.12.0), utils 11 | Imports: stats 12 | LazyLoad: yes 13 | License: GPL-2 | GPL-3 14 | BuildVignettes: no 15 | Packaged: 2013-08-16 17:55:15 UTC; 16 | Repository: 17 | Date/Publication: 18 | -------------------------------------------------------------------------------- /vRODBC/NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(vRODBC, .registration = TRUE, .fixes = "C_") 2 | 3 | importFrom(stats, runif) 4 | 5 | export(getSqlTypeInfo, odbcClearError, odbcClose, odbcCloseAll, 6 | odbcConnect, odbcDataSources, odbcDriverConnect, odbcEndTran, 7 | odbcFetchRows, odbcGetErrMsg, odbcGetInfo, odbcQuery, 8 | odbcReConnect, odbcSetAutoCommit, odbcTables, odbcUpdate, 9 | setSqlTypeInfo, sqlClear, sqlColumns, sqlCopy, sqlCopyTable, 10 | sqlDrop, sqlFetch, sqlFetchMore, sqlGetResults, sqlPrimaryKeys, 11 | sqlQuery, sqlSave, sqlTables, sqlTypeInfo, sqlUpdate) 12 | 13 | S3method(close, vRODBC) 14 | S3method(print, vRODBC) 15 | S3method(format, "ODBC_binary") 16 | S3method(print, "ODBC_binary") 17 | 18 | if(tools:::.OStype() == "windows") { 19 | export(odbcConnectAccess, odbcConnectAccess2007, odbcConnectDbase, 20 | odbcConnectExcel, odbcConnectExcel2007) 21 | } 22 | -------------------------------------------------------------------------------- /vRODBC/cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f config.* src/Makevars src/config.h 4 | -------------------------------------------------------------------------------- /vRODBC/configure.win: -------------------------------------------------------------------------------- 1 | cp src/config.h.win src/config.h 2 | -------------------------------------------------------------------------------- /vRODBC/inst/LICENCE: -------------------------------------------------------------------------------- 1 | This package is free software; you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation; either version 2 or 3 of the License 4 | (at your option). 5 | 6 | This program is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | A copy of the GNU General Public License is available at 12 | http://www.r-project.org/Licenses/ . A copy of version 2 is 13 | included in the sources as file GPL-2. 14 | -------------------------------------------------------------------------------- /vRODBC/inst/doc/RODBC.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/doc/RODBC.pdf -------------------------------------------------------------------------------- /vRODBC/inst/po/da/LC_MESSAGES/R-RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/da/LC_MESSAGES/R-RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/da/LC_MESSAGES/RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/da/LC_MESSAGES/RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/de/LC_MESSAGES/R-RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/de/LC_MESSAGES/R-RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/de/LC_MESSAGES/RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/de/LC_MESSAGES/RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/en@quot/LC_MESSAGES/R-RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/en@quot/LC_MESSAGES/R-RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/en@quot/LC_MESSAGES/RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/en@quot/LC_MESSAGES/RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/pl/LC_MESSAGES/R-RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/pl/LC_MESSAGES/R-RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/po/pl/LC_MESSAGES/RODBC.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/po/pl/LC_MESSAGES/RODBC.mo -------------------------------------------------------------------------------- /vRODBC/inst/tests.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/inst/tests.R -------------------------------------------------------------------------------- /vRODBC/man/odbcClose.Rd: -------------------------------------------------------------------------------- 1 | \name{odbcClose} 2 | \alias{odbcClose} 3 | \alias{odbcCloseAll} 4 | \alias{close.vRODBC} 5 | \title{ODBC Close Connections} 6 | \description{ 7 | Close connections to ODBC databases. 8 | } 9 | \usage{ 10 | odbcClose(channel) 11 | 12 | \method{close}{vRODBC}(con, \dots) 13 | 14 | odbcCloseAll() 15 | } 16 | \arguments{ 17 | \item{channel, con}{vRODBC connection object as returned by 18 | \code{odbcConnect}.} 19 | \item{\dots}{additional arguments passed from the generic.} 20 | } 21 | \details{ 22 | \code{odbcClose} cleans up and frees resources. It is also the method for the 23 | generic function \code{close}. 24 | 25 | \code{odbcCloseAll} closes all open channels (amongst the first 1000 26 | used in the session). 27 | 28 | Channels are closed at the end of an \R session, and may also be 29 | closed by garbage collection if no object refers to them. In both 30 | cases a warning is given (but may not be seen at the end of a console 31 | session). 32 | } 33 | \value{ 34 | Function \code{odbcClose} returns invisibly a logical indicating if it 35 | succeeded. 36 | } 37 | \author{ 38 | Michael Lapsley and Brian Ripley 39 | } 40 | \seealso{ 41 | \code{\link{odbcConnect}} 42 | } 43 | \keyword{IO} 44 | \keyword{database} 45 | -------------------------------------------------------------------------------- /vRODBC/man/odbcDataSources.Rd: -------------------------------------------------------------------------------- 1 | \name{odbcDataSources} 2 | \alias{odbcDataSources} 3 | \title{List ODBC Data Sources} 4 | \description{ 5 | List known ODBC data sources. 6 | } 7 | \usage{ 8 | odbcDataSources(type = c("all", "user", "system")) 9 | } 10 | \arguments{ 11 | \item{type}{User DSNs, system DSNs or all?} 12 | } 13 | \value{ 14 | A named character vector of DSN descriptions, with names the DSNs. 15 | } 16 | \author{ 17 | Brian Ripley 18 | } 19 | \examples{\dontrun{ 20 | > odbcDataSources() 21 | test sqlite3 testpg 22 | "MySQL" "sqlite3" "PostgreSQL" 23 | or 24 | testdb3 sqlite3 25 | "MySQL ODBC 3.51 Driver" "SQLite3 ODBC Driver" 26 | bdr.xls testacc 27 | "Microsoft Excel Driver (*.xls)" "Microsoft Access Driver (*.mdb)" 28 | testpg SQLServer 29 | "PostgreSQL ANSI" "SQL Native Client" 30 | Oracle DB2 31 | "Oracle in OraDb10g_home1" "IBM DB2 ODBC DRIVER - DB2COPY1" 32 | testpgw testdb5 33 | "PostgreSQL Unicode" "MySQL ODBC 5.1 Driver" 34 | SQLite Datasource SQLite UTF-8 Datasource 35 | "SQLite ODBC Driver" "SQLite ODBC (UTF-8) Driver" 36 | SQLite3 Datasource mimdb 37 | "SQLite3 ODBC Driver" "MIMER" 38 | }} 39 | \keyword{utilities} -------------------------------------------------------------------------------- /vRODBC/man/odbcGetInfo.Rd: -------------------------------------------------------------------------------- 1 | \name{odbcGetInfo} 2 | \alias{odbcGetInfo} 3 | \title{Request Information on an ODBC Connection} 4 | \description{ 5 | Request information on an ODBC connection. 6 | } 7 | \usage{ 8 | odbcGetInfo(channel) 9 | } 10 | \arguments{ 11 | \item{channel}{connection handle as returned by 12 | \code{\link{odbcConnect}} of class \code{"vRODBC"}.} 13 | } 14 | \value{ 15 | A named character string giving information on the database and ODBC driver 16 | in use on the connection \code{channel}. 17 | } 18 | \author{ 19 | Brian Ripley 20 | } 21 | \examples{\dontrun{ 22 | odbcGetInfo(channel) # under Windows XP 23 | ## MySQL returned 24 | DBMS_Name DBMS_Ver Driver_ODBC_Ver 25 | "MySQL" "5.1.35-community" "03.51" 26 | Data_Source_Name Driver_Name Driver_Ver 27 | "testdb5" "myodbc5.dll" "05.01.0005" 28 | ODBC_Ver Server_Name 29 | "03.52.0000" "localhost via TCP/IP" 30 | ## MS Access returned 31 | DBMS_Name DBMS_Ver Driver_ODBC_Ver Data_Source_Name 32 | "ACCESS" "04.00.0000" "03.51" "testacc" 33 | Driver_Name Driver_Ver ODBC_Ver Server_Name 34 | "odbcjt32.dll" "04.00.6305" "03.52.0000" "ACCESS" 35 | ## SQL Server 2008 Express returned 36 | DBMS_Name DBMS_Ver Driver_ODBC_Ver 37 | "Microsoft SQL Server" "10.00.1600" "03.52" 38 | Data_Source_Name Driver_Name Driver_Ver 39 | "SQLServer" "SQLNCLI.DLL" "09.00.4035" 40 | ODBC_Ver Server_Name 41 | "03.52.0000" "AUK\\\\SQLEXPRESS" 42 | }} 43 | \keyword{IO} 44 | \keyword{database} 45 | -------------------------------------------------------------------------------- /vRODBC/man/odbcSetAutoCommit.Rd: -------------------------------------------------------------------------------- 1 | \name{odbcSetAutoCommit} 2 | \alias{odbcSetAutoCommit} 3 | \alias{odbcEndTran} 4 | \title{ODBC Set Auto-Commit Mode} 5 | \description{ 6 | Set ODBC database connection's auto-commit mode. 7 | } 8 | \usage{ 9 | odbcSetAutoCommit(channel, autoCommit = TRUE) 10 | 11 | odbcEndTran(channel, commit = TRUE) 12 | } 13 | \arguments{ 14 | \item{channel}{RODBC connection object returned by \code{odbcConnect}.} 15 | \item{autoCommit}{logical. Set auto-commit on?} 16 | \item{commit}{logical. Commit or rollback pending transaction?} 17 | } 18 | \details{ 19 | Auto-commit is a concept supported only by ODBC connections to 20 | transactional DBMSs. 21 | 22 | If a connection to a transactional DBMS is in auto-commit mode (the 23 | default), then all its SQL statements will be executed and committed 24 | as individual transactions. Otherwise, its SQL statements are grouped 25 | into transactions that are terminated by an execution of \code{commit} 26 | or \code{rollback}. Switching a connection to auto-commit mode 27 | commits the pending transaction. 28 | 29 | By default, new connections are in auto-commit mode. If auto-commit 30 | mode has been disabled, a call to \code{odbcEndTran} or an SQL 31 | \code{commit} statement must be executed in order to commit changes; 32 | otherwise, pending database changes will not be saved. 33 | } 34 | \value{ 35 | \code{odbcSetAutoCommit} stops if \code{channel} is an invalid 36 | connection. The function returns \code{-1} on error, \code{0} on 37 | success and \code{} on success with a message that would be returned 38 | by \code{odbcGetErrMsg}. 39 | } 40 | \author{ 41 | Norman Yamada, Yasser El-Zein 42 | } 43 | \keyword{IO} 44 | \keyword{database} 45 | -------------------------------------------------------------------------------- /vRODBC/man/setSqlTypeInfo.Rd: -------------------------------------------------------------------------------- 1 | \name{setSqlTypeInfo} 2 | \alias{setSqlTypeInfo} 3 | \alias{getSqlTypeInfo} 4 | \title{Specify or Query a Mapping of R Types to DBMS Types} 5 | \description{ 6 | Specify or retrieve a mapping of \R types to DBMS datatypes. 7 | } 8 | \usage{ 9 | setSqlTypeInfo(driver, value) 10 | 11 | getSqlTypeInfo(driver) 12 | } 13 | \arguments{ 14 | \item{driver}{A character string specifying the \code{DBMS_name} as returned 15 | by \code{\link{odbcGetInfo}}. Optional for \code{getSqlTypeInfo}.} 16 | \item{value}{A named list with character values. This should have 17 | names \code{"double"}, \code{"integer"}, \code{"character"} and 18 | \code{"logical"}, and values SQL types appropriate to the DBMS.} 19 | } 20 | \details{ 21 | This information is used by \code{\link{sqlSave}} if it creates a 22 | table in the DBMS and is not overridden by arguments \code{typeInfo} 23 | or \code{varTypes}. Mappings are included for MySQL, PostgreSQL, 24 | SQLite, Oracle, Mimer, DB2 on Windows, and the Microsoft SQL Server, 25 | Access, Excel and Dbase drivers. 26 | 27 | The SQL types chosen should be nullable to allow \code{NA}s to be 28 | represented. (Bit and boolean types often are not.) 29 | } 30 | \value{ 31 | For \code{setSqlTypeInfo} none. 32 | 33 | For \code{getSqlTypeInfo} with an argument, a named list. Without an 34 | argument, a data frame. 35 | } 36 | \author{ 37 | Brian Ripley 38 | } 39 | \seealso{ 40 | \code{\link{sqlTypeInfo}}, \code{\link{sqlSave}}. 41 | } 42 | \examples{\dontrun{ 43 | getSqlTypeInfo() 44 | getSqlTypeInfo("MySQL") 45 | setSqlTypeInfo("Microsoft SQL Server", 46 | list(double="float", integer="int", 47 | character="varchar(255)", logical="varchar(5)")) 48 | }} 49 | \keyword{IO} 50 | \keyword{database} 51 | -------------------------------------------------------------------------------- /vRODBC/man/sqlCopy.Rd: -------------------------------------------------------------------------------- 1 | \name{sqlCopy} 2 | \alias{sqlCopy} 3 | \alias{sqlCopyTable} 4 | 5 | \title{ODBC Copy} 6 | \description{ 7 | Functions to copy tables or result sets from one database to another. 8 | } 9 | \usage{ 10 | sqlCopy(channel, query, destination, destchannel = channel, 11 | verbose = FALSE, errors = TRUE, \dots) 12 | 13 | sqlCopyTable(channel, srctable, desttable, destchannel = channel, 14 | verbose = FALSE, errors = TRUE) 15 | } 16 | \arguments{ 17 | \item{channel, destchannel}{connection handle as returned by 18 | \code{\link{odbcConnect}}.} 19 | \item{query}{any valid SQL statement} 20 | \item{destination, srctable, desttable}{character: a 21 | database table name accessible from the connected DSN.} 22 | \item{verbose}{Display statements as they are sent to the server?} 23 | \item{errors}{if TRUE halt and display error, else return \code{-1}.} 24 | \item{\dots}{additional arguments to be passed to \code{\link{sqlSave}}.} 25 | } 26 | \details{ 27 | \code{sqlCopy} as is like \code{\link{sqlQuery}}, but saves the output 28 | of \code{query} in table \code{destination} on channel 29 | \code{destchannel}. 30 | 31 | \code{sqlCopyTable} copies the structure of \code{srctable} to 32 | \code{desttable} on DSN \code{destchannel}. This is within the 33 | limitations of the ODBC lowest common denominator. More precise 34 | control is possible \emph{via} \code{sqlQuery}. 35 | } 36 | \value{ 37 | See \code{\link{sqlGetResults}}. 38 | } 39 | \seealso{ 40 | \code{\link{sqlQuery}}, \code{\link{sqlSave}} 41 | } 42 | \author{ 43 | Michael Lapsley and Brian Ripley 44 | } 45 | \examples{\dontrun{## example for a l/casing DBMS 46 | sqlSave(channel, USArrests, rownames = "state") 47 | query <- paste("select state, murder from usarrests", 48 | "where rape > 30", "order by murder") 49 | sqlCopy(channel, query, "HighRape", rownames = FALSE) 50 | sqlFetch(channel, "HighRape", rownames = "state", max = 5) 51 | sqlDrop(channel, "HighRape") 52 | }} 53 | \keyword{IO} 54 | \keyword{database} 55 | -------------------------------------------------------------------------------- /vRODBC/man/sqlDrop.Rd: -------------------------------------------------------------------------------- 1 | \name{sqlDrop} 2 | \alias{sqlDrop} 3 | \alias{sqlClear} 4 | 5 | \title{Deletion Operations on Tables in ODBC databases} 6 | \description{ 7 | \code{sqlClear} deletes all the rows of the table \code{sqtable}. 8 | 9 | \code{sqlDrop} removes the table \code{sqtable} (if permitted). 10 | } 11 | \usage{ 12 | sqlClear(channel, sqtable, errors = TRUE) 13 | 14 | sqlDrop(channel, sqtable, errors = TRUE) 15 | } 16 | \arguments{ 17 | \item{channel}{connection object as returned by \code{\link{odbcConnect}}.} 18 | \item{sqtable}{character string: a database table name accessible from the 19 | connected DSN. This can be a \sQuote{dotted} name of the form 20 | \code{\var{schema}.\var{table}}.} 21 | \item{errors}{logical: if \code{TRUE} halt and display error, else 22 | return \code{-1}.} 23 | } 24 | \details{ 25 | These submit \samp{TRUNCATE TABLE} and \samp{DROP TABLE} SQL queries 26 | respectively. 27 | 28 | \sQuote{Dotted} table names are allowed on systems that support them 29 | but the existence of the table is not checked and so attempting these 30 | operations on a non-existent table will give a low-level error. (This 31 | can be suppressed by opening the connection with \code{interpretDot = 32 | FALSE}.) 33 | 34 | The default \sQuote{drop} behaviour in Oracle is to move the table to 35 | the \sQuote{recycle bin}: use 36 | \preformatted{ 37 | sqlQuery(channel, "PURGE recyclebin") 38 | } 39 | to empty the recycle bin. 40 | 41 | The current user might not have privileges to allow these operations, 42 | and Actual Technologies' Mac OS X SQLite driver has a bug causing them 43 | silently to fail. 44 | } 45 | 46 | \value{ 47 | If \code{errors = FALSE}, a numeric value, invisibly. 48 | Otherwise a character string or \code{invisible()}. 49 | } 50 | \seealso{ 51 | \code{\link{odbcConnect}}, \code{\link{sqlQuery}}, \code{\link{sqlFetch}}, 52 | \code{\link{sqlSave}}, \code{\link{sqlTables}}, \code{\link{odbcGetInfo}} 53 | } 54 | \author{ 55 | Michael Lapsley and Brian Ripley 56 | } 57 | \keyword{IO} 58 | \keyword{database} 59 | -------------------------------------------------------------------------------- /vRODBC/man/sqlTypeInfo.Rd: -------------------------------------------------------------------------------- 1 | \name{sqlTypeInfo} 2 | \alias{sqlTypeInfo} 3 | 4 | \title{Request Information about Data Types in an ODBC Database} 5 | \description{ 6 | Request information about data types in an ODBC database 7 | } 8 | \usage{ 9 | sqlTypeInfo(channel, type = "all", errors = TRUE, as.is = TRUE) 10 | } 11 | \arguments{ 12 | \item{channel}{connection handle as returned by \code{\link{odbcConnect}}.} 13 | \item{type}{The types of columns about which information is requested. 14 | Possible values are \code{"all"}, \code{"char"}, \code{"varchar"} , 15 | \code{"wchar"}, \code{"wvarchar"} (Unicode), \code{"real"}, 16 | \code{"float"}, \code{"double"}, \code{"integer"}, 17 | \code{"smallint"}, \code{"date"}, \code{"time"}, 18 | \code{"timestamp"}, \code{"binary"}, \code{"varbinary"}, 19 | \code{"longvarbinary"} and (its alias) \code{"blob"}. 20 | } 21 | \item{errors}{logical: if true halt and display error, else return \code{-1}.} 22 | \item{as.is}{as in \code{\link{sqlGetResults}}.} 23 | } 24 | \details{ 25 | \code{sqlTypeInfo} attempts to find the types of columns the database 26 | supports: ODBC drivers are not required to support this (but all known 27 | examples do). Where it is supported, it is used by 28 | \code{\link{sqlSave}} to decide what column types to create when 29 | creating a new table in the database. 30 | } 31 | \value{ 32 | A data frame on success, or character/numeric on error depending on 33 | the \code{errors} argument. Use \code{\link{sqlGetResults}} for 34 | further details of errors. 35 | 36 | The columns returned may depend on the ODBC driver manager. For a 37 | fully ODBC 3 manager, see 38 | \url{http://msdn.microsoft.com/en-us/library/ms714632\%28VS.85\%29.aspx}: 39 | the symbolic constants mentioned there will be returned as numbers 40 | (and the values of the numeric constants can be found in the ODBC 41 | headers such as \file{sql.h} and \file{sqlext.h}). 42 | } 43 | \seealso{ 44 | \code{\link{sqlGetResults}}, \code{\link{odbcGetInfo}} 45 | } 46 | \author{ 47 | Brian Ripley 48 | } 49 | \examples{\dontrun{ 50 | > names(sqlTypeInfo(channel)) 51 | [1] "TYPE_NAME" "DATA_TYPE" "COLUMN_SIZE" 52 | [4] "LITERAL_PREFIX" "LITERAL_SUFFIX" "CREATE_PARAMS" 53 | [7] "NULLABLE" "CASE_SENSITIVE" "SEARCHABLE" 54 | [10] "UNSIGNED_ATTRIBUTE" "FIXED_PREC_SCALE" "AUTO_UNIQUE_VALUE" 55 | [13] "LOCAL_TYPE_NAME" "MINIMUM_SCALE" "MAXIMUM_SCALE" 56 | [16] "SQL_DATATYPE" "SQL_DATETIME_SUB" "NUM_PREC_RADIX" 57 | [19] "INTERVAL_PRECISION" 58 | }} 59 | \keyword{IO} 60 | \keyword{database} 61 | -------------------------------------------------------------------------------- /vRODBC/man/vRODBC-internal.Rd: -------------------------------------------------------------------------------- 1 | \name{vRODBC-internal} 2 | \alias{odbcUpdate} 3 | \alias{odbcCaseFlag} 4 | \alias{odbcColumns} 5 | \alias{odbcPrimaryKeys} 6 | \alias{odbcSpecialColumns} 7 | \alias{sqltablecreate} 8 | \alias{sqlwrite} 9 | \alias{odbcTableExists} 10 | \alias{odbcClearResults} 11 | \alias{odbcValidChannel} 12 | 13 | \title{Internal vRODBC functions} 14 | \description{ 15 | Internal vRODBC functions: not exported from the \pkg{vRODBC} name space. 16 | } 17 | \usage{ 18 | odbcUpdate(channel, query, data, params, test = FALSE, 19 | verbose = FALSE, nastring = NULL) 20 | odbcCaseFlag(channel) 21 | odbcColumns(channel, table, catalog = NULL, schema = NULL, literal = FALSE) 22 | odbcSpecialColumns(channel, table, catalog = NULL, schema = NULL) 23 | odbcPrimaryKeys(channel, table, catalog = NULL, schema = NULL) 24 | sqlwrite(channel, tablename, mydata, test = FALSE, fast = TRUE, 25 | nastring = NULL, verbose = FALSE) 26 | sqltablecreate(channel, tablename, coldata = NULL, colspecs, keys = -1) 27 | odbcTableExists(channel, tablename, abort = TRUE, forQuery = TRUE, 28 | allowDot = attr(channel, "interpretDot")) 29 | odbcClearResults(channel) 30 | odbcValidChannel(channel) 31 | } 32 | \keyword{internal} 33 | -------------------------------------------------------------------------------- /vRODBC/man/vRODBC-package.Rd: -------------------------------------------------------------------------------- 1 | \name{vRODBC-package} 2 | \alias{vRODBC} 3 | \title{ODBC Database Connectivity} 4 | 5 | \description{ 6 | Package \pkg{vRODBC} implements ODBC database connectivity. vRODBC is a 7 | modification to the Open-source RODBC package to provide improved data 8 | loading time with Vertica database. No functionality native to RODBC, 9 | for any other databases, has been hampered in any way. 10 | } 11 | 12 | \details{ 13 | Two groups of functions are provided. The mainly internal 14 | \code{odbc*} commands implement low-level access to the ODBC functions 15 | of similar name. The \code{sql*} functions operate at a higher level 16 | to read, save, copy and manipulate data between data frames and SQL 17 | tables. Many connections can be open at once to any combination of 18 | DSN/hosts. 19 | 20 | It is recommended not to load both RODBC and vRODBC packages in the same 21 | session as both packages share same user function names and can lead to 22 | unexpected behaviour. 23 | } 24 | 25 | \author{ 26 | Michael Lapsley and Brian Ripley 27 | Modified by, Shreya Prasad 28 | } 29 | 30 | \keyword{IO} 31 | \keyword{database} 32 | -------------------------------------------------------------------------------- /vRODBC/src/Makevars.in: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS=@RODBC_CPPFLAGS@ 2 | PKG_LIBS=@LIBS@ 3 | 4 | -------------------------------------------------------------------------------- /vRODBC/src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS=-I. 2 | PKG_LIBS=-lodbc32 3 | -------------------------------------------------------------------------------- /vRODBC/src/config.h.in: -------------------------------------------------------------------------------- 1 | /* src/config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to 1 if you have the header file. */ 4 | #undef HAVE_INTTYPES_H 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_MEMORY_H 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_SQLEXT_H 11 | 12 | /* Define to 1 if the system has the type `SQLLEN'. */ 13 | #undef HAVE_SQLLEN 14 | 15 | /* Define to 1 if the system has the type `SQLULEN'. */ 16 | #undef HAVE_SQLULEN 17 | 18 | /* Define to 1 if you have the header file. */ 19 | #undef HAVE_SQL_H 20 | 21 | /* Define to 1 if you have the header file. */ 22 | #undef HAVE_STDINT_H 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_STDLIB_H 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #undef HAVE_STRINGS_H 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_STRING_H 32 | 33 | /* Define to 1 if you have the header file. */ 34 | #undef HAVE_SYS_STAT_H 35 | 36 | /* Define to 1 if you have the header file. */ 37 | #undef HAVE_SYS_TYPES_H 38 | 39 | /* Define to 1 if you have the header file. */ 40 | #undef HAVE_UNISTD_H 41 | 42 | /* Define to the address where bug reports for this package should be sent. */ 43 | #undef PACKAGE_BUGREPORT 44 | 45 | /* Define to the full name of this package. */ 46 | #undef PACKAGE_NAME 47 | 48 | /* Define to the full name and version of this package. */ 49 | #undef PACKAGE_STRING 50 | 51 | /* Define to the one symbol short name of this package. */ 52 | #undef PACKAGE_TARNAME 53 | 54 | /* Define to the version of this package. */ 55 | #undef PACKAGE_VERSION 56 | 57 | /* The size of a `long', as computed by sizeof. */ 58 | #undef SIZEOF_LONG 59 | 60 | /* Define to 1 if you have the ANSI C header files. */ 61 | #undef STDC_HEADERS 62 | -------------------------------------------------------------------------------- /vRODBC/src/config.h.win: -------------------------------------------------------------------------------- 1 | /* Define to 1 if you have the header file. */ 2 | #undef HAVE_INTTYPES_H 3 | 4 | /* Define to 1 if you have the header file. */ 5 | #undef HAVE_MEMORY_H 6 | 7 | /* Define to 1 if you have the header file. */ 8 | #define HAVE_SQLEXT_H 1 9 | 10 | /* Define to 1 if the system has the type `SQLLEN'. */ 11 | #define HAVE_SQLLEN 1 12 | 13 | /* Define to 1 if the system has the type `SQLULEN'. */ 14 | #define HAVE_SQLULEN 1 15 | 16 | /* Define to 1 if you have the header file. */ 17 | #define HAVE_SQL_H 1 18 | 19 | /* Define to 1 if you have the header file. */ 20 | #undef HAVE_STDINT_H 21 | 22 | /* Define to 1 if you have the header file. */ 23 | #undef HAVE_STDLIB_H 24 | 25 | /* Define to 1 if you have the header file. */ 26 | #undef HAVE_STRINGS_H 27 | 28 | /* Define to 1 if you have the header file. */ 29 | #undef HAVE_STRING_H 30 | 31 | /* Define to 1 if you have the header file. */ 32 | #undef HAVE_SYS_STAT_H 33 | 34 | /* Define to 1 if you have the header file. */ 35 | #undef HAVE_SYS_TYPES_H 36 | 37 | /* Define to 1 if you have the header file. */ 38 | #undef HAVE_UNISTD_H 39 | 40 | /* Define to the address where bug reports for this package should be sent. */ 41 | #undef PACKAGE_BUGREPORT 42 | 43 | /* Define to the full name of this package. */ 44 | #undef PACKAGE_NAME 45 | 46 | /* Define to the full name and version of this package. */ 47 | #undef PACKAGE_STRING 48 | 49 | /* Define to the one symbol short name of this package. */ 50 | #undef PACKAGE_TARNAME 51 | 52 | /* Define to the version of this package. */ 53 | #undef PACKAGE_VERSION 54 | 55 | /* The size of a `long', as computed by sizeof. */ 56 | #define SIZEOF_LONG 8 57 | 58 | /* Define to 1 if you have the ANSI C header files. */ 59 | #undef STDC_HEADERS 60 | -------------------------------------------------------------------------------- /vRODBC/tests/Master.R: -------------------------------------------------------------------------------- 1 | if(!nzchar(Sys.getenv("RODBC_TESTING"))) q("no") 2 | 3 | if(getRversion() < "2.9.0") 4 | stop("testing is only supported in R >= 2.9.0") 5 | 6 | runone <- function(f) 7 | { 8 | message(" Running ", sQuote(f)) 9 | infile <- paste(f, "RR", sep = ".") 10 | outfile <- paste(f, "Rout", sep = ".") 11 | cmd <- paste(shQuote(file.path(R.home("bin"), "R")), 12 | "CMD BATCH --vanilla", 13 | shQuote(infile), shQuote(outfile)) 14 | res <- system(cmd) 15 | if (res) { 16 | cat(readLines(outfile), sep="\n") 17 | file.rename(outfile, paste(outfile, "fail", sep=".")) 18 | return(1L) 19 | } 20 | savefile <- paste(outfile, "save", sep = "." ) 21 | if (file.exists(savefile)) { 22 | message(" Comparing ", sQuote(outfile), " to ", 23 | sQuote(savefile), " ...", appendLF = FALSE) 24 | res <- tools:::Rdiff(outfile, savefile, TRUE) 25 | if (!res) message(" OK") 26 | } 27 | 0L 28 | } 29 | 30 | 31 | res <- if(.Platform$OS.type == "windows") 32 | runone("mysql-win") else runone("mysql") 33 | res <- res + runone("sqlite3") 34 | res <- res + runone("postgresql") 35 | if(.Platform$OS.type == "windows") { 36 | res <- res + runone("access") 37 | res <- res + runone("excel") 38 | res <- res + runone("SQLServer") 39 | res <- res + runone("mimer") 40 | res <- res + runone("DB2") 41 | res <- res + runone("Oracle") 42 | } 43 | 44 | proc.time() 45 | 46 | if(res) stop(gettextf("%d tests failed", res)) 47 | -------------------------------------------------------------------------------- /vRODBC/tests/Notes: -------------------------------------------------------------------------------- 1 | These tests are intended to be run as-is only on BDR's machines. 2 | Others will need to set up suitable DSNs or change the scripts to 3 | point to existing DSNs. They will only be run by R CMD check if 4 | RODBC_TESTING is set. 5 | 6 | I don't currently run MySQL as a service on Windows, so it is started by 7 | d:/packages/mysql/bin/mysqld --standalone & 8 | 9 | PostgreSQL, SQL Server, Oracle, DB2 and Mimer run as services, but may 10 | need to be started. The shell used to run these tests must have the 11 | Oracle binaries (e:/Oracle/bin) in the path. 12 | 13 | The Oracle and Mimer tests need DB_PWD set. 14 | 15 | sqlite3 worked with sqliteodbc 0.83 but not 0.91. 16 | -------------------------------------------------------------------------------- /vRODBC/tests/access.RR: -------------------------------------------------------------------------------- 1 | ##-*- R -*- 2 | library(RODBC) 3 | library(MASS) 4 | USArrests[1,2] <- NA 5 | hills <- hills[1:15,] 6 | row.names(hills)[12] <- "Dollar ('$')" 7 | 8 | channel <- odbcConnect("testacc") 9 | if(!inherits(channel, "RODBC")) q("no") 10 | odbcGetInfo(channel) 11 | sqlTables(channel) 12 | sqlDrop(channel, "USArrests", errors = FALSE) 13 | sqlSave(channel, USArrests) 14 | sqlTables(channel) 15 | sqlColumns(channel, "USArrests") 16 | sqlPrimaryKeys(channel, "USArrests") # reports none 17 | sqlFetch(channel, "USArrests") 18 | query <- paste("select rownames, murder from USArrests", 19 | "where Rape > 30", "order by Murder") 20 | sqlQuery(channel, query) 21 | sqlCopy(channel, query, "HighRape", rownames = FALSE) 22 | sqlFetch(channel, "HighRape", max = 5) 23 | sqlTables(channel) 24 | sqlDrop(channel, "HighRape") 25 | foo <- USArrests[1:3, 2, drop = FALSE] 26 | foo[1,1] <- 236 27 | sqlUpdate(channel, foo, "USArrests") 28 | sqlFetch(channel, "USArrests", max = 5) 29 | sqlFetchMore(channel, max = 8) 30 | sqlDrop(channel, "USArrests") 31 | 32 | dates <- as.character(seq(as.Date("2004-01-01"), by="week", length=10)) 33 | Dtest <- data.frame(dates) 34 | sqlDrop(channel, "Dtest", errors = FALSE) 35 | varspec <- "DATETIME"; names(varspec) <- names(Dtest) 36 | ## fast = TRUE crashes 37 | sqlSave(channel, Dtest, varTypes = varspec, verbose=TRUE, fast=FALSE) 38 | sqlColumns(channel, "Dtest") 39 | sqlFetch(channel, "Dtest") 40 | sqlDrop(channel, "Dtest") 41 | 42 | sqlDrop(channel, "hills test", errors = FALSE) 43 | sqlSave(channel, hills, "hills test", verbose=TRUE) 44 | sqlUpdate(channel, hills[11:15,], "hills test", verbose=TRUE, fast=TRUE) 45 | sqlFetch(channel, "hills test") 46 | sqlDrop(channel, "hills test") 47 | sqlSave(channel, hills, "hills test", verbose=TRUE, fast=FALSE) 48 | sqlUpdate(channel, hills[11:15,], "hills test", verbose=TRUE, fast=FALSE) 49 | sqlDrop(channel, "hills test") 50 | 51 | close(channel) 52 | -------------------------------------------------------------------------------- /vRODBC/tests/excel.RR: -------------------------------------------------------------------------------- 1 | ##-*- R -*- 2 | library(RODBC) 3 | library(MASS) 4 | USArrests[1,2] <- NA 5 | hills <- hills[1:15,] 6 | row.names(hills)[12] <- "Dollar ('$')" 7 | 8 | 9 | # DBase: maps table/column names to u/case, max length 8 10 | dbf <- system.file("files", "sids.dbf", package="foreign") 11 | unlink("sids.dbf") 12 | file.copy(dbf, "sids.dbf") 13 | channel <- odbcConnectDbase("sids.dbf") 14 | str(sids <- sqlFetch(channel, "sids")) 15 | sqlUpdate(channel, sids[1:2, ], "sids", index="NAME", verbose=TRUE, fast=FALSE) 16 | close(channel) 17 | 18 | channel <- odbcConnectDbase("sids.dbf", case="toupper") 19 | sqlDrop(channel, "HILLS 2", errors=FALSE) 20 | sqlSave(channel, hills, "HILLS 2", verbose=TRUE) 21 | sqlUpdate(channel, hills[11:15,], "HILLS 2", verbose=TRUE) 22 | sqlDrop(channel, "HILLS 2") 23 | close(channel) 24 | 25 | 26 | # Excel 2003 27 | channel <- odbcConnectExcel("hills.xls") 28 | ## list the spreadsheets and marked ranges 29 | sqlTables(channel) 30 | sqlColumns(channel, "hills") 31 | ## two ways to retrieve the contents of hills 32 | sqlFetch(channel, "hills") 33 | sqlQuery(channel, "select * from [hills$]") 34 | 35 | sqlFetch(channel, "women") 36 | close(channel) 37 | 38 | # The Excel driver maps ' ' to '_'. 39 | channel <- odbcConnectExcel("hills.xls", readOnly=FALSE) 40 | sqlSave(channel, hills, "hills_test", verbose=TRUE, fast=FALSE) 41 | sqlUpdate(channel, hills[11:15,], "hills_test", verbose=TRUE, fast=FALSE) 42 | sqlFetch(channel, "hills_test") 43 | sqlSave(channel, hills, "hills_test2", verbose=TRUE) 44 | sqlUpdate(channel, hills[11:15,], "hills_test2", verbose=TRUE) 45 | sqlFetch(channel, "hills_test2") 46 | close(channel) 47 | -------------------------------------------------------------------------------- /vRODBC/tests/hills.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/tests/hills.xls -------------------------------------------------------------------------------- /vRODBC/vignettes/Access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/Access.png -------------------------------------------------------------------------------- /vRODBC/vignettes/Makefile: -------------------------------------------------------------------------------- 1 | ## twice seems enough for now -- could also use texi2dvi, not portably 2 | ../inst/doc/RODBC.pdf: RODBC.Rnw 3 | pdflatex RODBC.Rnw 4 | pdflatex RODBC.Rnw 5 | mv RODBC.pdf ../inst/doc 6 | @rm -f RODBC.aux RODBC.log RODBC.out 7 | -------------------------------------------------------------------------------- /vRODBC/vignettes/MySQL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/MySQL.png -------------------------------------------------------------------------------- /vRODBC/vignettes/Oracle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/Oracle.png -------------------------------------------------------------------------------- /vRODBC/vignettes/SQLserver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/SQLserver.png -------------------------------------------------------------------------------- /vRODBC/vignettes/linuxDSN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/linuxDSN.png -------------------------------------------------------------------------------- /vRODBC/vignettes/linuxDSNsqlite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/linuxDSNsqlite.png -------------------------------------------------------------------------------- /vRODBC/vignettes/macAccess.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/macAccess.png -------------------------------------------------------------------------------- /vRODBC/vignettes/macODBC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/macODBC.png -------------------------------------------------------------------------------- /vRODBC/vignettes/winDSN1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/winDSN1.png -------------------------------------------------------------------------------- /vRODBC/vignettes/winDSN2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vertica/DistributedR/a55bc996532d04f3f5122f9e119338ffcdbd428e/vRODBC/vignettes/winDSN2.png --------------------------------------------------------------------------------