├── .classpath ├── .gitignore ├── .project ├── Copyright.txt ├── LICENSE ├── README.md ├── benchmarks.txt ├── bidmach ├── bidmach.cmd ├── bidmach65 ├── bidmach_full ├── build.sbt ├── build_scala_2_10_sbt ├── build_scala_2_11_sbt ├── command ├── scala └── scala.bat ├── data ├── MHTestCorrections │ ├── norm2log2000_20_0.9.txt │ ├── norm2log4000_20_0.9.txt │ └── norm2log4000_20_1.0.txt ├── factorNet │ ├── data.txt │ ├── data.txt~ │ ├── factorSet.txt │ ├── factorSet.txt~ │ ├── statePerNode.txt │ ├── statePerNode.txt~ │ └── test2 │ │ ├── data.txt │ │ ├── factorSet.txt │ │ ├── factorSet.txt~ │ │ ├── generateTestData.ipynb │ │ ├── statePerNode.txt │ │ └── statePerNode.txt~ ├── imagenet_classname.txt ├── rcv1_fmt.txt ├── uci_fmt.txt └── uci_wfmt.txt ├── getcudaversion.sh ├── getdevlibs.sh ├── getlibs.sh ├── getnativepath.class ├── getnativepath.java ├── jni ├── include │ ├── JNIUtils.hpp │ ├── Logger.hpp │ ├── MatKernel.hpp │ ├── MurmurHash.hpp │ └── PointerUtils.hpp ├── pom.xml └── src │ ├── BIDMach_CPUMACH.c │ ├── BIDMach_CUMACH.cpp │ ├── DNN.cu │ ├── DNN127.cu │ ├── DNN63.cu │ ├── Devel.cu │ ├── Dtree.cu │ ├── GLM.cu │ ├── HashMult.cu │ ├── JCUDA_Copyright.txt │ ├── JNIUtils.cpp │ ├── Logger.cpp │ ├── Makefile │ ├── PointerUtils.cpp │ ├── Samplers.cu │ └── configure ├── lib ├── Apache_Commons_Math_LICENSE.txt ├── Apache_License.txt ├── HDF5_Copyright.html ├── IScala_license.txt ├── JCUDA_Copyright.txt ├── Jcommon_JfreeChart_LGPL.html ├── PtPlot_Copyright.txt ├── Scala_License.txt ├── bidmach_init.sc └── bidmach_notebook_init.sc ├── logo-64x64.png ├── logs └── touch.txt ├── notes.txt ├── pom.xml ├── readme_gui.md ├── sbt ├── sbt-pkg ├── bin │ ├── java9-rt-export.jar │ ├── sbt │ ├── sbt-launch-lib.bash │ ├── sbt-launch.jar │ └── sbt.bat └── conf │ ├── sbtconfig.txt │ └── sbtopts ├── scripts ├── README.txt ├── analyze_mhtest_logreg.ssc ├── benchmarks │ ├── basic.c │ ├── basic.jl │ ├── basic.lua │ ├── basic.py │ ├── basic.ssc │ ├── convRC1spark.ssc │ ├── createVWdata.ssc │ ├── graphlab_ALS.ipynb │ ├── juliaRandWalk.jl │ ├── scalaRandWalk.ssc │ ├── scoreSpark.ssc │ ├── scoreSpark2.ssc │ ├── skkmeans.py │ ├── sklogistic.py │ ├── start_spark.sh │ ├── testSparkALS.ssc │ ├── testSparkKMeans.ssc │ ├── testSparkLR.ssc │ ├── testSparkSVM.ssc │ ├── testVWLDA.sh │ ├── testVWLR.sh │ └── testVWkmeans.sh ├── bidmach_ec2.py ├── bn_test.ssc ├── buildcriteo.ssc ├── cluster_destroy.sh ├── cluster_launch.sh ├── cluster_login.sh ├── cluster_mux.py ├── cluster_start.sh ├── cluster_stop.sh ├── collect_files.py ├── criteolr.ssc ├── criteolrslave.ssc ├── criteonet.ssc ├── distribute.sh ├── distribute_data.sh ├── distribute_file.sh ├── distributed │ ├── cmudict-tail-reducer.sh │ ├── master_criteo_lr.ssc │ ├── master_distr_lr_rcv.ssc │ ├── master_mnist_rf.ssc │ ├── master_net_rcv1.ssc │ ├── master_rf_yearprediction.ssc │ ├── master_s2s_cmudict.ssc │ ├── master_s2s_mnt2014.ssc │ ├── master_sts_2015-news-commentary-v10-fr-en.ssc │ ├── news-commentary-tail-reducer.sh │ ├── tail-workers │ ├── testrecv.ssc │ ├── testsend.ssc │ ├── worker_criteo_lr.ssc │ ├── worker_distr_lr_rcv.ssc │ ├── worker_mnist_rf.ssc │ ├── worker_net_rcv1.ssc │ ├── worker_rf_yearprediction.ssc │ ├── worker_s2s_cmudict.ssc │ ├── worker_s2s_mnt2014.ssc │ └── worker_sts_2015-news-commentary-v10-fr-en.ssc ├── factorNet_test.ssc ├── factorNet_test2.ssc ├── futures.ssc ├── get_cmudict.sh ├── get_mnt2014.sh ├── get_mnt2014_pc6.sh ├── getcirfar10.sh ├── getcriteo.sh ├── getdata.sh ├── getdigits.sh ├── getdigits.ssc ├── getmnist8m.sh ├── getmnist8m_finesplit.sh ├── getmovies.sh ├── getmovies.ssc ├── getpubmed.sh ├── getrcv1.sh ├── getrcv1.ssc ├── getuci.sh ├── getuci.ssc ├── getw2vdata.sh ├── getw2vdata.ssc ├── getyearprediction.sh ├── getyearprediction.ssc ├── higgsdnn.ssc ├── higgsprep.ssc ├── higgsrf.ssc ├── ica_test.ssc ├── make_bayesnet_data.py ├── mnistkmeans.ssc ├── mnistkmeans2.ssc ├── mnistlr.ssc ├── mnistlr2.ssc ├── networks │ ├── evalAlexnet.ssc │ ├── getImageNet.ssc │ ├── getImageNetLabels.ssc │ ├── getImageNetMeans.ssc │ ├── getcifar10.sh │ ├── getcifar100.sh │ ├── getmnist.sh │ ├── loadOnnx.ssc │ ├── modelmat_test │ ├── modelmat_test.fmat.lz4 │ ├── processcifar10.ssc │ ├── processcifar100.ssc │ ├── reduceRate.sc │ ├── resumeAlexnet.ssc │ ├── resumeResnet.ssc │ ├── testAlexnet.ssc │ ├── testAlexnet2.ssc │ ├── testAlexnet2.ssc~ │ ├── testAlexnet4y.ssc │ ├── testAlexnetClassic.ssc │ ├── testCIFAR10.ssc │ ├── testCIFAR100.ssc │ ├── testCIFAR100collide.ssc │ ├── testCIFAR10a.ssc │ ├── testCIFAR10c.ssc │ ├── testConv.ssc │ ├── testLeNet.ssc │ ├── testLeNet2.ssc │ ├── testResnet.ssc │ ├── testResnetCollide.ssc │ ├── testResnetSave.ssc │ ├── testTrans.sc │ └── testVGG16.ssc ├── prepLSTM.ssc ├── preprocess_mnt2014.py ├── preprocess_mnt2014_pc6.py ├── process_cmudict_json.ssc ├── process_mnt2014.ssc ├── process_mnt2014_pc6.ssc ├── processmnist.ssc ├── processmnist8m.ssc ├── processmnist8m_binary.ssc ├── processmnist8m_finesplit.ssc ├── processmnist_binary.ssc ├── processpubmed.ssc ├── pubmedlda.ssc ├── pubmednmf.ssc ├── readcriteo.ssc ├── recompress.ssc ├── runCriteo.ssc ├── runICA.py ├── runall.sh ├── runback.sh ├── runmaster.sh ├── runmaster16.sh ├── runnode.sh ├── runnode16.sh ├── seedActor.ssc ├── sortcriteo.ssc ├── sparseallreduce │ ├── check.sh │ ├── checkall.sh │ ├── checkssh.sh │ ├── checksshall.sh │ ├── compile.sh │ ├── copyData1.sh │ ├── copyData2.sh │ ├── copyData3.sh │ ├── copyData4.sh │ ├── kill.sh │ ├── killall.sh │ ├── logcollect.sh │ ├── mount.sh │ ├── mountall.sh │ ├── ping.sh │ ├── pingall.sh │ ├── runtwitter.sh │ ├── runtwitterall.sh │ ├── runyahoo.sh │ ├── runyahooall.sh │ ├── runyahoor.sh │ ├── runyahoorall.sh │ ├── setup.sh │ ├── unmount.sh │ ├── unmountall.sh │ ├── update.sh │ ├── updatecheck.sh │ ├── volumes │ ├── volumes1 │ ├── volumes2 │ ├── volumes3 │ ├── volumes4 │ └── volumesetup.sh ├── start_workers.sh ├── startup.sh ├── startup16.sh ├── stop_workers.sh ├── testActor.ssc ├── testActor2.ssc ├── testActor3.ssc ├── testActor3.ssc~ ├── testAllReduceGridMaster.ssc ├── testAllReduceNode.ssc ├── testAllReduceNodeDummy.ssc ├── testAllReduceNodeResnet.ssc ├── testLogging.ssc ├── testPowerNet.ssc ├── testPowerNet35.ssc ├── testPredMNT2015.ssc ├── testSeqToSeq.ssc ├── testSeqToSeqPred.ssc ├── test_cmudict_s2s.ssc ├── test_grid.sh ├── test_mh.ssc ├── test_pred_cmudict_s2s.ssc ├── testds.ssc ├── testldagibbs.ssc ├── testlincomb.sc ├── testlr.ssc ├── testlstm.ssc ├── testnet.ssc ├── testpairmult.ssc ├── testrecv_local.ssc ├── testrf.ssc ├── testrforest.ssc ├── testsend_local.ssc ├── testsfa.ssc ├── testsmf.ssc ├── testsvd.ssc ├── testword2vec.ssc ├── testword2vecp.ssc ├── tmp.sc ├── trainLSTM.ssc ├── viz │ ├── alex0.ssc │ ├── mnist.ssc │ ├── save.ssc │ ├── testResnet.ssc │ ├── testVGG16.ssc │ ├── testcifar_norm.ssc │ └── testcifar_vgg.ssc ├── workout.ssc ├── workout2.ssc ├── workout_slave.ssc └── yearprediction.ssc ├── shortpath.bat ├── src ├── main │ ├── C │ │ └── newparse │ │ │ ├── configure │ │ │ ├── gzstream.cpp │ │ │ ├── gzstream.h │ │ │ ├── makefile │ │ │ ├── makefile.gcc │ │ │ ├── makefile.w32 │ │ │ ├── newparse.cpp │ │ │ ├── parsevw.cpp │ │ │ ├── tparse.cpp │ │ │ ├── tparse2.cpp │ │ │ ├── trec.flex │ │ │ ├── utils.cpp │ │ │ ├── utils.h │ │ │ ├── xmltweet.flex │ │ │ ├── xmlwiki.flex │ │ │ ├── zconf.h │ │ │ └── zlib.h │ ├── java │ │ ├── caffe │ │ │ ├── Caffe.java │ │ │ └── LICENSE.Caffe │ │ ├── edu │ │ │ └── berkeley │ │ │ │ ├── bid │ │ │ │ ├── CPUMACH.java │ │ │ │ └── CUMACH.java │ │ │ │ └── bvlc │ │ │ │ ├── BLOB.java │ │ │ │ ├── CAFFE.java │ │ │ │ ├── LAYER.java │ │ │ │ ├── LibUtils.java │ │ │ │ ├── NET.java │ │ │ │ └── SGDSOLVER.java │ │ ├── onnx │ │ │ ├── Onnx.java │ │ │ ├── OnnxMl.java │ │ │ ├── OnnxOperators.java │ │ │ └── OnnxOperatorsMl.java │ │ └── org │ │ │ └── tensorflow │ │ │ ├── example │ │ │ ├── BytesList.java │ │ │ ├── BytesListOrBuilder.java │ │ │ ├── Example.java │ │ │ ├── ExampleOrBuilder.java │ │ │ ├── ExampleProtos.java │ │ │ ├── Feature.java │ │ │ ├── FeatureList.java │ │ │ ├── FeatureListOrBuilder.java │ │ │ ├── FeatureLists.java │ │ │ ├── FeatureListsOrBuilder.java │ │ │ ├── FeatureOrBuilder.java │ │ │ ├── FeatureProtos.java │ │ │ ├── Features.java │ │ │ ├── FeaturesOrBuilder.java │ │ │ ├── FloatList.java │ │ │ ├── FloatListOrBuilder.java │ │ │ ├── Int64List.java │ │ │ ├── Int64ListOrBuilder.java │ │ │ ├── RecordWriter.java │ │ │ ├── SequenceExample.java │ │ │ └── SequenceExampleOrBuilder.java │ │ │ └── io │ │ │ ├── CRC32C.java │ │ │ ├── RecordReader.java │ │ │ └── RecordWriter.java │ ├── proto │ │ ├── onnx │ │ │ ├── onnx-ml.proto │ │ │ ├── onnx-operators-ml.proto │ │ │ ├── onnx-operators.in.proto │ │ │ ├── onnx-operators.proto │ │ │ ├── onnx.in.proto │ │ │ └── onnx.proto │ │ └── tensorflow │ │ │ ├── example.proto │ │ │ └── feature.proto │ ├── resources │ │ ├── application.conf │ │ └── lib │ │ │ └── touch.txt │ └── scala │ │ └── BIDMach │ │ ├── Clustering.scala │ │ ├── Copyright.txt │ │ ├── Experiments.scala │ │ ├── Featurizer.scala │ │ ├── Learner.scala │ │ ├── Logging.scala │ │ ├── allreduce │ │ ├── AllreduceConfig.scala │ │ ├── AllreduceDimensionNode.scala │ │ ├── AllreduceDummyLearner.scala │ │ ├── AllreduceDummyModel.scala │ │ ├── AllreduceGridMaster.scala │ │ ├── AllreduceLineMaster.scala │ │ ├── AllreduceMessage.scala │ │ ├── AllreduceNode.scala │ │ ├── AllreduceType.scala │ │ ├── AllreduceWorker.scala │ │ ├── AllreduceWorkerStats.scala │ │ ├── Dynamic2DGridLayout.scala │ │ ├── RandPerm.scala │ │ ├── ReceivePipeline.scala │ │ ├── binder │ │ │ ├── AllreduceBinder.scala │ │ │ ├── AssertCorrectnessBinder.scala │ │ │ ├── ElasticAverageBinder.scala │ │ │ └── NoOpBinder.scala │ │ ├── buffer │ │ │ ├── AllReduceBuffer.scala │ │ │ ├── ReducedDataBuffer.scala │ │ │ └── ScatteredDataBuffer.scala │ │ └── old │ │ │ ├── ClosureCleaner.scala │ │ │ ├── Command.scala │ │ │ ├── Host.scala │ │ │ ├── Master.scala │ │ │ ├── Response.scala │ │ │ └── Worker.scala │ │ ├── caffe │ │ ├── Classifier.scala │ │ ├── Net.scala │ │ └── SGDSolver.scala │ │ ├── causal │ │ └── IPTW.scala │ │ ├── datasinks │ │ ├── DataSink.scala │ │ ├── FileSink.scala │ │ └── MatSink.scala │ │ ├── datasources │ │ ├── ArraySource.scala │ │ ├── BlendedSource.scala │ │ ├── DataSource.scala │ │ ├── FileSource.scala │ │ ├── IteratorSource.scala │ │ ├── MatSource.scala │ │ ├── SFileSource.scala │ │ └── StackedSource.scala │ │ ├── io │ │ └── Onnx.scala │ │ ├── mixins │ │ ├── Clustering.scala │ │ ├── Mixin.scala │ │ └── Regularizer.scala │ │ ├── models │ │ ├── Click.scala │ │ ├── Clustering.scala │ │ ├── FM.scala │ │ ├── FactorModel.scala │ │ ├── GLM.scala │ │ ├── GaussianMixture.scala │ │ ├── ICA.scala │ │ ├── KMeans.scala │ │ ├── KMeansw.scala │ │ ├── LDA.scala │ │ ├── LDAgibbs.scala │ │ ├── LDAgibbsv.scala │ │ ├── Model.scala │ │ ├── NMF.scala │ │ ├── RandomForest.scala │ │ ├── Regression.scala │ │ ├── SFA.scala │ │ ├── SMF.scala │ │ └── SVD.scala │ │ ├── networks │ │ ├── Net.scala │ │ ├── NetActor.scala │ │ ├── NextWord.scala │ │ ├── SeqToSeq.scala │ │ ├── TransformerLT.scala │ │ ├── Word2Vec.scala │ │ ├── Word2Vech.scala │ │ └── layers │ │ │ ├── AbsLayer.scala │ │ │ ├── AddLayer.scala │ │ │ ├── AllReduceActor.scala │ │ │ ├── AutoNormLayer.scala │ │ │ ├── BatchNormLayer.scala │ │ │ ├── BatchNormScaleLayer.scala │ │ │ ├── ColpermLayer.scala │ │ │ ├── ColsliceLayer.scala │ │ │ ├── CompoundLayer.scala │ │ │ ├── ConstantLayer.scala │ │ │ ├── ConvLayer.scala │ │ │ ├── CopyLayer.scala │ │ │ ├── CropLayer.scala │ │ │ ├── CropMirrorLayer.scala │ │ │ ├── DivLayer.scala │ │ │ ├── DotLayer.scala │ │ │ ├── DropoutLayer.scala │ │ │ ├── EfnLayer.scala │ │ │ ├── ElasticLayer.scala │ │ │ ├── ExpLayer.scala │ │ │ ├── Fn2Layer.scala │ │ │ ├── FnLayer.scala │ │ │ ├── ForwardLayer.scala │ │ │ ├── GLMLayer.scala │ │ │ ├── HcatLayer.scala │ │ │ ├── InputLayer.scala │ │ │ ├── LRNacrossLayer.scala │ │ │ ├── LRNwithinLayer.scala │ │ │ ├── LSTM.scala │ │ │ ├── LSTMfusedLayer.scala │ │ │ ├── Layer.scala │ │ │ ├── LayerActor.scala │ │ │ ├── LayerMat.scala │ │ │ ├── LayerNormLayer.scala │ │ │ ├── LayerNormScaleLayer.scala │ │ │ ├── LinLayer.scala │ │ │ ├── LnLayer.scala │ │ │ ├── MHAttnLayer.scala │ │ │ ├── MatMulLayer.scala │ │ │ ├── MaxIndexLayer.scala │ │ │ ├── MaxLayer.scala │ │ │ ├── Maxi2Layer.scala │ │ │ ├── MaxiLayer.scala │ │ │ ├── MinLayer.scala │ │ │ ├── Mini2Layer.scala │ │ │ ├── MiniLayer.scala │ │ │ ├── ModelLayer.scala │ │ │ ├── MulLayer.scala │ │ │ ├── NegsampOutputLayer.scala │ │ │ ├── Node.scala │ │ │ ├── NodeMat.scala │ │ │ ├── NodeSet.scala │ │ │ ├── NormLayer.scala │ │ │ ├── OnehotLayer.scala │ │ │ ├── PoolingLayer.scala │ │ │ ├── PowerLayer.scala │ │ │ ├── RandomMirrorLayer.scala │ │ │ ├── RectLayer.scala │ │ │ ├── ReshapeLayer.scala │ │ │ ├── ScaleLayer.scala │ │ │ ├── SelectLayer.scala │ │ │ ├── SigmoidLayer.scala │ │ │ ├── SignLayer.scala │ │ │ ├── SoftmaxLayer.scala │ │ │ ├── SoftmaxOutputLayer.scala │ │ │ ├── SoftmaxxLayer.scala │ │ │ ├── SoftplusLayer.scala │ │ │ ├── SplitHorizLayer.scala │ │ │ ├── SplitVertLayer.scala │ │ │ ├── SqrtLayer.scala │ │ │ ├── StackLayer.scala │ │ │ ├── SubLayer.scala │ │ │ ├── SumLayer.scala │ │ │ ├── TanhLayer.scala │ │ │ ├── TensorFormatLayer.scala │ │ │ ├── TransposeLayer.scala │ │ │ └── VariableLayer.scala │ │ ├── updaters │ │ ├── ADAGrad.scala │ │ ├── Batch.scala │ │ ├── BatchNorm.scala │ │ ├── CG.scala │ │ ├── Grad.scala │ │ ├── GradCollide.scala │ │ ├── IncMult.scala │ │ ├── IncNorm.scala │ │ ├── MHTest.scala │ │ ├── Telescoping.scala │ │ └── Updater.scala │ │ └── viz │ │ ├── FilterViz.scala │ │ ├── ImageArray.scala │ │ ├── InputViz.scala │ │ ├── LogViz.scala │ │ ├── Synthesis.scala │ │ ├── Visualization.scala │ │ ├── WebServer.scala │ │ └── utils.scala └── test │ └── scala │ └── BIDMach │ ├── BIDMachSpec.scala │ ├── allreduce │ ├── AllreduceWorkerSpec.scala │ ├── Dynamic2DGridLayoutSpec.scala │ ├── LeNetSpec.scala │ ├── binder │ │ └── ElasticAverageBinderSpec.scala │ └── buffer │ │ ├── ReducedDataBufferSpec.scala │ │ └── ScatteredDataBufferSpec.scala │ └── networks │ └── layers │ └── LayerTest.scala └── tutorials ├── BIDMach_basic_classification.ipynb ├── BIDMach_parameter_tuning.ipynb ├── BIDMat_Scala_Features.ipynb ├── BIDMat_intro.ipynb ├── ClusteringImages.ipynb ├── CreateModels.ipynb ├── CreateNets.ipynb ├── GeneralDNNregression.ipynb ├── MLscalePart1.ipynb ├── MLscalePart2.ipynb ├── NBandLR.ipynb └── testing.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | cbin 3 | lib/*.jar 4 | *.rej 5 | *.so 6 | target 7 | *.o 8 | *.incl 9 | logs 10 | .classpath 11 | 12 | data 13 | !data/rcv1_fmt.txt 14 | !data/uci_fmt.txt 15 | !data/uci_wfmt.txt 16 | !data/factorNet 17 | !data/MHTestCorrections 18 | /bin 19 | *.xml 20 | *.iml 21 | 22 | .DS_store 23 | project/ 24 | *.txt 25 | log.txt.lck 26 | *.lck 27 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | BIDMach 4 | 5 | 6 | 7 | 8 | 9 | org.scala-ide.sdt.core.scalabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.m2e.core.maven2Nature 21 | org.scala-ide.sdt.core.scalanature 22 | org.eclipse.jdt.core.javanature 23 | 24 | 25 | -------------------------------------------------------------------------------- /Copyright.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Regents of the University of California 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2014, Regents of the University of California 2 | All rights reserved. 3 | 4 | LICENSE 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | * Neither the name of the nor the 14 | names of its contributors may be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | BIDMach is a very fast machine learning library. Check the latest benchmarks 4 | 5 | The github distribution contains source code only. You also need a jdk 8, an installation of NVIDIA CUDA 8.0 (if you want to use a GPU) and CUDNN 5 if you plan to use deep networks. For building you need maven 3.X. 6 | 7 | After doing git clone, cd to the BIDMach directory, and build and install the jars with mvn install. You can then run bidmach with `./bidmach`. More details on installing and running are available here. 8 | 9 | The main project page is here. 10 | 11 | Documentation is here in the wiki 12 | 13 | New BIDMach has a discussion group on Google Groups. 14 | 15 | BIDMach is a sister project of BIDMat, a matrix library, which is 16 | also on github 17 | 18 | BIDData also has a project for deep reinforcement learning. BIDMach_RL contains state-of-the-art implementations of several reinforcement learning algorithms. 19 | -------------------------------------------------------------------------------- /bidmach.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | :: Set JAVA_HOME here if not set in environment 3 | :: SET JAVA_HOME= 4 | :: Set as much memory as possible 5 | (SET JAVA_OPTS=-Xmx12G -Xms128M) 6 | :: Fix these if needed 7 | SET JCUDA_VERSION=0.6.5 8 | SET LIBDIR=%CD%\lib 9 | SET JCUDA_LIBDIR=%LIBDIR% 10 | 11 | SET BIDMACH_LIBS=%LIBDIR%\BIDMat.jar;%CD%\BIDMach.jar;%LIBDIR%\ptplot.jar;%LIBDIR%\ptplotapplication.jar;%LIBDIR%\jhdf5.jar;%LIBDIR%\commons-math3-3.1.1.jar;%LIBDIR%\lz4-1.1.2.jar 12 | 13 | SET JCUDA_LIBS=%JCUDA_LIBDIR%\jcuda-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcublas-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcufft-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcurand-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcusparse-%JCUDA_VERSION%.jar 14 | 15 | SET ALL_LIBS=%LIBDIR%\IScala.jar;%BIDMACH_LIBS%;%JCUDA_LIBS%;%JAVA_HOME%\lib\tools.jar 16 | (SET JAVA_OPTS=-Djava.library.path="%LIBDIR%;%PATH%" %JAVA_OPTS%) 17 | 18 | %CD%\scripts\scala\scala.bat -toolcp "%ALL_LIBS%" -Yrepl-sync -i %LIBDIR%\bidmach_init.scala 19 | -------------------------------------------------------------------------------- /bidmach_full: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # export JAVA_HOME="" # Set here if not set in environment 3 | export JAVA_OPTS="-Xmx12G -Xms128M" # Set as much memory as possible 4 | JCUDA_VERSION="0.5.5" # Fix if needed 5 | BIDMACH_ROOT="${BASH_SOURCE[0]}" 6 | if [ ! `uname` = "Darwin" ]; then 7 | BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"` 8 | else 9 | BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"` 10 | fi 11 | BIDMACH_ROOT=`dirname "$BIDMACH_ROOT"` 12 | BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed s+/cygdrive/c+c:+ )" 13 | JCUDA_LIBDIR="${BIDMACH_ROOT}/lib" 14 | LIBDIR="${BIDMACH_ROOT}/lib" 15 | if [ `uname` = "Darwin" ]; then 16 | export DYLD_LIBRARY_PATH="${LIBDIR}:${LIBDIR}/cuda:${DYLD_LIBRARY_PATH}" 17 | else 18 | export LD_LIBRARY_PATH="${LIBDIR}:${LIBDIR}/cuda:${LD_LIBRARY_PATH}" 19 | fi 20 | 21 | BIDMACH_LIBS="${LIBDIR}/BIDMat.jar;${LIBDIR}/ptplot.jar;${LIBDIR}/ptplotapplication.jar;${LIBDIR}/jhdf5.jar;${LIBDIR}/commons-math3-3.1.1.jar;${LIBDIR}/lz4-1.1.2.jar" 22 | JCUDA_LIBS="${JCUDA_LIBDIR}/jcuda-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcublas-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcufft-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcurand-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcusparse-${JCUDA_VERSION}.jar" 23 | 24 | export ALL_LIBS="${BIDMACH_ROOT}/BIDMach.jar;${BIDMACH_LIBS};${JCUDA_LIBS};${JAVA_HOME}/lib/tools.jar" 25 | 26 | if [ ! "$OS" = "Windows_NT" ]; then 27 | export ALL_LIBS=`echo "${ALL_LIBS}" | sed 's/;/:/g'` 28 | else 29 | export JAVA_OPTS="-Djava.library.path=${LIBDIR};${LIBDIR}\\cuda "$JAVA_OPTS 30 | fi 31 | 32 | ${BIDMACH_ROOT}/scala/bin/scala -nobootcp -cp "${ALL_LIBS}" -Yrepl-sync -i ${LIBDIR}/bidmach_init.scala "$@" 33 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | 2 | name := "BIDMach" 3 | 4 | version := "2.0.10-cuda8.0beta" 5 | 6 | organization := "edu.berkeley.bid" 7 | 8 | scalaVersion := "2.11.2" 9 | 10 | artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) => 11 | "../../BIDMach.jar" 12 | } 13 | 14 | resolvers ++= Seq( 15 | "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/", 16 | "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials") 20 | 21 | javacOptions ++= Seq("-source", "1.7", "-target", "1.7") 22 | 23 | scalacOptions ++= Seq("-deprecation","-target:jvm-1.7") 24 | 25 | initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n") 26 | 27 | javaOptions += "-Xmx12g" 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /build_scala_2_10_sbt: -------------------------------------------------------------------------------- 1 | 2 | name := "BIDMach" 3 | 4 | version := "1.1.0" 5 | 6 | organization := "edu.berkeley.bid" 7 | 8 | scalaVersion := "2.10.6" 9 | 10 | artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) => 11 | "../../BIDMach.jar" 12 | } 13 | 14 | resolvers ++= Seq( 15 | "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/", 16 | "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) => 20 | deps :+ ("org.scala-lang" % "scala-compiler" % sv) 21 | } 22 | 23 | libraryDependencies += "jline" % "jline" % "2.10" 24 | 25 | libraryDependencies += "org.apache.commons" % "commons-math3" % "3.2" 26 | 27 | //libraryDependencies += "org.scalatest" %% "scalatest" % "2.0" % "test" 28 | 29 | //libraryDependencies += "org.scalacheck" %% "scalacheck" % "1.11.2" % "test" 30 | 31 | libraryDependencies += "junit" % "junit" % "4.5" % "test" 32 | 33 | libraryDependencies += "net.jpountz.lz4" % "lz4" % "1.3" 34 | 35 | //libraryDependencies += "org.scala-saddle" % "jhdf5" % "2.9" 36 | 37 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials") 38 | 39 | javacOptions ++= Seq("-source", "1.7", "-target", "1.7") 40 | 41 | scalacOptions ++= Seq("-deprecation","-target:jvm-1.7") 42 | 43 | initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n") 44 | 45 | javaOptions += "-Xmx12g" 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /build_scala_2_11_sbt: -------------------------------------------------------------------------------- 1 | 2 | name := "BIDMach" 3 | 4 | version := "1.1.0" 5 | 6 | organization := "edu.berkeley.bid" 7 | 8 | scalaVersion := "2.11.2" 9 | 10 | artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) => 11 | "../../BIDMach.jar" 12 | } 13 | 14 | resolvers ++= Seq( 15 | "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/", 16 | "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/" 17 | ) 18 | 19 | libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) => 20 | deps :+ ("org.scala-lang" % "scala-compiler" % sv) 21 | } 22 | 23 | libraryDependencies += "jline" % "jline" % "2.11" 24 | 25 | libraryDependencies += "org.apache.commons" % "commons-math3" % "3.2" 26 | 27 | //libraryDependencies += "org.scalatest" %% "scalatest" % "2.0" % "test" 28 | 29 | //libraryDependencies += "org.scalacheck" %% "scalacheck" % "1.11.2" % "test" 30 | 31 | libraryDependencies += "junit" % "junit" % "4.5" % "test" 32 | 33 | libraryDependencies += "net.jpountz.lz4" % "lz4" % "1.3" 34 | 35 | //libraryDependencies += "org.scala-saddle" % "jhdf5" % "2.9" 36 | 37 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials") 38 | 39 | javacOptions ++= Seq("-source", "1.7", "-target", "1.7") 40 | 41 | scalacOptions ++= Seq("-deprecation","-target:jvm-1.7") 42 | 43 | initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n") 44 | 45 | javaOptions += "-Xmx12g" 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /data/factorNet/data.txt: -------------------------------------------------------------------------------- 1 | 1, 1, 2, 2, 0, 2, 1, 2, 1, 2, 1, 1 2 | 2, 3, 1, 2, 2, 3, 0, 2, 2, 1, 3, 3 3 | 3, 4, 4, 0, 1, 2, 2, 3, 3, 3, 3, 4 4 | -------------------------------------------------------------------------------- /data/factorNet/data.txt~: -------------------------------------------------------------------------------- 1 | 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1 2 | 2, 3, 1, 2, 2, 3, 2, 2, 2, 1, 3, 3 3 | 3, 4, 4, 2, 1, 2, 2, 3, 3, 3, 3, 4 4 | -------------------------------------------------------------------------------- /data/factorNet/factorSet.txt: -------------------------------------------------------------------------------- 1 | 1, 1 2 | 0, 1 3 | 0, 1 4 | -------------------------------------------------------------------------------- /data/factorNet/factorSet.txt~: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/data/factorNet/factorSet.txt~ -------------------------------------------------------------------------------- /data/factorNet/statePerNode.txt: -------------------------------------------------------------------------------- 1 | 2 2 | 3 3 | 4 4 | -------------------------------------------------------------------------------- /data/factorNet/statePerNode.txt~: -------------------------------------------------------------------------------- 1 | 2, 3, 4 2 | -------------------------------------------------------------------------------- /data/factorNet/test2/factorSet.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 1 3 | -------------------------------------------------------------------------------- /data/factorNet/test2/factorSet.txt~: -------------------------------------------------------------------------------- 1 | 1, 1 2 | 0, 1 3 | 0, 1 4 | -------------------------------------------------------------------------------- /data/factorNet/test2/statePerNode.txt: -------------------------------------------------------------------------------- 1 | 2 2 | 3 3 | -------------------------------------------------------------------------------- /data/factorNet/test2/statePerNode.txt~: -------------------------------------------------------------------------------- 1 | 2 2 | 3 3 | 4 4 | -------------------------------------------------------------------------------- /data/rcv1_fmt.txt: -------------------------------------------------------------------------------- 1 | word catname 2 | int docid 3 | int dmy 4 | -------------------------------------------------------------------------------- /data/uci_fmt.txt: -------------------------------------------------------------------------------- 1 | int cols 2 | int rows 3 | float vals 4 | -------------------------------------------------------------------------------- /data/uci_wfmt.txt: -------------------------------------------------------------------------------- 1 | word term 2 | -------------------------------------------------------------------------------- /getcudaversion.sh: -------------------------------------------------------------------------------- 1 | 2 | # try to figure out the CUDA version. See if nvcc is in the path, and 3 | # then call it to get the version. If not, use a default version. 4 | # If $CUDA_VERSION is already set, dont touch it. 5 | 6 | if [ "${CUDA_VERSION}" = "" ];then 7 | if [[ $(type -P nvcc) ]]; then 8 | CUDA_VERSION=`nvcc --version | grep release | sed 's/.*release //' | sed 's/\,.*//'` 9 | else 10 | CUDA_VERSION="7.5" 11 | fi 12 | fi 13 | -------------------------------------------------------------------------------- /getlibs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | source=$1 4 | 5 | BIDMACH_ROOT="${BASH_SOURCE[0]}" 6 | if [ ! `uname` = "Darwin" ]; then 7 | BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"` 8 | else 9 | while [ -L "${BIDMACH_ROOT}" ]; do 10 | BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"` 11 | done 12 | fi 13 | BIDMACH_ROOT=`dirname "$BIDMACH_ROOT"` 14 | BIDMACH_ROOT=`pwd` 15 | BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed s+/cygdrive/c+c:+ )" 16 | 17 | cp ${source}/lib/*.jar ${BIDMACH_ROOT}/lib 18 | cp ${source}/lib/*.so ${BIDMACH_ROOT}/lib 19 | cp ${source}/lib/*.dll ${BIDMACH_ROOT}/lib 20 | cp ${source}/lib/*.dylib ${BIDMACH_ROOT}/lib 21 | cp ${source}/lib/*.jnilib ${BIDMACH_ROOT}/lib 22 | 23 | cp ${source}/BIDMach.jar ${BIDMACH_ROOT} 24 | 25 | mkdir -p ${BIDMACH_ROOT}/cbin 26 | cp ${source}/cbin/* ${BIDMACH_ROOT}/cbin 27 | 28 | -------------------------------------------------------------------------------- /getnativepath.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/getnativepath.class -------------------------------------------------------------------------------- /getnativepath.java: -------------------------------------------------------------------------------- 1 | public class getnativepath { 2 | public static void main(String [] args) 3 | { 4 | String v = System.getProperty("java.library.path"); 5 | System.out.print(v); 6 | } 7 | } -------------------------------------------------------------------------------- /jni/include/Logger.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * JCuda - Java bindings for NVIDIA CUDA driver and runtime API 3 | * 4 | * Copyright (c) 2009-2012 Marco Hutter - http://www.jcuda.org 5 | * 6 | * Permission is hereby granted, free of charge, to any person 7 | * obtaining a copy of this software and associated documentation 8 | * files (the "Software"), to deal in the Software without 9 | * restriction, including without limitation the rights to use, 10 | * copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the 12 | * Software is furnished to do so, subject to the following 13 | * conditions: 14 | * 15 | * The above copyright notice and this permission notice shall be 16 | * included in all copies or substantial portions of the Software. 17 | * 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 | * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 | * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 | * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 | * OTHER DEALINGS IN THE SOFTWARE. 26 | */ 27 | 28 | #ifndef LOGGER 29 | #define LOGGER 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | enum LogLevel {LOG_QUIET, LOG_ERROR, LOG_WARNING, LOG_INFO, LOG_DEBUG, LOG_TRACE, LOG_DEBUGTRACE}; 36 | 37 | class Logger 38 | { 39 | public: 40 | static void log(LogLevel level, const char* message, ...); 41 | static void setLogLevel(LogLevel level); 42 | 43 | //private: 44 | static LogLevel currentLogLevel; 45 | 46 | }; 47 | 48 | #endif -------------------------------------------------------------------------------- /jni/src/JCUDA_Copyright.txt: -------------------------------------------------------------------------------- 1 | JCuda - Java bindings for NVIDIA CUDA 2 | 3 | Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org 4 | 5 | Permission is hereby granted, free of charge, to any person 6 | obtaining a copy of this software and associated documentation 7 | files (the "Software"), to deal in the Software without 8 | restriction, including without limitation the rights to use, 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following 12 | conditions: 13 | 14 | The above copyright notice and this permission notice shall be 15 | included in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 | OTHER DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /lib/Jcommon_JfreeChart_LGPL.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | JFreeChart LGPL 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /lib/bidmach_init.sc: -------------------------------------------------------------------------------- 1 | import BIDMat.{BMat,CMat,CSMat,DMat,Dict,FMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,HMat,IDict,Image,IMat,LMat,Mat} 2 | import BIDMat.{Quaternion,SMat,SBMat,SDMat,TMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMat.Solvers._ 6 | import BIDMat.Plotting._ 7 | import BIDMach.{Learner,ParLearner} 8 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD} 9 | import BIDMach.networks.{Net} 10 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource} 11 | import BIDMach.datasinks.{DataSink,MatSink} 12 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer} 13 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping,Updater} 14 | import BIDMach.causal.{IPTW} 15 | import BIDMat.Mat.console_publish 16 | 17 | Mat.checkMKL(false) 18 | Mat.checkCUDA(true) 19 | 20 | -------------------------------------------------------------------------------- /lib/bidmach_notebook_init.sc: -------------------------------------------------------------------------------- 1 | import BIDMat.{BMat,CMat,CSMat,DMat,Dict,FMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,HMat,IDict,Image,IMat,LMat,Mat,Quaternion,SMat,SBMat,SDMat,TMat} 2 | import BIDMat.MatFunctions._ 3 | import BIDMat.SciFunctions._ 4 | import BIDMat.Solvers._ 5 | import BIDMat.JPlotting._ 6 | import BIDMach.{Learner,ParLearner} 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD} 8 | import BIDMach.networks.{Net} 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource} 10 | import BIDMach.datasinks.{DataSink,MatSink} 11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer} 12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping,Updater} 13 | import BIDMach.causal.{IPTW} 14 | 15 | Mat.checkMKL(false) 16 | Mat.checkCUDA 17 | Mat.setInline 18 | 19 | 20 | -------------------------------------------------------------------------------- /logo-64x64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/logo-64x64.png -------------------------------------------------------------------------------- /logs/touch.txt: -------------------------------------------------------------------------------- 1 | touch 2 | -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | 2 | In theory -toolcp should only contain the tool classes, but native code errors result if it doesnt contain all libs. 3 | 4 | ${BIDMACH_ROOT}/command/scala -Dscala.repl.maxprintstring=8000 ${userargs} -nobootcp -toolcp "${ALL_LIBS}" \ 5 | -------------------------------------------------------------------------------- /readme_gui.md: -------------------------------------------------------------------------------- 1 | ``` 2 | git clone https://github.com/BIDData/BIDMach.git 3 | cd BIDMach 4 | git checkout gui 5 | mvn package 6 | ``` 7 | 8 | First download the models.tar.gz and data.tar.gz from the google drive (https://goo.gl/vqc3rJ) 9 | 10 | Extract the models.tar.gz at the BIDMach folder. 11 | Extract the data.tar.gz to somewhere you like. 12 | 13 | Before running these scripts, config the `traindir` variable in the scripts as the right data location. Change `pretrain_model_dir` and `pretrain_discriminator_dir` if you extract models into different location. 14 | 15 | DEMO for mnist: 16 | ``` 17 | ./bidmach scripts/viz/mnist.ssc 18 | ``` 19 | 20 | DEMO for CIFAR: 21 | ``` 22 | ./bidmach scripts/viz/testcifar_norm.ssc 23 | ``` 24 | 25 | After loading the scripts, type `s.launch` to start the MCMC process. Use `s.stop` to stop. 26 | 27 | If you want to use discriminator (Require pixel value in [0,256)), run command `o.clipping = true` in the shell. And set `base` as 0 in the UI. 28 | 29 | By default, mnist.ssc set `clipping` as false, testcifar_norm.ssc set `clipping` as true. 30 | 31 | Change L2 weight and discriminator weight to see the effect. 32 | -------------------------------------------------------------------------------- /sbt-pkg/bin/java9-rt-export.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/sbt-pkg/bin/java9-rt-export.jar -------------------------------------------------------------------------------- /sbt-pkg/bin/sbt-launch.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/sbt-pkg/bin/sbt-launch.jar -------------------------------------------------------------------------------- /sbt-pkg/conf/sbtconfig.txt: -------------------------------------------------------------------------------- 1 | # Set the java args to high 2 | 3 | -Xmx512M 4 | 5 | -XX:MaxPermSize=256m 6 | 7 | -XX:ReservedCodeCacheSize=128m 8 | 9 | 10 | 11 | # Set the extra SBT options 12 | 13 | -Dsbt.log.format=true 14 | 15 | -------------------------------------------------------------------------------- /sbt-pkg/conf/sbtopts: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------ # 2 | # The SBT Configuration file. # 3 | # ------------------------------------------------ # 4 | 5 | 6 | # Disable ANSI color codes 7 | # 8 | #-no-colors 9 | 10 | # Starts sbt even if the current directory contains no sbt project. 11 | # 12 | -sbt-create 13 | 14 | # Path to global settings/plugins directory (default: ~/.sbt) 15 | # 16 | #-sbt-dir /etc/sbt 17 | 18 | # Path to shared boot directory (default: ~/.sbt/boot in 0.11 series) 19 | # 20 | #-sbt-boot ~/.sbt/boot 21 | 22 | # Path to local Ivy repository (default: ~/.ivy2) 23 | # 24 | #-ivy ~/.ivy2 25 | 26 | # set memory options 27 | # 28 | #-mem 29 | 30 | # Use local caches for projects, no sharing. 31 | # 32 | #-no-share 33 | 34 | # Put SBT in offline mode. 35 | # 36 | #-offline 37 | 38 | # Sets the SBT version to use. 39 | #-sbt-version 0.11.3 40 | 41 | # Scala version (default: latest release) 42 | # 43 | #-scala-home 44 | #-scala-version 45 | 46 | # java version (default: java from PATH, currently $(java -version |& grep version)) 47 | # 48 | #-java-home 49 | 50 | -------------------------------------------------------------------------------- /scripts/README.txt: -------------------------------------------------------------------------------- 1 | Scripts for loading data and testing BIDMach learners. 2 | 3 | Most of these scripts should be run from within this directory. 4 | 5 | .sh scripts are bash scripts that should be launched from a bash shell. 6 | 7 | .ssc scripts are scala scripts which should be run from this directory with 8 | 9 | ../bidmach scriptname.ssc -------------------------------------------------------------------------------- /scripts/benchmarks/basic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char ** argv) { 6 | int n, nreps, irep, i, j, ibase; 7 | struct timeval tp1, tp2; 8 | double t1, t2, tdiff; 9 | double flops; 10 | sscanf(argv[1], "%d", &n); 11 | sscanf(argv[2], "%d", &nreps); 12 | float *a; 13 | float *b; 14 | float *c; 15 | a = (float *)malloc(n*n*sizeof(float)); 16 | b = (float *)malloc(n*n*sizeof(float)); 17 | c = (float *)malloc(n*n*sizeof(float)); 18 | gettimeofday(&tp1, NULL); 19 | for (irep = 0; irep < nreps; irep++) { 20 | for (i = 0; i < n; i++) { 21 | ibase = i * n; 22 | for (j = 0; j < n; j++) { 23 | c[j + ibase] = a[j + ibase] + b[j + ibase]; 24 | } 25 | } 26 | } 27 | gettimeofday(&tp2, NULL); 28 | t1 = tp1.tv_sec + 1.0e-6*tp1.tv_usec; 29 | t2 = tp2.tv_sec + 1.0e-6*tp2.tv_usec; 30 | tdiff = t2 - t1; 31 | flops = 1.0 * n * n * nreps; 32 | printf("time %f, Mflops %f %f\n", tdiff/nreps, flops/tdiff/1.0e6, c[1000000-1]); 33 | } 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /scripts/benchmarks/basic.jl: -------------------------------------------------------------------------------- 1 | 2 | n = 10000; 3 | a = rand(Float32,(n,n)) 4 | b = rand(Float32,(n,n)) 5 | c = zeros(Float32,(n,n)); 6 | t1 = time(); 7 | 8 | for i = 1:n 9 | for j = 1:n 10 | c[i,j] = a[i,j] + b[i,j] 11 | end 12 | end 13 | 14 | t2 = time(); 15 | 16 | for i = 1:10 17 | c = a+ b; 18 | end 19 | 20 | t3 = time(); 21 | 22 | dt1 = t2 - t1 23 | dt2 = t3 - t2 24 | n2 = n*n 25 | 26 | mflops1 = n2 / dt1 / 1e6; 27 | mflops2 = n2 / dt2 / 1e5; 28 | 29 | println("times $dt1,$dt2, mflops $mflops1,$mflops2") 30 | 31 | 32 | -------------------------------------------------------------------------------- /scripts/benchmarks/basic.lua: -------------------------------------------------------------------------------- 1 | 2 | nreps = 10 3 | n = 10000 4 | a = {} 5 | b = {} 6 | c = {} 7 | 8 | for i = 1, n do 9 | a[i] = {}; 10 | b[i] = {}; 11 | c[i] = {}; 12 | for j = 1, n do 13 | a[i][j] = math.random(); 14 | b[i][j] = math.random(); 15 | c[i][j] = 0; 16 | end 17 | end 18 | 19 | t1=os.time(); 20 | 21 | for irep = 1, nreps do 22 | for i = 1, n do 23 | for j = 1, n do 24 | c[i][j] = a[i][j] + b[i][j]; 25 | end 26 | end 27 | end 28 | 29 | t2=os.time(); 30 | dt = t2 - t1; 31 | n2 = 1.0*n*n*nreps 32 | 33 | print(string.format("time=3.2%f, Mflops=3.2%f",dt/nreps,n2/dt/1e6)) 34 | -------------------------------------------------------------------------------- /scripts/benchmarks/basic.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np; 3 | import numpy.random as rand; 4 | t0 = time.time() 5 | n = 10000 6 | a = rand.rand(n,n) 7 | b = rand.rand(n,n) 8 | 9 | t0 = time.time() 10 | c = a + b; 11 | t1 = time.time() 12 | dt1 = t1 - t0 13 | print dt1 14 | 15 | for i in range(0,n): 16 | for j in range(0,n): 17 | c[i][j] = a[i][j] + b[i][j]; 18 | 19 | t2 = time.time() 20 | 21 | dt2 = t2 - t1 22 | print dt1, dt2 23 | -------------------------------------------------------------------------------- /scripts/benchmarks/basic.ssc: -------------------------------------------------------------------------------- 1 | :silent 2 | val n = 10000; 3 | val a = rand(n,n); 4 | val b = rand(n,n); 5 | val c = zeros(n,n); 6 | 7 | tic 8 | var i = 0; 9 | while (i < n) { 10 | var j = 0; 11 | while (j < n) { 12 | c(j,i) = a(j,i) + b(j,i); 13 | j +=1; 14 | } 15 | i += 1; 16 | } 17 | val t1 = toc; 18 | 19 | for (i <- 0 until 100) { 20 | val c = a+b; 21 | } 22 | 23 | val t2 = toc - t1; 24 | val n2 = n*n; 25 | :silent 26 | println("time %f,%f Mflops %f,%f" format (t1,t2/100,n2/t1/1e6f,n2/t2/1e4f)); 27 | 28 | -------------------------------------------------------------------------------- /scripts/benchmarks/createVWdata.ssc: -------------------------------------------------------------------------------- 1 | 2 | val dir = "../../data/rcv1/" 3 | 4 | val a = loadSMat(dir+"docs.smat.lz4"); 5 | val c = sparse(loadFMat(dir+"cats.fmat.lz4")); 6 | 7 | val ta = loadSMat(dir+"testdocs.smat.lz4"); 8 | val tc = sparse(loadFMat(dir+"testcats.fmat.lz4")); 9 | 10 | saveVW(dir+"train.vw", a, c); 11 | saveVW(dir+"test.vw", ta, tc); -------------------------------------------------------------------------------- /scripts/benchmarks/juliaRandWalk.jl: -------------------------------------------------------------------------------- 1 | 2 | function rw(a) 3 | n = length(a) 4 | a[1] = rand() - 0.5 5 | for i = 2:n 6 | a[i] = a[i-1] + rand() - 0.5 7 | end 8 | a 9 | end 10 | 11 | function fib(n::Int64) 12 | if (n <= 2) 13 | 1 14 | else 15 | fib(n-1) + fib(n-2) 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /scripts/benchmarks/scalaRandWalk.ssc: -------------------------------------------------------------------------------- 1 | 2 | import java.util.Random 3 | 4 | val randgen = new Random 5 | 6 | def rw(n:Int) = { 7 | var i = 0; 8 | var sum = 0f; 9 | while (i < n) { 10 | sum += randgen.nextFloat 11 | i += 1; 12 | } 13 | sum 14 | } 15 | 16 | def fib(n:Long):Long = { 17 | if (n <= 2) 1 18 | else fib(n-2) + fib(n-1) 19 | } 20 | -------------------------------------------------------------------------------- /scripts/benchmarks/scoreSpark.ssc: -------------------------------------------------------------------------------- 1 | 2 | val m = loadFMat("/code/spark/modelx.txt") 3 | val m2 = loadFMat("/code/spark/modelx.fmat.lz4") 4 | 5 | val dd = loadFMat("/big/RCV1/v2/spark_test.fmat.lz4") 6 | 7 | val cc = loadFMat("/big/RCV1/v2/spark_cattest.fmat.lz4") 8 | 9 | val c6 = cc(6,?) 10 | 11 | val ddi = dd on ones(1,dd.ncols) 12 | val prod = m * ddi 13 | val prod2 = m2 * ddi 14 | 15 | val px = 1 / (1 + exp(- prod)) 16 | val px2 = 1 / (1 + exp(- prod2)) 17 | 18 | val ii = find(c6) 19 | val jj = find(c6 == 0) 20 | 21 | val vv = px(jj) on (1 - px(ii)) 22 | val score = mean(vv) 23 | 24 | val vv2 = px2(jj) on (1 - px2(ii)) 25 | val score = mean(vv2) 26 | -------------------------------------------------------------------------------- /scripts/benchmarks/scoreSpark2.ssc: -------------------------------------------------------------------------------- 1 | 2 | val m = loadFMat("/code/spark/modelx.txt") 3 | val m2 = loadFMat("/code/spark/modelx.fmat.lz4") 4 | 5 | val prod = m * ddi 6 | val prod2 = m2 * ddi 7 | 8 | val px = 1 / (1 + exp(- prod)) 9 | val px2 = 1 / (1 + exp(- prod2)) 10 | 11 | val vv = px(jj) on (1 - px(ii)) 12 | val vv2 = px2(jj) on (1 - px2(ii)) 13 | 14 | val uu = c6 *@ px + (1.0f - c6) *@ (1.0f - px) 15 | val uu2 = c6 *@ px2 + (1.0f - c6) *@ (1.0f - px2) 16 | 17 | val llv = ln(uu) 18 | val ii1 = find(prod > 10f) 19 | llv(ii1) = 0 20 | val jj1 = find(prod < -10f) 21 | llv(jj1) = prod(jj1) 22 | 23 | val llv2 = ln(uu2) 24 | val ii2 = find(prod2 > 10f) 25 | llv2(ii2) = 0 26 | val jj2 = find(prod2 < -10f) 27 | llv2(jj2) = prod2(jj2) 28 | 29 | val score = mean(vv) 30 | val score2 = mean(vv2) 31 | 32 | val ll = mean(llv) 33 | val ll2 = mean(llv2) 34 | 35 | -------------------------------------------------------------------------------- /scripts/benchmarks/skkmeans.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | import scipy.io as sio 5 | import h5py 6 | from sklearn.datasets import load_svmlight_file 7 | from sklearn.cluster import KMeans 8 | 9 | print("reading") 10 | 11 | f = h5py.File('/code/BIDMach/data/MNIST8M/all.mat','r') 12 | 13 | t0 = time.time() 14 | data = f.get('/all') # Get a certain dataset 15 | X = np.array(data) 16 | t1 = time.time() 17 | 18 | t_read = t1 - t0 19 | print("Finished reading in " + repr(t_read) + " secs") 20 | 21 | batch_size = 10 22 | kmeans = KMeans(n_clusters=256, init='random', n_init=1, max_iter=10, tol=0.0001, precompute_distances=False, verbose=0, random_state=None, copy_x=False, n_jobs=1) 23 | kmeans.fit(X) 24 | t2 = time.time() 25 | t_batch = t2 - t1 26 | print("compute time " + repr(t_batch) + " secs") 27 | -------------------------------------------------------------------------------- /scripts/benchmarks/sklogistic.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from sklearn.multiclass import OneVsRestClassifier 4 | from sklearn.linear_model import SGDClassifier 5 | from sklearn.datasets import load_svmlight_file 6 | 7 | t0 = time.time() 8 | print("Start reading") 9 | X, Y = load_svmlight_file("../../data/rcv1/train.libsvm") 10 | 11 | print("Finished reading") 12 | batch_size = 10 13 | 14 | sgd = OneVsRestClassifier(SGDClassifier(loss='log', alpha=0.01, fit_intercept=True, n_iter=3)) 15 | t1 = time.time() 16 | sgd.fit(X,Y) 17 | t2 = time.time() 18 | 19 | print("load time {0:3.2f}, train time {1:3.2f}".format(t1-t0,t2-t1)) 20 | 21 | -------------------------------------------------------------------------------- /scripts/benchmarks/start_spark.sh: -------------------------------------------------------------------------------- 1 | 2 | cd /opt/spark/ec2 3 | 4 | # launch a cluster 5 | ./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa -s 2 --instance-type=r3.2xlarge --region=us-west-2 launch sparkcluster 6 | 7 | # ganglia patch 8 | 9 | MASTER=`./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa --region=us-west-2 get-master sparkcluster | tail -n 1` 10 | scp -i ~/.ssh/jfc_rsa ~/httpd.conf ec2-user@${MASTER}:httpd.conf 11 | 12 | # login to the master 13 | ./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa --region=us-west-2 login sparkcluster 14 | 15 | export AWS_ACCESS_KEY_ID= 16 | export AWS_SECRET_ACCESS_KEY= 17 | 18 | # ganglia patch 19 | 20 | rm -r /var/lib/ganglia/rrds 21 | ln -s /mnt/ganglia/rrds /var/lib/ganglia/rrds 22 | 23 | cp /etc/httpd/conf/httpd.conf /etc/httpd/conf/httpd_bkup.conf 24 | cp /home/ec2-user/httpd.conf /etc/httpd/conf/httpd.conf 25 | apachectl -k graceful 26 | 27 | # need more driver memory for several models, e.g. multiclass and word2vec 28 | spark/bin/spark-shell 29 | spark/bin/spark-shell --driver-memory 16g --conf "spark.driver.maxResultSize=8g" 30 | 31 | 32 | exit 33 | 34 | echo "y" | ./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa --region=us-west-2 destroy sparkcluster 35 | 36 | 37 | -------------------------------------------------------------------------------- /scripts/benchmarks/testSparkALS.ssc: -------------------------------------------------------------------------------- 1 | 2 | import org.apache.spark.mllib.recommendation.ALS 3 | import org.apache.spark.mllib.recommendation.Rating 4 | 5 | import scala.compat.Platform._ 6 | 7 | val nnodes = 32; 8 | val t0 = currentTime 9 | val data = sc.textFile("s3n://bidmach/netflix_mm.train", nnodes * 4) 10 | //val ratings = data.map(_.split("::") match { case Array(user, item, rate, timestamp) => 11 | // Rating(user.toInt, item.toInt, rate.toDouble) 12 | val ratings = data.map(_.split("\t") match { case Array(user, item, rate) => 13 | Rating(user.toInt, item.toInt, rate.toDouble) 14 | }) 15 | 16 | // Do a test-train split 17 | val splits = ratings.randomSplit(Array(0.9, 0.1)) 18 | val training = splits(0) 19 | val test = splits(1) 20 | 21 | val cc = training.cache.count // force the parse to execute, result in memory 22 | 23 | 24 | // Build the recommendation model using ALS 25 | val rank = 500 26 | val t1 = currentTime 27 | val numIterations = 5 28 | val model = ALS.train(training, rank, numIterations, 0.05) 29 | val t2 = currentTime 30 | // Evaluate the model on test data 31 | val usersProducts = test.map { case Rating(user, product, rate) => 32 | (user, product) 33 | } 34 | 35 | val predictions = 36 | model.predict(usersProducts).map { case Rating(user, product, rate) => 37 | ((user, product), math.min(5.0, math.max(1.0, rate))) 38 | } 39 | 40 | val ratesAndPreds = test.map { case Rating(user, product, rate) => 41 | ((user, product), rate) 42 | }.join(predictions) 43 | 44 | val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => 45 | val err = (r1 - r2) 46 | err * err 47 | }.mean() 48 | 49 | val RMSE = math.sqrt(MSE) 50 | 51 | println("Root Mean Squared Error = " + RMSE) 52 | 53 | println("Load Time = %f secs, Compute Time = %f" format ((t1-t0)/1000f, (t2-t1)/1000f)) 54 | -------------------------------------------------------------------------------- /scripts/benchmarks/testSparkKMeans.ssc: -------------------------------------------------------------------------------- 1 | 2 | import org.apache.spark.mllib.clustering.KMeans 3 | import org.apache.spark.mllib.linalg.Vectors 4 | import scala.compat.Platform._ 5 | 6 | val nnodes = 16 7 | 8 | // Load and parse the data 9 | val t0 = currentTime 10 | val data = sc.textFile("s3n://bidmach/allst.txt", nnodes * 4) 11 | val parsedData = data.map(s => Vectors.dense(s.split('\t').map(_.toDouble))) 12 | val cc = parsedData.cache.count // force the parse to execute, result in memory 13 | 14 | // Cluster the data into classes using KMeans 15 | val numClusters = 4 16 | val numIterations = 10 17 | val t1 = currentTime 18 | val clusters = KMeans.train(parsedData, numClusters, numIterations, 1, "random") 19 | val t2 = currentTime 20 | 21 | // Evaluate clustering by computing Within Set Sum of Squared Errors 22 | val WSSSE = clusters.computeCost(parsedData) 23 | val t3 = currentTime 24 | println("Within Set Sum of Squared Errors = " + WSSSE) 25 | println("Load Time = %f secs, Compute Time = %f, Eval Time =%f" format ((t1-t0)/1000f, (t2-t1)/1000f, (t3-t2)/1000f)) 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/benchmarks/testSparkSVM.ssc: -------------------------------------------------------------------------------- 1 | import org.apache.spark.SparkContext 2 | import org.apache.spark.mllib.classification.SVMWithSGD 3 | import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics 4 | import org.apache.spark.mllib.regression.LabeledPoint 5 | import org.apache.spark.mllib.linalg.Vectors 6 | import org.apache.spark.mllib.util.MLUtils 7 | import scala.compat.Platform._ 8 | 9 | val t0=currentTime 10 | // Load training data in LIBSVM format. 11 | val data = MLUtils.loadLibSVMFile(sc, "/big/RCV1/v2/train6.libsvm") 12 | val t1=currentTime 13 | 14 | // Split data into training (90%) and test (10%). 15 | val splits = data.randomSplit(Array(0.9, 0.1), seed = 11L) 16 | val training = splits(0).cache() 17 | val test = splits(1) 18 | val t2=currentTime 19 | 20 | // Run training algorithm to build the model 21 | val numIterations = 100 22 | val model = SVMWithSGD.train(training, numIterations) 23 | 24 | val t3=currentTime 25 | 26 | // Clear the default threshold. 27 | model.clearThreshold() 28 | 29 | // Compute raw scores on the test set. 30 | val scoreAndLabels = test.map { point => 31 | val score = model.predict(point.features) 32 | (score, point.label) 33 | } 34 | 35 | val t4=currentTime 36 | 37 | // Get evaluation metrics. 38 | val metrics = new BinaryClassificationMetrics(scoreAndLabels) 39 | val auROC = metrics.areaUnderROC() 40 | println("Area under ROC = " + auROC) 41 | 42 | println("load time %f, split %f, train %f, predict %f" format ((t1-t0)/1000f, 43 | (t2-t1)/1000f, (t3-t2)/1000f, (t4-t3)/1000f)) 44 | -------------------------------------------------------------------------------- /scripts/benchmarks/testVWLDA.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | time /code/vowpal_wabbit/vowpalwabbit/vw --lda 256 --lda_D 100000 --passes 3 --readable_model wordTopics.dat --bit_precision 18 --learning_rate 1.0 --lda_rho 0.1 --cache_file vw.cache --data /big/RCV1/v2/vw_sparse_lda_train.dat --lda_alpha 0.1 --random_weights true --power_t 0.5 --minibatch 1024 --initial_t 1.0 4 | 5 | # BIDMach options 6 | # opts.putBack = 1 7 | # opts.uiter = 1 8 | # opts.batchSize = 1024 9 | # opts.npasses = 3 10 | # opts.dim = 256 -------------------------------------------------------------------------------- /scripts/benchmarks/testVWLR.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # time /code/vowpal_wabbit/vowpalwabbit/vw --oaa 103 --readable_model rcv1.model.txt --loss_function logistic -b 24 --adaptive --invariant -l 1 --cache_file vw.cache --passes 3 -d /big/RCV1/v2/vw_sparse_train.dat 4 | 5 | time /code/vowpal_wabbit/vowpalwabbit/vw --multilabel_oaa 104 --readable_model rcv1.model.txt --loss_function logistic -b 24 -l 1 --cache_file vw.cache --passes 3 -d ../../data/rcv1/train.vw 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /scripts/benchmarks/testVWkmeans.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | time /code/vowpal_wabbit/vowpalwabbit/vw --oaa 103 -f rcv1.model --loss_function logistic -b 24 --adaptive --invariant -l 1 --cache_file vw.cache --passes 1 -d /big/RCV1/v2/vw_sparse_train.dat 4 | 5 | 6 | -------------------------------------------------------------------------------- /scripts/bn_test.ssc: -------------------------------------------------------------------------------- 1 | // Script to test out BayesNet.scala for the general case. 2 | 3 | val data = loadFMat("gibbs_data/koller_data_1m_050perc.lz4") 4 | val dag = loadSMat("gibbs_data/koller_dag.lz4") 5 | val states = loadIMat("gibbs_data/koller_states.lz4") 6 | 7 | val (nn , opts) = BIDMach.models.BayesNet.learner(states , dag , false , data) 8 | opts.npasses = 10 9 | opts.useGPU = true 10 | opts.batchSize = 10000 11 | opts.what 12 | nn.train 13 | nn.modelmats(0).t 14 | //sys.exit 15 | -------------------------------------------------------------------------------- /scripts/cluster_destroy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$CLUSTER" == "" ]]; then 4 | CLUSTER="bidcluster1" 5 | fi 6 | 7 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 destroy $CLUSTER 8 | 9 | 10 | -------------------------------------------------------------------------------- /scripts/cluster_launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$CLUSTER" == "" ]]; then 4 | CLUSTER="bidcluster2" 5 | fi 6 | 7 | # launch a cluster 8 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa -a "ami-b2bf04ca" -s 16 --instance-type=p2.xlarge --region=us-west-2 --zone=us-west-2a --vpc-id="vpc-c93fbdac" --subnet-id="subnet-75177210" --additional-tags='Group:DSS 2' launch $CLUSTER 9 | -------------------------------------------------------------------------------- /scripts/cluster_login.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$CLUSTER" == "" ]]; then 4 | CLUSTER="bidcluster1" 5 | fi 6 | 7 | if [ ! ${1} == "" ]; then 8 | LOGIN="-n ${1} login" 9 | else 10 | LOGIN="login" 11 | fi 12 | 13 | # login to the master 14 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 ${LOGIN} $CLUSTER 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /scripts/cluster_mux.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import subprocess 3 | import time 4 | 5 | SPARK_SLAVE_PATH = '/opt/spark/conf/slaves' 6 | 7 | 8 | def tmux_cmd(cmd, fail_ok=False): 9 | if type(cmd) is str: 10 | cmd = cmd.split(' ') 11 | try: 12 | return subprocess.check_output(['tmux'] + cmd).strip().split('\n') 13 | except subprocess.CalledProcessError as e: 14 | if not fail_ok: 15 | raise e 16 | 17 | 18 | def send_cmd(pid, cmd): 19 | tmux_cmd(['send-keys', '-t', pid, cmd+'\n']) 20 | 21 | 22 | def main(): 23 | tmux_cmd('kill-window -t tail-workers', fail_ok=True) 24 | tmux_cmd('new-window -d -n tail-workers') 25 | 26 | pane_id = tmux_cmd('list-panes -t tail-workers -F #D')[0] 27 | tmux_cmd('split-window -d -h -t {}'.format(pane_id)) 28 | pane_ids = tmux_cmd('list-panes -t tail-workers -F #D') 29 | for pid in pane_ids: 30 | tmux_cmd('split-window -d -v -t {}'.format(pid)) 31 | pane_ids = tmux_cmd('list-panes -t tail-workers -F #D') 32 | 33 | with open(SPARK_SLAVE_PATH, 'r') as f: 34 | slave_addrs = list(f.readlines()) 35 | 36 | for pid, saddr in zip(pane_ids, slave_addrs): 37 | send_cmd(pid, 'su2') 38 | time.sleep(0.05) 39 | send_cmd(pid, 'ssh {}'.format(saddr)) 40 | time.sleep(0.1) 41 | send_cmd(pid, 'tail -f /tmp/bidmach_worker.log') 42 | 43 | 44 | if __name__ == '__main__': 45 | main() 46 | -------------------------------------------------------------------------------- /scripts/cluster_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$CLUSTER" == "" ]]; then 4 | CLUSTER="bidcluster1" 5 | fi 6 | 7 | # start the cluster 8 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 --private-ips start $CLUSTER 9 | 10 | -------------------------------------------------------------------------------- /scripts/cluster_stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$CLUSTER" == "" ]]; then 4 | CLUSTER="bidcluster1" 5 | fi 6 | 7 | # stop the cluster 8 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 stop $CLUSTER 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/collect_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import subprocess 4 | import sys 5 | import os 6 | import datetime 7 | 8 | def main(): 9 | files = sys.argv[1:] 10 | s = subprocess.check_output("python bidmach_ec2.py -k id_rsa -i ~/.ssh/id_rsa --region=us-west-2 get-slaves " + os.environ['CLUSTER'], shell=True) 11 | slaves = s.splitlines()[2:] 12 | dir = '/code/BIDMach/%s/%s' % (os.environ['CLUSTER'], datetime.datetime.now().strftime("%Y%m%d%H%M")) 13 | os.mkdir(dir) 14 | for s in slaves: 15 | slave_dir = '%s/%s' % (dir, s) 16 | os.mkdir(slave_dir) 17 | todostr = 'rsync -e "ssh -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no" -avz ubuntu@%s:/code/BIDMach/logs/log.0.0.txt %s/' % (s, slave_dir) 18 | print(todostr) 19 | subprocess.check_call(todostr, shell=True) 20 | todostr = 'rsync -e "ssh -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no" -avz ubuntu@%s:/code/BIDMach/scripts/logres* %s/' % (s, slave_dir) 21 | print(todostr) 22 | subprocess.check_call(todostr, shell=True) 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /scripts/criteonet.ssc: -------------------------------------------------------------------------------- 1 | val mdir = "../data/criteo/parts/" 2 | 3 | val (nn,opts) = Net.learner(mdir+"trainsorted%02d.smat.lz4",mdir+"trainlabel%02d.fmat.lz4"); 4 | opts.nend = 80; 5 | opts.lrate = 0.01f; 6 | opts.reg1weight = 0.0001f; 7 | opts.batchSize=100 8 | opts.npasses=5; 9 | opts.hasBias = true; 10 | opts.links = irow(1); 11 | //opts.pstep = 0.001f; 12 | //opts.aopts = opts; 13 | opts.texp = 0.4f; 14 | opts.nweight = 1e-4f 15 | 16 | val net = Net.dnodes3(6,10,1f,1,opts,1); 17 | opts.nodeset = net 18 | val model = nn.model.asInstanceOf[Net]; 19 | 20 | nn.train 21 | 22 | val res = nn.results(0,?) 23 | 24 | val testdata = loadSMat(mdir+"trainsorted%02d.smat.lz4" format opts.nend); 25 | val testlabels = loadFMat(mdir+"trainlabel%02d.fmat.lz4" format opts.nend); 26 | 27 | val (mm, mopts) = Net.predictor(model, testdata); 28 | 29 | mm.predict 30 | 31 | val preds=FMat(mm.preds(0)) 32 | 33 | val ll = DMat(ln(preds *@ testlabels + (1-preds) *@ (1-testlabels))) 34 | val rc = roc(preds, testlabels, 1-testlabels, 1000); 35 | 36 | (mean(ll), mean(rc)) 37 | 38 | -------------------------------------------------------------------------------- /scripts/distribute.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | while read slave; do 5 | echo "distributing to ${slave}" 6 | rsync -r "${1}/" "${slave}:${1}" 7 | done < /code/BIDMach/conf/slaves 8 | -------------------------------------------------------------------------------- /scripts/distribute_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | path=$1 4 | number=$2 5 | 6 | i=0 7 | while read slave; do 8 | slaves[$i]=$slave 9 | i=$((i+1)) 10 | done < /code/BIDMach/conf/slaves 11 | 12 | alen=$i 13 | echo ${slaves[*]} 14 | 15 | j=0 16 | k=0 17 | for i in `seq 0 $number`; do 18 | fromname=`printf $path $i` 19 | toname=`printf $path $j` 20 | echo scp $fromname ${slaves[$k]}:$toname 21 | scp $fromname ${slaves[$k]}:$toname 22 | k=$((k+1)) 23 | if [ ${k} -ge ${alen} ]; then 24 | k=0 25 | j=$((j+1)) 26 | fi 27 | done 28 | -------------------------------------------------------------------------------- /scripts/distribute_file.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | folder=`dirname ${1}` 3 | while read slave; do 4 | echo "distributing to ${slave}" 5 | rsync "${1}" "${slave}:${folder}" 6 | done < /code/BIDMach/conf/slaves 7 | -------------------------------------------------------------------------------- /scripts/distributed/cmudict-tail-reducer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cmd-mux 7 -n 8 -c 'tail -f /opt/BIDMach/logs/master_s2s_cmudict/matIdx{idx}.log' 3 | -------------------------------------------------------------------------------- /scripts/distributed/news-commentary-tail-reducer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cmd-mux 7 -n 8 -c 'tail -f /opt/BIDMach/logs/master_sts_2015-news-commentary-v10-fr-en/matIdx{idx}.log' 3 | -------------------------------------------------------------------------------- /scripts/distributed/tail-workers: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CMD_STR="cluster-mux -p su2 -c 'tail -f /tmp/bidmach_worker.log'" 3 | if [[ $(whoami) != "aleks" ]]; then 4 | sudo su aleks -c "$CMD_CTR" 5 | else 6 | eval $CMD_STR 7 | fi 8 | -------------------------------------------------------------------------------- /scripts/distributed/testrecv.ssc: -------------------------------------------------------------------------------- 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat} 2 | import BIDMat.MatFunctions._ 3 | import BIDMat.SciFunctions._ 4 | import BIDMat.Solvers._ 5 | import BIDMat.Plotting._ 6 | import BIDMach.Learner 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD} 8 | import BIDMach.networks.{Net} 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource} 10 | import BIDMach.datasinks.{DataSink,MatSink} 11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer} 12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping} 13 | import BIDMach.causal.{IPTW} 14 | import BIDMach.allreduce.{Master,Worker,Command} 15 | import BIDMach.models.GLM 16 | 17 | import scala.concurrent.Future 18 | import scala.concurrent.ExecutionContext.Implicits.global 19 | 20 | Mat.checkMKL(false) 21 | Mat.checkCUDA 22 | 23 | val data_dir = "/mnt/BIDMach/data/MNIST8M/parts/" 24 | val (nn, nnopts) = GLM.learner(data_dir+"data%02d.fmat.lz4", data_dir+"cats%02d.fmat.lz4") 25 | 26 | nnopts.useGPU = true; 27 | // nnopts.nstart = 0; 28 | // nnopts.nend = 0; 29 | nnopts.order = 0; 30 | nnopts.lookahead = 2; 31 | nnopts.featType = 1; 32 | nnopts.links = 2*iones(10,1); 33 | nnopts.eltsPerSample = 300; 34 | nnopts.targets = mkdiag(ones(10,1)) \ zeros(10, 784); 35 | nnopts.rmask = zeros(1,10) \ ones(1, 784); 36 | 37 | nnopts.batchSize = 500; 38 | nnopts.npasses = 1; 39 | nnopts.lrate = 0.001; // for logistic 40 | 41 | val w = new Worker(); 42 | val wopts = w.opts; 43 | wopts.trace = 4; 44 | wopts.machineTrace = 1; 45 | 46 | w.start(nn) 47 | 48 | nn.paused = true 49 | -------------------------------------------------------------------------------- /scripts/distributed/testsend.ssc: -------------------------------------------------------------------------------- 1 | import java.net.{InetAddress,InetSocketAddress} 2 | import BIDMach.allreduce.{Master,Worker,Command} 3 | 4 | var addresses = scala.io.Source.fromFile("/opt/spark/conf/slaves").getLines. 5 | map(InetAddress.getByName(_).getHostAddress()). 6 | map(new InetSocketAddress(_, 50050)).toArray 7 | 8 | // addresses = addresses.slice(0, 1) 9 | 10 | val m = new Master(); 11 | val opts = m.opts; 12 | opts.trace = 3; 13 | opts.intervalMsec = 2000; 14 | //opts.limitFctn = Master.powerLimitFctn 15 | opts.limit = 1000000 16 | opts.timeScaleMsec = 2e-3f 17 | opts.permuteAlways = false 18 | 19 | opts.machine_threshold = 0.75 20 | opts.min_time_to_wait_for_all = 3000 21 | opts.time_threshold = 5000 22 | 23 | 24 | val nmachines = addresses.length; 25 | 26 | val gmods = irow(nmachines); 27 | val gmachines = irow(0->nmachines); 28 | 29 | m.init 30 | m.config(gmods, gmachines, addresses) 31 | m.setMachineNumbers 32 | m.sendConfig 33 | 34 | // m.parCall((w) => { w.learner.opts.npasses = 10; "done" }) 35 | // m.parCall((w) => { w.learner.opts.asInstanceOf[GLM.FGOptions].nstart = w.imach * 20; "done"}) 36 | // m.parCall((w) => { w.learner.opts.asInstanceOf[GLM.FGOptions].nend = (w.imach+1) * 20; "done"}) 37 | // m.parCall((w) => { w.learner.paused = true; "done"}) 38 | // m.parCall((w) => { w.learner.train; "not reached" }) // this will hang, just wait for it to timeout 39 | 40 | // m.startLearners 41 | // m.startUpdates 42 | 43 | //Testing by Max 44 | -------------------------------------------------------------------------------- /scripts/distributed/worker_distr_lr_rcv.ssc: -------------------------------------------------------------------------------- 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat} 2 | import BIDMat.MatFunctions._ 3 | import BIDMat.SciFunctions._ 4 | import BIDMat.Solvers._ 5 | import BIDMat.Plotting._ 6 | import BIDMach.Learner 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD} 8 | import BIDMach.networks.{Net} 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource} 10 | import BIDMach.datasinks.{DataSink,MatSink} 11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer} 12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping} 13 | import BIDMach.causal.{IPTW} 14 | import BIDMach.allreduce.{Master,Worker,Command} 15 | import BIDMach.models.GLM 16 | 17 | Mat.checkMKL(false) 18 | Mat.checkCUDA 19 | 20 | val datadir = "/mnt/BIDMach/data/rcv1/" 21 | 22 | val a0 = loadSMat(datadir + "docs.smat.lz4") 23 | val c0 = loadFMat(datadir + "cats.fmat.lz4")(0->100,?) 24 | val rr = rand(c0.ncols,1) 25 | val (ss, ii) = sort2(rr) 26 | val a = a0(?,ii) 27 | val c = c0(?,ii) 28 | 29 | setNumThreads(1) 30 | val (nn,opts)=GLM.learnerX(a,c,1) 31 | opts.batchSize=20000 32 | opts.lrate = 0.02f 33 | opts.npasses = 10 34 | opts.reg1weight = 0.0 35 | opts.links = iones(103,1) 36 | opts.addConstFeat = true 37 | opts.aopts = opts 38 | opts.doVariance = true 39 | opts.evalStep = 3 40 | opts.useGPU = true 41 | 42 | val w = new Worker() 43 | val wopts = w.opts 44 | wopts.trace = 4 45 | wopts.machineTrace = 1 46 | 47 | w.start(nn) 48 | 49 | nn.paused = true 50 | -------------------------------------------------------------------------------- /scripts/distributed/worker_mnist_rf.ssc: -------------------------------------------------------------------------------- 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat} 2 | import BIDMat.MatFunctions._ 3 | import BIDMat.SciFunctions._ 4 | import BIDMat.Solvers._ 5 | import BIDMat.Plotting._ 6 | import BIDMach.Learner 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD} 8 | import BIDMach.networks.{Net} 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource} 10 | import BIDMach.datasinks.{DataSink,MatSink} 11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer} 12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping} 13 | import BIDMach.causal.{IPTW} 14 | import BIDMach.allreduce.{Master,Worker,Command} 15 | import BIDMach.models.GLM 16 | 17 | Mat.checkMKL(false) 18 | Mat.checkCUDA 19 | 20 | val mdir = "/mnt/BIDMach/data/MNIST8M/parts/" 21 | val (nn, opts) = RandomForest.learner(mdir+"data%02d.fmat.lz4", mdir+"cats%02d.imat.lz4") 22 | 23 | opts.nend = 70 24 | opts.batchSize = 20000 25 | opts.depth = 30 26 | opts.ntrees = (32 / 4) 27 | opts.nsamps = 32 28 | opts.nnodes = 500000 29 | opts.nbits = 16 30 | opts.gain = 0.001f 31 | opts.ncats = 10 32 | 33 | val w = new Worker() 34 | val wopts = w.opts 35 | wopts.trace = 4 36 | wopts.machineTrace = 1 37 | 38 | w.start(nn) 39 | 40 | nn.paused = true 41 | -------------------------------------------------------------------------------- /scripts/factorNet_test.ssc: -------------------------------------------------------------------------------- 1 | // Script to test out BayesNet.scala for the general case. 2 | 3 | val data = loadFMat("data/factorNet/data.txt") 4 | val dagDense = loadIMat("data/factorNet/factorSet.txt") 5 | val states = loadIMat("data/factorNet/statePerNode.txt") 6 | 7 | // convert the dag to sparse 8 | val dag = sparse(dagDense) 9 | 10 | val (nn , opts) = BIDMach.models.BayesNet.learner(states , dag , true , data) 11 | opts.npasses = 2 12 | opts.useGPU = false 13 | opts.batchSize = 2 14 | opts.updateAll = true 15 | opts.what 16 | nn.train 17 | nn.modelmats(0).t 18 | -------------------------------------------------------------------------------- /scripts/factorNet_test2.ssc: -------------------------------------------------------------------------------- 1 | // Script to test out BayesNet.scala for factor graphs. 2 | 3 | val data = loadFMat("data/factorNet/test2/data.txt") 4 | val dagDense = loadIMat("data/factorNet/test2/factorSet.txt") 5 | val states = loadIMat("data/factorNet/test2/statePerNode.txt") 6 | val dag = sparse(dagDense) 7 | 8 | val (nn , opts) = BIDMach.models.BayesNet.learner(states , dag , true , data) 9 | opts.npasses = 50 10 | opts.useGPU = false 11 | opts.batchSize = 10 12 | opts.what 13 | nn.train 14 | nn.modelmats(0).t 15 | -------------------------------------------------------------------------------- /scripts/futures.ssc: -------------------------------------------------------------------------------- 1 | import scala.concurrent.future 2 | import scala.concurrent.ExecutionContextExecutor 3 | import java.io._ 4 | import scala.concurrent.ExecutionContext.Implicits.global 5 | 6 | def test(n:Int) = { 7 | for (i <- 0 until n) { 8 | future { 9 | println("started %d" format i); 10 | Thread.`yield` 11 | println("done %d" format i); 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /scripts/getcirfar10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | else 7 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 8 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 9 | done 10 | alias wget='curl --retry 2 -O' 11 | fi 12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 13 | cd "${BIDMACH_SCRIPTS}" 14 | BIDMACH_SCRIPTS=`pwd` 15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 16 | 17 | echo "Loading CIFAR10 data" 18 | 19 | CIFAR10="${BIDMACH_SCRIPTS}/../data/CIFAR10" 20 | mkdir -p ${CIFAR10}/parts 21 | cd ${CIFAR10} 22 | 23 | if [ ! -e t10k-labels-idx1-ubyte ]; then 24 | wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz 25 | tar -xf cifar-10-binary.tar.gz 26 | rm -f cifar-10-binary.tar.gz 27 | mv cifar-10-batches-bin/* . 28 | rm -rf cifar-10-batches-bin 29 | fi 30 | 31 | echo "Processing CIFAR10 data" 32 | cd "${BIDMACH_SCRIPTS}" 33 | ../bidmach processcifar10.ssc -------------------------------------------------------------------------------- /scripts/getcriteo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../data/criteo 4 | mkdir -p parts 5 | 6 | split -a 2 -d -l 500000 train.txt parts/train 7 | split -a 2 -d -l 500000 test.txt parts/test 8 | 9 | cd ../../scripts 10 | 11 | bidmach readcriteo.ssc 12 | 13 | bidmach buildcriteo.ssc 14 | 15 | bidmach sortcriteo.ssc 16 | -------------------------------------------------------------------------------- /scripts/getdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | BIDMACH_ROOT="${BASH_SOURCE[0]}" 5 | if [ ! `uname` = "Darwin" ]; then 6 | BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"` 7 | else 8 | while [ -L "${BIDMACH_ROOT}" ]; do 9 | BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"` 10 | done 11 | fi 12 | BIDMACH_ROOT=`dirname "${BIDMACH_ROOT}"` 13 | BIDMACH_ROOT=`cd ${BIDMACH_ROOT}/..;pwd -P` 14 | BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed 's+/cygdrive/\(.\)+\1:+' )" 15 | 16 | cd "${BIDMACH_ROOT}/scripts" 17 | 18 | ./getrcv1.sh 19 | 20 | ./getuci.sh nips 21 | 22 | ./getuci.sh nytimes 23 | 24 | ./getdigits.sh 25 | 26 | ./getmovies.sh 27 | 28 | ./getmnist.sh 29 | 30 | ./getcifar10.sh 31 | 32 | ./getmnist8m.sh 33 | 34 | # this one is huge, make sure you really want it 35 | # ./getuci.sh pubmed 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /scripts/getdigits.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | export WGET='wget --no-check-certificate' 7 | else 8 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 9 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 10 | done 11 | export WGET='curl --retry 2 -O' 12 | fi 13 | 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | echo "Loading arabic digits data" 20 | 21 | UCI=${BIDMACH_SCRIPTS}/../data/uci 22 | cd $UCI 23 | 24 | if [ ! -e Train_Arabic_Digit.txt ]; then 25 | ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/00195/Train_Arabic_Digit.txt 26 | fi 27 | 28 | sed -e 's/^[[:space:]]*$/0 0 0 0 0 0 0 0 0 0 0 0 0/g' Train_Arabic_Digit.txt > arabic.txt 29 | cd ${UCI} 30 | #${BIDMACH_SCRIPTS}/../bidmach "-e" "BIDMach.Experiments.DIGITS.preprocess(\"${UCI}/\",\"arabic\")" 31 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/getdigits.ssc 32 | 33 | if [ -e "arabic.txt" ]; then 34 | rm arabic.txt 35 | fi 36 | -------------------------------------------------------------------------------- /scripts/getdigits.ssc: -------------------------------------------------------------------------------- 1 | object DIGITS { 2 | def preprocess(dict:String, fname:String) { 3 | println("Processing digits"); 4 | val mat = loadFMat(dict+fname+".txt") 5 | val srow = sum(abs(mat),2) 6 | val inds = IMat((cumsum(srow==0)-1)/660) 7 | val ii = find(srow > 0) 8 | val mm = mat(ii,?) 9 | val inn = inds(ii,?) 10 | saveFMat(dict+fname+".fmat.lz4", mm.t) 11 | val cats = zeros(mm.nrows, maxi(inn).v + 1) 12 | cats(icol(0->(inn.nrows)) + inn*mm.nrows) = 1f 13 | saveFMat(dict+fname+"_cats.fmat.lz4", cats.t) 14 | } 15 | } 16 | 17 | DIGITS.preprocess("","arabic") 18 | println("done"); 19 | sys.exit -------------------------------------------------------------------------------- /scripts/getmnist8m.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run this to load the MNIST8M data 3 | 4 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 5 | if [ ! `uname` = "Darwin" ]; then 6 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 7 | export WGET='wget -c --no-check-certificate' 8 | else 9 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 10 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 11 | done 12 | export WGET='curl -C - --retry 20 -O' 13 | fi 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | 20 | echo "Loading MNIST8M data" 21 | 22 | MNIST8M="${BIDMACH_SCRIPTS}/../data/MNIST8M" 23 | mkdir -p ${MNIST8M}/parts 24 | cd ${MNIST8M} 25 | 26 | ${WGET} http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist8m.bz2 27 | 28 | echo "Uncompressing MNIST8M data" 29 | 30 | bunzip2 -c mnist8m.bz2 > mnist8m.libsvm 31 | 32 | echo "Splitting MNIST8M data" 33 | 34 | if [ ! `uname` = "Darwin" ]; then 35 | split -l 100000 -d mnist8m.libsvm parts/part 36 | else 37 | split -l 100000 mnist8m.libsvm parts/part 38 | j=0 39 | for i in {a..z}{a..z}; do 40 | jj=`printf "%02d" $j` 41 | mv parts/part$i parts/part$jj 42 | j=$((j+1)) 43 | if [ $j -gt 80 ]; then break; fi 44 | done 45 | fi 46 | 47 | cd ${MNIST8M}/parts 48 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/processmnist8m.ssc 49 | -------------------------------------------------------------------------------- /scripts/getmnist8m_finesplit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # run this to load the MNIST8M data 3 | 4 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 5 | if [ ! `uname` = "Darwin" ]; then 6 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 7 | export WGET='wget -c --no-check-certificate' 8 | else 9 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 10 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 11 | done 12 | export WGET='curl -C - --retry 20 -O' 13 | fi 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | if [[ -z "$BIDMACH_DATA_HOME" ]]; then 20 | echo '$BIDMACH_DATA_HOME environment variable not set, aborting!' 1>&2 21 | exit 1 22 | fi 23 | 24 | echo "Loading MNIST8M data" 25 | MNIST8M="${BIDMACH_DATA_HOME}/MNIST8M" 26 | mkdir -p ${MNIST8M}/parts_fine 27 | cd ${MNIST8M} 28 | 29 | ${WGET} http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist8m.bz2 30 | 31 | echo "Uncompressing MNIST8M data" 32 | 33 | bunzip2 -c mnist8m.bz2 > mnist8m.libsvm 34 | 35 | echo "Splitting MNIST8M data" 36 | 37 | if [ ! `uname` = "Darwin" ]; then 38 | split -l 10000 -a 3 -d mnist8m.libsvm parts_fine/part 39 | else 40 | split -l 10000 -a 3 mnist8m.libsvm parts_fine/part 41 | j=0 42 | for i in {a..z}{a..z}{a..z}; do 43 | jj=`printf "%03d" $j` 44 | mv parts_fine/part$i parts_fine/part$jj 45 | j=$((j+1)) 46 | if [ $j -gt 800 ]; then break; fi 47 | done 48 | fi 49 | 50 | cd ${MNIST8M}/parts_fine 51 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/processmnist8m_finesplit.ssc 52 | -------------------------------------------------------------------------------- /scripts/getmovies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | export WGET='wget --no-check-certificate' 7 | else 8 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 9 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 10 | done 11 | export WGET='curl --retry 2 -O' 12 | fi 13 | 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | echo "Loading movielens 10M data" 20 | 21 | ML=${BIDMACH_SCRIPTS}/../data/movielens 22 | mkdir -p ${ML} 23 | cd ${ML} 24 | 25 | if [ ! -e ml-10m.zip ]; then 26 | ${WGET} http://files.grouplens.org/datasets/movielens/ml-10m.zip 27 | fi 28 | 29 | unzip -o ml-10m.zip 30 | cd ml-10M100K 31 | ./split_ratings.sh 32 | for i in 1 2 3 4 5 a b; do 33 | mv r${i}.train r${i}.train.txt 34 | mv r${i}.test r${i}.test.txt 35 | done 36 | cd ${BIDMACH_SCRIPTS} 37 | 38 | ../bidmach getmovies.ssc 39 | -------------------------------------------------------------------------------- /scripts/getmovies.ssc: -------------------------------------------------------------------------------- 1 | val dir="../data/movielens/ml-10M100K/" 2 | val nusers = 71567 3 | val nmovies = 65134 4 | 5 | println("Converting movies") 6 | for (i <- List("1","2","3","4","5","a","b")) { 7 | val a = loadDMat(dir + "r" + i + ".train.txt"); 8 | val sa = sparse(IMat(a(?,1))-1, IMat(a(?,0))-1, FMat(a(?,2)), nmovies, nusers); 9 | sa.check 10 | saveSMat(dir+"../train%s.smat.lz4" format i, sa); 11 | 12 | val b = loadDMat(dir + "r" + i + ".test.txt"); 13 | val sb = sparse(IMat(b(?,1))-1, IMat(b(?,0))-1, FMat(b(?,2)), nmovies, nusers); 14 | sb.check 15 | saveSMat(dir+"../test%s.smat.lz4" format i, sb); 16 | print("."); 17 | } 18 | println("") 19 | val a=loadSMat(dir+"../train1.smat.lz4"); 20 | val ta=loadSMat(dir+"../test1.smat.lz4"); 21 | val aa = a + ta; 22 | val (ii, jj, vv) = find3(aa); 23 | val rs = rand(ii.length, 1) < 0.1; 24 | val itrain = find(rs == 0); 25 | val itest = find(rs); 26 | val train = sparse(ii(itrain), jj(itrain), vv(itrain), nmovies, nusers); 27 | val test = sparse(ii(itest), jj(itest), vv(itest), nmovies, nusers); 28 | saveSMat(dir+"../train.smat.lz4", train); 29 | saveSMat(dir+"../test.smat.lz4", test); 30 | println("done"); 31 | sys.exit -------------------------------------------------------------------------------- /scripts/getpubmed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Run this to load and partition pubmed data 3 | 4 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 5 | if [ ! `uname` = "Darwin" ]; then 6 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 7 | export WGET='wget --no-check-certificate' 8 | else 9 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 10 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 11 | done 12 | export WGET='curl --retry 2 -O' 13 | fi 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | 20 | echo "Loading pubmed data" 21 | 22 | ${BIDMACH_SCRIPTS}/getuci.sh pubmed 23 | 24 | cd "${BIDMACH_SCRIPTS}/../data/uci" 25 | mkdir -p pubmed_parts 26 | 27 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/processpubmed.ssc 28 | -------------------------------------------------------------------------------- /scripts/getuci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | export WGET='wget --no-check-certificate' 7 | else 8 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 9 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 10 | done 11 | export WGET='curl -C - --retry 2 -O' 12 | fi 13 | 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | echo "Loading $1 data" 20 | 21 | UCI="${BIDMACH_SCRIPTS}/../data/uci/${1}" 22 | mkdir -p ${UCI} 23 | cd ${UCI} 24 | 25 | if [ ! -e docword.txt.gz ]; then 26 | ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/docword.${1}.txt.gz 27 | mv docword.${1}.txt.gz docword.txt.gz 28 | fi 29 | if [ ! -e vocab.txt ]; then 30 | ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.${1}.txt 31 | mv vocab.${1}.txt vocab.txt 32 | fi 33 | 34 | echo "Uncompressing docword.${1}.txt.gz" 35 | gunzip -c "docword.txt.gz" | tail -n +4 > "docword.txt" 36 | ${BIDMACH_SCRIPTS}/../cbin/tparse.exe -i "docword.txt" -f "${UCI}/../../uci_fmt.txt" -o "" -m "" -d " " -c 37 | ${BIDMACH_SCRIPTS}/../cbin/tparse.exe -i "vocab.txt" -f "${UCI}/../../uci_wfmt.txt" -o "" -m "" -c 38 | cd ${BIDMACH_SCRIPTS}/.. 39 | cd ${UCI} 40 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/getuci.ssc 41 | mv "smat.lz4" "../${1}.smat.lz4" 42 | mv "term.sbmat.gz" "../${1}.term.sbmat.gz" 43 | mv "term.imat.gz" "../${1}.term.imat.gz" 44 | if [ -e "docword.txt" ]; then 45 | echo "clearing up" 46 | rm docword.txt 47 | fi 48 | -------------------------------------------------------------------------------- /scripts/getuci.ssc: -------------------------------------------------------------------------------- 1 | 2 | 3 | object UCI { 4 | def preprocess(dict:String, fname:String) { 5 | println("Processing "+fname); 6 | tic; 7 | val cols = loadIMat(dict+fname+"cols.imat.gz") 8 | val rows = loadIMat(dict+fname+"rows.imat.gz") 9 | val values = loadFMat(dict+fname+"vals.fmat.gz") 10 | val m = cols2sparse(rows, cols, values, true, 1) 11 | saveSMat(dict+fname+"smat.lz4", m) 12 | } 13 | } 14 | 15 | UCI.preprocess("","") 16 | println("done"); 17 | sys.exit 18 | 19 | -------------------------------------------------------------------------------- /scripts/getw2vdata.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | SCRIPTDIR=`pwd` 4 | 5 | mkdir -p ../data/word2vec/raw 6 | cd ../data/word2vec/raw 7 | 8 | if [ ! -e 1-billion-word-language-modeling-benchmark-r13output.tar.gz ]; then 9 | echo "Downloading" 10 | wget http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz 11 | fi 12 | 13 | if [ ! -d 1-billion-word-language-modeling-benchmark-r13output ]; then 14 | echo "Uncompressing" 15 | tar xvzf 1-billion-word-language-modeling-benchmark-r13output.tar.gz 16 | # fix the misplaced first news item 17 | mv 1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100 \ 18 | 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled 19 | fi 20 | 21 | cd 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled 22 | 23 | FILES=`echo news.en*00100 | sed 's/ /,/g'` 24 | 25 | mkdir -p ${SCRIPTDIR}/../data/word2vec/tokenized 26 | mkdir -p ${SCRIPTDIR}/../data/word2vec/tokenized2 27 | mkdir -p ${SCRIPTDIR}/../data/word2vec/data 28 | 29 | ${SCRIPTDIR}/../cbin/tparse2.exe -i "${FILES}" -f ../../fmt.txt -o ${SCRIPTDIR}/../data/word2vec/tokenized/ -c 30 | 31 | cd ${SCRIPTDIR}/../data/word2vec/raw/1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/ 32 | 33 | FILES=`echo news.en*00050 | sed 's/ /,/g'` 34 | 35 | ${SCRIPTDIR}/../cbin/tparse2.exe -i "${FILES}" -f ../../fmt.txt -o ${SCRIPTDIR}/../data/word2vec/tokenized2/ -c 36 | 37 | cd ${SCRIPTDIR} 38 | 39 | bidmach getw2vdata.ssc 40 | 41 | 42 | -------------------------------------------------------------------------------- /scripts/getyearprediction.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | export WGET='wget -c --no-check-certificate' 7 | else 8 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 9 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 10 | done 11 | export WGET='curl -C - --retry 2 -O' 12 | fi 13 | 14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 15 | cd ${BIDMACH_SCRIPTS} 16 | BIDMACH_SCRIPTS=`pwd` 17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 18 | 19 | echo "Loading $1 data" 20 | 21 | YP="${BIDMACH_SCRIPTS}/../data/YearPrediction" 22 | mkdir -p ${YP} 23 | cd ${YP} 24 | 25 | ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip 26 | 27 | unzip YearPredictionMSD.txt.zip 28 | 29 | cd ${BIDMACH_SCRIPTS} 30 | 31 | bidmach getyearprediction.ssc 32 | 33 | -------------------------------------------------------------------------------- /scripts/getyearprediction.ssc: -------------------------------------------------------------------------------- 1 | 2 | val wdir = "../data/YearPrediction/" 3 | val a = loadFMat(wdir+"YearPredictionMSD.txt"); 4 | 5 | val labels = a(?,0); 6 | val data = a(?,1->a.ncols); 7 | 8 | saveFMat(wdir+"train.fmat.lz4", data.t); 9 | saveFMat(wdir+"cats.fmat.lz4", labels.t); 10 | saveIMat(wdir+"cats.imat.lz4", IMat(labels.t)); 11 | 12 | sys.exit; -------------------------------------------------------------------------------- /scripts/higgsdnn.ssc: -------------------------------------------------------------------------------- 1 | val dir = "../data/uci/Higgs/parts/" 2 | 3 | val (nn, opts) = Net.learner(dir+"data%03d.fmat.lz4", dir+"label%03d.fmat.lz4") 4 | 5 | opts.nend = 10 6 | opts.npasses = 5 7 | opts.batchSize = 200 8 | opts.reg1weight = 0.0001; 9 | opts.hasBias = true; 10 | opts.links = iones(1,1); 11 | opts.lrate = 0.01f; 12 | opts.texp = 0.4f; 13 | opts.evalStep = 311; 14 | opts.nweight = 1e-4f 15 | val net = Net.dnodes3(4, 500, 0.6f, 1, opts, 2); 16 | opts.nodeset = net 17 | opts.lookahead = 0; 18 | 19 | val model = nn.model.asInstanceOf[Net] 20 | 21 | nn.train 22 | 23 | val ta = loadFMat(dir + "data%03d.fmat.lz4" format 10); 24 | val tc = loadFMat(dir + "label%03d.fmat.lz4" format 10); 25 | 26 | val (mm,mopts) = Net.predictor(model, ta); 27 | 28 | mopts.batchSize=1000 29 | 30 | mm.predict 31 | 32 | val pc = FMat(mm.preds(0)) 33 | 34 | val rc = roc(pc, tc, 1-tc, 1000); 35 | 36 | mean(rc) 37 | 38 | -------------------------------------------------------------------------------- /scripts/higgsprep.ssc: -------------------------------------------------------------------------------- 1 | 2 | val dir = "../data/uci/Higgs/parts/" 3 | 4 | for (i <- 0 until 110) { 5 | val a = HMat.loadFMatTxt(dir+"data%03d" format i, null, 0); 6 | val targ = a(?,0); 7 | val data = a(?,1->a.ncols); 8 | saveFMat(dir + "data%03d.fmat.lz4" format i, data.t); 9 | saveIMat(dir + "label%03d.imat.lz4" format i, int(targ.t)); 10 | saveFMat(dir + "label%03d.fmat.lz4" format i, targ.t); 11 | print("."); 12 | } -------------------------------------------------------------------------------- /scripts/higgsrf.ssc: -------------------------------------------------------------------------------- 1 | val dir = "../data/uci/Higgs/parts/" 2 | 3 | val (nn, opts) = RandomForest.learner(dir+"data%03d.fmat.lz4", dir+"label%03d.imat.lz4") 4 | 5 | opts.nend = 10 6 | opts.batchSize = 20000 7 | opts.depth = 30 8 | opts.ntrees = 128 9 | opts.nsamps = 8 10 | opts.nnodes = 400000 11 | opts.nbits = 16 12 | opts.ncats = 2; 13 | opts.regression = true; 14 | opts.gain = 0.001f 15 | 16 | val rf = nn.model.asInstanceOf[RandomForest] 17 | 18 | nn.train 19 | 20 | val ta = loadFMat(dir + "data%03d.fmat.lz4" format 10); 21 | val tc = loadIMat(dir + "label%03d.imat.lz4" format 10); 22 | 23 | val (mm,mopts) = RandomForest.predictor(rf, ta); 24 | 25 | mopts.batchSize=1000 26 | 27 | mm.predict 28 | 29 | val pc = FMat(mm.preds(0)) 30 | 31 | val rc = roc(pc, tc, 1-tc, 1000); 32 | 33 | mean(rc) 34 | 35 | -------------------------------------------------------------------------------- /scripts/ica_test.ssc: -------------------------------------------------------------------------------- 1 | 2 | val output = loadFMat("ica_output.txt") 3 | val (nn, opts) = BIDMach.models.ICA.learner(output) 4 | opts.npasses = 20 5 | nn.train 6 | 7 | val predW = FMat(nn.modelmats(0)) 8 | val predMean = FMat(nn.modelmats(1)) 9 | predMean.t 10 | 11 | val result = FMat(predW * (output - predMean)) 12 | HMat.saveFMatTxt("ica_pred_source.txt", result) 13 | sys.exit 14 | -------------------------------------------------------------------------------- /scripts/make_bayesnet_data.py: -------------------------------------------------------------------------------- 1 | # I'll be using this code to generate some data for a simple Bayesian network. 2 | # (c) 2015 by Daniel Seita 3 | 4 | import numpy as np 5 | 6 | ncols = 1000000 # Change as needed 7 | nrows = 5 8 | data = np.zeros([nrows,ncols]) 9 | # First, handle variables X_0 (intelligence) and X_1 (difficulty) 10 | data[0,:] = np.random.choice(2, ncols, p = [0.7, 0.3]) 11 | data[1,:] = np.random.choice(2, ncols, p = [0.6, 0.4]) 12 | third = [] 13 | fourth = [] 14 | fifth = [] 15 | for i in range(ncols): 16 | # Variable X_2 (SAT) 17 | if data[0,i] == 0: 18 | third.append( np.random.choice(2, 1, p = [0.95, 0.05])[0] ) 19 | else: 20 | third.append( np.random.choice(2, 1, p = [0.2, 0.8])[0] ) 21 | # Variable X_3 (grade) 22 | if (data[0,i] == 0 and data[1,i] == 0): 23 | fourth.append( np.random.choice(3, 1, p = [0.3, 0.4, 0.3])[0] ) 24 | elif (data[0,i] == 0 and data[1,i] == 1): 25 | fourth.append( np.random.choice(3, 1, p = [0.05, 0.25, 0.7])[0] ) 26 | elif (data[0,i] == 1 and data[1,i] == 0): 27 | fourth.append( np.random.choice(3, 1, p = [0.9, 0.08, 0.02])[0] ) 28 | else: 29 | fourth.append( np.random.choice(3, 1, p = [0.5, 0.3, 0.2])[0] ) 30 | # Variable X_4 (letter) 31 | if fourth[i] == 0: 32 | fifth.append( np.random.choice(2, 1, p = [0.1, 0.9])[0] ) 33 | elif fourth[i] == 1: 34 | fifth.append( np.random.choice(2, 1, p = [0.4, 0.6])[0] ) 35 | else: 36 | fifth.append( np.random.choice(2, 1, p = [0.99, 0.01])[0] ) 37 | data[2,:] = third 38 | data[3,:] = fourth 39 | data[4,:] = fifth 40 | np.savetxt('dataStudent_' + str(ncols) + '.txt', data, fmt='%i') 41 | -------------------------------------------------------------------------------- /scripts/mnistkmeans.ssc: -------------------------------------------------------------------------------- 1 | val dir="../data/MNIST8M/parts/" 2 | val (nn, opts) = KMeans.learner(dir+"alls%02d.fmat.lz4"); 3 | 4 | val test = loadFMat(dir+"alls80.fmat.lz4"); 5 | val testdata = test.copy; 6 | testdata(0->10, ?) = 0; 7 | val (vbest, ibest) = maxi2(test); 8 | 9 | opts.dim = 256; 10 | opts.nend = 80; 11 | opts.batchSize = 5000; 12 | opts.npasses = 10; 13 | 14 | nn.train 15 | val centroids = FMat(nn.modelmats(0)); 16 | 17 | val (mm, opts) = KMeans.predictor(nn.model, testdata); 18 | 19 | mm.predict 20 | val preds = IMat(mm.preds(0)) 21 | 22 | val (dmy, cmap) = maxi2(centroids, 2); 23 | 24 | mean(float(ibest == cmap(preds))) 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /scripts/mnistkmeans2.ssc: -------------------------------------------------------------------------------- 1 | val dir="../data/MNIST8M/parts/" 2 | val (nn, opts) = KMeans.learner(dir+"alls%02d.fmat.lz4"); 3 | 4 | val test = loadFMat(dir+"alls70.fmat.lz4"); 5 | val testdata = test.copy; 6 | testdata(0->10, ?) = 0; 7 | val (vbest, ibest) = maxi2(test); 8 | 9 | opts.dim = 1000; 10 | opts.nend = 40; 11 | opts.batchSize = 5000; 12 | opts.npasses = 10; 13 | opts.useGPU=false 14 | 15 | nn.train 16 | val centroids = FMat(nn.modelmats(0)); 17 | 18 | val (mm, opts) = KMeans.predictor(nn.model, testdata); 19 | 20 | mm.predict 21 | val preds = IMat(mm.preds(0)) 22 | 23 | val (dmy, cmap) = maxi2(centroids, 2); 24 | 25 | mean(float(ibest == cmap(preds))) 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /scripts/mnistlr.ssc: -------------------------------------------------------------------------------- 1 | val dir="../data/MNIST8M/parts/" 2 | val (nn, opts) = GLM.learner(List(dir+"part%02d.fmat.lz4",dir+"cats%02d.fmat.lz4")) 3 | 4 | opts.useGPU = true; 5 | opts.nend = 70; 6 | opts.order = 0; 7 | opts.lookahead = 2; 8 | opts.featType = 1; 9 | opts.links = 2*iones(10,1); 10 | opts.eltsPerSample = 300; 11 | opts.targets = mkdiag(ones(10,1)) \ zeros(10, 784); 12 | opts.rmask = zeros(1,10) \ ones(1, 784); 13 | 14 | opts.batchSize = 500; 15 | opts.npasses = 1; 16 | opts.lrate = 0.001; // for logistic 17 | 18 | nn.train 19 | 20 | println("Done training. Checking training set accuracy...") 21 | 22 | val randPartNum = scala.util.Random.nextInt(80) 23 | 24 | var test = loadFMat(data_dir+"data%02d.fmat.lz4" format randPartNum) 25 | val tcats = loadFMat(data_dir+"cats%02d.fmat.lz4" format randPartNum) 26 | val tcat = maxi2(tcats, 1)._2 27 | 28 | val pmodel = new GLM(new GLM.PredOptions()); 29 | pmodel.copyFrom(nn.model); 30 | val popts = pmodel.opts.asInstanceOf[GLM.Opts] 31 | popts.targmap = opts.targmap; 32 | popts.links = opts.links; 33 | popts.targets = null 34 | popts.iweight = opts.iweight; 35 | popts.lim = opts.lim; 36 | popts.hashFeatures = opts.hashFeatures; 37 | popts.hashBound1 = opts.hashBound1; 38 | popts.hashBound2 = opts.hashBound2; 39 | 40 | val (pp, popts) = GLM.predictor(pmodel, test) 41 | pp.predict 42 | 43 | val preds = FMat(pp.preds(0)) 44 | 45 | val rocs = roc2(preds, tcats, 1-tcats, 100) 46 | 47 | println("Training AUCs:\n%s" format ((0 to 9) on mean(rocs))) 48 | -------------------------------------------------------------------------------- /scripts/networks/evalAlexnet.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.layers._ 2 | 3 | val traindir = "../../data/ImageNet/train/"; 4 | //val traindir = "/home/jfc/data/ImageNet/2012/BIDMach/train/"; 5 | val testdir = "../../data/ImageNet/val/"; 6 | val traindata = traindir+"partNCHW%04d.bmat.lz4"; 7 | val trainlabels = traindir+"label%04d.imat.lz4"; 8 | val testdata = testdir+"partNCHW%04d.bmat.lz4"; 9 | val testlabels = testdir+"label%04d.imat.lz4"; 10 | val testpreds = testdir+"pred%04d.fmat.lz4"; 11 | 12 | val (nn, opts) = Net.gradLearner(traindata, trainlabels); 13 | val net = nn.model.asInstanceOf[Net] 14 | 15 | // Load the most recent checkpoint matching the checkpoint filename template 16 | opts.checkPointFile = "../../models/AlexnetFullyTrained/alexnet%03d/" 17 | nn.loadCheckPoint(); 18 | opts.checkPointFile = null; 19 | 20 | // Enter the number of epochs completed already 21 | val doneEpochs = 79; 22 | val lrinit = 1e-2f; 23 | 24 | def lr_update(ipass0:Float, istep:Float, frac:Float):Float = { 25 | val ipass = ipass0 + doneEpochs; 26 | val lr = if (ipass < 20) { 27 | lrinit 28 | } else if (ipass < 40) { 29 | lrinit/10 30 | } else lrinit/100 31 | lr 32 | } 33 | 34 | opts.logfile = "logAlexnet_%fc.txt" format (lrinit); 35 | opts.lr_policy = lr_update _; 36 | opts.npasses = 1; 37 | 38 | nn.launchTrain; 39 | 40 | println("Examine the 'nn' variable to track learning state.\n"); 41 | 42 | 43 | def validate = { 44 | val (mm, mopts) = Net.predictor(net, testdata, testlabels, testpreds); 45 | mopts.batchSize = opts.batchSize 46 | mopts.nodeset(mopts.nodeset.length-1).asInstanceOf[SoftmaxOutputNode].lossType=SoftmaxOutputLayer.TargetProbs 47 | mm.predict; 48 | println("Accuracy = %f" format mean(mm.results(0,?),2).v); 49 | } 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /scripts/networks/getImageNet.ssc: -------------------------------------------------------------------------------- 1 | val doNCHW = true; 2 | val tt = "val"; 3 | 4 | val imagenetroot = "/data/ImageNet/2012resized/"+tt+"/"; 5 | val dataroot = "../../data/ImageNet/"; 6 | val savefname = if (doNCHW) tt+"/partNCHW%04d.bmat.lz4" else tt+"/part%04d.bmat.lz4"; 7 | val labelfname = tt+"/label%04d.imat.lz4"; 8 | val namesfname = tt+"/names%04d.csmat.txt"; 9 | val loadtable = loadCSMat(dataroot+tt+".txt"); 10 | 11 | val bsize = 1024; 12 | 13 | val nimgs = loadtable.nrows; 14 | 15 | val fnames = loadtable(?,0); 16 | val alllabels = loadtable(?,1).toIMat; 17 | 18 | val perm = randperm(nimgs); 19 | val mat = zeros(3 \ 256 \ 256 \ bsize); 20 | val labels = izeros(1, bsize); 21 | val names = CSMat(bsize,1); 22 | var i = 0; 23 | var jin = 0; 24 | while (jin < nimgs) { 25 | val todo = math.min(bsize, nimgs - jin); 26 | var j = 0; 27 | while (j < todo && jin < nimgs) { 28 | val indx = perm(jin); 29 | try { 30 | val im = loadImage(imagenetroot+fnames(indx)); 31 | val mm = im.toFMat(0->3,?,?).reshapeView(3,256,256,1); 32 | mat(?,?,?,j) = mm; 33 | labels(0, j) = alllabels(indx); 34 | names(j) = fnames(indx); 35 | j += 1; 36 | } catch { 37 | case e:Exception => println("\nProblem reading %s, continuing" format fnames(indx)); 38 | } 39 | jin += 1; 40 | } 41 | if (j == bsize) { 42 | saveBMat(dataroot+savefname format i, BMat(if (doNCHW) mat.fromNHWCtoNCHW else mat)); 43 | saveIMat(dataroot+labelfname format i, labels); 44 | saveCSMat(dataroot+namesfname format i, names); 45 | } else { 46 | val mc = mat.colslice(0,j); 47 | saveBMat(dataroot+savefname format i, BMat(if (doNCHW) mc.fromNHWCtoNCHW else mc)); 48 | saveIMat(dataroot+labelfname format i, labels.colslice(0,j)); 49 | saveCSMat(dataroot+namesfname format i, names(0->j,0)); 50 | } 51 | i += 1; 52 | print("."); 53 | } 54 | Mat.useCache=false; 55 | println(""); 56 | 57 | -------------------------------------------------------------------------------- /scripts/networks/getImageNetLabels.ssc: -------------------------------------------------------------------------------- 1 | val tt = "train"; 2 | 3 | val dataroot = "../../data/ImageNet/"; 4 | val labelfname = dataroot+tt+"/label%04d.imat.lz4"; 5 | val labelsout = dataroot+tt+"/labels%04d.fmat.lz4"; 6 | 7 | val bsize = 1024; 8 | val nparts = 1252; 9 | 10 | print("\nComputing one-hot labels"); 11 | val omat = zeros(1000,bsize); 12 | val coln = irow(0->bsize) *@ 1000; 13 | for (i <- 0 until nparts) { 14 | val mat = loadIMat(labelfname format i); 15 | omat.clear; 16 | val inds = mat + coln(0,0->mat.ncols); 17 | omat(inds) = 1f; 18 | if (mat.ncols == bsize) { 19 | saveFMat(labelsout format i, omat); 20 | } else { 21 | saveFMat(labelsout format i, omat.colslice(0,mat.ncols)); 22 | } 23 | print("."); 24 | } 25 | println(""); 26 | 27 | -------------------------------------------------------------------------------- /scripts/networks/getImageNetMeans.ssc: -------------------------------------------------------------------------------- 1 | val tt = "train"; 2 | 3 | val traindir = "../../data/ImageNet/train/"; 4 | val traindata = traindir+"partNCHW%04d.bmat.lz4"; 5 | 6 | val bsize = 1024; 7 | //val nparts = 1252; 8 | val nparts = 125; 9 | 10 | var nimgs = 0L; 11 | val msum = dzeros(3\256\256\1); 12 | 13 | print("\nComputing mean"); 14 | val times = zeros(1,4) 15 | for (i <- 0 until nparts) { 16 | tic; 17 | val mat = loadBMat(traindata format i); 18 | val t1 = toc; 19 | val fmat = unsignedFloat(mat); 20 | val t2 = toc; 21 | val tmpsum = fmat.sum(irow(3)); 22 | val t3 = toc; 23 | msum ~ msum + DMat(tmpsum); 24 | val t4 = toc; 25 | times ~ times + row(t1,t2-t1,t3-t2,t4-t3); 26 | nimgs = nimgs + fmat.ncols; 27 | print("."); 28 | } 29 | println(""); 30 | 31 | msum ~ msum / nimgs.toDouble; 32 | val means = FMat(msum); 33 | saveFMat(traindir+"/means.fmat.lz4", means); 34 | -------------------------------------------------------------------------------- /scripts/networks/getcifar10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | else 7 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 8 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 9 | done 10 | alias wget='curl --retry 2 -O' 11 | fi 12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 13 | cd "${BIDMACH_SCRIPTS}" 14 | BIDMACH_SCRIPTS=`pwd` 15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 16 | 17 | echo "Loading CIFAR10 data" 18 | 19 | CIFAR10="${BIDMACH_SCRIPTS}/../../data/CIFAR10" 20 | mkdir -p ${CIFAR10}/parts 21 | cd ${CIFAR10} 22 | 23 | if [ ! -e t10k-labels-idx1-ubyte ]; then 24 | wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz 25 | tar -xf cifar-10-binary.tar.gz 26 | rm -f cifar-10-binary.tar.gz 27 | mv cifar-10-batches-bin/* . 28 | rm -rf cifar-10-batches-bin 29 | fi 30 | 31 | echo "Processing CIFAR10 data" 32 | cd "${BIDMACH_SCRIPTS}" 33 | ../../bidmach processcifar10.ssc 34 | -------------------------------------------------------------------------------- /scripts/networks/getcifar100.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | else 7 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 8 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 9 | done 10 | alias wget='curl --retry 2 -O' 11 | fi 12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 13 | cd "${BIDMACH_SCRIPTS}" 14 | BIDMACH_SCRIPTS=`pwd` 15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 16 | 17 | echo "Loading CIFAR100 data" 18 | 19 | CIFAR100="${BIDMACH_SCRIPTS}/../../data/CIFAR100" 20 | mkdir -p ${CIFAR100}/parts 21 | cd ${CIFAR100} 22 | 23 | if [ ! -e t10k-labels-idx1-ubyte ]; then 24 | wget http://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz 25 | tar -xf cifar-100-binary.tar.gz 26 | rm -f cifar-100-binary.tar.gz 27 | mv cifar-100-binary/* . 28 | rm -rf cifar-100-binary 29 | fi 30 | 31 | echo "Processing CIFAR100 data" 32 | cd "${BIDMACH_SCRIPTS}" 33 | ../../bidmach processcifar100.ssc 34 | -------------------------------------------------------------------------------- /scripts/networks/getmnist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}" 4 | if [ ! `uname` = "Darwin" ]; then 5 | BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"` 6 | else 7 | while [ -L "${BIDMACH_SCRIPTS}" ]; do 8 | BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"` 9 | done 10 | alias wget='curl --retry 2 -O' 11 | fi 12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"` 13 | cd ${BIDMACH_SCRIPTS} 14 | BIDMACH_SCRIPTS=`pwd` 15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 16 | 17 | 18 | echo "Loading MNIST data" 19 | 20 | MNIST="${BIDMACH_SCRIPTS}/../../data/MNIST" 21 | mkdir -p ${MNIST} 22 | cd ${MNIST} 23 | 24 | if [ ! -e train-images-idx3-ubyte ]; then 25 | wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 26 | gunzip train-images-idx3-ubyte.gz 27 | fi 28 | if [ ! -e train-labels-idx1-ubyte ]; then 29 | wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz 30 | gunzip train-labels-idx1-ubyte.gz 31 | fi 32 | 33 | if [ ! -e t10k-images-idx3-ubyte ]; then 34 | wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 35 | gunzip t10k-images-idx3-ubyte.gz 36 | fi 37 | if [ ! -e t10k-labels-idx1-ubyte ]; then 38 | wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz 39 | gunzip t10k-labels-idx1-ubyte.gz 40 | fi 41 | -------------------------------------------------------------------------------- /scripts/networks/modelmat_test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/scripts/networks/modelmat_test -------------------------------------------------------------------------------- /scripts/networks/modelmat_test.fmat.lz4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/scripts/networks/modelmat_test.fmat.lz4 -------------------------------------------------------------------------------- /scripts/networks/processcifar10.ssc: -------------------------------------------------------------------------------- 1 | val inputdir = "../../data/CIFAR10/"; 2 | val outputdir = "../../data/CIFAR10/parts/"; 3 | val trainfname = "data_batch_%d.bin"; 4 | val testfname = "test_batch.bin"; 5 | val traindname = "train%d.fmat.lz4"; 6 | val trainNCHWdname = "trainNCHW%d.fmat.lz4"; 7 | val trainlabels = "labels%d.imat.lz4"; 8 | val testdname = "test0.fmat.lz4"; 9 | val testNCHWdname = "testNCHW0.fmat.lz4"; 10 | val testlabels = "testlabels0.imat.lz4"; 11 | 12 | val nparts = 5; 13 | val nimages = 10000; 14 | val nimgbytes = 3 * 32 * 32; 15 | 16 | import java.io._ 17 | 18 | val buffer = new Array[Byte](nimgbytes); 19 | val datamat = zeros(3\32\32\nimages); 20 | val labelmat = izeros(1, nimages); 21 | 22 | def getFile(fname:String, datamat:FMat, labelmat:IMat) { 23 | val ds = new DataInputStream(new FileInputStream(fname)); 24 | for (j <- 0 until nimages) { 25 | val label = ds.readByte(); 26 | labelmat(j) = label; 27 | ds.readFully(buffer, 0, nimgbytes); 28 | var k = 0; 29 | while (k < nimgbytes) { 30 | datamat.data(k + j * nimgbytes) = buffer(k) & 0xFF; 31 | k += 1; 32 | } 33 | } 34 | ds.close(); 35 | } 36 | 37 | 38 | print("\nConverting CIFAR10"); 39 | for (i <- 1 to nparts) { 40 | getFile(inputdir + trainfname format i, datamat, labelmat); 41 | saveFMat(outputdir+traindname format (i-1), datamat.fromNCHWtoNHWC); 42 | saveFMat(outputdir+trainNCHWdname format (i-1), datamat); 43 | saveIMat(outputdir+trainlabels format (i-1), labelmat); 44 | print("."); 45 | } 46 | getFile(inputdir + testfname, datamat, labelmat); 47 | saveFMat(outputdir + testdname, datamat.fromNCHWtoNHWC); 48 | saveFMat(outputdir + testNCHWdname, datamat); 49 | saveIMat(outputdir + testlabels, labelmat); 50 | print("."); 51 | 52 | println(); 53 | System.exit(0) -------------------------------------------------------------------------------- /scripts/networks/reduceRate.sc: -------------------------------------------------------------------------------- 1 | 2 | import scala.concurrent.Future 3 | import scala.concurrent.ExecutionContext.Implicits.global 4 | 5 | Future { 6 | while (opts.resScale > 0.01f) { 7 | Thread.sleep(100*1000) 8 | opts.resScale = opts.resScale * 0.99f 9 | } 10 | opts.resScale = 0f 11 | } 12 | -------------------------------------------------------------------------------- /scripts/networks/resumeAlexnet.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.layers._ 2 | 3 | val traindir = "../../data/ImageNet/train/"; 4 | //val traindir = "/home/jfc/data/ImageNet/2012/BIDMach/train/"; 5 | val testdir = "../../data/ImageNet/val/"; 6 | val traindata = traindir+"partNCHW%04d.bmat.lz4"; 7 | val trainlabels = traindir+"label%04d.imat.lz4"; 8 | val testdata = testdir+"partNCHW%04d.bmat.lz4"; 9 | val testlabels = testdir+"label%04d.imat.lz4"; 10 | val testpreds = testdir+"pred%04d.fmat.lz4"; 11 | 12 | val (nn, opts) = Net.gradLearner(traindata, trainlabels); 13 | val net = nn.model.asInstanceOf[Net] 14 | 15 | // Load the most recent checkpoint matching the checkpoint filename template 16 | opts.checkPointFile = "../../models/alexnet%03d/" 17 | nn.loadCheckPoint(); 18 | 19 | // Enter the number of epochs completed already 20 | val doneEpochs = 0; 21 | val lrinit = 1e-2f; 22 | 23 | def lr_update(ipass0:Float, istep:Float, frac:Float):Float = { 24 | val ipass = ipass0 + doneEpochs; 25 | val lr = if (ipass < 20) { 26 | lrinit 27 | } else if (ipass < 40) { 28 | lrinit/10 29 | } else lrinit/100 30 | lr 31 | } 32 | 33 | opts.logfile = "logAlexnet_%fb.txt" format (lrinit); 34 | opts.lr_policy = lr_update _; 35 | opts.npasses = opts.npasses - doneEpochs; 36 | 37 | nn.launchTrain; 38 | 39 | println("Examine the 'nn' variable to track learning state.\n"); 40 | 41 | 42 | def validate = { 43 | val (mm, mopts) = Net.predLabels(net, testdata, testlabels); 44 | mopts.batchSize= opts.batchSize; 45 | mm.predict; 46 | println("Accuracy = %f" format mean(mm.results(0,?),2).v); 47 | } 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /scripts/networks/resumeResnet.ssc: -------------------------------------------------------------------------------- 1 | // This script must be run from BIDMach/scripts/networks 2 | 3 | import BIDMach.networks.layers._ 4 | 5 | val traindir = "../../data/ImageNet/train/"; 6 | val testdir = "../../data/ImageNet/val/"; 7 | val traindata = traindir+"partNCHW%04d.bmat.lz4"; 8 | val trainlabels = traindir+"label%04d.imat.lz4"; 9 | val testdata = testdir+"partNCHW%04d.bmat.lz4"; 10 | val testlabels = testdir+"label%04d.imat.lz4"; 11 | val testpreds = testdir+"pred%04d.fmat.lz4"; 12 | 13 | val (nn, opts) = Net.gradLearner(traindata, trainlabels); 14 | val net = nn.model.asInstanceOf[Net] 15 | 16 | // Load the most recent checkpoint matching the checkpoint filename template 17 | opts.checkPointFile = "../../models/resnet%03d/" 18 | nn.loadCheckPoint(); 19 | 20 | // Enter the number of epochs completed already 21 | val doneEpochs = 0; 22 | val lrinit = 1e-1f; 23 | 24 | def lr_update(ipass0:Float, istep:Float, frac:Float):Float = { 25 | val ipass = ipass0 + doneEpochs; 26 | val lr = if (ipass < 15) { 27 | lrinit 28 | } else if (ipass < 20) { 29 | lrinit/10 30 | } else lrinit/100 31 | lr 32 | } 33 | 34 | opts.lr_policy = lr_update _; 35 | opts.logfile = "logresv1b.txt"; 36 | opts.npasses = opts.npasses - doneEpochs; 37 | 38 | nn.launchTrain 39 | 40 | def validate = { 41 | val (mm, mopts) = Net.predLabels(net, testdata, testlabels); 42 | mopts.batchSize= opts.batchSize; 43 | mm.predict; 44 | println("Accuracy = %f" format mean(mm.results(0,?),2).v); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /scripts/networks/testTrans.sc: -------------------------------------------------------------------------------- 1 | // Sum before Layernorm 2 | 3 | import BIDMach.networks.TransformerLT 4 | import BIDMach.networks.layers._ 5 | 6 | val ddir = "/code/BIDMach/data/wikitext/" 7 | val fname = ddir + "train/part%04d.imat.lz4" 8 | 9 | val dict = loadCSMat(ddir + "wikitext_spm_vocab.txt")(?,0) on "막" 10 | 11 | val (nn, opts) = TransformerLT.learner(fname); 12 | 13 | opts.lrate = 1e-4f 14 | opts.seqlength = 2048 15 | opts.batchSize = 2048 16 | opts.npasses = 40 17 | opts.degree = 128 18 | opts.decay = 0.999f 19 | opts.depth = 16 20 | opts.nheads = 8 21 | opts.dim = 2048 22 | opts.dim = 1024 23 | opts.indim = opts.dim 24 | opts.outdim = opts.dim 25 | opts.dropout= 0.8f; 26 | opts.normInit = 2f 27 | opts.decay = 0.999f 28 | opts.texp = 0f 29 | opts.vel_decay = 0.8f; 30 | opts.lrate = opts.lrate*(1-opts.vel_decay) 31 | opts.gsq_decay = 0.999f; 32 | opts.clip_grad_norm = 10f 33 | opts.scoreType = SoftmaxOutputLayer.CrossEntropyScore 34 | opts.pstep = 0.01f 35 | opts.useCache = false 36 | opts.useGPUcache = true 37 | //opts.resScale = 0.9f 38 | //opts.resLinks = 2 \ 4 on 5 \ 7 on 9 \ 11 on 12 \ 14 39 | //opts.resLinks = 4 \ 8 40 | 41 | val lrfinal = opts.lrate.v 42 | val lrinit = lrfinal / 2 43 | val lastepoch = 10f 44 | 45 | def lr_update(ipass:Float, istep:Float, frac:Float):Float = { 46 | val lr = if (ipass < 1) { 47 | lrinit + frac * (lrfinal - lrinit) 48 | } else { 49 | lrfinal * math.max(0f, lastepoch - frac) / (lastepoch - 1) 50 | } 51 | opts.lrate = lr; 52 | lr 53 | } 54 | 55 | opts.lr_policy = lr_update _; 56 | 57 | opts.logfile = "logTrans_d%d_n%d_m%d_lr%7.6f.txt" format (opts.degree, opts.depth, opts.dim, opts.lrate.v) 58 | 59 | val tt = nn.model.asInstanceOf[TransformerLT] 60 | 61 | //nn.train 62 | nn.launchTrain 63 | Thread.sleep(6000) 64 | 65 | 66 | val net = tt.txNets(0) 67 | val fe = tt.frontEnd 68 | val be = tt.backEnd 69 | 70 | -------------------------------------------------------------------------------- /scripts/processmnist.ssc: -------------------------------------------------------------------------------- 1 | 2 | val (d,c,w) = loadLibSVM("mnist.lsvm", 784); 3 | saveFMat("train.fmat.lz4", full(d)); 4 | saveIMat("ctrain.imat.lz4", c); 5 | val (d2,c2,w2) = loadLibSVM("mnist.t.lsvm", 784); 6 | saveFMat("test.fmat.lz4", full(d2)); 7 | saveIMat("ctest.imat.lz4", c2); 8 | 9 | -------------------------------------------------------------------------------- /scripts/processmnist8m.ssc: -------------------------------------------------------------------------------- 1 | // This script needs to be run after getmnist8m.sh 2 | // from the BIDMach/data/MNIST8M/parts directory 3 | 4 | for (i <- 0 to 80) { 5 | val (d,c,w) = loadLibSVM("part%02d" format i, 784); 6 | val fd = full(d); 7 | val fc = accum(c.t \ icol(0->c.length), 1f, 10, c.length); 8 | saveIMat("cat%02d.imat.lz4" format i, c); 9 | saveFMat("data%02d.fmat.lz4" format i, fd); 10 | saveFMat("cats%02d.fmat.lz4" format i, fc); 11 | val alls = (fc * 10000f) on fd; 12 | saveFMat("alls%02d.fmat.lz4" format i, alls); 13 | // saveFMat("allst%02d.fmat.txt" format i, alls.t); 14 | print("."); 15 | } 16 | 17 | System.exit(0) -------------------------------------------------------------------------------- /scripts/processmnist8m_finesplit.ssc: -------------------------------------------------------------------------------- 1 | // This script needs to be run after getmnist8m_finesplit.sh 2 | // from the $BIDMACH_DATA_HOME/MNIST8M/parts_fine directory 3 | 4 | for (i <- 0 until 800) { 5 | val (d,c,w) = loadLibSVM("part%03d" format i, 784); 6 | val fd = full(d); 7 | val fc = accum(c.t \ icol(0->c.length), 1f, 10, c.length); 8 | saveIMat("cat%03d.imat.lz4" format i, c); 9 | saveFMat("data%03d.fmat.lz4" format i, fd); 10 | saveFMat("cat_onehot%03d.fmat.lz4" format i, fc); 11 | val alls = (fc * 10000f) on fd; 12 | saveFMat("alls%03d.fmat.lz4" format i, alls); 13 | 14 | println("%d / 800 processed" format i); 15 | } 16 | -------------------------------------------------------------------------------- /scripts/processpubmed.ssc: -------------------------------------------------------------------------------- 1 | 2 | val a=loadSMat("pubmed.smat.lz4"); 3 | val d=loadSBMat("pubmed.term.sbmat.gz"); 4 | val dc=CSMat(d); 5 | val sa = sum(a,2); 6 | val (vx,ix) = sortdown2(sa); 7 | val ip = invperm(ix); 8 | val nc = a.ncols; 9 | for (i <- 0 until 10) { 10 | val icmin = ((i * 1L * nc)/10).toInt; 11 | val icmax = (((i+1) * 1L * nc)/10).toInt; 12 | val a0 = a(?, icmin -> icmax); 13 | val (ii, jj, vv) = find3(a0); 14 | val aa = sparse(ip(ii), jj, vv, a.nrows, a0.ncols); 15 | aa.check 16 | saveSMat("pubmed_parts/part%02d.smat.lz4" format i, aa); 17 | print("."); 18 | } 19 | saveSBMat("pubmed.term.sbmat.lz4",SBMat(dc(ix))); 20 | sys.exit() 21 | -------------------------------------------------------------------------------- /scripts/pubmedlda.ssc: -------------------------------------------------------------------------------- 1 | val dir = "../data/uci/"; 2 | 3 | val (nn,opts)=LDA.learner(dir + "pubmed_parts/part%02d.smat.lz4", 256); 4 | 5 | opts.batchSize = 50000; 6 | opts.nend = 9; 7 | opts.eltsPerSample = 400; 8 | opts.npasses = 3; 9 | 10 | 11 | val dict = Dict(loadSBMat(dir+"pubmed.term.sbmat.gz")) 12 | 13 | nn.train 14 | 15 | -------------------------------------------------------------------------------- /scripts/pubmednmf.ssc: -------------------------------------------------------------------------------- 1 | val dir = "../data/uci/" 2 | 3 | val (nn, opts) = NMF.learner(dir + "pubmed_parts/part%02d.smat.lz4", 256); 4 | 5 | opts.nend = 9; 6 | opts.eltsPerSample = 400 7 | 8 | opts.batchSize = 20000; 9 | opts.npasses = 3; 10 | 11 | val dict = Dict(loadSBMat(dir+"pubmed.term.sbmat.gz")) 12 | 13 | nn.train 14 | 15 | -------------------------------------------------------------------------------- /scripts/recompress.ssc: -------------------------------------------------------------------------------- 1 | 2 | val dir = "/data/MNIST8M/parts"; 3 | 4 | for (i<- 0 until 80) { 5 | val a=loadSMat(dir+"/part%02d.smat.lz4" format i); 6 | saveSMat(dir+"/part%02d.smat.gz" format i, a); 7 | val c=loadSMat(dir+"/cats%02d.smat.lz4" format i); 8 | saveSMat(dir+"/cats%02d.smat.gz" format i, c); 9 | print("."); 10 | } 11 | 12 | -------------------------------------------------------------------------------- /scripts/runall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while read slave; do 4 | echo ssh "${slave}" "${1}" 5 | ssh -n -o StrictHostKeyChecking=no "${slave}" "${1}" 6 | done < /code/BIDMach/conf/slaves 7 | -------------------------------------------------------------------------------- /scripts/runback.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | while read slave; do 4 | echo ssh "${slave}" "nohup sh -c \"${1}\" > ${HOME}/logs/bklog.txt 2>&1 &" 5 | ssh -n -o StrictHostKeyChecking=no "${slave}" "nohup sh -c \"${1}\" > ${HOME}/logs/bklog.txt 2>&1 &" 6 | done < /code/BIDMach/conf/slaves 7 | -------------------------------------------------------------------------------- /scripts/runmaster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd /code/BIDMach/scripts 3 | bidmach testAllReduceGridMaster.ssc 4 | -------------------------------------------------------------------------------- /scripts/runmaster16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cd /code/BIDMach/scripts 3 | bidmach testAllReduceGridMaster16.ssc 4 | -------------------------------------------------------------------------------- /scripts/runnode.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /code/BIDMach/scripts 4 | bidmach testAllReduceNodeResnet.ssc 5 | -------------------------------------------------------------------------------- /scripts/runnode16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd /code/BIDMach/scripts 4 | bidmach testAllReduceNodeResnet.ssc 5 | -------------------------------------------------------------------------------- /scripts/seedActor.ssc: -------------------------------------------------------------------------------- 1 | 2 | import BIDMach.allreduce.SeedActor 3 | 4 | val seeds = SeedActor.startup(Seq("2551","2552")); 5 | 6 | val gn = new SeedActor.GetNodes(); 7 | 8 | def queryHosts() = { 9 | gn.query(seeds(0)); 10 | } -------------------------------------------------------------------------------- /scripts/sortcriteo.ssc: -------------------------------------------------------------------------------- 1 | val ntrain = 92 2 | val ntest = 13 3 | val ndense = 2*ntest+1; 4 | val dir = "../data/criteo/parts2/"; 5 | 6 | println("\nCounting features"); 7 | val x= loadSMat(dir+"train00.smat.lz4"); 8 | val nfeats = x.nrows; 9 | val counts = dzeros(nfeats,1); 10 | val counts2 = dzeros(nfeats,1); 11 | 12 | //for (i <- 0 until (ntrain+ntest)) { 13 | for (i <- 0 until (ntrain)) { 14 | val a = if (i < ntrain) { 15 | loadSMat(dir+("train%02d.smat.lz4" format i)); 16 | } else { 17 | loadSMat(dir+("test%02d.smat.lz4" format i-ntrain)); 18 | } 19 | counts ~ counts + DMat(sum(a,2)); 20 | print(".") 21 | } 22 | 23 | saveDMat(dir+"featurecounts.dmat.lz4", counts) 24 | 25 | val cmeans = counts / counts(0); 26 | val cscale = FMat(cmeans); 27 | cscale(ndense->nfeats) = 1f; 28 | 29 | println("\nSorting"); 30 | val (vv, ii0) = sortdown2(counts(ndense->counts.length,0)); 31 | println("\nMapping"); 32 | 33 | val iperm = icol(0->ndense) on (ii0 + ndense); 34 | saveIMat(dir+"permutation.imat.lz4", iperm); 35 | val uperm = invperm(iperm); 36 | 37 | for (i <- 0 until (ntrain+ntest)) { 38 | val a = if (i < ntrain) { 39 | loadSMat(dir+("train%02d.smat.lz4" format i)); 40 | } else { 41 | loadSMat(dir+("test%02d.smat.lz4" format i-ntrain)); 42 | } 43 | val (ii, jj, vv) = find3(a); 44 | val newii = uperm(ii); 45 | val b = sparse(newii, jj, vv / cscale(newii), a.nrows, a.ncols); 46 | b.check; 47 | if (i < ntrain) { 48 | saveSMat(dir+("trainsorted%02d.smat.lz4" format i), b/cscale); 49 | } else { 50 | saveSMat(dir+("testsorted%02d.smat.lz4" format i-ntrain), b/cscale); 51 | } 52 | counts2 ~ counts2 + DMat(sum(b,2)); 53 | print(".") 54 | } 55 | saveDMat(dir+"sortedfeaturecounts.dmat.lz4", counts2) 56 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/check.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | if [ ! -s /home/ubuntu/sparseallreduce/PageRank/done.mount ]; then 4 | echo "mount fail" 5 | fi 6 | if [ ! -s /home/ubuntu/sparseallreduce/PageRank/machines ]; then 7 | echo "missing machines" 8 | fi 9 | if [ ! -s /home/ubuntu/sparseallreduce/PageRank/rmachines ]; then 10 | echo "missing rmachines" 11 | fi 12 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/checkall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | hosts=`cat $1` 3 | counter=0; 4 | 5 | for i in `echo $hosts`; do 6 | host=`echo $i` 7 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank/;./check.sh" 8 | counter=`expr $counter + 1` 9 | echo $counter 10 | done 11 | 12 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/checkssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | status=$(ssh $1 echo ok 2>&1) 4 | if [[ $status == ok ]] ; then 5 | mkdir ips/$2/$1 6 | fi 7 | 8 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/checksshall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # $1 machine ips from one placement group $2 number of placement group 3 | hosts=`cat $1` 4 | #remove all existing ips 5 | rm -r ips/$2/* 6 | for i in `echo $hosts`; do 7 | host=`echo $i` 8 | ./checkssh.sh $host $2 & 9 | done 10 | 11 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | scalac -cp $ALL_LIBS Twitter.scala 4 | scalac -cp $ALL_LIBS Yahoo.scala 5 | scalac relabelmachines.scala 6 | scalac splitmachines.scala 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/kill.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ps aux | grep mount | awk '{print $2}' | xargs sudo kill 9 3 | ps aux | grep scala | awk '{print $2}' | xargs sudo kill 15 4 | sleep 3s 5 | ps aux | grep scala | awk '{print $2}' | xargs sudo kill 2 6 | sleep 3s 7 | ps aux | grep scala | awk '{print $2}' | xargs sudo kill 9 8 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/killall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | hosts=`cat $1` 4 | 5 | for i in `echo $hosts`; do 6 | host=`echo $i` 7 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./kill.sh;" & 8 | done 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/logcollect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | hosts=`cat $1` 3 | counter=0; 4 | 5 | for i in `echo $hosts`; do 6 | host=`echo $i` 7 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cp /disk4/log* /disk4/copylog/" & 8 | sleep 1s 9 | scp -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host:/disk4/copylog/log* /logs & 10 | counter=`expr $counter + 1` 11 | echo $counter 12 | done 13 | 14 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/mount.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | export JAVA_HOME="/usr/" 3 | export EC2_HOME="/home/ubuntu/lib/ec2-api-tools-1.6.7.2" 4 | export PATH=$PATH:$EC2_HOME/bin 5 | export AWS_ACCESS_KEY=AAAA 6 | export AWS_SECRET_KEY=BBBB 7 | 8 | if [ ! -d /disk4 ]; then 9 | sudo mkdir /disk4 10 | fi 11 | #dir for copy logs 12 | if [ ! -d /disk4/copylog ]; then 13 | sudo mkdir /disk4/copylog 14 | fi 15 | sudo chown -R ubuntu /disk4 16 | sudo chgrp -R ubuntu /disk4 17 | sudo chmod -R 755 /disk4 18 | 19 | ec2-attach-volume $1 -i $(ec2metadata --instance-id) -d /dev/xvdk 20 | sleep 15s 21 | while [ $(sudo file -s /dev/xvdk | grep ERROR | wc -l) -eq 1 ]; do 22 | sleep 1s 23 | done 24 | sudo mount /dev/xvdk /disk4 25 | sudo chown -R ubuntu /disk4 26 | sudo chgrp -R ubuntu /disk4 27 | sudo chmod -R 755 /disk4 28 | 29 | echo "x" > done.mount 30 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/mountall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | while read host<&4 && read volume<&5 3 | do 4 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./mount.sh $volume;" & 5 | done 4<$1 5<$2 6 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/ping.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | hosts=`cat $1` 3 | counter=0; 4 | 5 | for i in `echo $hosts`; do 6 | host=`echo $i` 7 | ping -c 1 $host 8 | done 9 | 10 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/pingall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #set -x 3 | hosts=`cat $1` 4 | 5 | for i in `echo $hosts`; do 6 | host=`echo $i` 7 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./ping.sh machines >& /disk3/log-ping-$host;" & 8 | done 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/runtwitter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | export JAVA_OPTS=-Xmx28G 4 | export LD_LIBRARY_PATH=/home/ubuntu/lib/BIDMat/lib:/usr/local/lib 5 | export PATH=$1 6 | export ALL_LIBS=$2 7 | 8 | scala -cp $ALL_LIBS Twitter 41652230 $3 $4 $5 $6 machines 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/runtwitterall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | hosts=`cat $1` 4 | imachine=0 5 | config="8,8" 6 | 7 | for i in `echo $hosts`; do 8 | host=`echo $i` 9 | echo $imachine 10 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;nohup ./runtwitter.sh $PATH $ALL_LIBS $config $imachine 10000000 1 >& /disk3/log-twitter-$config-$machine &" & 11 | imachine=`expr $imachine + 1` 12 | done 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/runyahoo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | export JAVA_OPTS=-Xmx60G 4 | export LD_LIBRARY_PATH=/home/ubuntu/lib/BIDMat/lib:/usr/local/lib 5 | export PATH=$1 6 | export ALL_LIBS=$2 7 | 8 | scala -cp $ALL_LIBS Yahoo 1413511394 $3 $4 $5 $6 machines 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/runyahooall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | hosts=`cat $1` 4 | imachine=0 5 | config="4,4,2,2" 6 | 7 | for i in `echo $hosts`; do 8 | host=`echo $i` 9 | echo $imachine 10 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;nohup ./runyahoo.sh $PATH $ALL_LIBS $config $imachine 30000000 1 >& /disk4/log-yahoo-$config-$imachine &" & 11 | imachine=`expr $imachine + 1` 12 | done 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/runyahoor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | export JAVA_OPTS=-Xmx60G 4 | export LD_LIBRARY_PATH=/home/ubuntu/lib/BIDMat/lib:/usr/local/lib 5 | export PATH=$1 6 | export ALL_LIBS=$2 7 | 8 | scala -cp $ALL_LIBS Yahoo 1413511394 $3 $4 $5 $6 rmachines 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/runyahoorall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | hosts=`cat $1` 4 | imachine=0 5 | config="128" 6 | 7 | for i in `echo $hosts`; do 8 | host=`echo $i` 9 | echo $imachine 10 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;nohup ./runyahoor.sh $PATH $ALL_LIBS $config $imachine 30000000 2 >& /disk4/log-yahoor-$config-$imachine &" & 11 | imachine=`expr $imachine + 1` 12 | done 13 | 14 | 15 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/setup.sh: -------------------------------------------------------------------------------- 1 | 2 | N=69 3 | TYPE="cc2.8xlarge" 4 | PLACEMENT="sparseallreduce" 5 | 6 | ec2-run-instances ami-53c5c03a -n $N -g template-all-access -k supermario -t $TYPE --placement-group $PLACEMENT --availability-zone us-east-1a 7 | 8 | sleep 10s 9 | 10 | ec2-describe-instances --filter "instance-type=$TYPE" --filter "placement-group-name=$PLACEMENT" | grep -o 'ip[0-9-]\+' > /home/ubuntu/sparseallreduce/PageRank/rawmachines 11 | 12 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/unmount.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | export JAVA_HOME="/usr/" 3 | export EC2_HOME="/home/ubuntu/lib/ec2-api-tools-1.6.7.2" 4 | export PATH=$PATH:$EC2_HOME/bin 5 | export AWS_ACCESS_KEY=AAAA 6 | export AWS_SECRET_KEY=BBBB 7 | 8 | sudo umount -d /dev/xvdk 9 | ec2-detach-volume $1 10 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/unmountall.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | while read host<&4 && read volume<&5 3 | do 4 | ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./unmount.sh $volume;" & 5 | done 4<$1 5<$2 6 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | hosts=`cat $1` 4 | imachine=0 5 | 6 | for i in `echo $hosts`; do 7 | host=`echo $i` 8 | scp -i /home/ubuntu/.ssh/supermario.pem *.sh ubuntu@$host:sparseallreduce/PageRank/ & 9 | scp -i /home/ubuntu/.ssh/supermario.pem machines ubuntu@$host:sparseallreduce/PageRank/ & 10 | scp -i /home/ubuntu/.ssh/supermario.pem rmachines ubuntu@$host:sparseallreduce/PageRank/machines & 11 | #scp -i /home/ubuntu/.ssh/supermario.pem Twitter* ubuntu@$host:sparseallreduce/PageRank/ & 12 | #scp -i /home/ubuntu/.ssh/supermario.pem Yahoo* ubuntu@$host:sparseallreduce/PageRank/ & 13 | #scp -i /home/ubuntu/.ssh/supermario.pem ~/lib/BIDMat/BIDMat.jar ubuntu@$host:lib/BIDMat/ & 14 | done 15 | 16 | 17 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/updatecheck.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | hosts=`cat $1` 3 | 4 | for i in `echo $hosts`; do 5 | host=`echo $i` 6 | result=$(ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "ls machines | wc -l;" 2>&1) 7 | if [ "$result" -ne 1 ]; 8 | then 9 | echo "$host: false" 10 | fi 11 | done 12 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/volumes1: -------------------------------------------------------------------------------- 1 | vol-42b66e34 2 | vol-4bb66e3d 3 | vol-32b66e44 4 | vol-31b66e47 5 | vol-15b66e63 6 | vol-10b66e66 7 | vol-11b66e67 8 | vol-1eb66e68 9 | vol-1fb66e69 10 | vol-1ab66e6c 11 | vol-19b66e6f 12 | vol-06b66e70 13 | vol-0cb66e7a 14 | vol-0ab66e7c 15 | vol-08b66e7e 16 | vol-f6b66e80 17 | vol-f7b66e81 18 | vol-f4b66e82 19 | vol-f5b66e83 20 | vol-f2b66e84 21 | vol-fab66e8c 22 | vol-f9b66e8f 23 | vol-e2b66e94 24 | vol-e0b66e96 25 | vol-d6b66ea0 26 | vol-d7b66ea1 27 | vol-d3b66ea5 28 | vol-d1b66ea7 29 | vol-deb66ea8 30 | vol-c6b66eb0 31 | vol-c2b66eb4 32 | vol-c0b66eb6 33 | vol-ceb66eb8 34 | vol-cbb66ebd 35 | vol-b7b66ec1 36 | vol-b5b66ec3 37 | vol-b1b66ec7 38 | vol-beb66ec8 39 | vol-bdb66ecb 40 | vol-bab66ecc 41 | vol-b8b66ece 42 | vol-a4b66ed2 43 | vol-a5b66ed3 44 | vol-acb66eda 45 | vol-aab66edc 46 | vol-96b66ee0 47 | vol-94b66ee2 48 | vol-93b66ee5 49 | vol-9cb66eea 50 | vol-98b66eee 51 | vol-86b66ef0 52 | vol-85b66ef3 53 | vol-8eb66ef8 54 | vol-8fb66ef9 55 | vol-8db66efb 56 | vol-8ab66efc 57 | vol-89b66eff 58 | vol-75b76f03 59 | vol-72b76f04 60 | vol-7cb76f0a 61 | vol-7ab76f0c 62 | vol-79b76f0f 63 | vol-67b76f11 64 | vol-62b76f14 65 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/volumes2: -------------------------------------------------------------------------------- 1 | vol-6eb76f18 2 | vol-6db76f1b 3 | vol-68b76f1e 4 | vol-56b76f20 5 | vol-55b76f23 6 | vol-53b76f25 7 | vol-5eb76f28 8 | vol-5db76f2b 9 | vol-5ab76f2c 10 | vol-58b76f2e 11 | vol-42b76f34 12 | vol-40b76f36 13 | vol-41b76f37 14 | vol-4eb76f38 15 | vol-4db76f3b 16 | vol-4bb76f3d 17 | vol-49b76f3f 18 | vol-36b76f40 19 | vol-37b76f41 20 | vol-33b76f45 21 | vol-30b76f46 22 | vol-31b76f47 23 | vol-38b76f4e 24 | vol-24b76f52 25 | vol-21b76f57 26 | vol-2eb76f58 27 | vol-2fb76f59 28 | vol-2cb76f5a 29 | vol-2db76f5b 30 | vol-2ab76f5c 31 | vol-2bb76f5d 32 | vol-28b76f5e 33 | vol-14b76f62 34 | vol-11b76f67 35 | vol-1eb76f68 36 | vol-1fb76f69 37 | vol-04b76f72 38 | vol-05b76f73 39 | vol-00b76f76 40 | vol-f1b76f87 41 | vol-fab76f8c 42 | vol-f8b76f8e 43 | vol-e5b76f93 44 | vol-e1b76f97 45 | vol-efb76f99 46 | vol-ecb76f9a 47 | vol-ebb76f9d 48 | vol-e9b76f9f 49 | vol-d6b76fa0 50 | vol-d7b76fa1 51 | vol-d4b76fa2 52 | vol-d3b76fa5 53 | vol-d1b76fa7 54 | vol-c7b76fb1 55 | vol-c4b76fb2 56 | vol-c5b76fb3 57 | vol-c9b76fbf 58 | vol-b6b76fc0 59 | vol-b7b76fc1 60 | vol-b4b76fc2 61 | vol-b5b76fc3 62 | vol-b2b76fc4 63 | vol-b0b76fc6 64 | vol-b1b76fc7 65 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/volumes3: -------------------------------------------------------------------------------- 1 | vol-bfb76fc9 2 | vol-a1b76fd7 3 | vol-a9b76fdf 4 | vol-94b76fe2 5 | vol-95b76fe3 6 | vol-91b76fe7 7 | vol-9eb76fe8 8 | vol-87b76ff1 9 | vol-85b76ff3 10 | vol-8cb76ffa 11 | vol-77a87001 12 | vol-75a87003 13 | vol-73a87005 14 | vol-70a87006 15 | vol-71a87007 16 | vol-7ea87008 17 | vol-7aa8700c 18 | vol-79a8700f 19 | vol-65a87013 20 | vol-63a87015 21 | vol-60a87016 22 | vol-56a87020 23 | vol-52a87024 24 | vol-53a87025 25 | vol-5aa8702c 26 | vol-5ba8702d 27 | vol-58a8702e 28 | vol-41a87037 29 | vol-4fa87039 30 | vol-36a87040 31 | vol-30a87046 32 | vol-31a87047 33 | vol-3da8704b 34 | vol-3ba8704d 35 | vol-38a8704e 36 | vol-39a8704f 37 | vol-27a87051 38 | vol-24a87052 39 | vol-22a87054 40 | vol-21a87057 41 | vol-2da8705b 42 | vol-29a8705f 43 | vol-17a87061 44 | vol-15a87063 45 | vol-10a87066 46 | vol-11a87067 47 | vol-1da8706b 48 | vol-1ba8706d 49 | vol-04a87072 50 | vol-0ea87078 51 | vol-0fa87079 52 | vol-0ca8707a 53 | vol-09a8707f 54 | vol-f4a87082 55 | vol-f0a87086 56 | vol-f1a87087 57 | vol-ffa87089 58 | vol-fba8708d 59 | vol-f8a8708e 60 | vol-e7a87091 61 | vol-e4a87092 62 | vol-e0a87096 63 | vol-eea87098 64 | vol-efa87099 65 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/volumes4: -------------------------------------------------------------------------------- 1 | vol-eca8709a 2 | vol-eda8709b 3 | vol-eba8709d 4 | vol-d4a870a2 5 | vol-d5a870a3 6 | vol-dfa870a9 7 | vol-daa870ac 8 | vol-dba870ad 9 | vol-d8a870ae 10 | vol-c4a870b2 11 | vol-c2a870b4 12 | vol-c3a870b5 13 | vol-c0a870b6 14 | vol-c1a870b7 15 | vol-cfa870b9 16 | vol-cca870ba 17 | vol-cba870bd 18 | vol-c8a870be 19 | vol-b5a870c3 20 | vol-b0a870c6 21 | vol-b1a870c7 22 | vol-bea870c8 23 | vol-baa870cc 24 | vol-bba870cd 25 | vol-b9a870cf 26 | vol-a6a870d0 27 | vol-a4a870d2 28 | vol-a2a870d4 29 | vol-aea870d8 30 | vol-96a870e0 31 | vol-97a870e1 32 | vol-94a870e2 33 | vol-91a870e7 34 | vol-9ca870ea 35 | vol-87a870f1 36 | vol-82a870f4 37 | vol-83a870f5 38 | vol-81a870f7 39 | vol-8ea870f8 40 | vol-8ca870fa 41 | vol-8da870fb 42 | vol-8ba870fd 43 | vol-76a97100 44 | vol-7da9710b 45 | vol-79a9710f 46 | vol-65a97113 47 | vol-62a97114 48 | vol-63a97115 49 | vol-60a97116 50 | vol-6ca9711a 51 | vol-6da9711b 52 | vol-6ba9711d 53 | vol-69a9711f 54 | vol-56a97120 55 | vol-54a97122 56 | vol-55a97123 57 | vol-51a97127 58 | vol-5ba9712d 59 | vol-44a97132 60 | vol-45a97133 61 | vol-20a97156 62 | vol-21a97157 63 | vol-2ca9715a 64 | vol-2aa9715c 65 | -------------------------------------------------------------------------------- /scripts/sparseallreduce/volumesetup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in {0..191} 4 | do 5 | ec2-create-volume --size 1 --availability-zone us-east-1a 6 | done 7 | 8 | ec2-describe-volumes --filter "size=1" | grep -o 'vol[a-zA-Z0-9-]\+' > volumesbackup 9 | 10 | 11 | -------------------------------------------------------------------------------- /scripts/start_workers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | if [[ -z $1 ]]; then 5 | echo 'Must supply script argument for workers to start!' 1>&2 6 | echo 'Example: ./start_workers.sh distributed/worker_criteo_lr.ssc' 1>&2 7 | exit 1 8 | fi 9 | 10 | WORKER_SCRIPT="${1}" 11 | 12 | SSH_OPTS='-T -o ConnectTimeout=3' 13 | while read worker_ip; do 14 | echo "Starting BIDMach worker on ${worker_ip}" 15 | ssh $SSH_OPTS "ubuntu@${worker_ip}" << EOS 16 | 17 | JAVA_OPTS=$JAVA_OPTS nohup bidmach $WORKER_SCRIPT /tmp/bidmach_worker.log 2>&1 & disown 18 | 19 | EOS 20 | done < /code/BIDMach/conf/slaves 21 | echo 'Done!' 22 | -------------------------------------------------------------------------------- /scripts/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Starting the Grid Master script here" 4 | screen -d -m bash -i -x /code/BIDMach/scripts/runmaster.sh 5 | 6 | echo "Waiting 20 seconds for Master startup" 7 | sleep 20 8 | 9 | echo "Starting Nodes" 10 | runall.sh 'screen -d -m bash -i -x /code/BIDMach/scripts/runnode.sh' 11 | -------------------------------------------------------------------------------- /scripts/startup16.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Starting the Grid Master script here" 4 | screen -d -m bash -i -x /code/BIDMach/scripts/runmaster16.sh 5 | 6 | echo "Waiting 20 seconds for Master startup" 7 | sleep 20 8 | 9 | echo "Starting Nodes" 10 | runall.sh 'screen -d -m bash -i -x /code/BIDMach/scripts/runnode16.sh' 11 | -------------------------------------------------------------------------------- /scripts/stop_workers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | SSH_OPTS='-T -o ConnectTimeout=3 -o BatchMode=yes -o StrictHostKeyChecking=no' 4 | while read worker_ip; do 5 | echo "Killing BIDMach worker on ${worker_ip}" 6 | ssh $SSH_OPTS "ubuntu@${worker_ip}" << EOS 7 | 8 | jps | grep 'MainGenericRunner' | awk '{print \$1}' | xargs -I% kill % 9 | 10 | EOS 11 | done < /code/BIDMach/conf/slaves 12 | echo 'Done!' 13 | -------------------------------------------------------------------------------- /scripts/testActor.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.allreduce.TestActor 2 | import com.typesafe.config.ConfigFactory 3 | 4 | val nodes = TestActor.startup(Seq("2553","2554")); 5 | 6 | val conf = ConfigFactory.load() 7 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped 8 | val seedhostPort = seeds.get(0).toString 9 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":")); 10 | 11 | // The default message size for Akka is about 100 kB, so stay under 25k. 12 | val msize = 20000 13 | val sv = new TestActor.SendData(seedhost + ":2553/user/testActor",rand(1,msize),1000); 14 | //val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",10); 15 | 16 | 17 | def sendIt() = { 18 | nodes(0) ! sv; 19 | } 20 | 21 | -------------------------------------------------------------------------------- /scripts/testActor2.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.allreduce.TestActor 2 | import scala.io.Source 3 | import com.typesafe.config.ConfigFactory 4 | 5 | val nodes = TestActor.startup(Seq("2555","2556")); 6 | 7 | 8 | val conf = ConfigFactory.load() 9 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped 10 | val seedhostPort = seeds.get(0).toString 11 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":")); 12 | 13 | 14 | val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",101) 15 | 16 | def sendIt() = { 17 | nodes(0) ! sv; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /scripts/testActor3.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.allreduce.TestActor 2 | import com.typesafe.config.ConfigFactory 3 | 4 | val nodes = TestActor.startup(Seq("2553","2554")); 5 | 6 | val conf = ConfigFactory.load() 7 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped 8 | val seedhostPort = seeds.get(0).toString 9 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":")); 10 | 11 | val msize = 20000; 12 | val sv = new TestActor.SendData(seedhost + ":2553/user/testActor",rand(1,msize),1000); 13 | //val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",10); 14 | 15 | 16 | def sendIt() = { 17 | nodes(0) ! sv; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /scripts/testActor3.ssc~: -------------------------------------------------------------------------------- 1 | import BIDMach.allreduce.TestActor 2 | import com.typesafe.config.ConfigFactory 3 | 4 | val nodes = TestActor.startup(Seq("2553","2554")); 5 | 6 | val conf = ConfigFactory.load() 7 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped 8 | val seedhostPort = seeds.get(0).toString 9 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":")); 10 | 11 | val msize = 20000; 12 | val sv = new TestActor.SendData(seedhost + ":2553/user/testActor",zeros(1,msize),100); 13 | //val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",10); 14 | 15 | 16 | def sendIt() = { 17 | nodes(0) ! sv; 18 | } 19 | 20 | -------------------------------------------------------------------------------- /scripts/testAllReduceGridMaster.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.allreduce._ 2 | 3 | import scala.concurrent.duration._ 4 | 5 | // Override the configuration of the port when specified as program argument 6 | val port = "2551" 7 | val nodeNum = 4 8 | val masterConfig = GridMasterConfig(nodeNum = nodeNum, nodeResolutionTimeout = 10.seconds) 9 | 10 | AllreduceGridMaster.startUp(port, masterConfig) 11 | -------------------------------------------------------------------------------- /scripts/testAllReduceNodeDummy.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.allreduce.AllreduceNode.getBasicConfigs 2 | import BIDMach.allreduce.binder.{AssertCorrectnessBinder, NoOpBinder} 3 | import BIDMach.allreduce.{AllreduceDummyLearner, AllreduceNode} 4 | 5 | val learner = new AllreduceDummyLearner() 6 | learner.ipass = 20 7 | 8 | val dataSize = 60000000 9 | val maxChunkSize = 20000 10 | 11 | val basicConfig = getBasicConfigs() 12 | val modifiedConfig = basicConfig.copy(workerConfig = 13 | basicConfig.workerConfig.copy( 14 | metaData = basicConfig.workerConfig.metaData.copy(dataSize = dataSize, maxChunkSize = maxChunkSize), 15 | threshold = basicConfig.workerConfig.threshold.copy(thComplete = 1.0f) 16 | ) 17 | ) 18 | 19 | 20 | val binder = new NoOpBinder(dataSize, 10) 21 | AllreduceNode.startNodeAfterIter(learner = learner, iter = 0, nodeConfig = modifiedConfig, binder = binder) -------------------------------------------------------------------------------- /scripts/testLogging.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks._ 2 | import scala.io.Source 3 | import BIDMach.datasources._ 4 | import scala.util.Random 5 | import BIDMach._ 6 | import BIDMach.updaters._ 7 | import BIDMach.mixins._ 8 | import BIDMat.TMat 9 | 10 | val prefix = "/data/word2vec/simple-examples/data/" 11 | val filename=prefix+"ptb.train.imat.lz4" 12 | val vob = 10000 13 | val data = loadIMat(filename)(0,0 until 920000) 14 | val test = loadIMat(prefix+"ptb.valid.imat.lz4")(0,0 until 70000); 15 | val (l,o)=NextWord.learner(data) 16 | val lr = 1f 17 | o.nvocab=vob; 18 | o.npasses=1; 19 | o.lrate=lr; 20 | o.height=2; 21 | o.width = 20 22 | o.batchSize=10000;//For fast testing only... set to 200 if want reasonable results 23 | o.dim=200 24 | o.kind=2 25 | o.pstep = 0.09f 26 | o.hasBias = true 27 | o.max_grad_norm = 5 28 | o.logDataSink = new MatSink() //nmats will be computed automatically during the logging 29 | o.logFuncs = Array(Logging.logGradientL2Norm,Logging.logGradientL1Norm) 30 | l.train 31 | val log = Logging.getResults(l) // or Logging.getResults(l.model) 32 | -------------------------------------------------------------------------------- /scripts/testPowerNet.ssc: -------------------------------------------------------------------------------- 1 | import BIDMat.TMat 2 | 3 | val mdir = "../data/criteo/parts/" 4 | 5 | val (nn,opts) = Net.learnerX(mdir+"trainsortedx%02d.smat.lz4",mdir+"trainlabel%02d.fmat.lz4"); 6 | 7 | opts.nend = 90 8 | opts.batchSize= 100 9 | opts.npasses = 1 10 | opts.lrate = 0.01f 11 | opts.texp = 0.3f 12 | opts.pstep = 0.001f 13 | 14 | opts.aopts = opts 15 | //opts.reg1weight = 0.0001 16 | //opts.hasBias = true 17 | opts.links = iones(1,1); 18 | opts.nweight = 1e-4f 19 | opts.lookahead = 0 20 | opts.autoReset = false 21 | 22 | val tshape = 0.25f 23 | val shape = irow(200,120,80,50,1) 24 | opts.tmatShape = Net.powerShape(tshape)_; 25 | opts.nodeset = Net.powerNet(shape,opts,0,2); 26 | opts.what 27 | println(tshape.toString) 28 | println(shape.toString) 29 | 30 | val model = nn.model.asInstanceOf[Net] 31 | nn.train 32 | 33 | val res = nn.results(0,?) 34 | 35 | val testdata = loadSMat(mdir+"trainsortedx%02d.smat.lz4" format opts.nend); 36 | val testlabels = loadFMat(mdir+"trainlabel%02d.fmat.lz4" format opts.nend); 37 | 38 | val (mm, mopts) = Net.predictor(model, testdata); 39 | mm.predict 40 | 41 | val preds=FMat(mm.preds(0)) 42 | 43 | val ll = DMat(ln(preds *@ testlabels + (1-preds) *@ (1-testlabels))) 44 | val rc = roc(preds, testlabels, 1-testlabels, 1000); 45 | 46 | (mean(ll), mean(rc)) -------------------------------------------------------------------------------- /scripts/testPredMNT2015.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.SeqToSeq 2 | 3 | val mdir = "/mnt/BIDMach/data/MNT2015/models/local_10passes/model256_te0.3_02/" 4 | // val mdir = "/mnt/BIDMach/data/MNT2015/models/local/" 5 | val datadir = "/mnt/BIDMach/data/MNT2015/data/" 6 | 7 | val PADsym = 1 8 | 9 | val batchSize = 128 10 | var src = loadSMat(datadir+"news-commentary-v10.fr-en.fr.smat.lz4") 11 | var dst = loadSMat(datadir+"news-commentary-v10.fr-en.en.smat.lz4") 12 | 13 | val ncols = src.ncols 14 | var traincols = Math.floor(0.9*ncols).toInt 15 | traincols = traincols - (traincols % batchSize) 16 | var srcTest = src(?, traincols -> (ncols-1)) 17 | var dstTest = dst(?, traincols -> (ncols-1)) 18 | srcTest = srcTest(?, 0 -> (srcTest.ncols - (srcTest.ncols % batchSize))) 19 | dstTest = dstTest(?, 0 -> (dstTest.ncols - (dstTest.ncols % batchSize))) 20 | 21 | // var srcTestFull = full(srcTest) 22 | // var dstTestFull = full(dstTest) 23 | var srcTestFull = full(src) 24 | var dstTestFull = full(dst) 25 | srcTestFull ~ srcTestFull + PADsym * (srcTestFull == 0) 26 | dstTestFull ~ dstTestFull + PADsym * (dstTestFull == 0) 27 | 28 | val model = new SeqToSeq 29 | model.setmodelmats(new Array[Mat](7)) 30 | for (i <- 0 until 7) { 31 | model.modelmats(i) = loadMat(mdir+"modelmat%02d.lz4" format i) 32 | } 33 | 34 | val srcSlice = srcTestFull(?, 0 -> batchSize) 35 | val dstSlice = dstTestFull(?, 0 -> batchSize) 36 | val (nn, opts) = SeqToSeq.predict(model, srcSlice) 37 | 38 | opts.nvocab = 20000 39 | opts.height = 2 40 | opts.dim = 256 41 | opts.batchSize = batchSize 42 | 43 | opts.kind = 1 44 | opts.netType = 0 45 | opts.scoreType = 1 46 | opts.inwidth = 30 47 | opts.outwidth = 30 48 | opts.hasBias = true 49 | opts.pstep = 0.005f 50 | opts.cumScore = 3 51 | opts.PADsym = PADsym 52 | opts.OOVsym = 2 53 | opts.STARTsym = 0 54 | 55 | nn.predict 56 | 57 | val pred = nn.datasink.asInstanceOf[MatSink].omats(0).asInstanceOf[IMat] 58 | -------------------------------------------------------------------------------- /scripts/testSeqToSeqPred.ssc: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * LSTM training script. 4 | * 5 | */ 6 | import BIDMach.networks.SeqToSeq 7 | 8 | val dir = "/data/livejournal/srcdst/"; // Directory for input data 9 | val mdir = "/data/livejournal/models/" 10 | val odir = "/data/livejournal/preds2/"; // Directory for input data 11 | 12 | val model = SeqToSeq.load(mdir+"model256_te0.3_12/") 13 | val (nn,opts) = SeqToSeq.embed(model, dir+ "src%04d.smat.lz4", odir + "pred%04d.fmat.lz4") 14 | 15 | opts.nend = 1132 16 | opts.batchSize = 128 17 | opts.ofcols = 128000 18 | 19 | opts.nvocab = 100000; // Vocabulary limit 20 | opts.npasses = 1; // Number of passes over the dataset 21 | opts.height = 2; // Height of the network 22 | opts.dim = 256; // Dimension of LSTM units 23 | opts.kind = 1; // LSTM structure 24 | opts.netType = 0; // Net type (softmax=0, or negsampling=1) 25 | opts.scoreType = 1; // Score type (logloss=0, accuracy=1) 26 | opts.inwidth = 30; // Max input sentence length (truncates) 27 | opts.outwidth = 30; // Max output sentence length (truncates) 28 | opts.hasBias = true; // Use bias terms in linear layers 29 | opts.pstep = 0.0001f; // How often to print 30 | opts.cumScore = 3; // Accumulate scores for less-noisy printing 31 | opts.PADsym = 1; // The padding symbol 32 | opts.OOVsym = 2; // The OOV symbol 33 | opts.STARTsym = 0; 34 | opts.lookahead = 0; 35 | 36 | nn.predict 37 | -------------------------------------------------------------------------------- /scripts/test_cmudict_s2s.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.SeqToSeq 2 | 3 | val datadir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/smat_data/" 4 | val modeldir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/bidmach_model_10pass/" 5 | 6 | val src = loadMat(datadir+"train.src_grapheme.shuf.smat.lz4") 7 | val dst = loadMat(datadir+"train.dst_phoneme.shuf.smat.lz4") 8 | 9 | val (nn, opts) = SeqToSeq.learner(src, dst) 10 | val net = nn.model.asInstanceOf[BIDMach.networks.SeqToSeq] 11 | 12 | opts.lrate = 0.05f 13 | opts.nvocabIn = 31 14 | opts.nvocabOut = 43 15 | opts.npasses = 10 16 | opts.height = 2 17 | opts.dim = 512 18 | opts.batchSize = 64 19 | 20 | opts.checkPointInterval = 1f 21 | opts.checkPointFile = modeldir+"model256_te0.3_%02d/" 22 | opts.kind = 1 23 | opts.netType = 0 24 | opts.scoreType = 0 25 | opts.inwidth = 22 26 | opts.outwidth = 20 27 | opts.hasBias = true 28 | opts.pstep = 0.0001f 29 | opts.cumScore = 3 30 | opts.PADsym = 1 31 | opts.OOVsym = 2 32 | opts.STARTsym = 0 33 | opts.texp = 0.3f 34 | 35 | nn.train 36 | -------------------------------------------------------------------------------- /scripts/test_grid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #read testing parameters from user 4 | read -p 'nodeNum: ' nn 5 | read -p 'dataSize: ' ds 6 | read -p 'threshold: ' th 7 | read -p 'maxRound: ' mr 8 | 9 | #set parameters on master 10 | sed -i "s/^val nodeNum .*$/val nodeNum = ${nn}/" testAllReduceGridMaster.scala 11 | 12 | #run testAllReduceGridMaster 13 | nohup bidmach ./testAllReduceGridMaster.scala & 14 | 15 | #set parameters on slaves 16 | runall.sh "cd /code/BIDMach/scripts;sed -i \"s/^val maxRound .*$/val maxRound = ${mr}/\" testAllReduceNode.scala;sed -i \"s/^val dataSize.*$/val dataSize = ${ds}/\" testAllReduceNode.scala;sed -i \"s/^val threshold = ThresholdConfig(thAllreduce = .*$/val threshold = ThresholdConfig(thAllreduce = ${th}f, thReduce = ${th}f, thComplete = ${th}f)/\" testAllReduceNode.scala" 17 | 18 | #run testAllReduceNode on each slave 19 | ./start_workers.sh /code/BIDMach/scripts/testAllReduceNode.scala 20 | -------------------------------------------------------------------------------- /scripts/test_pred_cmudict_s2s.ssc: -------------------------------------------------------------------------------- 1 | import util.control.Breaks._ 2 | import BIDMach.networks.SeqToSeq 3 | 4 | val datadir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/smat_data/" 5 | val modeldir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/bidmach_model_10pass/" 6 | val preddir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/pred/" 7 | 8 | var src = loadMat(datadir+"valid.src_grapheme.shuf.smat.lz4") 9 | var dst = loadMat(datadir+"valid.dst_phoneme.shuf.smat.lz4") 10 | 11 | // val model = SeqToSeq.load(modeldir+"model256_te0.3_00/") 12 | val model = new SeqToSeq 13 | model.setmodelmats(new Array[Mat](7)) 14 | for (i <- 0 until 7) { 15 | model.modelmats(i) = loadMat(modeldir+"model256_te0.3_00/modelmat%02d.lz4" format i) 16 | } 17 | 18 | val (nn, opts) = SeqToSeq.predict(model, src) 19 | val net = nn.model.asInstanceOf[BIDMach.networks.SeqToSeq] 20 | 21 | opts.nvocabIn = 31 22 | opts.nvocabOut = 43 23 | opts.height = 2 24 | opts.dim = 512 25 | opts.batchSize = 64 26 | 27 | opts.kind = 1 28 | opts.netType = 0 29 | opts.scoreType = 0 30 | opts.inwidth = 22 31 | opts.outwidth = 20 32 | opts.hasBias = true 33 | opts.pstep = 0.0001f 34 | opts.cumScore = 3 35 | opts.PADsym = 1 36 | opts.OOVsym = 2 37 | opts.STARTsym = 0 38 | 39 | opts.autoReset = false 40 | 41 | nn.predict 42 | 43 | val preds = IMat(nn.preds(0)) 44 | 45 | def calcWER(preds:IMat, dst:IMat):(Int, Float) = { 46 | var error = 0 47 | for (j <- 0 until preds.ncols) { 48 | breakable { 49 | for (i <- 0 until preds.nrows) { 50 | if (preds(i, j) == 1) { 51 | if (dst(i, j) > 1) error += 1 // early prediction termination 52 | break 53 | } else if (preds(i, j) != dst(i, j)) { 54 | error += 1 55 | break 56 | } 57 | } 58 | } 59 | } 60 | (error, error.toFloat/preds.ncols) 61 | } 62 | 63 | val WER = calcWER(preds, IMat(full(dst))) 64 | -------------------------------------------------------------------------------- /scripts/testds.ssc: -------------------------------------------------------------------------------- 1 | 2 | val opts = new SFileSource.Options; 3 | 4 | def getDS() = { 5 | implicit val threads = threadPool(4) 6 | new SFileSource(opts); 7 | } 8 | 9 | val ds=getDS; 10 | 11 | opts.nend=10; 12 | opts.fnames=List(FileSource.simpleEnum("../data/uci/pubmed_parts/part%02d.smat.lz4", 1, 0)); 13 | opts.batchSize = 100000; 14 | opts.fcounts = 141043 15 | opts.eltsPerSample = 400; 16 | ds.init; 17 | 18 | var i = 0; 19 | var total = 0L; 20 | tic; 21 | while (ds.hasNext) { 22 | val mats = ds.next; 23 | total += mats(0).asInstanceOf[SMat].nnz * 8L; 24 | val t=toc; 25 | println("Speed %4.3f MB/s, %4.3f GB in %4.3f secs" format (total/t/1e6, total/1e9, t)); 26 | } 27 | 28 | 29 | -------------------------------------------------------------------------------- /scripts/testldagibbs.ssc: -------------------------------------------------------------------------------- 1 | 2 | val dd= loadSMat("../data/uci/nytimes.smat.lz4"); 3 | 4 | val (nn,opts)=LDAgibbs.learner(dd) 5 | 6 | opts.dim=128; 7 | opts.uiter=5; 8 | opts.batchSize=1024; 9 | opts.npasses=1; 10 | opts.useBino=true; 11 | opts.doDirichlet=true; 12 | opts.alpha=0.2f; 13 | opts.doAlpha=true; 14 | opts.nsamps=100; 15 | opts.power=0.5f; 16 | 17 | nn.train 18 | -------------------------------------------------------------------------------- /scripts/testlincomb.sc: -------------------------------------------------------------------------------- 1 | 2 | :silent 3 | 4 | val a=grand(2,2) 5 | val b=grand(2,2) 6 | val c=gzeros(2,2) 7 | 8 | Grad.linComb(a,1f,b,1f,c) 9 | 10 | c 11 | 12 | System.exit(0) -------------------------------------------------------------------------------- /scripts/testlr.ssc: -------------------------------------------------------------------------------- 1 | 2 | val dir="../data/rcv1/" 3 | 4 | val a0 = loadSMat(dir + "docs.smat.lz4") 5 | val c0 = loadFMat(dir + "cats.fmat.lz4")(0->100,?) 6 | val rr = rand(c0.ncols,1); 7 | val (ss, ii) = sort2(rr); 8 | val a = a0(?,ii); 9 | val c = c0(?,ii); 10 | 11 | val ta = loadSMat(dir + "testdocs.smat.lz4") 12 | val tc = loadFMat(dir + "testcats.fmat.lz4")(0->100,?) 13 | 14 | setNumThreads(1) 15 | val (nn,opts)=GLM.learnerX(a,c,1) 16 | 17 | opts.batchSize=20000 18 | opts.lrate = 0.02f 19 | opts.npasses = 4 20 | opts.reg1weight = 0.0 21 | opts.links = iones(103,1) 22 | opts.addConstFeat=true; 23 | opts.aopts = opts; 24 | //opts.doVariance = true; 25 | opts.evalStep = 3; 26 | //opts.debugMem = true 27 | //opts.useGPU = false 28 | //Mat.useMKL = false 29 | 30 | val model = nn.model.asInstanceOf[GLM] 31 | nn.train 32 | 33 | val (mm, mopts) = GLM.predictor(nn.model, ta) 34 | mopts.addConstFeat=opts.addConstFeat; 35 | mopts.batchSize=20000 36 | mopts.links = opts.links 37 | mm.predict 38 | 39 | val pc = FMat(mm.preds(0)) 40 | 41 | //val tc2= tmap * tc 42 | val rc = roc2(pc, tc, 1-tc, 1000) 43 | val nc = sum(tc,2); 44 | val wmean = mean(rc)*nc/sum(nc) 45 | 46 | println("roc6 = %5.4f, roc weighted mean = %5.4f" format (mean(rc)(6), wmean.v)) 47 | -------------------------------------------------------------------------------- /scripts/testlstm.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.LSTMnextWord 2 | 3 | val dir="d:/data/twitter/featurized3/" 4 | val wlim = 10000 5 | 6 | val a0 = loadIMat(dir + "sentfeats000000.imat.lz4")(1,?); 7 | val igood = find((a0 < wlim) *@ (a0 >= 0)); 8 | val a = a0(0,igood); 9 | //val a = a1(0,0->400000); 10 | 11 | val ta0 = loadIMat(dir + "sentfeats000001.imat.lz4")(1,?) 12 | val tigood = find((ta0 < wlim) *@ (ta0 >= 0)); 13 | val ta = ta0(0,tigood); 14 | 15 | val (nn,opts) = LSTMnextWord.learner(a) 16 | 17 | //opts.aopts = opts 18 | 19 | //opts.useGPU = false 20 | //Mat.useCache = false 21 | 22 | opts.npasses = 3 23 | opts.lrate = 0.3f 24 | opts.batchSize=10000 25 | opts.width=5; 26 | opts.height=1; 27 | opts.dim=128; 28 | opts.kind = 3; 29 | opts.nvocab = 10000; 30 | opts.autoReset=false 31 | opts.bylevel = false; 32 | //opts.debug =1; 33 | 34 | opts.reg1weight = 0.00001 35 | 36 | val dnn = nn.model.asInstanceOf[LSTMnextWord] 37 | nn.train 38 | 39 | val ll = dnn.layers 40 | val d1 = ll(3).asInstanceOf[BIDMach.networks.LSTMLayer] 41 | val d2 = ll(4).asInstanceOf[BIDMach.networks.LSTMLayer] 42 | val dl1 = d1.internal_layers 43 | val dl2 = d2.internal_layers 44 | 45 | val in = ll(0).output.asInstanceOf[GSMat]; 46 | val rin = IMat(new GIMat(1, in.nc, in.ir, in.nc)); 47 | val dict = Dict(loadSBMat(dir+"../alldict.gz")); 48 | 49 | val lres = nn.results.ncols 50 | mean(nn.results(?,(lres-11)->(lres-1)),2) 51 | 52 | 53 | -------------------------------------------------------------------------------- /scripts/testnet.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.Net 2 | 3 | val dir="/mnt/BIDMach/data/rcv1/" 4 | 5 | val a0 = loadSMat(dir + "docs.smat.lz4")(0->100000,?) 6 | val c0 = loadFMat(dir + "cats.fmat.lz4")(0->100,?) 7 | val rr = rand(c0.ncols,1); 8 | val (ss, ii) = sort2(rr); 9 | val a = a0(?,ii); 10 | val c = c0(?,ii); 11 | 12 | val ta = loadSMat(dir + "testdocs.smat.lz4")(0->100000,0->23000) 13 | val tc = loadFMat(dir + "testcats.fmat.lz4")(0->100,0->23000) 14 | 15 | val (nn,opts) = Net.learnerX(a,c); 16 | 17 | opts.aopts = opts 18 | opts.batchSize=200 19 | opts.reg1weight = 0.0001 20 | opts.npasses = 2 21 | opts.hasBias = true 22 | opts.links = iones(100,1); 23 | opts.lrate = 0.4f // best for 6-layer 24 | opts.lrate = 0.01f 25 | opts.texp = 0.3f 26 | opts.nweight = 1e-4f 27 | //opts.useGPU = false 28 | 29 | val net = Net.dnodes4(2,500,0.5f,100,opts,2); 30 | opts.nodeset = net 31 | 32 | val dnn = nn.model.asInstanceOf[Net] 33 | 34 | nn.train 35 | 36 | 37 | val (mm,mopts) = Net.predictor(dnn, ta); 38 | val dmm = mm.model.asInstanceOf[Net] 39 | mopts.batchSize=1000 40 | 41 | mm.predict 42 | 43 | val pc = FMat(mm.preds(0)) 44 | 45 | val rc = roc2(pc, tc, 1-tc, 1000) 46 | val counts = sum(tc,2); 47 | println("auc6 = %5.4f, auc weighted mean = %5.4f" format (mean(rc)(6), (mean(rc) * counts / sum(counts)).dv)); 48 | 49 | -------------------------------------------------------------------------------- /scripts/testrecv_local.ssc: -------------------------------------------------------------------------------- 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat} 2 | import BIDMat.MatFunctions._ 3 | import BIDMat.SciFunctions._ 4 | import BIDMat.Solvers._ 5 | import BIDMat.Plotting._ 6 | import BIDMach.Learner 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD} 8 | import BIDMach.networks.{Net} 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource} 10 | import BIDMach.datasinks.{DataSink,MatSink} 11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer} 12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping} 13 | import BIDMach.causal.{IPTW} 14 | import BIDMach.allreduce.{Master,Worker,Command} 15 | import BIDMach.models.GLM 16 | 17 | import scala.concurrent.Future 18 | import scala.concurrent.ExecutionContext.Implicits.global 19 | 20 | Mat.checkMKL(false) 21 | Mat.checkCUDA 22 | 23 | val data_dir = "/mnt/BIDMach/data/MNIST8M/parts/" 24 | val (nn, nnopts) = GLM.learner(data_dir+"data%02d.fmat.lz4", data_dir+"cats%02d.fmat.lz4") 25 | 26 | nnopts.useGPU = true; 27 | nnopts.nstart = 0; 28 | nnopts.nend = 0; 29 | nnopts.order = 0; 30 | nnopts.lookahead = 2; 31 | nnopts.featType = 1; 32 | nnopts.links = 2*iones(10,1); 33 | nnopts.eltsPerSample = 300; 34 | nnopts.targets = mkdiag(ones(10,1)) \ zeros(10, 784); 35 | nnopts.rmask = zeros(1,10) \ ones(1, 784); 36 | 37 | nnopts.batchSize = 500; 38 | nnopts.npasses = 1; 39 | nnopts.lrate = 0.001; // for logistic 40 | 41 | val w = new Worker(); 42 | val wopts = w.opts; 43 | wopts.trace = 4; 44 | wopts.machineTrace = 1; 45 | wopts.commandSocketNum = 12345 46 | wopts.responseSocketNum = 12346 47 | wopts.peerSocketNum = 12347 48 | 49 | w.start(nn) 50 | 51 | nn.paused = true 52 | 53 | // Future { 54 | // nn.train 55 | // } 56 | -------------------------------------------------------------------------------- /scripts/testrf.ssc: -------------------------------------------------------------------------------- 1 | val (mm,opts) = RandomForest.learner( 2 | "/opt/BIDMach/data/MNIST8M/parts/data%02d.fmat.lz4", 3 | "/opt/BIDMach/data/MNIST8M/parts/cats%02d.imat.lz4" 4 | ) 5 | opts.useGPU = true 6 | opts.batchSize = 20000 7 | opts.depth = 10 8 | // opts.nend = 8 9 | opts.ntrees = 100 10 | opts.ncats = 10 11 | opts.impurity = 0 12 | 13 | opts.nsamps = 12 14 | opts.nnodes = 50000 15 | opts.nbits = 16 16 | mm.train 17 | -------------------------------------------------------------------------------- /scripts/testrforest.ssc: -------------------------------------------------------------------------------- 1 | val mdir = "../data/MNIST8M/parts/" 2 | 3 | val (nn, opts) = RandomForest.learner(mdir+"data%02d.fmat.lz4", mdir+"cats%02d.imat.lz4") 4 | 5 | opts.nend = 70 6 | opts.batchSize = 20000 7 | opts.depth = 30 8 | opts.ntrees = 32 9 | opts.nsamps = 32 10 | opts.nnodes = 300000 11 | opts.nbits = 16 12 | opts.gain = 0.001f 13 | opts.ncats = 10 14 | 15 | val rf = nn.model.asInstanceOf[RandomForest] 16 | 17 | nn.train 18 | 19 | -------------------------------------------------------------------------------- /scripts/testsend_local.ssc: -------------------------------------------------------------------------------- 1 | import java.net.{InetAddress,InetSocketAddress} 2 | import BIDMach.allreduce.{Master,Worker,Command} 3 | 4 | val addresses = new Array[InetSocketAddress](1) 5 | addresses(0) = new InetSocketAddress("0.0.0.0", 12345) 6 | 7 | val m = new Master(); 8 | val opts = m.opts; 9 | opts.trace = 3; 10 | opts.intervalMsec = 2000; 11 | //opts.limitFctn = Master.powerLimitFctn 12 | opts.limit = 1000000 13 | opts.timeScaleMsec = 2e-3f 14 | opts.permuteAlways = false 15 | 16 | opts.machine_threshold = 0.75 17 | opts.min_time_to_wait_for_all = 3000 18 | opts.time_threshold = 5000 19 | 20 | 21 | val nmachines = addresses.length; 22 | 23 | val gmods = irow(nmachines); 24 | val gmachines = irow(0->nmachines); 25 | 26 | m.init 27 | m.config(gmods, gmachines, addresses) 28 | m.sendConfig 29 | m.setMachineNumbers 30 | 31 | //m.startLearners 32 | //m.startUpdates 33 | //m.permuteAllreduce(0,1000000) 34 | 35 | -------------------------------------------------------------------------------- /scripts/testsvd.ssc: -------------------------------------------------------------------------------- 1 | // Run approx. SVD on the Climate dataset 2 | 3 | val dir="/code/BIDMach/data/MNIST8M/parts/" 4 | val (nn, opts) = SVD.learner(dir+"data%02d.fmat.lz4"); 5 | 6 | opts.nend = 10; 7 | opts.dim = 20; 8 | opts.npasses = 10; 9 | opts.batchSize = 10000; 10 | opts.useDouble = true; 11 | opts.pstep = 0.01f 12 | opts.miniBatchPasses = 1; 13 | opts.batchesPerUpdate = 200; 14 | opts.updateAll = true 15 | opts.lookahead = 2; 16 | opts.evalType = 0; 17 | opts.order = 1; 18 | opts.doRayleighRitz = false 19 | opts.autoReset = false 20 | opts.subMean = false 21 | //opts.traceFileSource = 1 22 | //opts.useGPU = false 23 | 24 | val model = nn.model.asInstanceOf[SVD] 25 | 26 | nn.train 27 | 28 | // Singular values and vectors 29 | 30 | val svals = FMat(nn.modelmats(1)); 31 | val svecs = FMat(nn.modelmats(0)); 32 | 33 | // Compute M * M^t directly to compute a reference SVD (can only do this 34 | // for small feature spaces). 35 | 36 | 37 | -------------------------------------------------------------------------------- /scripts/testword2vec.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.Word2Vec 2 | 3 | val mdir = "../data/word2vec/data/" 4 | 5 | val (nn, opts) = Word2Vec.learner(mdir+"train%05d.imat.lz4"); 6 | 7 | opts.nstart = 0; 8 | opts.nend = 7; 9 | opts.npasses = 4; 10 | opts.batchSize = 1000000; 11 | opts.lrate = 1e-3f 12 | opts.vexp = 0.5f 13 | opts.nreuse = 5 14 | opts.dim = 300 15 | opts.vocabSize = 100000 16 | 17 | opts.useGPU = true; 18 | //opts.autoReset = false; 19 | //Mat.useMKL = false; 20 | 21 | nn.train 22 | 23 | val mod = nn.model.asInstanceOf[Word2Vec] 24 | 25 | //saveFMat(mdir+"model0.fmat.lz4", FMat(mod.modelmats(0))) 26 | 27 | //saveFMat(mdir+"model1.fmat.lz4", FMat(mod.modelmats(1))) 28 | 29 | val test = loadIMat(mdir+"test00000.imat.lz4"); 30 | 31 | val (mm,mopts) = Word2Vec.predictor(mod, test); 32 | 33 | mopts.useGPU = opts.useGPU 34 | mm.predict 35 | 36 | val score = mean(mm.results(0,0->(mm.results.ncols-2))); 37 | 38 | val dict = loadCSMat(mdir+"dict.csmat.lz4"); 39 | 40 | Word2Vec.saveGoogleW2V(dict, FMat(mod.modelmats(0)), mdir+"googmodel.bin", true); -------------------------------------------------------------------------------- /scripts/testword2vecp.ssc: -------------------------------------------------------------------------------- 1 | import BIDMach.networks.Word2Vec 2 | 3 | val mdir = "/code/word2vec/data/"; 4 | 5 | val (nn, opts) = Word2Vec.learnPar(mdir+"data%03d.imat.lz4"); 6 | 7 | opts.nstart = 0; 8 | opts.nend = 7; 9 | opts.npasses = 1; 10 | opts.batchSize = 1000000; 11 | opts.lrate = 1e-3f; 12 | opts.vexp = 0.5f 13 | opts.nreuse = 5 14 | opts.dim = 300 15 | opts.vocabSize = 400000 16 | 17 | opts.syncStep = 256 18 | 19 | //opts.useGPU = false; 20 | //opts.autoReset = false; 21 | //Mat.useMKL = false; 22 | 23 | nn.train 24 | 25 | val mod0 = nn.models(0).asInstanceOf[Word2Vec] 26 | 27 | //saveFMat(mdir+"model0.fmat.lz4", FMat(mod.modelmats(0))) 28 | 29 | //saveFMat(mdir+"model1.fmat.lz4", FMat(mod.modelmats(1))) 30 | -------------------------------------------------------------------------------- /scripts/tmp.sc: -------------------------------------------------------------------------------- 1 | println("ran script") 2 | System.exit(0); 3 | -------------------------------------------------------------------------------- /scripts/trainLSTM.ssc: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * LSTM training script. 4 | * 5 | */ 6 | import BIDMach.networks.SeqToSeq 7 | 8 | val dir = "/data01/livejournal/srcdst/"; // Directory for input data 9 | 10 | val (nn, opts) = SeqToSeq.learner(dir + "src%04d.smat.lz4", dir + "dst%04d.smat.lz4"); 11 | 12 | opts.lrate = 0.1f; // Learning rate 13 | opts.nvocab = 100000; // Vocabulary limit 14 | opts.npasses = 2; // Number of passes over the dataset 15 | opts.height = 2; // Height of the network 16 | opts.dim = 256; // Dimension of LSTM units 17 | opts.batchSize = 128; // Batch size 18 | opts.nstart = 0; // File start number 19 | opts.nend = 1132; // File end number 20 | opts.checkPointFile = dir + "../models/livejournal_256d_1lr_%02d/"; // Where to save models 21 | opts.checkPointInterval = 24f; // How often to save in hours 22 | opts.netType = 0; // Net type (softmax=0, or negsampling=1) 23 | opts.scoreType = 1; // Score type (logloss=0, accuracy=1) 24 | opts.inwidth = 30; // Max input sentence length (truncates) 25 | opts.outwidth = 30; // Max ouptut sentence length (truncates) 26 | opts.hasBias = true; // Use bias terms in linear layers 27 | opts.pstep = 0.0001f; // How often to print 28 | opts.cumScore = 3; // Accumulate scores for less-noisy printing 29 | opts.PADsym = 1; // The padding symbol 30 | opts.OOVsym = 2; // The OOV symbol 31 | opts STARTsym = 0; 32 | opts.reg1weight = 1e-9f // L1 regularization weight 33 | 34 | println(opts.what) 35 | 36 | nn.train 37 | -------------------------------------------------------------------------------- /scripts/workout.ssc: -------------------------------------------------------------------------------- 1 | 2 | // This script needs to be run from /scripts 3 | 4 | var useGPU=true 5 | var doTwitter=false 6 | 7 | println("\n<<<<<<< Testing with GPU >>>>>>>") 8 | :load workout_slave.ssc 9 | 10 | println("\n<<<<<<< Testing without GPU >>>>>>>") 11 | useGPU = false 12 | :load workout_slave.ssc 13 | 14 | println("\n<<<<<<< Testing without MKL >>>>>>>") 15 | Mat.useMKL = false 16 | :load workout_slave.ssc 17 | -------------------------------------------------------------------------------- /scripts/workout2.ssc: -------------------------------------------------------------------------------- 1 | :silent 2 | 3 | // This script needs to be run from /scripts 4 | 5 | var useGPU=false 6 | var doTwitter=false 7 | 8 | println("\n<<<<<<< Testing with GPU >>>>>>>") 9 | :load workout_slave.ssc 10 | 11 | println("\n<<<<<<< Testing without GPU >>>>>>>") 12 | useGPU = false 13 | :load workout_slave.ssc 14 | 15 | println("\n<<<<<<< Testing without MKL >>>>>>>") 16 | Mat.useMKL = false 17 | :load workout_slave.ssc 18 | :silent -------------------------------------------------------------------------------- /shortpath.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo %~s1% 3 | -------------------------------------------------------------------------------- /src/main/C/newparse/configure: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # If on Windows, fix ZLIBPATH below to point to zlib.lib. 4 | 5 | if [[ "$1" == win* ]]; then 6 | ZLIBPATH="/LIBPATH:/code/zlib/lib/ zlib.lib" 7 | MYDEFS="/D WITHGZS" 8 | RELOPTS="/O2 /I . ${MYDEFS} /EHsc /D NDEBUG /Gd /GF /MT" 9 | DBGOPTS="/EHsc /I . ${MYDEFS} /D _DEBUG /Gd /GF /MTd /W4 /wd4996" 10 | RELLINK="${ZLIBPATH} /MACHINE:AMD64" 11 | DBGLINK="${ZLIBPATH} /NODEFAULTLIB:LIBCMT /MACHINE:AMD64" 12 | CC="cl" 13 | CPP="cl" 14 | LD="link" 15 | else 16 | GCCV=`gcc -dumpversion` 17 | GV=`echo $GCCV | sed -e 's/\.\([0-9][0-9]\)/\1/g' -e 's/\.\([0-9]\)/0\1/g' -e 's/^[0-9]\{3,4\}$/&00/'` 18 | 19 | if [ $GV -ge "40700" ] ; then 20 | CPPFLAGS="-std=c++11" 21 | else 22 | if [ $GV -ge "40400" ] ; then 23 | CPPFLAGS="-std=c++0x" 24 | fi 25 | fi 26 | RELOPTS="${CFLAGS} -O2 -DNDEBUG -DWITHGZS -Wno-deprecated -I." 27 | DBGOPTS="${CFLAGS} -O2 -DDEBUG -DWITHGZS -g -I." 28 | RELLINK="-lz" 29 | DBGLINK="-lz" 30 | CC="gcc" 31 | CPP="g++" 32 | LD="g++" 33 | fi 34 | 35 | if [[ "$1" == *debug ]]; then 36 | CC_OPTS="${DBGOPTS}" 37 | LINK_OPTS="${DBGLINK}" 38 | else 39 | CC_OPTS="${RELOPTS}" 40 | LINK_OPTS="${RELLINK}" 41 | fi 42 | 43 | echo "CC=$CC" > makefile.incl 44 | echo "CPP=$CPP" >> makefile.incl 45 | echo "LD=$LD" >> makefile.incl 46 | echo "CC_OPTS=$CC_OPTS" >> makefile.incl 47 | echo "CPPFLAGS=$CPPFLAGS" >> makefile.incl 48 | echo "LINK_OPTS=$LINK_OPTS" >> makefile.incl 49 | 50 | -------------------------------------------------------------------------------- /src/main/C/newparse/makefile: -------------------------------------------------------------------------------- 1 | 2 | include makefile.incl 3 | 4 | ifeq ($(CC),gcc) 5 | include makefile.gcc 6 | else 7 | include makefile.w32 8 | endif 9 | 10 | install: all 11 | mkdir -p ../../../../cbin 12 | cp *.exe ../../../../cbin 13 | mkdir -p ../../../../src/main/resources/cbin 14 | cp *.exe ../../../../src/main/resources/cbin -------------------------------------------------------------------------------- /src/main/C/newparse/makefile.gcc: -------------------------------------------------------------------------------- 1 | 2 | .SUFFIXES: 3 | .SUFFIXES: .c .cpp .o .exe .lxc .flex 4 | 5 | EXES=xmltweet.exe xmlwiki.exe trec.exe tparse.exe parsevw.exe tparse2.exe 6 | OBJS=newparse.o utils.o gzstream.o 7 | 8 | .SECONDARY: xmltweet.lxc xmlwiki.lxc trec.lxc 9 | 10 | all: $(EXES) 11 | 12 | .flex.lxc: 13 | flex -o $@ $< 14 | 15 | tparse.exe: gzstream.o utils.o tparse.o utils.h 16 | $(LD) -o tparse.exe tparse.o utils.o gzstream.o $(LINK_OPTS) 17 | 18 | tparse2.exe: gzstream.o utils.o tparse2.o utils.h 19 | $(LD) -o tparse2.exe tparse2.o utils.o gzstream.o $(LINK_OPTS) 20 | 21 | parsevw.exe: gzstream.o utils.o parsevw.o utils.h 22 | $(LD) -o parsevw.exe parsevw.o utils.o gzstream.o $(LINK_OPTS) 23 | 24 | .o.exe: $(OBJS) 25 | $(LD) -o $@ $(OBJS) $< $(LINK_OPTS) 26 | 27 | .cpp.o: utils.h gzstream.h 28 | $(CPP) $(CPPFLAGS) $(CC_OPTS) -o $@ -c $< 29 | 30 | .lxc.o: 31 | $(CC) $(CC_OPTS) -DYY_NO_UNISTD_H -o $@ -c -x c $< 32 | 33 | $(EXES): $(OBJS) 34 | 35 | gzstream.o: gzstream.h 36 | 37 | clean: 38 | rm -f $(EXES) *.o *.lxc 39 | -------------------------------------------------------------------------------- /src/main/C/newparse/makefile.w32: -------------------------------------------------------------------------------- 1 | 2 | .SUFFIXES: 3 | .SUFFIXES: .c .cpp .obj .exe .lxc .flex 4 | 5 | EXES=xmltweet.exe xmlwiki.exe trec.exe tparse.exe parsevw.exe tparse2.exe 6 | OBJS=newparse.obj utils.obj gzstream.obj 7 | 8 | .SECONDARY: xmltweet.lxc xmlwiki.lxc trec.lxc 9 | 10 | all: $(EXES) 11 | 12 | .flex.lxc: 13 | flex -o $@ $< 14 | 15 | tparse.exe: gzstream.obj utils.obj tparse.obj utils.h 16 | $(LD) tparse.obj utils.obj gzstream.obj $(LINK_OPTS) /OUT:tparse.exe 17 | 18 | tparse2.exe: gzstream.obj utils.obj tparse2.obj utils.h 19 | $(LD) tparse2.obj utils.obj gzstream.obj $(LINK_OPTS) /OUT:tparse2.exe 20 | 21 | parsevw.exe: gzstream.obj utils.obj parsevw.obj utils.h 22 | $(LD) parsevw.obj utils.obj gzstream.obj $(LINK_OPTS) /OUT:parsevw.exe 23 | 24 | .obj.exe: $(OBJS) 25 | $(LD) $(OBJS) $< $(LINK_OPTS) /OUT:"$@" 26 | 27 | .cpp.obj: utils.h gzstream.h 28 | $(CPP) $(CC_OPTS) /Fo"$@" /c $< 29 | 30 | .lxc.obj: 31 | $(CPP) $(CC_OPTS) /DYY_NO_UNISTD_H /Fo"$@" /c /Tc"$<" 32 | 33 | $(EXES): $(OBJS) 34 | 35 | gzstream.obj: gzstream.h 36 | 37 | clean: 38 | rm -f $(EXES) *.obj *.lxc 39 | -------------------------------------------------------------------------------- /src/main/C/newparse/trec.flex: -------------------------------------------------------------------------------- 1 | /* Scanner for TREC format */ 2 | 3 | %{ 4 | extern int checkword(char *); 5 | extern void addtok(int tok); 6 | extern int parsedate(char * str); 7 | extern int numlines; 8 | 9 | %} 10 | 11 | %option never-interactive 12 | %option noyywrap 13 | 14 | LETTER [a-zA-Z_] 15 | DIGF [0-9][0-9][0-9][0-9] 16 | DIGT [0-9][0-9] 17 | DIGIT [0-9] 18 | PUNCT [;:,.?!] 19 | 20 | %% 21 | 22 | -?{DIGIT}+ { 23 | #if __STDC_VERSION__ >= 199901L 24 | long long iv = strtoll(yytext, NULL, 10); 25 | #else 26 | long iv = strtol(yytext, NULL, 10); 27 | #endif 28 | addtok(iv); 29 | iv = iv >> 31; 30 | if (iv > 0 || iv < -1) { 31 | addtok(iv); 32 | } 33 | } 34 | 35 | -?{DIGIT}+"."{DIGIT}* { 36 | float f = (float)strtod(yytext, NULL); 37 | int iv = *((int *)(&f)); 38 | addtok(iv >> 1); 39 | } 40 | 41 | {DIGF}"-"{DIGT}"-"{DIGT}"T"{DIGT}":"{DIGT}":"{DIGT}("-"|"+"){DIGT}":"{DIGT} { 42 | int tt = parsedate(yytext); 43 | addtok(tt); 44 | } 45 | 46 | {LETTER}+ { 47 | int iv = checkword(yytext); 48 | } 49 | 50 | "<"{LETTER}+">" { 51 | int iv = checkword(yytext); 52 | } 53 | 54 | "" { 55 | int iv = checkword(yytext); 56 | } 57 | 58 | ".I" { 59 | int iv = checkword(yytext); 60 | } 61 | 62 | ".W" { 63 | int iv = checkword(yytext); 64 | } 65 | 66 | {PUNCT} { 67 | int iv = checkword(yytext); 68 | } 69 | 70 | "..""."* { 71 | char ell[] = "..."; 72 | int iv = checkword(ell); 73 | } 74 | 75 | [\n] { 76 | numlines++; 77 | if (numlines % 1000000 == 0) { 78 | fprintf(stderr, "\r%05d lines", numlines); 79 | fflush(stderr); 80 | } 81 | } 82 | 83 | . {} 84 | 85 | %% 86 | 87 | -------------------------------------------------------------------------------- /src/main/java/edu/berkeley/bvlc/CAFFE.java: -------------------------------------------------------------------------------- 1 | package edu.berkeley.bvlc; 2 | 3 | public final class CAFFE { 4 | 5 | private CAFFE() {} 6 | 7 | static { 8 | LibUtils.loadLibrary("caffe"); 9 | } 10 | 11 | public static native void set_mode(int mode); 12 | 13 | public static native void set_phase(int phase); 14 | 15 | public static native int get_mode(); 16 | 17 | public static native int get_phase(); 18 | 19 | public static native void set_device(int n); 20 | 21 | public static native void DeviceQuery(); 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/edu/berkeley/bvlc/LAYER.java: -------------------------------------------------------------------------------- 1 | package edu.berkeley.bvlc; 2 | 3 | public final class LAYER { 4 | 5 | static { 6 | LibUtils.loadLibrary("caffe"); 7 | } 8 | 9 | private LAYER() {} 10 | 11 | protected LAYER(long shptr) { 12 | _shptr = shptr; 13 | } 14 | 15 | public int num_blobs() {if (_shptr != 0) return num_blobs(_shptr); else throw new RuntimeException("Layer uninitialized");} 16 | 17 | public BLOB blob(int i) { 18 | if (_shptr == 0) { 19 | throw new RuntimeException("Layer uninitialized"); 20 | } else { 21 | int n = num_blobs(); 22 | if (i < 0 || i >= n) { 23 | throw new RuntimeException("Layer blob index "+i+" out of range (0, "+(n-1)+")"); 24 | } 25 | return new BLOB(blob(_shptr, i)); 26 | } 27 | } 28 | 29 | @Override 30 | protected void finalize() { 31 | if (_shptr != 0) clearLayer(_shptr); 32 | _shptr = 0; 33 | } 34 | 35 | private long _shptr = 0; 36 | 37 | private static native int num_blobs(long ref); 38 | 39 | private static native long blob(long ref, int i); 40 | 41 | private static native int clearLayer(long ref); 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/edu/berkeley/bvlc/SGDSOLVER.java: -------------------------------------------------------------------------------- 1 | package edu.berkeley.bvlc; 2 | 3 | public final class SGDSOLVER { 4 | 5 | static { 6 | LibUtils.loadLibrary("caffe"); 7 | } 8 | 9 | public SGDSOLVER(String pfile) { 10 | _sptr = fromParams(pfile); 11 | _net = new NET(net(_sptr)); 12 | } 13 | 14 | public NET net() {return _net;} 15 | 16 | public void Solve() {if (_sptr != 0) Solve(_sptr);} 17 | 18 | public void SolveResume(String s) {if (_sptr != 0) SolveResume(_sptr, s);} 19 | 20 | @Override 21 | protected void finalize() { 22 | if (_sptr != 0) clearSGDSolver(_sptr); 23 | } 24 | 25 | private final long _sptr; 26 | 27 | private final NET _net; 28 | 29 | private static native long fromParams(String name); 30 | 31 | private static native long net(long n); 32 | 33 | private static native void Solve(long n); 34 | 35 | private static native void SolveResume(long n, String s); 36 | 37 | private static native void clearSGDSolver(long ref); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/BytesListOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: feature.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface BytesListOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.BytesList) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * repeated bytes value = 1; 12 | */ 13 | java.util.List getValueList(); 14 | /** 15 | * repeated bytes value = 1; 16 | */ 17 | int getValueCount(); 18 | /** 19 | * repeated bytes value = 1; 20 | */ 21 | com.google.protobuf.ByteString getValue(int index); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/ExampleOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: example.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface ExampleOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.Example) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * .tensorflow.Features features = 1; 12 | */ 13 | boolean hasFeatures(); 14 | /** 15 | * .tensorflow.Features features = 1; 16 | */ 17 | org.tensorflow.example.Features getFeatures(); 18 | /** 19 | * .tensorflow.Features features = 1; 20 | */ 21 | org.tensorflow.example.FeaturesOrBuilder getFeaturesOrBuilder(); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/FeatureListOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: feature.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface FeatureListOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.FeatureList) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * repeated .tensorflow.Feature feature = 1; 12 | */ 13 | java.util.List 14 | getFeatureList(); 15 | /** 16 | * repeated .tensorflow.Feature feature = 1; 17 | */ 18 | org.tensorflow.example.Feature getFeature(int index); 19 | /** 20 | * repeated .tensorflow.Feature feature = 1; 21 | */ 22 | int getFeatureCount(); 23 | /** 24 | * repeated .tensorflow.Feature feature = 1; 25 | */ 26 | java.util.List 27 | getFeatureOrBuilderList(); 28 | /** 29 | * repeated .tensorflow.Feature feature = 1; 30 | */ 31 | org.tensorflow.example.FeatureOrBuilder getFeatureOrBuilder( 32 | int index); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/FeatureOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: feature.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface FeatureOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.Feature) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * .tensorflow.BytesList bytes_list = 1; 12 | */ 13 | boolean hasBytesList(); 14 | /** 15 | * .tensorflow.BytesList bytes_list = 1; 16 | */ 17 | org.tensorflow.example.BytesList getBytesList(); 18 | /** 19 | * .tensorflow.BytesList bytes_list = 1; 20 | */ 21 | org.tensorflow.example.BytesListOrBuilder getBytesListOrBuilder(); 22 | 23 | /** 24 | * .tensorflow.FloatList float_list = 2; 25 | */ 26 | boolean hasFloatList(); 27 | /** 28 | * .tensorflow.FloatList float_list = 2; 29 | */ 30 | org.tensorflow.example.FloatList getFloatList(); 31 | /** 32 | * .tensorflow.FloatList float_list = 2; 33 | */ 34 | org.tensorflow.example.FloatListOrBuilder getFloatListOrBuilder(); 35 | 36 | /** 37 | * .tensorflow.Int64List int64_list = 3; 38 | */ 39 | boolean hasInt64List(); 40 | /** 41 | * .tensorflow.Int64List int64_list = 3; 42 | */ 43 | org.tensorflow.example.Int64List getInt64List(); 44 | /** 45 | * .tensorflow.Int64List int64_list = 3; 46 | */ 47 | org.tensorflow.example.Int64ListOrBuilder getInt64ListOrBuilder(); 48 | 49 | public org.tensorflow.example.Feature.KindCase getKindCase(); 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/FeaturesOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: feature.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface FeaturesOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.Features) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | *
12 |    * Map from feature name to feature.
13 |    * 
14 | * 15 | * map<string, .tensorflow.Feature> feature = 1; 16 | */ 17 | int getFeatureCount(); 18 | /** 19 | *
20 |    * Map from feature name to feature.
21 |    * 
22 | * 23 | * map<string, .tensorflow.Feature> feature = 1; 24 | */ 25 | boolean containsFeature( 26 | java.lang.String key); 27 | /** 28 | * Use {@link #getFeatureMap()} instead. 29 | */ 30 | @java.lang.Deprecated 31 | java.util.Map 32 | getFeature(); 33 | /** 34 | *
35 |    * Map from feature name to feature.
36 |    * 
37 | * 38 | * map<string, .tensorflow.Feature> feature = 1; 39 | */ 40 | java.util.Map 41 | getFeatureMap(); 42 | /** 43 | *
44 |    * Map from feature name to feature.
45 |    * 
46 | * 47 | * map<string, .tensorflow.Feature> feature = 1; 48 | */ 49 | 50 | org.tensorflow.example.Feature getFeatureOrDefault( 51 | java.lang.String key, 52 | org.tensorflow.example.Feature defaultValue); 53 | /** 54 | *
55 |    * Map from feature name to feature.
56 |    * 
57 | * 58 | * map<string, .tensorflow.Feature> feature = 1; 59 | */ 60 | 61 | org.tensorflow.example.Feature getFeatureOrThrow( 62 | java.lang.String key); 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/FloatListOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: feature.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface FloatListOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.FloatList) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * repeated float value = 1 [packed = true]; 12 | */ 13 | java.util.List getValueList(); 14 | /** 15 | * repeated float value = 1 [packed = true]; 16 | */ 17 | int getValueCount(); 18 | /** 19 | * repeated float value = 1 [packed = true]; 20 | */ 21 | float getValue(int index); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/Int64ListOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: feature.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface Int64ListOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.Int64List) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * repeated int64 value = 1 [packed = true]; 12 | */ 13 | java.util.List getValueList(); 14 | /** 15 | * repeated int64 value = 1 [packed = true]; 16 | */ 17 | int getValueCount(); 18 | /** 19 | * repeated int64 value = 1 [packed = true]; 20 | */ 21 | long getValue(int index); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/RecordWriter.java: -------------------------------------------------------------------------------- 1 | package org.tensorflow.example; 2 | import java.io.*; 3 | import java.util.zip.*; 4 | 5 | public class RecordWriter { 6 | private static final long serialVersionUID = 0L; 7 | 8 | public RecordWriter(DataInputStream ds) { 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/example/SequenceExampleOrBuilder.java: -------------------------------------------------------------------------------- 1 | // Generated by the protocol buffer compiler. DO NOT EDIT! 2 | // source: example.proto 3 | 4 | package org.tensorflow.example; 5 | 6 | public interface SequenceExampleOrBuilder extends 7 | // @@protoc_insertion_point(interface_extends:tensorflow.SequenceExample) 8 | com.google.protobuf.MessageOrBuilder { 9 | 10 | /** 11 | * .tensorflow.Features context = 1; 12 | */ 13 | boolean hasContext(); 14 | /** 15 | * .tensorflow.Features context = 1; 16 | */ 17 | org.tensorflow.example.Features getContext(); 18 | /** 19 | * .tensorflow.Features context = 1; 20 | */ 21 | org.tensorflow.example.FeaturesOrBuilder getContextOrBuilder(); 22 | 23 | /** 24 | * .tensorflow.FeatureLists feature_lists = 2; 25 | */ 26 | boolean hasFeatureLists(); 27 | /** 28 | * .tensorflow.FeatureLists feature_lists = 2; 29 | */ 30 | org.tensorflow.example.FeatureLists getFeatureLists(); 31 | /** 32 | * .tensorflow.FeatureLists feature_lists = 2; 33 | */ 34 | org.tensorflow.example.FeatureListsOrBuilder getFeatureListsOrBuilder(); 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/tensorflow/io/RecordWriter.java: -------------------------------------------------------------------------------- 1 | package org.tensorflow.io; 2 | import java.io.*; 3 | import java.util.zip.*; 4 | 5 | public class RecordWriter { 6 | private static final long serialVersionUID = 0L; 7 | private static final int DEFAULT_BUFSIZE = 64*1024; 8 | 9 | private BufferedOutputStream ds_; 10 | 11 | public RecordWriter(OutputStream ds) { 12 | ds_ = new BufferedOutputStream(ds, DEFAULT_BUFSIZE); 13 | } 14 | 15 | public RecordWriter(String fname) throws IOException { 16 | FileOutputStream fout = new FileOutputStream(fname); 17 | ds_ = new BufferedOutputStream(fout, DEFAULT_BUFSIZE); 18 | } 19 | 20 | public int maskedCRC(byte [] bytes, int count) { 21 | return CRC32C.mask(CRC32C.getValue(bytes, 0, count)); 22 | } 23 | 24 | public int writeRecord(byte [] data) throws IOException { 25 | byte [] header = new byte[12]; 26 | byte [] footer = new byte[4]; 27 | CRC32C.encodeFixed64(header, 0, data.length); 28 | CRC32C.encodeFixed32(header, 8, maskedCRC(header, 8)); 29 | 30 | CRC32C.encodeFixed32(footer, 0, maskedCRC(data, data.length)); 31 | 32 | ds_.write(header, 0, 12); 33 | ds_.write(data, 0, data.length); 34 | ds_.write(footer, 0, 4); 35 | 36 | return 0; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | akka { 2 | actor { 3 | provider = cluster 4 | } 5 | remote { 6 | log-remote-lifecycle-events = off 7 | netty.tcp { 8 | hostname = "127.0.0.1" 9 | port = 0 10 | } 11 | } 12 | 13 | cluster { 14 | seed-nodes = [ 15 | "akka.tcp://ClusterSystem@127.0.0.1:2551", 16 | "akka.tcp://ClusterSystem@127.0.0.1:2552"] 17 | 18 | # auto downing is NOT safe for production deployments. 19 | # you may want to use it during development, read more about it in the docs. 20 | auto-down-unreachable-after = 10s 21 | } 22 | log-dead-letters = 0 23 | log-dead-letters-during-shutdown = off 24 | } 25 | 26 | # Disable legacy metrics in akka-cluster. 27 | akka.cluster.metrics.enabled=off 28 | 29 | # Enable metrics extension in akka-cluster-metrics. 30 | //akka.extensions=["akka.cluster.metrics.ClusterMetricsExtension"] 31 | 32 | # Sigar native library extract location during tests. 33 | # Note: use per-jvm-instance folder when running multiple jvm on one host. 34 | akka.cluster.metrics.native-library-extract-folder=${user.dir}/target/native 35 | -------------------------------------------------------------------------------- /src/main/resources/lib/touch.txt: -------------------------------------------------------------------------------- 1 | touch 2 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/Copyright.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Regents of the University of California 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/Logging.scala: -------------------------------------------------------------------------------- 1 | package BIDMach 2 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GDMat,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat,TMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMat.Plotting._ 6 | import BIDMach.models._ 7 | import BIDMach.datasinks._ 8 | 9 | 10 | object Logging{ 11 | def logGradientL2Norm(model:Model,data:Array[Mat]):Array[Mat] = { 12 | val m = model.modelmats 13 | val res = new Array[Float](m.length) 14 | for(i<-0 until m.length){ 15 | res(i) = sum(snorm(m(i))).dv.toFloat 16 | } 17 | Array(new FMat(m.length,1,res)) 18 | } 19 | 20 | def logGradientL1Norm(model:Model,data:Array[Mat]):Array[Mat] = { 21 | val m = model.modelmats 22 | val res = new Array[Float](m.length) 23 | for(i<-0 until m.length){ 24 | res(i) = sum(sum(abs(m(i)))).dv.toFloat 25 | } 26 | Array(new FMat(m.length,1,res)) 27 | } 28 | 29 | def getResults(model:Model): Array[Mat] = { 30 | model.opts.logDataSink match { 31 | case f:FileSink=>{println("Found results at "+f.opts.ofnames.head(0));null} 32 | case m:MatSink=>m.mats 33 | case null=>{println("No logDataSink found");null} 34 | } 35 | } 36 | 37 | def getResults(l:Learner): Array[Mat] = getResults(l.model) 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/AllreduceDummyLearner.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce 2 | 3 | import BIDMach.Learner 4 | import BIDMach.networks.Net 5 | 6 | /** 7 | * A dummy learner for ease of test. Can be opt or refactored out if necessary 8 | * @param learner 9 | * @param dummy_model 10 | */ 11 | class AllreduceDummyLearner(learner:Learner, dummy_model:AllreduceDummyModel) 12 | extends Learner(learner.datasource,dummy_model,learner.mixins, learner.updater, learner.datasink ,learner.opts) { 13 | 14 | def this(){ 15 | this(Net.learner("dummy learner")._1, new AllreduceDummyModel()) 16 | } 17 | 18 | 19 | override def train: Unit = { 20 | println("dummy model is training!") 21 | while(true){ 22 | this.ipass+=1 23 | myLogger.info("pass=%2d" format ipass) 24 | this.dummy_model.showSomeWork() 25 | } 26 | 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/AllreduceDummyModel.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce 2 | 3 | import BIDMach.models.Model 4 | import BIDMat.{FMat, Mat} 5 | 6 | class AllreduceDummyModel(val _modelmat: Array[Mat]) extends Model { 7 | def this(){ 8 | this(Array[Mat](FMat.ones(30,100),FMat.ones(100,30))) 9 | } 10 | 11 | 12 | override def modelmats:Array[Mat] = { 13 | _modelmat 14 | } 15 | override def init()={} 16 | override def dobatch(mats:Array[Mat], ipass:Int, here:Long)={} 17 | override def evalbatch(mats: Array[Mat], ipass: Int, here:Long):FMat = { 18 | FMat.zeros(0,0) 19 | } 20 | def showSomeWork(){ 21 | println("I'm learning something") 22 | Thread.sleep(1000) 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/AllreduceMessage.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce 2 | 3 | import akka.actor.ActorRef 4 | import scala.collection.mutable.ArrayBuffer 5 | 6 | 7 | // worker messages 8 | final case class StartAllreduce(config : RoundConfig) 9 | final case class CompleteAllreduce(srcId : Int, config : RoundConfig) 10 | 11 | final case class ScatterBlock(value : Array[Float], srcId : Int, destId : Int, chunkId : Int, config : RoundConfig) 12 | final case class ReduceBlock(value: Array[Float], srcId : Int, destId : Int, chunkId : Int, config : RoundConfig, count: Int) 13 | 14 | final case class AllreduceStats(outgoingFloats: Long, incomingFloats: Long) 15 | 16 | /** 17 | * "comparison override to provide a (line master version, round) pair for a smooth transition when nodes are added or removed 18 | */ 19 | final case class RoundConfig(lineMasterVersion : Int, round: Int, lineMaster : ActorRef, peerWorkers: Map[Int, ActorRef], workerId: Int) { 20 | def < (other : RoundConfig): Boolean = { 21 | return if (lineMasterVersion < other.lineMasterVersion || 22 | (lineMasterVersion == other.lineMasterVersion && round < other.round)) {true} 23 | else {false} 24 | } 25 | 26 | def == (other : RoundConfig): Boolean = { 27 | return if (lineMasterVersion == other.lineMasterVersion && round == other.round) {true} else {false} 28 | } 29 | 30 | def > (other : RoundConfig): Boolean = { 31 | return !(this < other || this == other) 32 | } 33 | } 34 | 35 | /* 36 | * Following message used by Line Master 37 | */ 38 | final case class StartAllreduceTask(peerNodes: ArrayBuffer[ActorRef], lineMasterVersion : Int) 39 | final case class StopAllreduceTask(lineMasterVersion : Int) 40 | 41 | /* 42 | * For grid master in case we want to kill the node 43 | */ 44 | final case class StopAllreduceNode() -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/binder/AllreduceBinder.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce.binder 2 | 3 | import BIDMach.allreduce.binder.AllreduceBinder.{DataSink, DataSource} 4 | 5 | /** 6 | * Trait to specify source and sink, allowing binding data input/output to the all-reduce process. 7 | */ 8 | trait AllreduceBinder { 9 | 10 | def totalDataSize: Int 11 | 12 | def dataSource: DataSource 13 | 14 | def dataSink: DataSink 15 | 16 | } 17 | 18 | object AllreduceBinder { 19 | 20 | type DataSink = AllReduceOutput => Unit 21 | type DataSource = AllReduceInputRequest => AllReduceInput 22 | var updateCounts = 100 23 | 24 | } 25 | 26 | case class AllReduceInputRequest(iteration: Int) 27 | 28 | case class AllReduceInput(data: Array[Float]) 29 | 30 | case class AllReduceOutput(data: Array[Float], iteration: Int) 31 | 32 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/binder/AssertCorrectnessBinder.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce.binder 2 | 3 | import BIDMach.allreduce.binder.AllreduceBinder.{DataSink, DataSource} 4 | 5 | 6 | class AssertCorrectnessBinder(dataSize: Int, checkpoint: Int) extends AllreduceBinder { 7 | 8 | val random = new scala.util.Random(100) 9 | val totalInputSample = 8 10 | 11 | lazy val randomFloats = { 12 | val nestedArray = new Array[Array[Float]](totalInputSample) 13 | for (i <- 0 until totalInputSample) { 14 | nestedArray(i) = Array.range(0, dataSize).toList.map(_ => random.nextFloat()).toArray 15 | } 16 | nestedArray 17 | } 18 | 19 | private def ~=(x: Double, y: Double, precision: Double = 1e-5) = { 20 | if ((x - y).abs < precision) true else false 21 | } 22 | 23 | override def dataSource: DataSource = r => { 24 | AllReduceInput(randomFloats(r.iteration % totalInputSample)) 25 | } 26 | 27 | override def dataSink: DataSink = r => { 28 | 29 | if (r.iteration % checkpoint == 0) { 30 | val inputUsed = randomFloats(r.iteration % totalInputSample) 31 | println(s"\n----Asserting #${r.iteration} output...") 32 | for (i <- 0 until dataSize) { 33 | val meanActual = r.data(i) 34 | val expected = inputUsed(i) 35 | assert(~=(expected, meanActual), s"Expected [$expected], but actual [$meanActual] at pos $i for iteraton #${r.iteration}") 36 | } 37 | println("OK: Means match the expected value!") 38 | } 39 | 40 | } 41 | 42 | override def totalDataSize: Int = dataSize 43 | } 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/binder/NoOpBinder.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce.binder 2 | import BIDMach.allreduce.binder.AllreduceBinder.{DataSink, DataSource} 3 | 4 | /** 5 | * Just for experiment. Can be opted out or refactored. 6 | */ 7 | class NoOpBinder(dataSize: Int, printFrequency: Int = 10) extends AllreduceBinder { 8 | 9 | 10 | val random = new scala.util.Random(100) 11 | val totalInputSample = 4 12 | 13 | lazy val randomFloats = { 14 | val nestedArray: Array[Array[Float]] = Array.ofDim(totalInputSample, dataSize) 15 | for (i <- 0 until totalInputSample) { 16 | for (j <- 0 until dataSize) 17 | nestedArray(i)(j) = random.nextFloat() 18 | } 19 | nestedArray 20 | } 21 | 22 | 23 | override def dataSource: DataSource = { inputRequest => 24 | if (inputRequest.iteration % printFrequency == 0) { 25 | println(s"--NoOptBinder: dump model data at ${inputRequest.iteration}--") 26 | } 27 | 28 | AllReduceInput(randomFloats(inputRequest.iteration % totalInputSample)) 29 | } 30 | 31 | override def dataSink: DataSink = { output => 32 | if (output.iteration % printFrequency == 0) { 33 | println(s"--NoOptBinder: reduced done data at ${output.iteration}--") 34 | } 35 | 36 | } 37 | 38 | override def totalDataSize: Int = dataSize 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/allreduce/buffer/AllReduceBuffer.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.allreduce.buffer 2 | 3 | 4 | abstract class AllReduceBuffer(dataSize: Int, 5 | peerSize: Int, 6 | maxChunkSize: Int) { 7 | 8 | type Buffer = Array[Array[Float]] 9 | 10 | val peerBuffer: Buffer = Array.ofDim(peerSize, dataSize) 11 | 12 | val numChunks = getNumChunk(dataSize) 13 | 14 | protected def store(data: Array[Float], srcId: Int, chunkId: Int) = { 15 | 16 | val array = peerBuffer(srcId) 17 | System.arraycopy( 18 | data, 0, 19 | array, chunkId * maxChunkSize, 20 | data.size) 21 | } 22 | 23 | protected def getNumChunk(size: Int) = { 24 | math.ceil(1f * size / maxChunkSize).toInt 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/caffe/Classifier.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.caffe 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,GMat,GIMat,GSMat,HMat,Image,IMat,ND,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMach.datasources._ 6 | import edu.berkeley.bvlc.SGDSOLVER 7 | import edu.berkeley.bvlc.NET 8 | import edu.berkeley.bvlc.CAFFE 9 | 10 | class Classifier { 11 | 12 | val net = new Net 13 | 14 | def init(model_file:String, pretrained_file:String, image_dims:Array[Int] = Array(256, 256), 15 | gpu:Boolean = false, mean_file:String = null, input_scale:Float = 1f, channel_swap:IMat = 2\1\0) = { 16 | 17 | net.init(model_file, pretrained_file); 18 | 19 | CAFFE.set_phase(1); 20 | 21 | CAFFE.set_mode(if (gpu) 1 else 0) 22 | 23 | if (image_dims != null) { 24 | net.set_image_dims(image_dims) 25 | } else { 26 | net.set_image_dims(Array(net.inwidth, net.inheight)) 27 | } 28 | 29 | if (mean_file != null) net.set_mean(mean_file) 30 | 31 | if (input_scale != 1f) net.set_input_scale(input_scale) 32 | 33 | if (channel_swap.asInstanceOf[AnyRef] != null) net.set_channel_swap(channel_swap) 34 | 35 | } 36 | 37 | def classify(im:Image):FMat = { 38 | val fnd = net.preprocess(im) 39 | net.clear_inputs 40 | net.add_input(fnd, 0, 0) 41 | net.forward 42 | net.output_data(0)(?,?,?,0) 43 | } 44 | 45 | 46 | } 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/caffe/SGDSolver.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.caffe 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,GMat,GIMat,GSMat,HMat,Image,IMat,ND,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMach.datasources._ 6 | import edu.berkeley.bvlc.SGDSOLVER 7 | import edu.berkeley.bvlc.NET 8 | import edu.berkeley.bvlc.CAFFE 9 | 10 | class SGDSolver (val sgd:SGDSOLVER) { 11 | val net = sgd.net 12 | 13 | def Solve = sgd.Solve 14 | 15 | def SolveResume(fname:String) = sgd.SolveResume(fname) 16 | 17 | } 18 | 19 | object SGDSolver { 20 | def apply(paramFile:String):SGDSolver = new SGDSolver(new SGDSOLVER(paramFile)) 21 | } 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/datasinks/DataSink.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.datasinks 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import java.io._ 6 | 7 | @SerialVersionUID(100L) 8 | abstract class DataSink(val opts:DataSink.Opts = new DataSink.Options) extends Serializable { 9 | private var _GUID = Mat.myrand.nextLong 10 | def setGUID(v:Long):Unit = {_GUID = v} 11 | def GUID:Long = _GUID 12 | def put; 13 | def init:Unit = {} 14 | def close = {} 15 | private var _nmats = 0; 16 | def nmats = _nmats; 17 | def setnmats(k:Int) = {_nmats = k;} 18 | var omats:Array[Mat] = null 19 | } 20 | 21 | @SerialVersionUID(100L) 22 | object DataSink { 23 | trait Opts extends BIDMat.Opts { 24 | } 25 | 26 | class Options extends Opts {} 27 | } 28 | 29 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/datasinks/FileSink.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.datasinks 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,LMat,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMach.datasources._ 6 | import scala.collection.mutable.ListBuffer 7 | 8 | @SerialVersionUID(100L) 9 | class FileSink(override val opts:FileSink.Opts = new FileSink.Options) extends MatSink(opts) { 10 | var ifile = 0; 11 | var colsdone = 0; 12 | 13 | override def init = { 14 | blocks = new ListBuffer[Array[Mat]](); 15 | setnmats(opts.ofnames.length); 16 | omats = new Array[Mat](nmats); 17 | ifile = 0; 18 | opts match { 19 | case fopts:FileSource.Opts => { 20 | ifile = fopts.nstart; 21 | } 22 | } 23 | colsdone = 0; 24 | } 25 | 26 | override def put = { 27 | blocks += omats.map(MatSink.copyCPUmat); 28 | colsdone += omats(0).ncols; 29 | if (colsdone >= opts.ofcols) { 30 | mergeSaveBlocks; 31 | colsdone = 0; 32 | ifile += 1; 33 | blocks = new ListBuffer[Array[Mat]](); 34 | } 35 | } 36 | 37 | override def close () = { 38 | mergeSaveBlocks; 39 | } 40 | 41 | def mergeSaveBlocks = { 42 | mergeBlocks 43 | if (blocks.size > 0) { 44 | for (i <- 0 until opts.ofnames.length) { 45 | saveMat(opts.ofnames(i)(ifile), mats(i)); 46 | } 47 | } 48 | } 49 | } 50 | 51 | @SerialVersionUID(100L) 52 | object FileSink { 53 | trait Opts extends MatSink.Opts { 54 | var ofnames:List[(Int)=>String] = null; 55 | var ofcols = 100000; 56 | } 57 | 58 | class Options extends Opts { 59 | 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/datasources/ArraySource.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.datasources 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMat.MatIOtrait 6 | import scala.concurrent.Future 7 | import scala.concurrent.ExecutionContextExecutor 8 | import java.io._ 9 | 10 | @SerialVersionUID(100L) 11 | class ArraySource(override val opts:ArraySource.Opts = new ArraySource.Options) extends IteratorSource(opts) { 12 | @transient var dataArray:Array[_ <: AnyRef] = null 13 | 14 | override def init = { 15 | dataArray = opts.dataArray 16 | super.init 17 | } 18 | 19 | override def iterHasNext:Boolean = { 20 | iblock += 1 21 | iblock < dataArray.length 22 | } 23 | 24 | override def hasNext:Boolean = { 25 | val matq = inMats(0) 26 | val matqnr = if (opts.dorows) matq.nrows else matq.ncols 27 | val ihn = iblock < dataArray.length 28 | if (! ihn && iblock > 0) { 29 | nblocks = iblock 30 | } 31 | (ihn || (matqnr - samplesDone) == 0); 32 | } 33 | 34 | override def iterNext() = { 35 | val marr = dataArray(iblock) 36 | marr match { 37 | case (key:AnyRef,v:MatIOtrait) => {inMats = v.get} 38 | case m:Mat => { 39 | if (inMats == null) inMats = Array[Mat](1); 40 | inMats(0) = m; 41 | } 42 | case ma:Array[Mat] => inMats = ma; 43 | } 44 | } 45 | 46 | override def close = { 47 | iblock = 0 48 | } 49 | } 50 | 51 | @SerialVersionUID(100L) 52 | object ArraySource { 53 | def apply(opts:ArraySource.Opts):ArraySource = { 54 | new ArraySource(opts); 55 | } 56 | 57 | trait Opts extends IteratorSource.Opts { 58 | @transient var dataArray:Array[_ <: AnyRef] = null 59 | } 60 | 61 | @SerialVersionUID(100L) 62 | class Options extends Opts {} 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/datasources/DataSource.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.datasources 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import java.io._ 6 | 7 | @SerialVersionUID(100L) 8 | abstract class DataSource(val opts:DataSource.Opts = new DataSource.Options) extends Serializable { 9 | private var _GUID = Mat.myrand.nextLong 10 | def setGUID(v:Long):Unit = {_GUID = v} 11 | def GUID:Long = _GUID 12 | def next:Array[Mat] 13 | def hasNext:Boolean 14 | def reset:Unit 15 | def putBack(mats:Array[Mat],i:Int):Unit = {throw new RuntimeException("putBack not implemented")} 16 | def setupPutBack(n:Int,dim:Int):Unit = {throw new RuntimeException("putBack not implemented")} 17 | def nmats:Int 18 | def init:Unit 19 | def progress:Float 20 | def close = {} 21 | var omats:Array[Mat] = null 22 | var endmats:Array[Mat] = null 23 | var fullmats:Array[Mat] = null 24 | } 25 | 26 | @SerialVersionUID(100L) 27 | object DataSource { 28 | trait Opts extends BIDMat.Opts { 29 | var batchSize = 10000 30 | var sizeMargin = 3f 31 | var sample = 1f 32 | var addConstFeat:Boolean = false 33 | var featType:Int = 1 // 0 = binary features, 1 = linear features, 2 = threshold features 34 | var featThreshold:Mat = null 35 | var putBack = -1 36 | } 37 | 38 | class Options extends Opts {} 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/mixins/Mixin.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.mixins 2 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMach.models._ 6 | 7 | @SerialVersionUID(100L) 8 | abstract class Mixin(val opts:Mixin.Opts = new Mixin.Options) extends Serializable { 9 | val options = opts 10 | var modelmats:Array[Mat] = null 11 | var updatemats:Array[Mat] = null 12 | var counter = 0 13 | 14 | def compute(mats:Array[Mat], step:Float) 15 | 16 | def score(mats:Array[Mat], step:Float):FMat 17 | 18 | def init(model:Model) = { 19 | modelmats = model.modelmats 20 | updatemats = model.updatemats 21 | } 22 | } 23 | 24 | object Mixin { 25 | trait Opts extends BIDMat.Opts { 26 | var mixinInterval = 1 27 | } 28 | 29 | class Options extends Opts {} 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/networks/layers/ForwardLayer.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.networks.layers 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,LMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.datasources._ 7 | import BIDMach.updaters._ 8 | import BIDMach.mixins._ 9 | import BIDMach.models._ 10 | import BIDMach._ 11 | import edu.berkeley.bid.CPUMACH 12 | import edu.berkeley.bid.CUMACH 13 | import scala.util.hashing.MurmurHash3; 14 | import java.util.HashMap; 15 | import BIDMach.networks._ 16 | 17 | 18 | @SerialVersionUID(100L) 19 | class ForwardLayer(override val net:Net, override val opts:ForwardNodeOpts = new ForwardNode) extends Layer(net, opts) { 20 | 21 | override def forward = { 22 | val start = toc; 23 | inplaceNoConnectGetOutput(); 24 | 25 | output <-- inputData; 26 | // clearDeriv; 27 | forwardtime += toc - start; 28 | } 29 | 30 | override def backward = { 31 | } 32 | 33 | override def toString = { 34 | "forward@"+Integer.toHexString(hashCode % 0x10000).toString 35 | } 36 | } 37 | 38 | trait ForwardNodeOpts extends NodeOpts { 39 | } 40 | 41 | @SerialVersionUID(100L) 42 | class ForwardNode extends Node with ForwardNodeOpts { 43 | 44 | override def clone:ForwardNode = {copyTo(new ForwardNode).asInstanceOf[ForwardNode];} 45 | 46 | override def create(net:Net):ForwardLayer = {ForwardLayer(net, this);} 47 | 48 | override def toString = { 49 | "forward@"+Integer.toHexString(hashCode % 0x10000).toString 50 | } 51 | } 52 | 53 | @SerialVersionUID(100L) 54 | object ForwardLayer { 55 | 56 | def apply(net:Net) = new ForwardLayer(net, new ForwardNode); 57 | 58 | def apply(net:Net, opts:ForwardNode) = new ForwardLayer(net, opts); 59 | } 60 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/networks/layers/MaxIndexLayer.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.networks.layers 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,LMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.datasources._ 7 | import BIDMach.updaters._ 8 | import BIDMach.mixins._ 9 | import BIDMach.models._ 10 | import BIDMach._ 11 | import edu.berkeley.bid.CPUMACH 12 | import edu.berkeley.bid.CUMACH 13 | import scala.util.hashing.MurmurHash3; 14 | import java.util.HashMap; 15 | import BIDMach.networks._ 16 | 17 | @SerialVersionUID(100L) 18 | class MaxIndexLayer(override val net:Net, override val opts:MaxIndexNodeOpts = new MaxIndexNode) extends Layer(net, opts) { 19 | 20 | override def forward = { 21 | val start = toc; 22 | output = maxi2(inputData, 1)._2; 23 | forwardtime += toc - start; 24 | } 25 | 26 | override def backward = { 27 | val start = toc; 28 | backwardtime += toc - start; 29 | } 30 | 31 | override def toString = { 32 | "copy@"+Integer.toHexString(hashCode % 0x10000).toString 33 | } 34 | } 35 | 36 | trait MaxIndexNodeOpts extends NodeOpts { 37 | } 38 | 39 | @SerialVersionUID(100L) 40 | class MaxIndexNode extends Node with MaxIndexNodeOpts { 41 | 42 | override def clone:MaxIndexNode = {copyTo(new MaxIndexNode).asInstanceOf[MaxIndexNode];} 43 | 44 | override def create(net:Net):MaxIndexLayer = {MaxIndexLayer(net, this);} 45 | 46 | override def toString = { 47 | "maxidx@"+Integer.toHexString(hashCode % 0x10000).toString 48 | } 49 | } 50 | 51 | @SerialVersionUID(100L) 52 | object MaxIndexLayer { 53 | 54 | def apply(net:Net) = new MaxIndexLayer(net, new MaxIndexNode); 55 | 56 | def apply(net:Net, opts:MaxIndexNode) = new MaxIndexLayer(net, opts); 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/networks/layers/NodeSet.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.networks.layers 2 | 3 | @SerialVersionUID(100L) 4 | class NodeSet(val nnodes:Int, val nodes:Array[Node]) extends Serializable { 5 | 6 | def this(nnodes:Int) = this(nnodes, new Array[Node](nnodes)); 7 | 8 | def this(nodes:Array[Node]) = this(nodes.length, nodes); 9 | 10 | def apply(i:Int):Node = nodes(i); 11 | 12 | def update(i:Int, lopts:Node) = {nodes(i) = lopts; this} 13 | 14 | def size = nnodes; 15 | 16 | def length = nnodes; 17 | 18 | override def clone = copyTo(new NodeSet(nnodes)); 19 | 20 | def copyTo(lopts:NodeSet):NodeSet = { 21 | for (i <- 0 until nnodes) { 22 | lopts.nodes(i) = nodes(i).clone; 23 | nodes(i).myGhost = lopts.nodes(i); 24 | } 25 | for (i <- 0 until nnodes) { 26 | for (j <- 0 until nodes(i).inputs.length) { 27 | if (nodes(i).inputs(j) != null) lopts.nodes(i).inputs(j) = nodes(i).inputs(j).node.myGhost; 28 | } 29 | } 30 | lopts; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/networks/layers/SignLayer.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.networks.layers 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,LMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.datasources._ 7 | import BIDMach.updaters._ 8 | import BIDMach.mixins._ 9 | import BIDMach.models._ 10 | import BIDMach._ 11 | import edu.berkeley.bid.CPUMACH 12 | import edu.berkeley.bid.CUMACH 13 | import scala.util.hashing.MurmurHash3; 14 | import java.util.HashMap; 15 | import BIDMach.networks._ 16 | 17 | 18 | /** 19 | * Sign layer. 20 | */ 21 | 22 | @SerialVersionUID(100L) 23 | class SignLayer(override val net:Net, override val opts:SignNodeOpts = new SignNode) extends Layer(net, opts) { 24 | 25 | override def forward = { 26 | val start = toc; 27 | inplaceNoConnectGetOutput(); 28 | 29 | sign(inputData, output); 30 | 31 | forwardtime += toc - start; 32 | } 33 | 34 | override def backward = { 35 | val start = toc; 36 | 37 | backwardtime += toc - start; 38 | } 39 | 40 | override def toString = { 41 | "exp@"+Integer.toHexString(hashCode % 0x10000).toString 42 | } 43 | } 44 | 45 | 46 | trait SignNodeOpts extends NodeOpts { 47 | } 48 | 49 | @SerialVersionUID(100L) 50 | class SignNode extends Node with SignNodeOpts { 51 | 52 | override def clone:SignNode = {copyTo(new SignNode).asInstanceOf[SignNode];} 53 | 54 | override def create(net:Net):SignLayer = {SignLayer(net, this);} 55 | 56 | override def toString = { 57 | "exp@"+Integer.toHexString(hashCode % 0x10000).toString 58 | } 59 | } 60 | 61 | @SerialVersionUID(100L) 62 | object SignLayer { 63 | 64 | def apply(net:Net) = new SignLayer(net, new SignNode); 65 | 66 | def apply(net:Net, opts:SignNode) = new SignLayer(net, opts); 67 | } 68 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/updaters/Batch.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.updaters 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.models._ 7 | 8 | @SerialVersionUID(100L) 9 | class Batch(override val opts:Batch.Opts = new Batch.Options) extends Updater { 10 | 11 | override def init(model0:Model) = { 12 | super.init(model0) 13 | } 14 | 15 | override def update(ipass:Int, step:Long) = {} 16 | } 17 | 18 | @SerialVersionUID(100L) 19 | object Batch { 20 | trait Opts extends Updater.Opts { 21 | var beps = 1e-5f 22 | } 23 | 24 | class Options extends Opts {} 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/updaters/BatchNorm.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.updaters 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.models._ 7 | 8 | @SerialVersionUID(100L) 9 | class BatchNorm(override val opts:BatchNorm.Opts = new BatchNorm.Options) extends Updater { 10 | var accumulators:Array[Mat] = null 11 | 12 | override def init(model0:Model) = { 13 | super.init(model0) 14 | val modelmats = model.modelmats 15 | val updatemats = model.updatemats 16 | accumulators = new Array[Mat](updatemats.length) 17 | for (i <- 0 until accumulators.length) { 18 | accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols) 19 | } 20 | } 21 | 22 | override def update(ipass:Int, step:Long) = { 23 | val updatemats = model.updatemats 24 | for (i <- 0 until accumulators.length) { 25 | accumulators(i) ~ accumulators(i) + updatemats(i) 26 | } 27 | } 28 | 29 | override def clear() = { 30 | for (i <- 0 until accumulators.length) { 31 | accumulators(i).clear 32 | } 33 | } 34 | 35 | override def updateM(ipass:Int):Unit = { 36 | val mm = model.modelmats(0) 37 | mm ~ accumulators(0) / accumulators(1) 38 | mm ~ mm / sum(mm,2) 39 | clear 40 | } 41 | } 42 | 43 | @SerialVersionUID(100L) 44 | object BatchNorm { 45 | trait Opts extends Updater.Opts { 46 | } 47 | 48 | class Options extends Opts {} 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/updaters/IncMult.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.updaters 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.models._ 7 | 8 | @SerialVersionUID(100L) 9 | class IncMult(override val opts:IncMult.Opts = new IncMult.Options) extends Updater { 10 | 11 | var firstStep = 0f 12 | var rm:Mat = null 13 | 14 | override def init(model0:Model) = { 15 | super.init(model0) 16 | rm = model0.modelmats(0).zeros(1,1) 17 | } 18 | 19 | override def update(ipass:Int, step:Long) = { 20 | val modelmats = model.modelmats 21 | val updatemats = model.updatemats 22 | val mm = modelmats(0) 23 | val ms = modelmats(1) 24 | val um = updatemats(0) 25 | val ums = updatemats(1) 26 | val rr = if (step == 0) 1f else { 27 | if (firstStep == 0f) { 28 | firstStep = step 29 | 1f 30 | } else { 31 | (math.pow(firstStep / step, opts.power)).toFloat 32 | } 33 | } 34 | 35 | um ~ um *@ rm.set(rr) 36 | ln(mm, mm) 37 | mm ~ mm *@ rm.set(1-rr) 38 | mm ~ mm + um 39 | exp(mm, mm) 40 | if (opts.isprob) mm ~ mm / sum(mm,2) 41 | } 42 | 43 | override def clear() = { 44 | firstStep = 0f 45 | } 46 | } 47 | 48 | @SerialVersionUID(100L) 49 | object IncMult { 50 | trait Opts extends Updater.Opts { 51 | var warmup = 0L 52 | var power = 0.3f 53 | var isprob = true 54 | } 55 | 56 | class Options extends Opts {} 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/updaters/Telescoping.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.updaters 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.models._ 7 | 8 | @SerialVersionUID(100L) 9 | class Telescoping(override val opts:Telescoping.Opts = new Telescoping.Options) extends Updater { 10 | var accumulators:Array[Mat] = null 11 | var firstStep = 0L 12 | var nextStep = 10L 13 | var nextCount = 0L 14 | var rm:Mat = null 15 | 16 | override def init(model0:Model) = { 17 | super.init(model0) 18 | val modelmats = model0.modelmats 19 | val updatemats = model0.updatemats 20 | rm = model0.modelmats(0).zeros(1,1) 21 | accumulators = new Array[Mat](updatemats.length) 22 | for (i <- 0 until updatemats.length) yield { 23 | accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols) 24 | } 25 | firstStep = 0L 26 | nextStep = 10L 27 | nextCount = 0L 28 | } 29 | 30 | override def update(ipass:Int, step:Long) = { 31 | if (firstStep == 0 && step > 0) { 32 | firstStep = step 33 | } 34 | val updatemats = model.updatemats 35 | for (i <- 0 until updatemats.length) { 36 | accumulators(i) ~ accumulators(i) + updatemats(i) 37 | } 38 | if (step >= nextCount) { 39 | model.modelmats(0) ~ accumulators(0) / accumulators(1) 40 | nextStep = (nextStep * opts.factor).toLong 41 | nextCount = step + nextStep 42 | } 43 | } 44 | 45 | override def clear() = { 46 | for (i <- 0 until accumulators.length) { 47 | accumulators(i).clear 48 | } 49 | } 50 | } 51 | 52 | @SerialVersionUID(100L) 53 | object Telescoping { 54 | trait Opts extends Updater.Opts { 55 | val factor = 1.5f 56 | } 57 | 58 | class Options extends Opts {} 59 | } 60 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/updaters/Updater.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.updaters 2 | 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat} 4 | import BIDMat.MatFunctions._ 5 | import BIDMat.SciFunctions._ 6 | import BIDMach.models._ 7 | 8 | 9 | abstract class Updater(val opts:Updater.Opts = new Updater.Options) extends Serializable { 10 | var model:Model = null; 11 | var runningtime = 0.0; 12 | 13 | def init(model0:Model) = { 14 | model = model0 15 | } 16 | 17 | def clear():Unit = {} 18 | 19 | def update(ipass:Int, step:Long):Unit = {} 20 | 21 | def update(ipass:Int, step:Long, gprogress:Float):Unit = update(ipass, step) 22 | 23 | def updateM(ipass:Int):Unit = { 24 | model.updatePass(ipass) 25 | } 26 | 27 | def preupdate(ipass:Int, step:Long, gprogress:Float):Unit = {} 28 | } 29 | 30 | @SerialVersionUID(100L) 31 | object Updater { 32 | trait Opts extends BIDMat.Opts { 33 | } 34 | 35 | class Options extends Opts {} 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/viz/LogViz.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.viz; 2 | import BIDMat.{BMat,Mat,SBMat,CMat,DMat,FMat,FFilter,IMat,HMat,GDMat,GFilter,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat,TMat} 3 | import BIDMat.MatFunctions._ 4 | import BIDMat.SciFunctions._ 5 | import BIDMach.models.Model; 6 | import BIDMach.networks.Net; 7 | import BIDMach.networks.layers._; 8 | import BIDMach.Learner; 9 | import scala.collection.mutable.ListBuffer; 10 | 11 | /*** 12 | Collect and Visualize some logged values 13 | **/ 14 | 15 | class LogViz(val name: String = "varName") extends Visualization{ 16 | val data:ListBuffer[FMat] = new ListBuffer[FMat]; 17 | interval = 1; 18 | 19 | // Override one of these to collect some log data 20 | def collect(model:Model, mats:Array[Mat], ipass:Int, pos:Long):FMat = { 21 | collect(model); 22 | } 23 | 24 | def collect(model:Model):FMat = { 25 | collect(); 26 | } 27 | 28 | def collect():FMat = { 29 | row(0); 30 | } 31 | 32 | override def doUpdate(model:Model, mats:Array[Mat], ipass:Int, pos:Long) = { 33 | data.synchronized { 34 | data += FMat(collect(model, mats, ipass, pos)); 35 | } 36 | } 37 | 38 | def snapshot = { 39 | Learner.scores2FMat(data); 40 | } 41 | 42 | def fromto(n0:Int, n1:Int) = { 43 | data.synchronized { 44 | val len = data.length; 45 | val na = math.min(n0, len); 46 | val nb = math.min(n1, len); 47 | val out = zeros(data(0).nrows, nb - na); 48 | var i = 0; 49 | data.foreach(f => { 50 | if (i >= na && i < nb) out(?, i - na) = f; 51 | i += 1; 52 | }) 53 | out 54 | } 55 | } 56 | 57 | def lastn(n0:Int) = { 58 | val len = data.synchronized {data.length}; 59 | fromto(math.max(0, len - n0), len); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/scala/BIDMach/viz/Visualization.scala: -------------------------------------------------------------------------------- 1 | package BIDMach.viz 2 | import BIDMach.models.Model; 3 | import BIDMat.Mat 4 | 5 | 6 | /** 7 | Abstract class for visualizations. Extend this class to get correct behavior 8 | */ 9 | 10 | abstract class Visualization { 11 | var interval = 10; 12 | var cnt = 0 13 | var checkStatus = -1 14 | 15 | def doUpdate(model:Model,mats:Array[Mat],ipass:Int, pos:Long) 16 | 17 | //Perform some initial check to make sure data type is correct 18 | def check(model:Model,mats:Array[Mat]):Int = 0 19 | 20 | //Initialize variables and states during the first update. 21 | def init(model:Model,mats:Array[Mat]) {} 22 | 23 | //Update the visualization per cnt batches 24 | def update(model:Model,mats:Array[Mat],ipass:Int, pos:Long){ 25 | if (checkStatus == -1){ 26 | checkStatus = check(model, mats) 27 | if (checkStatus == 0) init(model, mats) 28 | } 29 | if (checkStatus == 0) { 30 | if (cnt == 0) { 31 | //doUpdate(model, mats, ipass, pos) 32 | try { 33 | doUpdate(model, mats, ipass, pos) 34 | } 35 | catch { 36 | case e:Exception=> { 37 | checkStatus = 2 38 | println(e.toString) 39 | println(e.getStackTrace.mkString("\n")) 40 | } 41 | } 42 | } 43 | cnt = (cnt + 1) % interval 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/scala/BIDMach/BIDMachSpec.scala: -------------------------------------------------------------------------------- 1 | package BIDMach 2 | 3 | import org.scalatest._ 4 | 5 | abstract class BIDMachSpec extends FlatSpec 6 | with Matchers 7 | with BeforeAndAfterAll { 8 | 9 | override def beforeAll { 10 | BIDMat.Mat.checkMKL(false); 11 | } 12 | 13 | def assert_approx_eq(a: Array[Float], b: Array[Float], eps: Float = 1e-4f) = { 14 | (a, b).zipped foreach { 15 | case (x, y) => { 16 | val scale = (math.abs(x) + math.abs(y) + eps).toFloat; 17 | x / scale should equal ((y / scale) +- eps) 18 | } 19 | } 20 | } 21 | 22 | def assert_approx_eq_double(a: Array[Double], b: Array[Double], eps: Double = 1e-6f) = { 23 | (a, b).zipped foreach { 24 | case (x, y) => { 25 | val scale = (math.abs(x) + math.abs(y) + eps); 26 | x / scale should equal ((y / scale) +- eps) 27 | } 28 | } 29 | } 30 | 31 | } 32 | --------------------------------------------------------------------------------