├── .classpath
├── .gitignore
├── .project
├── Copyright.txt
├── LICENSE
├── README.md
├── benchmarks.txt
├── bidmach
├── bidmach.cmd
├── bidmach65
├── bidmach_full
├── build.sbt
├── build_scala_2_10_sbt
├── build_scala_2_11_sbt
├── command
    ├── scala
    └── scala.bat
├── data
    ├── MHTestCorrections
    │   ├── norm2log2000_20_0.9.txt
    │   ├── norm2log4000_20_0.9.txt
    │   └── norm2log4000_20_1.0.txt
    ├── factorNet
    │   ├── data.txt
    │   ├── data.txt~
    │   ├── factorSet.txt
    │   ├── factorSet.txt~
    │   ├── statePerNode.txt
    │   ├── statePerNode.txt~
    │   └── test2
    │   │   ├── data.txt
    │   │   ├── factorSet.txt
    │   │   ├── factorSet.txt~
    │   │   ├── generateTestData.ipynb
    │   │   ├── statePerNode.txt
    │   │   └── statePerNode.txt~
    ├── imagenet_classname.txt
    ├── rcv1_fmt.txt
    ├── uci_fmt.txt
    └── uci_wfmt.txt
├── getcudaversion.sh
├── getdevlibs.sh
├── getlibs.sh
├── getnativepath.class
├── getnativepath.java
├── jni
    ├── include
    │   ├── JNIUtils.hpp
    │   ├── Logger.hpp
    │   ├── MatKernel.hpp
    │   ├── MurmurHash.hpp
    │   └── PointerUtils.hpp
    ├── pom.xml
    └── src
    │   ├── BIDMach_CPUMACH.c
    │   ├── BIDMach_CUMACH.cpp
    │   ├── DNN.cu
    │   ├── DNN127.cu
    │   ├── DNN63.cu
    │   ├── Devel.cu
    │   ├── Dtree.cu
    │   ├── GLM.cu
    │   ├── HashMult.cu
    │   ├── JCUDA_Copyright.txt
    │   ├── JNIUtils.cpp
    │   ├── Logger.cpp
    │   ├── Makefile
    │   ├── PointerUtils.cpp
    │   ├── Samplers.cu
    │   └── configure
├── lib
    ├── Apache_Commons_Math_LICENSE.txt
    ├── Apache_License.txt
    ├── HDF5_Copyright.html
    ├── IScala_license.txt
    ├── JCUDA_Copyright.txt
    ├── Jcommon_JfreeChart_LGPL.html
    ├── PtPlot_Copyright.txt
    ├── Scala_License.txt
    ├── bidmach_init.sc
    └── bidmach_notebook_init.sc
├── logo-64x64.png
├── logs
    └── touch.txt
├── notes.txt
├── pom.xml
├── readme_gui.md
├── sbt
├── sbt-pkg
    ├── bin
    │   ├── java9-rt-export.jar
    │   ├── sbt
    │   ├── sbt-launch-lib.bash
    │   ├── sbt-launch.jar
    │   └── sbt.bat
    └── conf
    │   ├── sbtconfig.txt
    │   └── sbtopts
├── scripts
    ├── README.txt
    ├── analyze_mhtest_logreg.ssc
    ├── benchmarks
    │   ├── basic.c
    │   ├── basic.jl
    │   ├── basic.lua
    │   ├── basic.py
    │   ├── basic.ssc
    │   ├── convRC1spark.ssc
    │   ├── createVWdata.ssc
    │   ├── graphlab_ALS.ipynb
    │   ├── juliaRandWalk.jl
    │   ├── scalaRandWalk.ssc
    │   ├── scoreSpark.ssc
    │   ├── scoreSpark2.ssc
    │   ├── skkmeans.py
    │   ├── sklogistic.py
    │   ├── start_spark.sh
    │   ├── testSparkALS.ssc
    │   ├── testSparkKMeans.ssc
    │   ├── testSparkLR.ssc
    │   ├── testSparkSVM.ssc
    │   ├── testVWLDA.sh
    │   ├── testVWLR.sh
    │   └── testVWkmeans.sh
    ├── bidmach_ec2.py
    ├── bn_test.ssc
    ├── buildcriteo.ssc
    ├── cluster_destroy.sh
    ├── cluster_launch.sh
    ├── cluster_login.sh
    ├── cluster_mux.py
    ├── cluster_start.sh
    ├── cluster_stop.sh
    ├── collect_files.py
    ├── criteolr.ssc
    ├── criteolrslave.ssc
    ├── criteonet.ssc
    ├── distribute.sh
    ├── distribute_data.sh
    ├── distribute_file.sh
    ├── distributed
    │   ├── cmudict-tail-reducer.sh
    │   ├── master_criteo_lr.ssc
    │   ├── master_distr_lr_rcv.ssc
    │   ├── master_mnist_rf.ssc
    │   ├── master_net_rcv1.ssc
    │   ├── master_rf_yearprediction.ssc
    │   ├── master_s2s_cmudict.ssc
    │   ├── master_s2s_mnt2014.ssc
    │   ├── master_sts_2015-news-commentary-v10-fr-en.ssc
    │   ├── news-commentary-tail-reducer.sh
    │   ├── tail-workers
    │   ├── testrecv.ssc
    │   ├── testsend.ssc
    │   ├── worker_criteo_lr.ssc
    │   ├── worker_distr_lr_rcv.ssc
    │   ├── worker_mnist_rf.ssc
    │   ├── worker_net_rcv1.ssc
    │   ├── worker_rf_yearprediction.ssc
    │   ├── worker_s2s_cmudict.ssc
    │   ├── worker_s2s_mnt2014.ssc
    │   └── worker_sts_2015-news-commentary-v10-fr-en.ssc
    ├── factorNet_test.ssc
    ├── factorNet_test2.ssc
    ├── futures.ssc
    ├── get_cmudict.sh
    ├── get_mnt2014.sh
    ├── get_mnt2014_pc6.sh
    ├── getcirfar10.sh
    ├── getcriteo.sh
    ├── getdata.sh
    ├── getdigits.sh
    ├── getdigits.ssc
    ├── getmnist8m.sh
    ├── getmnist8m_finesplit.sh
    ├── getmovies.sh
    ├── getmovies.ssc
    ├── getpubmed.sh
    ├── getrcv1.sh
    ├── getrcv1.ssc
    ├── getuci.sh
    ├── getuci.ssc
    ├── getw2vdata.sh
    ├── getw2vdata.ssc
    ├── getyearprediction.sh
    ├── getyearprediction.ssc
    ├── higgsdnn.ssc
    ├── higgsprep.ssc
    ├── higgsrf.ssc
    ├── ica_test.ssc
    ├── make_bayesnet_data.py
    ├── mnistkmeans.ssc
    ├── mnistkmeans2.ssc
    ├── mnistlr.ssc
    ├── mnistlr2.ssc
    ├── networks
    │   ├── evalAlexnet.ssc
    │   ├── getImageNet.ssc
    │   ├── getImageNetLabels.ssc
    │   ├── getImageNetMeans.ssc
    │   ├── getcifar10.sh
    │   ├── getcifar100.sh
    │   ├── getmnist.sh
    │   ├── loadOnnx.ssc
    │   ├── modelmat_test
    │   ├── modelmat_test.fmat.lz4
    │   ├── processcifar10.ssc
    │   ├── processcifar100.ssc
    │   ├── reduceRate.sc
    │   ├── resumeAlexnet.ssc
    │   ├── resumeResnet.ssc
    │   ├── testAlexnet.ssc
    │   ├── testAlexnet2.ssc
    │   ├── testAlexnet2.ssc~
    │   ├── testAlexnet4y.ssc
    │   ├── testAlexnetClassic.ssc
    │   ├── testCIFAR10.ssc
    │   ├── testCIFAR100.ssc
    │   ├── testCIFAR100collide.ssc
    │   ├── testCIFAR10a.ssc
    │   ├── testCIFAR10c.ssc
    │   ├── testConv.ssc
    │   ├── testLeNet.ssc
    │   ├── testLeNet2.ssc
    │   ├── testResnet.ssc
    │   ├── testResnetCollide.ssc
    │   ├── testResnetSave.ssc
    │   ├── testTrans.sc
    │   └── testVGG16.ssc
    ├── prepLSTM.ssc
    ├── preprocess_mnt2014.py
    ├── preprocess_mnt2014_pc6.py
    ├── process_cmudict_json.ssc
    ├── process_mnt2014.ssc
    ├── process_mnt2014_pc6.ssc
    ├── processmnist.ssc
    ├── processmnist8m.ssc
    ├── processmnist8m_binary.ssc
    ├── processmnist8m_finesplit.ssc
    ├── processmnist_binary.ssc
    ├── processpubmed.ssc
    ├── pubmedlda.ssc
    ├── pubmednmf.ssc
    ├── readcriteo.ssc
    ├── recompress.ssc
    ├── runCriteo.ssc
    ├── runICA.py
    ├── runall.sh
    ├── runback.sh
    ├── runmaster.sh
    ├── runmaster16.sh
    ├── runnode.sh
    ├── runnode16.sh
    ├── seedActor.ssc
    ├── sortcriteo.ssc
    ├── sparseallreduce
    │   ├── check.sh
    │   ├── checkall.sh
    │   ├── checkssh.sh
    │   ├── checksshall.sh
    │   ├── compile.sh
    │   ├── copyData1.sh
    │   ├── copyData2.sh
    │   ├── copyData3.sh
    │   ├── copyData4.sh
    │   ├── kill.sh
    │   ├── killall.sh
    │   ├── logcollect.sh
    │   ├── mount.sh
    │   ├── mountall.sh
    │   ├── ping.sh
    │   ├── pingall.sh
    │   ├── runtwitter.sh
    │   ├── runtwitterall.sh
    │   ├── runyahoo.sh
    │   ├── runyahooall.sh
    │   ├── runyahoor.sh
    │   ├── runyahoorall.sh
    │   ├── setup.sh
    │   ├── unmount.sh
    │   ├── unmountall.sh
    │   ├── update.sh
    │   ├── updatecheck.sh
    │   ├── volumes
    │   ├── volumes1
    │   ├── volumes2
    │   ├── volumes3
    │   ├── volumes4
    │   └── volumesetup.sh
    ├── start_workers.sh
    ├── startup.sh
    ├── startup16.sh
    ├── stop_workers.sh
    ├── testActor.ssc
    ├── testActor2.ssc
    ├── testActor3.ssc
    ├── testActor3.ssc~
    ├── testAllReduceGridMaster.ssc
    ├── testAllReduceNode.ssc
    ├── testAllReduceNodeDummy.ssc
    ├── testAllReduceNodeResnet.ssc
    ├── testLogging.ssc
    ├── testPowerNet.ssc
    ├── testPowerNet35.ssc
    ├── testPredMNT2015.ssc
    ├── testSeqToSeq.ssc
    ├── testSeqToSeqPred.ssc
    ├── test_cmudict_s2s.ssc
    ├── test_grid.sh
    ├── test_mh.ssc
    ├── test_pred_cmudict_s2s.ssc
    ├── testds.ssc
    ├── testldagibbs.ssc
    ├── testlincomb.sc
    ├── testlr.ssc
    ├── testlstm.ssc
    ├── testnet.ssc
    ├── testpairmult.ssc
    ├── testrecv_local.ssc
    ├── testrf.ssc
    ├── testrforest.ssc
    ├── testsend_local.ssc
    ├── testsfa.ssc
    ├── testsmf.ssc
    ├── testsvd.ssc
    ├── testword2vec.ssc
    ├── testword2vecp.ssc
    ├── tmp.sc
    ├── trainLSTM.ssc
    ├── viz
    │   ├── alex0.ssc
    │   ├── mnist.ssc
    │   ├── save.ssc
    │   ├── testResnet.ssc
    │   ├── testVGG16.ssc
    │   ├── testcifar_norm.ssc
    │   └── testcifar_vgg.ssc
    ├── workout.ssc
    ├── workout2.ssc
    ├── workout_slave.ssc
    └── yearprediction.ssc
├── shortpath.bat
├── src
    ├── main
    │   ├── C
    │   │   └── newparse
    │   │   │   ├── configure
    │   │   │   ├── gzstream.cpp
    │   │   │   ├── gzstream.h
    │   │   │   ├── makefile
    │   │   │   ├── makefile.gcc
    │   │   │   ├── makefile.w32
    │   │   │   ├── newparse.cpp
    │   │   │   ├── parsevw.cpp
    │   │   │   ├── tparse.cpp
    │   │   │   ├── tparse2.cpp
    │   │   │   ├── trec.flex
    │   │   │   ├── utils.cpp
    │   │   │   ├── utils.h
    │   │   │   ├── xmltweet.flex
    │   │   │   ├── xmlwiki.flex
    │   │   │   ├── zconf.h
    │   │   │   └── zlib.h
    │   ├── java
    │   │   ├── caffe
    │   │   │   ├── Caffe.java
    │   │   │   └── LICENSE.Caffe
    │   │   ├── edu
    │   │   │   └── berkeley
    │   │   │   │   ├── bid
    │   │   │   │       ├── CPUMACH.java
    │   │   │   │       └── CUMACH.java
    │   │   │   │   └── bvlc
    │   │   │   │       ├── BLOB.java
    │   │   │   │       ├── CAFFE.java
    │   │   │   │       ├── LAYER.java
    │   │   │   │       ├── LibUtils.java
    │   │   │   │       ├── NET.java
    │   │   │   │       └── SGDSOLVER.java
    │   │   ├── onnx
    │   │   │   ├── Onnx.java
    │   │   │   ├── OnnxMl.java
    │   │   │   ├── OnnxOperators.java
    │   │   │   └── OnnxOperatorsMl.java
    │   │   └── org
    │   │   │   └── tensorflow
    │   │   │       ├── example
    │   │   │           ├── BytesList.java
    │   │   │           ├── BytesListOrBuilder.java
    │   │   │           ├── Example.java
    │   │   │           ├── ExampleOrBuilder.java
    │   │   │           ├── ExampleProtos.java
    │   │   │           ├── Feature.java
    │   │   │           ├── FeatureList.java
    │   │   │           ├── FeatureListOrBuilder.java
    │   │   │           ├── FeatureLists.java
    │   │   │           ├── FeatureListsOrBuilder.java
    │   │   │           ├── FeatureOrBuilder.java
    │   │   │           ├── FeatureProtos.java
    │   │   │           ├── Features.java
    │   │   │           ├── FeaturesOrBuilder.java
    │   │   │           ├── FloatList.java
    │   │   │           ├── FloatListOrBuilder.java
    │   │   │           ├── Int64List.java
    │   │   │           ├── Int64ListOrBuilder.java
    │   │   │           ├── RecordWriter.java
    │   │   │           ├── SequenceExample.java
    │   │   │           └── SequenceExampleOrBuilder.java
    │   │   │       └── io
    │   │   │           ├── CRC32C.java
    │   │   │           ├── RecordReader.java
    │   │   │           └── RecordWriter.java
    │   ├── proto
    │   │   ├── onnx
    │   │   │   ├── onnx-ml.proto
    │   │   │   ├── onnx-operators-ml.proto
    │   │   │   ├── onnx-operators.in.proto
    │   │   │   ├── onnx-operators.proto
    │   │   │   ├── onnx.in.proto
    │   │   │   └── onnx.proto
    │   │   └── tensorflow
    │   │   │   ├── example.proto
    │   │   │   └── feature.proto
    │   ├── resources
    │   │   ├── application.conf
    │   │   └── lib
    │   │   │   └── touch.txt
    │   └── scala
    │   │   └── BIDMach
    │   │       ├── Clustering.scala
    │   │       ├── Copyright.txt
    │   │       ├── Experiments.scala
    │   │       ├── Featurizer.scala
    │   │       ├── Learner.scala
    │   │       ├── Logging.scala
    │   │       ├── allreduce
    │   │           ├── AllreduceConfig.scala
    │   │           ├── AllreduceDimensionNode.scala
    │   │           ├── AllreduceDummyLearner.scala
    │   │           ├── AllreduceDummyModel.scala
    │   │           ├── AllreduceGridMaster.scala
    │   │           ├── AllreduceLineMaster.scala
    │   │           ├── AllreduceMessage.scala
    │   │           ├── AllreduceNode.scala
    │   │           ├── AllreduceType.scala
    │   │           ├── AllreduceWorker.scala
    │   │           ├── AllreduceWorkerStats.scala
    │   │           ├── Dynamic2DGridLayout.scala
    │   │           ├── RandPerm.scala
    │   │           ├── ReceivePipeline.scala
    │   │           ├── binder
    │   │           │   ├── AllreduceBinder.scala
    │   │           │   ├── AssertCorrectnessBinder.scala
    │   │           │   ├── ElasticAverageBinder.scala
    │   │           │   └── NoOpBinder.scala
    │   │           ├── buffer
    │   │           │   ├── AllReduceBuffer.scala
    │   │           │   ├── ReducedDataBuffer.scala
    │   │           │   └── ScatteredDataBuffer.scala
    │   │           └── old
    │   │           │   ├── ClosureCleaner.scala
    │   │           │   ├── Command.scala
    │   │           │   ├── Host.scala
    │   │           │   ├── Master.scala
    │   │           │   ├── Response.scala
    │   │           │   └── Worker.scala
    │   │       ├── caffe
    │   │           ├── Classifier.scala
    │   │           ├── Net.scala
    │   │           └── SGDSolver.scala
    │   │       ├── causal
    │   │           └── IPTW.scala
    │   │       ├── datasinks
    │   │           ├── DataSink.scala
    │   │           ├── FileSink.scala
    │   │           └── MatSink.scala
    │   │       ├── datasources
    │   │           ├── ArraySource.scala
    │   │           ├── BlendedSource.scala
    │   │           ├── DataSource.scala
    │   │           ├── FileSource.scala
    │   │           ├── IteratorSource.scala
    │   │           ├── MatSource.scala
    │   │           ├── SFileSource.scala
    │   │           └── StackedSource.scala
    │   │       ├── io
    │   │           └── Onnx.scala
    │   │       ├── mixins
    │   │           ├── Clustering.scala
    │   │           ├── Mixin.scala
    │   │           └── Regularizer.scala
    │   │       ├── models
    │   │           ├── Click.scala
    │   │           ├── Clustering.scala
    │   │           ├── FM.scala
    │   │           ├── FactorModel.scala
    │   │           ├── GLM.scala
    │   │           ├── GaussianMixture.scala
    │   │           ├── ICA.scala
    │   │           ├── KMeans.scala
    │   │           ├── KMeansw.scala
    │   │           ├── LDA.scala
    │   │           ├── LDAgibbs.scala
    │   │           ├── LDAgibbsv.scala
    │   │           ├── Model.scala
    │   │           ├── NMF.scala
    │   │           ├── RandomForest.scala
    │   │           ├── Regression.scala
    │   │           ├── SFA.scala
    │   │           ├── SMF.scala
    │   │           └── SVD.scala
    │   │       ├── networks
    │   │           ├── Net.scala
    │   │           ├── NetActor.scala
    │   │           ├── NextWord.scala
    │   │           ├── SeqToSeq.scala
    │   │           ├── TransformerLT.scala
    │   │           ├── Word2Vec.scala
    │   │           ├── Word2Vech.scala
    │   │           └── layers
    │   │           │   ├── AbsLayer.scala
    │   │           │   ├── AddLayer.scala
    │   │           │   ├── AllReduceActor.scala
    │   │           │   ├── AutoNormLayer.scala
    │   │           │   ├── BatchNormLayer.scala
    │   │           │   ├── BatchNormScaleLayer.scala
    │   │           │   ├── ColpermLayer.scala
    │   │           │   ├── ColsliceLayer.scala
    │   │           │   ├── CompoundLayer.scala
    │   │           │   ├── ConstantLayer.scala
    │   │           │   ├── ConvLayer.scala
    │   │           │   ├── CopyLayer.scala
    │   │           │   ├── CropLayer.scala
    │   │           │   ├── CropMirrorLayer.scala
    │   │           │   ├── DivLayer.scala
    │   │           │   ├── DotLayer.scala
    │   │           │   ├── DropoutLayer.scala
    │   │           │   ├── EfnLayer.scala
    │   │           │   ├── ElasticLayer.scala
    │   │           │   ├── ExpLayer.scala
    │   │           │   ├── Fn2Layer.scala
    │   │           │   ├── FnLayer.scala
    │   │           │   ├── ForwardLayer.scala
    │   │           │   ├── GLMLayer.scala
    │   │           │   ├── HcatLayer.scala
    │   │           │   ├── InputLayer.scala
    │   │           │   ├── LRNacrossLayer.scala
    │   │           │   ├── LRNwithinLayer.scala
    │   │           │   ├── LSTM.scala
    │   │           │   ├── LSTMfusedLayer.scala
    │   │           │   ├── Layer.scala
    │   │           │   ├── LayerActor.scala
    │   │           │   ├── LayerMat.scala
    │   │           │   ├── LayerNormLayer.scala
    │   │           │   ├── LayerNormScaleLayer.scala
    │   │           │   ├── LinLayer.scala
    │   │           │   ├── LnLayer.scala
    │   │           │   ├── MHAttnLayer.scala
    │   │           │   ├── MatMulLayer.scala
    │   │           │   ├── MaxIndexLayer.scala
    │   │           │   ├── MaxLayer.scala
    │   │           │   ├── Maxi2Layer.scala
    │   │           │   ├── MaxiLayer.scala
    │   │           │   ├── MinLayer.scala
    │   │           │   ├── Mini2Layer.scala
    │   │           │   ├── MiniLayer.scala
    │   │           │   ├── ModelLayer.scala
    │   │           │   ├── MulLayer.scala
    │   │           │   ├── NegsampOutputLayer.scala
    │   │           │   ├── Node.scala
    │   │           │   ├── NodeMat.scala
    │   │           │   ├── NodeSet.scala
    │   │           │   ├── NormLayer.scala
    │   │           │   ├── OnehotLayer.scala
    │   │           │   ├── PoolingLayer.scala
    │   │           │   ├── PowerLayer.scala
    │   │           │   ├── RandomMirrorLayer.scala
    │   │           │   ├── RectLayer.scala
    │   │           │   ├── ReshapeLayer.scala
    │   │           │   ├── ScaleLayer.scala
    │   │           │   ├── SelectLayer.scala
    │   │           │   ├── SigmoidLayer.scala
    │   │           │   ├── SignLayer.scala
    │   │           │   ├── SoftmaxLayer.scala
    │   │           │   ├── SoftmaxOutputLayer.scala
    │   │           │   ├── SoftmaxxLayer.scala
    │   │           │   ├── SoftplusLayer.scala
    │   │           │   ├── SplitHorizLayer.scala
    │   │           │   ├── SplitVertLayer.scala
    │   │           │   ├── SqrtLayer.scala
    │   │           │   ├── StackLayer.scala
    │   │           │   ├── SubLayer.scala
    │   │           │   ├── SumLayer.scala
    │   │           │   ├── TanhLayer.scala
    │   │           │   ├── TensorFormatLayer.scala
    │   │           │   ├── TransposeLayer.scala
    │   │           │   └── VariableLayer.scala
    │   │       ├── updaters
    │   │           ├── ADAGrad.scala
    │   │           ├── Batch.scala
    │   │           ├── BatchNorm.scala
    │   │           ├── CG.scala
    │   │           ├── Grad.scala
    │   │           ├── GradCollide.scala
    │   │           ├── IncMult.scala
    │   │           ├── IncNorm.scala
    │   │           ├── MHTest.scala
    │   │           ├── Telescoping.scala
    │   │           └── Updater.scala
    │   │       └── viz
    │   │           ├── FilterViz.scala
    │   │           ├── ImageArray.scala
    │   │           ├── InputViz.scala
    │   │           ├── LogViz.scala
    │   │           ├── Synthesis.scala
    │   │           ├── Visualization.scala
    │   │           ├── WebServer.scala
    │   │           └── utils.scala
    └── test
    │   └── scala
    │       └── BIDMach
    │           ├── BIDMachSpec.scala
    │           ├── allreduce
    │               ├── AllreduceWorkerSpec.scala
    │               ├── Dynamic2DGridLayoutSpec.scala
    │               ├── LeNetSpec.scala
    │               ├── binder
    │               │   └── ElasticAverageBinderSpec.scala
    │               └── buffer
    │               │   ├── ReducedDataBufferSpec.scala
    │               │   └── ScatteredDataBufferSpec.scala
    │           └── networks
    │               └── layers
    │                   └── LayerTest.scala
└── tutorials
    ├── BIDMach_basic_classification.ipynb
    ├── BIDMach_parameter_tuning.ipynb
    ├── BIDMat_Scala_Features.ipynb
    ├── BIDMat_intro.ipynb
    ├── ClusteringImages.ipynb
    ├── CreateModels.ipynb
    ├── CreateNets.ipynb
    ├── GeneralDNNregression.ipynb
    ├── MLscalePart1.ipynb
    ├── MLscalePart2.ipynb
    ├── NBandLR.ipynb
    └── testing.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | cbin
 3 | lib/*.jar
 4 | *.rej
 5 | *.so
 6 | target
 7 | *.o
 8 | *.incl
 9 | logs
10 | .classpath
11 | 
12 | data
13 | !data/rcv1_fmt.txt
14 | !data/uci_fmt.txt
15 | !data/uci_wfmt.txt
16 | !data/factorNet
17 | !data/MHTestCorrections
18 | /bin
19 | *.xml
20 | *.iml
21 | 
22 | .DS_store
23 | project/
24 | *.txt
25 | log.txt.lck
26 | *.lck
27 | 


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>BIDMach</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.scala-ide.sdt.core.scalabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
21 | 		<nature>org.scala-ide.sdt.core.scalanature</nature>
22 | 		<nature>org.eclipse.jdt.core.javanature</nature>
23 | 	</natures>
24 | </projectDescription>
25 | 


--------------------------------------------------------------------------------
/Copyright.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012, Regents of the University of California
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the <organization> nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2014, Regents of the University of California
 2 | All rights reserved.
 3 | 
 4 | LICENSE
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright
11 |       notice, this list of conditions and the following disclaimer in the
12 |       documentation and/or other materials provided with the distribution.
13 |     * Neither the name of the <organization> nor the
14 |       names of its contributors may be used to endorse or promote products
15 |       derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
21 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | BIDMach is a very fast machine learning library. Check the latest <b><a href="https://github.com/BIDData/BIDMach/wiki/Benchmarks">benchmarks</a></b>
 4 | 
 5 | The github distribution contains source code only. You also need a jdk 8, an installation of NVIDIA CUDA 8.0 (if you want to use a GPU) and CUDNN 5 if you plan to use deep networks. For building you need <a href="https://maven.apache.org/docs/history.html">maven 3.X</a>.
 6 | 
 7 | After doing <code>git clone</code>, cd to the BIDMach directory, and build and install the jars with <code>mvn install</code>. You can then run bidmach with `./bidmach`. More details on installing and running are available <b><a href="https://github.com/BIDData/BIDMach/wiki/Installing-and-Running">here</a></b>.
 8 | 
 9 | The main project page is <b><a href="http://bid2.berkeley.edu/bid-data-project/">here</a></b>.
10 | 
11 | Documentation is <b><a href="https://github.com/BIDData/BIDMach/wiki">here in the wiki</a></b>
12 | 
13 | <b>New</b> BIDMach has a <b><a href="https://groups.google.com/forum/#!forum/bidmach-users-group">discussion group</a></b> on Google Groups.
14 | 
15 | BIDMach is a sister project of BIDMat, a matrix library, which is 
16 | <b><a href="https://github.com/BIDData/BIDMat">also on github</a></b>
17 | 
18 | BIDData also has a project for deep reinforcement learning. <b><a href="https://github.com/BIDData/BIDMach_RL">BIDMach_RL</a></b> contains state-of-the-art implementations of several reinforcement learning algorithms.
19 | 


--------------------------------------------------------------------------------
/bidmach.cmd:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | :: Set JAVA_HOME here if not set in environment
 3 | :: SET JAVA_HOME= 
 4 | :: Set as much memory as possible
 5 | (SET JAVA_OPTS=-Xmx12G -Xms128M)
 6 | :: Fix these if needed
 7 | SET JCUDA_VERSION=0.6.5
 8 | SET LIBDIR=%CD%\lib
 9 | SET JCUDA_LIBDIR=%LIBDIR%
10 | 
11 | SET BIDMACH_LIBS=%LIBDIR%\BIDMat.jar;%CD%\BIDMach.jar;%LIBDIR%\ptplot.jar;%LIBDIR%\ptplotapplication.jar;%LIBDIR%\jhdf5.jar;%LIBDIR%\commons-math3-3.1.1.jar;%LIBDIR%\lz4-1.1.2.jar
12 | 
13 | SET JCUDA_LIBS=%JCUDA_LIBDIR%\jcuda-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcublas-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcufft-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcurand-%JCUDA_VERSION%.jar;%JCUDA_LIBDIR%\jcusparse-%JCUDA_VERSION%.jar
14 | 
15 | SET ALL_LIBS=%LIBDIR%\IScala.jar;%BIDMACH_LIBS%;%JCUDA_LIBS%;%JAVA_HOME%\lib\tools.jar
16 | (SET JAVA_OPTS=-Djava.library.path="%LIBDIR%;%PATH%" %JAVA_OPTS%)
17 | 
18 | %CD%\scripts\scala\scala.bat -toolcp "%ALL_LIBS%" -Yrepl-sync -i %LIBDIR%\bidmach_init.scala
19 | 


--------------------------------------------------------------------------------
/bidmach_full:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # export JAVA_HOME="" # Set here if not set in environment
 3 | export JAVA_OPTS="-Xmx12G -Xms128M" # Set as much memory as possible
 4 | JCUDA_VERSION="0.5.5" # Fix if needed
 5 | BIDMACH_ROOT="${BASH_SOURCE[0]}"
 6 | if [ ! `uname` = "Darwin" ]; then
 7 |   BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"`
 8 | else 
 9 |   BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"`
10 | fi
11 | BIDMACH_ROOT=`dirname "$BIDMACH_ROOT"`
12 | BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed s+/cygdrive/c+c:+ )" 
13 | JCUDA_LIBDIR="${BIDMACH_ROOT}/lib"
14 | LIBDIR="${BIDMACH_ROOT}/lib"
15 | if [ `uname` = "Darwin" ]; then
16 |   export DYLD_LIBRARY_PATH="${LIBDIR}:${LIBDIR}/cuda:${DYLD_LIBRARY_PATH}"
17 | else
18 |   export LD_LIBRARY_PATH="${LIBDIR}:${LIBDIR}/cuda:${LD_LIBRARY_PATH}" 
19 | fi
20 | 
21 | BIDMACH_LIBS="${LIBDIR}/BIDMat.jar;${LIBDIR}/ptplot.jar;${LIBDIR}/ptplotapplication.jar;${LIBDIR}/jhdf5.jar;${LIBDIR}/commons-math3-3.1.1.jar;${LIBDIR}/lz4-1.1.2.jar"
22 | JCUDA_LIBS="${JCUDA_LIBDIR}/jcuda-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcublas-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcufft-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcurand-${JCUDA_VERSION}.jar;${JCUDA_LIBDIR}/jcusparse-${JCUDA_VERSION}.jar"
23 | 
24 | export ALL_LIBS="${BIDMACH_ROOT}/BIDMach.jar;${BIDMACH_LIBS};${JCUDA_LIBS};${JAVA_HOME}/lib/tools.jar"
25 | 
26 | if [ ! "$OS" = "Windows_NT" ]; then
27 |     export ALL_LIBS=`echo "${ALL_LIBS}" | sed 's/;/:/g'`
28 | else
29 |     export JAVA_OPTS="-Djava.library.path=${LIBDIR};${LIBDIR}\\cuda "$JAVA_OPTS
30 | fi
31 | 
32 | ${BIDMACH_ROOT}/scala/bin/scala -nobootcp -cp "${ALL_LIBS}" -Yrepl-sync -i ${LIBDIR}/bidmach_init.scala "$@"
33 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | 
 2 | name := "BIDMach"
 3 | 
 4 | version := "2.0.10-cuda8.0beta"
 5 | 
 6 | organization := "edu.berkeley.bid"
 7 | 
 8 | scalaVersion := "2.11.2"
 9 | 
10 | artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
11 |   "../../BIDMach.jar"
12 | }
13 | 
14 | resolvers ++= Seq(
15 |   "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/",
16 |   "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")
20 | 
21 | javacOptions ++= Seq("-source", "1.7", "-target", "1.7")
22 | 
23 | scalacOptions ++= Seq("-deprecation","-target:jvm-1.7")
24 | 
25 | initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n")
26 | 
27 | javaOptions += "-Xmx12g"
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/build_scala_2_10_sbt:
--------------------------------------------------------------------------------
 1 | 
 2 | name := "BIDMach"
 3 | 
 4 | version := "1.1.0"
 5 | 
 6 | organization := "edu.berkeley.bid"
 7 | 
 8 | scalaVersion := "2.10.6"
 9 | 
10 | artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
11 |   "../../BIDMach.jar"
12 | }
13 | 
14 | resolvers ++= Seq(
15 |   "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/",
16 |   "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) =>
20 |   deps :+ ("org.scala-lang" % "scala-compiler" % sv)
21 | }
22 | 
23 | libraryDependencies += "jline" % "jline" % "2.10"
24 | 
25 | libraryDependencies += "org.apache.commons" % "commons-math3" % "3.2"
26 | 
27 | //libraryDependencies += "org.scalatest" %% "scalatest" % "2.0" % "test"
28 | 
29 | //libraryDependencies += "org.scalacheck" %% "scalacheck" % "1.11.2" % "test"
30 | 
31 | libraryDependencies += "junit" % "junit" % "4.5" % "test"
32 | 
33 | libraryDependencies += "net.jpountz.lz4" % "lz4" % "1.3"
34 | 
35 | //libraryDependencies += "org.scala-saddle" % "jhdf5" % "2.9"
36 | 
37 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")
38 | 
39 | javacOptions ++= Seq("-source", "1.7", "-target", "1.7")
40 | 
41 | scalacOptions ++= Seq("-deprecation","-target:jvm-1.7")
42 | 
43 | initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n")
44 | 
45 | javaOptions += "-Xmx12g"
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/build_scala_2_11_sbt:
--------------------------------------------------------------------------------
 1 | 
 2 | name := "BIDMach"
 3 | 
 4 | version := "1.1.0"
 5 | 
 6 | organization := "edu.berkeley.bid"
 7 | 
 8 | scalaVersion := "2.11.2"
 9 | 
10 | artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
11 |   "../../BIDMach.jar"
12 | }
13 | 
14 | resolvers ++= Seq(
15 |   "Scala Tools Snapshots" at "http://scala-tools.org/repo-snapshots/",
16 |   "Scala Mirror" at "https://oss.sonatype.org/content/repositories/releases/"
17 | )
18 | 
19 | libraryDependencies <<= (scalaVersion, libraryDependencies) { (sv, deps) =>
20 |   deps :+ ("org.scala-lang" % "scala-compiler" % sv)
21 | }
22 | 
23 | libraryDependencies += "jline" % "jline" % "2.11"
24 | 
25 | libraryDependencies += "org.apache.commons" % "commons-math3" % "3.2"
26 | 
27 | //libraryDependencies += "org.scalatest" %% "scalatest" % "2.0" % "test"
28 | 
29 | //libraryDependencies += "org.scalacheck" %% "scalacheck" % "1.11.2" % "test"
30 | 
31 | libraryDependencies += "junit" % "junit" % "4.5" % "test"
32 | 
33 | libraryDependencies += "net.jpountz.lz4" % "lz4" % "1.3"
34 | 
35 | //libraryDependencies += "org.scala-saddle" % "jhdf5" % "2.9"
36 | 
37 | credentials += Credentials(Path.userHome / ".ivy2" / ".credentials")
38 | 
39 | javacOptions ++= Seq("-source", "1.7", "-target", "1.7")
40 | 
41 | scalacOptions ++= Seq("-deprecation","-target:jvm-1.7")
42 | 
43 | initialCommands := scala.io.Source.fromFile("lib/bidmach_init.scala").getLines.mkString("\n")
44 | 
45 | javaOptions += "-Xmx12g"
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/data/factorNet/data.txt:
--------------------------------------------------------------------------------
1 | 1, 1, 2, 2, 0, 2, 1, 2, 1, 2, 1, 1
2 | 2, 3, 1, 2, 2, 3, 0, 2, 2, 1, 3, 3
3 | 3, 4, 4, 0, 1, 2, 2, 3, 3, 3, 3, 4
4 | 


--------------------------------------------------------------------------------
/data/factorNet/data.txt~:
--------------------------------------------------------------------------------
1 | 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1
2 | 2, 3, 1, 2, 2, 3, 2, 2, 2, 1, 3, 3
3 | 3, 4, 4, 2, 1, 2, 2, 3, 3, 3, 3, 4
4 | 


--------------------------------------------------------------------------------
/data/factorNet/factorSet.txt:
--------------------------------------------------------------------------------
1 | 1, 1
2 | 0, 1
3 | 0, 1
4 | 


--------------------------------------------------------------------------------
/data/factorNet/factorSet.txt~:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/data/factorNet/factorSet.txt~


--------------------------------------------------------------------------------
/data/factorNet/statePerNode.txt:
--------------------------------------------------------------------------------
1 | 2
2 | 3
3 | 4
4 | 


--------------------------------------------------------------------------------
/data/factorNet/statePerNode.txt~:
--------------------------------------------------------------------------------
1 | 2, 3, 4
2 | 


--------------------------------------------------------------------------------
/data/factorNet/test2/factorSet.txt:
--------------------------------------------------------------------------------
1 | 1
2 | 1
3 | 


--------------------------------------------------------------------------------
/data/factorNet/test2/factorSet.txt~:
--------------------------------------------------------------------------------
1 | 1, 1
2 | 0, 1
3 | 0, 1
4 | 


--------------------------------------------------------------------------------
/data/factorNet/test2/statePerNode.txt:
--------------------------------------------------------------------------------
1 | 2
2 | 3
3 | 


--------------------------------------------------------------------------------
/data/factorNet/test2/statePerNode.txt~:
--------------------------------------------------------------------------------
1 | 2
2 | 3
3 | 4
4 | 


--------------------------------------------------------------------------------
/data/rcv1_fmt.txt:
--------------------------------------------------------------------------------
1 | word catname
2 | int docid
3 | int dmy
4 | 


--------------------------------------------------------------------------------
/data/uci_fmt.txt:
--------------------------------------------------------------------------------
1 | int cols
2 | int rows
3 | float vals
4 | 


--------------------------------------------------------------------------------
/data/uci_wfmt.txt:
--------------------------------------------------------------------------------
1 | word term
2 | 


--------------------------------------------------------------------------------
/getcudaversion.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | # try to figure out the CUDA version. See if nvcc is in the path, and 
 3 | # then call it to get the version. If not, use a default version.
 4 | # If $CUDA_VERSION is already set, dont touch it. 
 5 | 
 6 | if [ "${CUDA_VERSION}" = "" ];then
 7 |     if [[ $(type -P nvcc) ]]; then
 8 |         CUDA_VERSION=`nvcc --version | grep release | sed 's/.*release //' | sed 's/\,.*//'`
 9 |     else
10 |         CUDA_VERSION="7.5"
11 |     fi
12 | fi
13 | 


--------------------------------------------------------------------------------
/getlibs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | source=$1
 4 | 
 5 | BIDMACH_ROOT="${BASH_SOURCE[0]}"
 6 | if [ ! `uname` = "Darwin" ]; then
 7 |   BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"`
 8 | else 
 9 |   while [ -L "${BIDMACH_ROOT}" ]; do
10 |     BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"`
11 |   done
12 | fi
13 | BIDMACH_ROOT=`dirname "$BIDMACH_ROOT"`
14 | BIDMACH_ROOT=`pwd`
15 | BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed s+/cygdrive/c+c:+ )" 
16 | 
17 | cp ${source}/lib/*.jar ${BIDMACH_ROOT}/lib
18 | cp ${source}/lib/*.so ${BIDMACH_ROOT}/lib
19 | cp ${source}/lib/*.dll ${BIDMACH_ROOT}/lib
20 | cp ${source}/lib/*.dylib ${BIDMACH_ROOT}/lib
21 | cp ${source}/lib/*.jnilib ${BIDMACH_ROOT}/lib
22 | 
23 | cp ${source}/BIDMach.jar ${BIDMACH_ROOT}
24 | 
25 | mkdir -p ${BIDMACH_ROOT}/cbin
26 | cp ${source}/cbin/* ${BIDMACH_ROOT}/cbin
27 | 
28 | 


--------------------------------------------------------------------------------
/getnativepath.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/getnativepath.class


--------------------------------------------------------------------------------
/getnativepath.java:
--------------------------------------------------------------------------------
1 | public class getnativepath {
2 |     public static void main(String [] args) 
3 |     {
4 |         String v = System.getProperty("java.library.path");
5 |         System.out.print(v);
6 |     }
7 | }


--------------------------------------------------------------------------------
/jni/include/Logger.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * JCuda - Java bindings for NVIDIA CUDA driver and runtime API
 3 |  *
 4 |  * Copyright (c) 2009-2012 Marco Hutter - http://www.jcuda.org
 5 |  *
 6 |  * Permission is hereby granted, free of charge, to any person
 7 |  * obtaining a copy of this software and associated documentation
 8 |  * files (the "Software"), to deal in the Software without
 9 |  * restriction, including without limitation the rights to use,
10 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 |  * copies of the Software, and to permit persons to whom the
12 |  * Software is furnished to do so, subject to the following
13 |  * conditions:
14 |  *
15 |  * The above copyright notice and this permission notice shall be
16 |  * included in all copies or substantial portions of the Software.
17 |  *
18 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 |  * OTHER DEALINGS IN THE SOFTWARE.
26 |  */
27 | 
28 | #ifndef LOGGER
29 | #define LOGGER
30 | 
31 | #include <stdarg.h>
32 | #include <stdio.h>
33 | #include <stdlib.h>
34 | 
35 | enum LogLevel {LOG_QUIET, LOG_ERROR, LOG_WARNING, LOG_INFO, LOG_DEBUG, LOG_TRACE, LOG_DEBUGTRACE};
36 | 
37 | class Logger
38 | {
39 |     public:
40 |         static void log(LogLevel level, const char* message, ...);
41 |         static void setLogLevel(LogLevel level);
42 | 
43 |     //private:
44 |         static LogLevel currentLogLevel;
45 | 
46 | };
47 | 
48 | #endif


--------------------------------------------------------------------------------
/jni/src/JCUDA_Copyright.txt:
--------------------------------------------------------------------------------
 1 | JCuda - Java bindings for NVIDIA CUDA
 2 | 
 3 | Copyright (c) 2008-2012 Marco Hutter - http://www.jcuda.org
 4 | 
 5 | Permission is hereby granted, free of charge, to any person
 6 | obtaining a copy of this software and associated documentation
 7 | files (the "Software"), to deal in the Software without
 8 | restriction, including without limitation the rights to use,
 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following
12 | conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | OTHER DEALINGS IN THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/lib/Jcommon_JfreeChart_LGPL.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" version="XHTML+RDFa 1.0" dir="ltr">
 3 | 
 4 | <body>
 5 | <font face="Arial">
 6 | <a href="http://www.jfree.org/lgpl.php:">JFreeChart LGPL</a>
 7 | </font>
 8 | </body>
 9 | </html>
10 | 


--------------------------------------------------------------------------------
/lib/bidmach_init.sc:
--------------------------------------------------------------------------------
 1 | import BIDMat.{BMat,CMat,CSMat,DMat,Dict,FMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,HMat,IDict,Image,IMat,LMat,Mat}
 2 | import BIDMat.{Quaternion,SMat,SBMat,SDMat,TMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMat.Solvers._
 6 | import BIDMat.Plotting._
 7 | import BIDMach.{Learner,ParLearner}
 8 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD}
 9 | import BIDMach.networks.{Net}
10 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
11 | import BIDMach.datasinks.{DataSink,MatSink}
12 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
13 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping,Updater}
14 | import BIDMach.causal.{IPTW}
15 | import BIDMat.Mat.console_publish
16 | 
17 | Mat.checkMKL(false)
18 | Mat.checkCUDA(true)
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/bidmach_notebook_init.sc:
--------------------------------------------------------------------------------
 1 | import BIDMat.{BMat,CMat,CSMat,DMat,Dict,FMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,HMat,IDict,Image,IMat,LMat,Mat,Quaternion,SMat,SBMat,SDMat,TMat}
 2 | import BIDMat.MatFunctions._
 3 | import BIDMat.SciFunctions._
 4 | import BIDMat.Solvers._
 5 | import BIDMat.JPlotting._
 6 | import BIDMach.{Learner,ParLearner}
 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD}
 8 | import BIDMach.networks.{Net}
 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
10 | import BIDMach.datasinks.{DataSink,MatSink}
11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping,Updater}
13 | import BIDMach.causal.{IPTW}
14 | 
15 | Mat.checkMKL(false)
16 | Mat.checkCUDA
17 | Mat.setInline
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/logo-64x64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/logo-64x64.png


--------------------------------------------------------------------------------
/logs/touch.txt:
--------------------------------------------------------------------------------
1 | touch
2 | 


--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
1 | 
2 | In theory -toolcp should only contain the tool classes, but native code errors result if it doesnt contain all libs.
3 | 
4 |     ${BIDMACH_ROOT}/command/scala -Dscala.repl.maxprintstring=8000 ${userargs} -nobootcp -toolcp "${ALL_LIBS}" \
5 | 


--------------------------------------------------------------------------------
/readme_gui.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | git clone https://github.com/BIDData/BIDMach.git
 3 | cd BIDMach
 4 | git checkout gui
 5 | mvn package
 6 | ```
 7 | 
 8 | First download the models.tar.gz and data.tar.gz from the google drive (https://goo.gl/vqc3rJ)
 9 | 
10 | Extract the models.tar.gz at the BIDMach folder.
11 | Extract the data.tar.gz to somewhere you like.
12 | 
13 | Before running these scripts, config the `traindir` variable in the scripts as the right data location. Change `pretrain_model_dir`  and `pretrain_discriminator_dir` if you extract models into different location.
14 | 
15 | DEMO for mnist:
16 | ```
17 | ./bidmach scripts/viz/mnist.ssc
18 | ```
19 | 
20 | DEMO for CIFAR:
21 | ```
22 | ./bidmach scripts/viz/testcifar_norm.ssc
23 | ```
24 | 
25 | After loading the scripts, type `s.launch` to start the MCMC process. Use `s.stop` to stop.
26 | 
27 | If you want to use discriminator (Require pixel value in [0,256)）, run command `o.clipping = true` in the shell. And set `base` as 0 in the UI.
28 | 
29 | By default, mnist.ssc set `clipping` as false, testcifar_norm.ssc set `clipping` as true.
30 | 
31 | Change L2 weight and discriminator weight to see the effect.
32 | 


--------------------------------------------------------------------------------
/sbt-pkg/bin/java9-rt-export.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/sbt-pkg/bin/java9-rt-export.jar


--------------------------------------------------------------------------------
/sbt-pkg/bin/sbt-launch.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/sbt-pkg/bin/sbt-launch.jar


--------------------------------------------------------------------------------
/sbt-pkg/conf/sbtconfig.txt:
--------------------------------------------------------------------------------
 1 | # Set the java args to high
 2 | 
 3 | -Xmx512M
 4 | 
 5 | -XX:MaxPermSize=256m
 6 | 
 7 | -XX:ReservedCodeCacheSize=128m
 8 | 
 9 | 
10 | 
11 | # Set the extra SBT options
12 | 
13 | -Dsbt.log.format=true
14 | 
15 | 


--------------------------------------------------------------------------------
/sbt-pkg/conf/sbtopts:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------ #
 2 | #  The SBT Configuration file.                     #
 3 | # ------------------------------------------------ #
 4 | 
 5 | 
 6 | # Disable ANSI color codes
 7 | #
 8 | #-no-colors
 9 | 
10 | # Starts sbt even if the current directory contains no sbt project.
11 | #
12 | -sbt-create
13 | 
14 | # Path to global settings/plugins directory (default: ~/.sbt)
15 | #
16 | #-sbt-dir  /etc/sbt
17 | 
18 | # Path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
19 | #
20 | #-sbt-boot ~/.sbt/boot  
21 | 
22 | # Path to local Ivy repository (default: ~/.ivy2)
23 | #
24 | #-ivy ~/.ivy2
25 | 
26 | # set memory options
27 | #
28 | #-mem   <integer>  
29 | 
30 | # Use local caches for projects, no sharing.
31 | #
32 | #-no-share
33 | 
34 | # Put SBT in offline mode.
35 | #
36 | #-offline
37 | 
38 | # Sets the SBT version to use.
39 | #-sbt-version  0.11.3
40 | 
41 | # Scala version (default: latest release)
42 | #
43 | #-scala-home <path>        
44 | #-scala-version <version>
45 | 
46 | # java version (default: java from PATH, currently $(java -version |& grep version))
47 | #
48 | #-java-home <path>
49 | 
50 | 


--------------------------------------------------------------------------------
/scripts/README.txt:
--------------------------------------------------------------------------------
1 | Scripts for loading data and testing BIDMach learners. 
2 | 
3 | Most of these scripts should be run from within this directory. 
4 | 
5 | .sh scripts are bash scripts that should be launched from a bash shell.
6 | 
7 | .ssc scripts are scala scripts which should be run from this directory with
8 | 
9 | ../bidmach scriptname.ssc


--------------------------------------------------------------------------------
/scripts/benchmarks/basic.c:
--------------------------------------------------------------------------------
 1 | #include <sys/time.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | int main(int argc, char ** argv) {
 6 |   int n, nreps, irep, i, j, ibase;
 7 |   struct timeval tp1, tp2;
 8 |   double t1, t2, tdiff;
 9 |   double flops;
10 |   sscanf(argv[1], "%d", &n);
11 |   sscanf(argv[2], "%d", &nreps);
12 |   float *a;
13 |   float *b;
14 |   float *c;
15 |   a = (float *)malloc(n*n*sizeof(float));
16 |   b = (float *)malloc(n*n*sizeof(float));
17 |   c = (float *)malloc(n*n*sizeof(float));
18 |   gettimeofday(&tp1, NULL);
19 |   for (irep = 0; irep < nreps; irep++) {
20 |     for (i = 0; i < n; i++) {
21 |       ibase = i * n;
22 |       for (j = 0; j < n; j++) {
23 | 	c[j + ibase] = a[j + ibase] + b[j + ibase];
24 |       }
25 |     }
26 |   }
27 |   gettimeofday(&tp2, NULL);
28 |   t1 = tp1.tv_sec + 1.0e-6*tp1.tv_usec;
29 |   t2 = tp2.tv_sec + 1.0e-6*tp2.tv_usec;
30 |   tdiff = t2 - t1;
31 |   flops = 1.0 * n * n * nreps;
32 |   printf("time %f, Mflops %f %f\n", tdiff/nreps, flops/tdiff/1.0e6, c[1000000-1]);    
33 | }
34 | 
35 |   
36 |   
37 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/basic.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | n = 10000;
 3 | a = rand(Float32,(n,n))
 4 | b = rand(Float32,(n,n))
 5 | c = zeros(Float32,(n,n));
 6 | t1 = time();
 7 | 
 8 | for i = 1:n
 9 |   for j = 1:n
10 |     c[i,j] = a[i,j] + b[i,j]
11 |   end
12 | end
13 | 
14 | t2 = time();
15 | 
16 | for i = 1:10
17 | c = a+ b;	
18 | end
19 | 
20 | t3 = time();
21 | 
22 | dt1 = t2 - t1
23 | dt2 = t3 - t2
24 | n2 = n*n
25 | 
26 | mflops1 = n2 / dt1 / 1e6;
27 | mflops2 = n2 / dt2 / 1e5;
28 | 
29 | println("times $dt1,$dt2, mflops $mflops1,$mflops2")
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/basic.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | nreps = 10
 3 | n = 10000
 4 | a = {}
 5 | b = {}
 6 | c = {}
 7 | 
 8 | for i = 1, n do
 9 |   a[i] = {};
10 |   b[i] = {};
11 |   c[i] = {};
12 |   for j = 1, n do	
13 |     a[i][j] = math.random();
14 |     b[i][j] = math.random();
15 |     c[i][j] = 0;
16 |   end
17 | end
18 | 
19 | t1=os.time();
20 | 
21 | for irep = 1, nreps do
22 | for i = 1, n do
23 |   for j = 1, n do	
24 |     c[i][j] = a[i][j] + b[i][j];
25 |   end
26 | end
27 | end
28 | 
29 | t2=os.time();
30 | dt = t2 - t1;
31 | n2 = 1.0*n*n*nreps
32 | 
33 | print(string.format("time=3.2%f, Mflops=3.2%f",dt/nreps,n2/dt/1e6))
34 |   


--------------------------------------------------------------------------------
/scripts/benchmarks/basic.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np;
 3 | import numpy.random as rand;
 4 | t0 = time.time()
 5 | n = 10000
 6 | a = rand.rand(n,n)
 7 | b = rand.rand(n,n)
 8 | 
 9 | t0 = time.time()
10 | c = a + b;
11 | t1 = time.time()
12 | dt1 = t1 - t0
13 | print dt1
14 | 
15 | for i in range(0,n):
16 |     for j in range(0,n):
17 |         c[i][j] = a[i][j] + b[i][j];
18 | 
19 | t2 = time.time()
20 | 
21 | dt2 = t2 - t1
22 | print dt1, dt2
23 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/basic.ssc:
--------------------------------------------------------------------------------
 1 | :silent
 2 | val n = 10000;
 3 | val a = rand(n,n);
 4 | val b = rand(n,n);
 5 | val c = zeros(n,n);
 6 | 
 7 | tic
 8 | var i = 0;
 9 | while (i < n) {
10 |   var j = 0;
11 |   while (j < n) {
12 |      c(j,i) = a(j,i) + b(j,i);
13 |      j +=1;
14 |   }
15 |   i += 1;
16 | }
17 | val t1 = toc;
18 | 
19 | for (i <- 0 until 100) {
20 | val c = a+b;
21 | }
22 | 
23 | val t2 = toc - t1;
24 | val n2 = n*n;
25 | :silent
26 | println("time %f,%f Mflops %f,%f" format (t1,t2/100,n2/t1/1e6f,n2/t2/1e4f));
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/createVWdata.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val dir = "../../data/rcv1/"
 3 | 
 4 | val a = loadSMat(dir+"docs.smat.lz4");
 5 | val c = sparse(loadFMat(dir+"cats.fmat.lz4"));
 6 | 
 7 | val ta = loadSMat(dir+"testdocs.smat.lz4");
 8 | val tc = sparse(loadFMat(dir+"testcats.fmat.lz4"));
 9 | 
10 | saveVW(dir+"train.vw", a, c);
11 | saveVW(dir+"test.vw", ta, tc);


--------------------------------------------------------------------------------
/scripts/benchmarks/juliaRandWalk.jl:
--------------------------------------------------------------------------------
 1 | 
 2 | function rw(a)
 3 |     n = length(a)
 4 |     a[1] = rand() - 0.5
 5 |     for i = 2:n
 6 |         a[i] = a[i-1] + rand() - 0.5
 7 |     end
 8 |     a
 9 | end
10 | 
11 | function fib(n::Int64)
12 |   if (n <= 2) 
13 |      1
14 |   else
15 |      fib(n-1) + fib(n-2)
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/scalaRandWalk.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | import java.util.Random
 3 | 
 4 | val randgen = new Random
 5 | 
 6 | def rw(n:Int) = {
 7 |     var i = 0;
 8 |     var sum = 0f;
 9 |     while (i < n) {
10 |           sum += randgen.nextFloat
11 |           i += 1;
12 |     }
13 |     sum
14 | }
15 |     
16 | def fib(n:Long):Long = {
17 |     if (n <= 2) 1
18 |     else fib(n-2) + fib(n-1)
19 | }
20 |     


--------------------------------------------------------------------------------
/scripts/benchmarks/scoreSpark.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val m = loadFMat("/code/spark/modelx.txt")
 3 | val m2 = loadFMat("/code/spark/modelx.fmat.lz4")
 4 | 
 5 | val dd = loadFMat("/big/RCV1/v2/spark_test.fmat.lz4")
 6 | 
 7 | val cc = loadFMat("/big/RCV1/v2/spark_cattest.fmat.lz4")
 8 | 
 9 | val c6 = cc(6,?)
10 | 
11 | val ddi = dd on ones(1,dd.ncols)
12 | val prod = m * ddi
13 | val prod2 = m2 * ddi
14 | 
15 | val px = 1 / (1 + exp(- prod))
16 | val px2 = 1 / (1 + exp(- prod2))
17 | 
18 | val ii = find(c6)
19 | val jj = find(c6 == 0)
20 | 
21 | val vv = px(jj) on (1 - px(ii))
22 | val score = mean(vv)
23 | 
24 | val vv2 = px2(jj) on (1 - px2(ii))
25 | val score = mean(vv2)
26 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/scoreSpark2.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val m = loadFMat("/code/spark/modelx.txt")
 3 | val m2 = loadFMat("/code/spark/modelx.fmat.lz4")
 4 | 
 5 | val prod = m * ddi
 6 | val prod2 = m2 * ddi
 7 | 
 8 | val px = 1 / (1 + exp(- prod))
 9 | val px2 = 1 / (1 + exp(- prod2))
10 | 
11 | val vv = px(jj) on (1 - px(ii))
12 | val vv2 = px2(jj) on (1 - px2(ii))
13 | 
14 | val uu = c6 *@ px + (1.0f - c6) *@ (1.0f - px)
15 | val uu2 = c6 *@ px2 + (1.0f - c6) *@ (1.0f - px2)
16 | 
17 | val llv = ln(uu)
18 | val ii1 = find(prod > 10f)
19 | llv(ii1) = 0
20 | val jj1 = find(prod < -10f)
21 | llv(jj1) = prod(jj1)
22 | 
23 | val llv2 = ln(uu2)
24 | val ii2 = find(prod2 > 10f)
25 | llv2(ii2) = 0
26 | val jj2 = find(prod2 < -10f)
27 | llv2(jj2) = prod2(jj2)
28 | 
29 | val score = mean(vv)
30 | val score2 = mean(vv2)
31 | 
32 | val ll = mean(llv)
33 | val ll2 = mean(llv2)
34 | 
35 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/skkmeans.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import numpy as np
 4 | import scipy.io as sio
 5 | import h5py
 6 | from sklearn.datasets import load_svmlight_file
 7 | from sklearn.cluster import KMeans
 8 | 
 9 | print("reading")
10 | 
11 | f = h5py.File('/code/BIDMach/data/MNIST8M/all.mat','r')
12 | 
13 | t0 = time.time()
14 | data = f.get('/all') # Get a certain dataset
15 | X = np.array(data)
16 | t1 = time.time()
17 | 
18 | t_read = t1 - t0
19 | print("Finished reading in " + repr(t_read) + " secs")
20 | 
21 | batch_size = 10
22 | kmeans = KMeans(n_clusters=256, init='random', n_init=1, max_iter=10, tol=0.0001, precompute_distances=False, verbose=0, random_state=None, copy_x=False, n_jobs=1)
23 | kmeans.fit(X)
24 | t2 = time.time()
25 | t_batch = t2 - t1
26 | print("compute time " + repr(t_batch) + " secs")
27 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/sklogistic.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | from sklearn.multiclass import OneVsRestClassifier
 4 | from sklearn.linear_model import SGDClassifier
 5 | from sklearn.datasets import load_svmlight_file
 6 | 
 7 | t0 = time.time()
 8 | print("Start reading")
 9 | X, Y = load_svmlight_file("../../data/rcv1/train.libsvm")
10 | 
11 | print("Finished reading")
12 | batch_size = 10
13 | 
14 | sgd = OneVsRestClassifier(SGDClassifier(loss='log', alpha=0.01, fit_intercept=True, n_iter=3))
15 | t1 = time.time()
16 | sgd.fit(X,Y)
17 | t2 = time.time()
18 | 
19 | print("load time {0:3.2f}, train time {1:3.2f}".format(t1-t0,t2-t1))
20 | 
21 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/start_spark.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | cd /opt/spark/ec2
 3 | 
 4 | # launch a cluster
 5 | ./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa -s 2 --instance-type=r3.2xlarge --region=us-west-2 launch sparkcluster
 6 | 
 7 | # ganglia patch
 8 | 
 9 | MASTER=`./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa --region=us-west-2 get-master sparkcluster | tail -n 1`
10 | scp -i ~/.ssh/jfc_rsa ~/httpd.conf ec2-user@${MASTER}:httpd.conf
11 | 
12 | # login to the master
13 | ./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa --region=us-west-2 login sparkcluster
14 | 
15 | export AWS_ACCESS_KEY_ID=
16 | export AWS_SECRET_ACCESS_KEY=
17 | 
18 | # ganglia patch
19 | 
20 | rm -r /var/lib/ganglia/rrds
21 | ln -s /mnt/ganglia/rrds /var/lib/ganglia/rrds
22 | 
23 | cp /etc/httpd/conf/httpd.conf /etc/httpd/conf/httpd_bkup.conf
24 | cp /home/ec2-user/httpd.conf /etc/httpd/conf/httpd.conf
25 | apachectl -k graceful
26 | 
27 | # need more driver memory for several models, e.g. multiclass and word2vec
28 | spark/bin/spark-shell 
29 | spark/bin/spark-shell --driver-memory 16g --conf "spark.driver.maxResultSize=8g"
30 | 
31 | 
32 | exit
33 | 
34 | echo "y" | ./spark-ec2 -k "pils_rsa" -i /home/ec2-user/.ssh/jfc_rsa --region=us-west-2 destroy sparkcluster
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/testSparkALS.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | import org.apache.spark.mllib.recommendation.ALS
 3 | import org.apache.spark.mllib.recommendation.Rating
 4 | 
 5 | import scala.compat.Platform._
 6 | 
 7 | val nnodes = 32;
 8 | val t0 = currentTime
 9 | val data = sc.textFile("s3n://bidmach/netflix_mm.train", nnodes * 4)
10 | //val ratings = data.map(_.split("::") match { case Array(user, item, rate, timestamp) =>
11 | //    Rating(user.toInt, item.toInt, rate.toDouble)
12 | val ratings = data.map(_.split("\t") match { case Array(user, item, rate) =>
13 |     Rating(user.toInt, item.toInt, rate.toDouble)
14 |   })
15 | 
16 | // Do a test-train split
17 | val splits = ratings.randomSplit(Array(0.9, 0.1))
18 | val training = splits(0)
19 | val test = splits(1)
20 | 
21 | val cc = training.cache.count  // force the parse to execute, result in memory
22 | 
23 | 
24 | // Build the recommendation model using ALS
25 | val rank = 500
26 | val t1 = currentTime
27 | val numIterations = 5
28 | val model = ALS.train(training, rank, numIterations, 0.05)
29 | val t2 = currentTime
30 | // Evaluate the model on test data
31 | val usersProducts = test.map { case Rating(user, product, rate) =>
32 |   (user, product)
33 | }
34 | 
35 | val predictions = 
36 |   model.predict(usersProducts).map { case Rating(user, product, rate) => 
37 |     ((user, product), math.min(5.0, math.max(1.0, rate)))
38 |   }
39 | 
40 | val ratesAndPreds = test.map { case Rating(user, product, rate) => 
41 |   ((user, product), rate)
42 | }.join(predictions)
43 | 
44 | val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) => 
45 |   val err = (r1 - r2)
46 |   err * err
47 | }.mean()
48 | 
49 | val RMSE = math.sqrt(MSE)
50 | 
51 | println("Root Mean Squared Error = " + RMSE)
52 | 
53 | println("Load Time = %f secs, Compute Time = %f" format ((t1-t0)/1000f, (t2-t1)/1000f))
54 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/testSparkKMeans.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | import org.apache.spark.mllib.clustering.KMeans
 3 | import org.apache.spark.mllib.linalg.Vectors
 4 | import scala.compat.Platform._
 5 | 
 6 | val nnodes = 16
 7 | 
 8 | // Load and parse the data
 9 | val t0 = currentTime
10 | val data = sc.textFile("s3n://bidmach/allst.txt", nnodes * 4)
11 | val parsedData = data.map(s => Vectors.dense(s.split('\t').map(_.toDouble)))
12 | val cc = parsedData.cache.count  // force the parse to execute, result in memory
13 | 
14 | // Cluster the data into classes using KMeans
15 | val numClusters = 4
16 | val numIterations = 10
17 | val t1 = currentTime
18 | val clusters = KMeans.train(parsedData, numClusters, numIterations, 1, "random")
19 | val t2 = currentTime
20 | 
21 | // Evaluate clustering by computing Within Set Sum of Squared Errors
22 | val WSSSE = clusters.computeCost(parsedData)
23 | val t3 = currentTime
24 | println("Within Set Sum of Squared Errors = " + WSSSE)
25 | println("Load Time = %f secs, Compute Time = %f, Eval Time =%f" format ((t1-t0)/1000f, (t2-t1)/1000f, (t3-t2)/1000f))
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/testSparkSVM.ssc:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.SparkContext
 2 | import org.apache.spark.mllib.classification.SVMWithSGD
 3 | import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 4 | import org.apache.spark.mllib.regression.LabeledPoint
 5 | import org.apache.spark.mllib.linalg.Vectors
 6 | import org.apache.spark.mllib.util.MLUtils
 7 | import scala.compat.Platform._ 
 8 | 
 9 | val t0=currentTime
10 | // Load training data in LIBSVM format.
11 | val data = MLUtils.loadLibSVMFile(sc, "/big/RCV1/v2/train6.libsvm")
12 | val t1=currentTime
13 | 
14 | // Split data into training (90%) and test (10%).
15 | val splits = data.randomSplit(Array(0.9, 0.1), seed = 11L)
16 | val training = splits(0).cache()
17 | val test = splits(1)
18 | val t2=currentTime
19 | 
20 | // Run training algorithm to build the model
21 | val numIterations = 100
22 | val model = SVMWithSGD.train(training, numIterations)
23 | 
24 | val t3=currentTime
25 | 
26 | // Clear the default threshold.
27 | model.clearThreshold()
28 | 
29 | // Compute raw scores on the test set. 
30 | val scoreAndLabels = test.map { point =>
31 |   val score = model.predict(point.features)
32 |   (score, point.label)
33 | }
34 | 
35 | val t4=currentTime
36 | 
37 | // Get evaluation metrics.
38 | val metrics = new BinaryClassificationMetrics(scoreAndLabels)
39 | val auROC = metrics.areaUnderROC()
40 | println("Area under ROC = " + auROC)
41 | 
42 | println("load time %f, split %f, train %f, predict %f" format ((t1-t0)/1000f,
43 | (t2-t1)/1000f, (t3-t2)/1000f, (t4-t3)/1000f))
44 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/testVWLDA.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | time /code/vowpal_wabbit/vowpalwabbit/vw --lda 256 --lda_D 100000 --passes 3 --readable_model wordTopics.dat --bit_precision 18 --learning_rate 1.0 --lda_rho 0.1 --cache_file vw.cache --data /big/RCV1/v2/vw_sparse_lda_train.dat --lda_alpha 0.1 --random_weights true --power_t 0.5 --minibatch 1024 --initial_t 1.0 
 4 | 
 5 | # BIDMach options
 6 | #   opts.putBack = 1
 7 | #   opts.uiter = 1
 8 | #   opts.batchSize = 1024
 9 | #   opts.npasses = 3
10 | #   opts.dim = 256


--------------------------------------------------------------------------------
/scripts/benchmarks/testVWLR.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # time /code/vowpal_wabbit/vowpalwabbit/vw --oaa 103 --readable_model rcv1.model.txt --loss_function logistic -b 24 --adaptive --invariant -l 1 --cache_file vw.cache --passes 3 -d /big/RCV1/v2/vw_sparse_train.dat
4 | 
5 | time /code/vowpal_wabbit/vowpalwabbit/vw --multilabel_oaa 104 --readable_model rcv1.model.txt --loss_function logistic -b 24 -l 1 --cache_file vw.cache --passes 3 -d ../../data/rcv1/train.vw
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/scripts/benchmarks/testVWkmeans.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | time /code/vowpal_wabbit/vowpalwabbit/vw --oaa 103 -f rcv1.model --loss_function logistic -b 24 --adaptive --invariant -l 1 --cache_file vw.cache --passes 1 -d /big/RCV1/v2/vw_sparse_train.dat
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/scripts/bn_test.ssc:
--------------------------------------------------------------------------------
 1 | // Script to test out BayesNet.scala for the general case.
 2 | 
 3 | val data = loadFMat("gibbs_data/koller_data_1m_050perc.lz4")
 4 | val dag = loadSMat("gibbs_data/koller_dag.lz4")
 5 | val states = loadIMat("gibbs_data/koller_states.lz4")
 6 | 
 7 | val (nn , opts) = BIDMach.models.BayesNet.learner(states , dag , false , data)
 8 | opts.npasses = 10
 9 | opts.useGPU = true
10 | opts.batchSize = 10000
11 | opts.what
12 | nn.train
13 | nn.modelmats(0).t
14 | //sys.exit
15 | 


--------------------------------------------------------------------------------
/scripts/cluster_destroy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ "$CLUSTER" == "" ]]; then
 4 |     CLUSTER="bidcluster1"
 5 | fi
 6 | 
 7 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 destroy $CLUSTER
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/scripts/cluster_launch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | if [[ "$CLUSTER" == "" ]]; then
4 |     CLUSTER="bidcluster2"
5 | fi
6 | 
7 | # launch a cluster
8 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa -a "ami-b2bf04ca" -s 16 --instance-type=p2.xlarge --region=us-west-2 --zone=us-west-2a --vpc-id="vpc-c93fbdac" --subnet-id="subnet-75177210" --additional-tags='Group:DSS 2' launch $CLUSTER
9 | 


--------------------------------------------------------------------------------
/scripts/cluster_login.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ "$CLUSTER" == "" ]]; then
 4 |     CLUSTER="bidcluster1"
 5 | fi
 6 | 
 7 | if [ ! ${1} == "" ]; then
 8 |     LOGIN="-n ${1} login"
 9 | else
10 |     LOGIN="login"
11 | fi
12 | 
13 | # login to the master
14 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 ${LOGIN} $CLUSTER
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/scripts/cluster_mux.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import subprocess
 3 | import time
 4 | 
 5 | SPARK_SLAVE_PATH = '/opt/spark/conf/slaves'
 6 | 
 7 | 
 8 | def tmux_cmd(cmd, fail_ok=False):
 9 |     if type(cmd) is str:
10 |         cmd = cmd.split(' ')
11 |     try:
12 |         return subprocess.check_output(['tmux'] + cmd).strip().split('\n')
13 |     except subprocess.CalledProcessError as e:
14 |         if not fail_ok:
15 |             raise e
16 | 
17 | 
18 | def send_cmd(pid, cmd):
19 |     tmux_cmd(['send-keys', '-t', pid, cmd+'\n'])
20 | 
21 | 
22 | def main():
23 |     tmux_cmd('kill-window -t tail-workers', fail_ok=True)
24 |     tmux_cmd('new-window -d -n tail-workers')
25 | 
26 |     pane_id = tmux_cmd('list-panes -t tail-workers -F #D')[0]
27 |     tmux_cmd('split-window -d -h -t {}'.format(pane_id))
28 |     pane_ids = tmux_cmd('list-panes -t tail-workers -F #D')
29 |     for pid in pane_ids:
30 |         tmux_cmd('split-window -d -v -t {}'.format(pid))
31 |     pane_ids = tmux_cmd('list-panes -t tail-workers -F #D')
32 | 
33 |     with open(SPARK_SLAVE_PATH, 'r') as f:
34 |         slave_addrs = list(f.readlines())
35 | 
36 |     for pid, saddr in zip(pane_ids, slave_addrs):
37 |         send_cmd(pid, 'su2')
38 |         time.sleep(0.05)
39 |         send_cmd(pid, 'ssh {}'.format(saddr))
40 |         time.sleep(0.1)
41 |         send_cmd(pid, 'tail -f /tmp/bidmach_worker.log')
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     main()
46 | 


--------------------------------------------------------------------------------
/scripts/cluster_start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ "$CLUSTER" == "" ]]; then
 4 |     CLUSTER="bidcluster1"
 5 | fi
 6 | 
 7 | # start the cluster
 8 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 --private-ips start $CLUSTER
 9 | 
10 | 


--------------------------------------------------------------------------------
/scripts/cluster_stop.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ "$CLUSTER" == "" ]]; then
 4 |     CLUSTER="bidcluster1"
 5 | fi
 6 | 
 7 | # stop the cluster
 8 | python bidmach_ec2.py -k "dss2_rsa" -i ~/.ssh/dss2_rsa --region=us-west-2 stop $CLUSTER
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/collect_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import subprocess
 4 | import sys
 5 | import os
 6 | import datetime
 7 | 
 8 | def main():
 9 |     files = sys.argv[1:]
10 |     s = subprocess.check_output("python bidmach_ec2.py -k id_rsa -i ~/.ssh/id_rsa --region=us-west-2 get-slaves " + os.environ['CLUSTER'], shell=True)
11 |     slaves = s.splitlines()[2:]
12 |     dir = '/code/BIDMach/%s/%s' % (os.environ['CLUSTER'], datetime.datetime.now().strftime("%Y%m%d%H%M"))
13 |     os.mkdir(dir)
14 |     for s in slaves:
15 |         slave_dir = '%s/%s' % (dir, s)
16 |         os.mkdir(slave_dir)
17 |         todostr = 'rsync -e "ssh -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no" -avz ubuntu@%s:/code/BIDMach/logs/log.0.0.txt %s/' % (s, slave_dir)
18 |         print(todostr)
19 |         subprocess.check_call(todostr, shell=True)
20 |         todostr = 'rsync -e "ssh -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no" -avz ubuntu@%s:/code/BIDMach/scripts/logres* %s/' % (s, slave_dir)
21 |         print(todostr)
22 |         subprocess.check_call(todostr, shell=True)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/scripts/criteonet.ssc:
--------------------------------------------------------------------------------
 1 | val mdir = "../data/criteo/parts/"
 2 | 
 3 | val (nn,opts) = Net.learner(mdir+"trainsorted%02d.smat.lz4",mdir+"trainlabel%02d.fmat.lz4");
 4 | opts.nend = 80;
 5 | opts.lrate = 0.01f;
 6 | opts.reg1weight = 0.0001f;
 7 | opts.batchSize=100
 8 | opts.npasses=5;
 9 | opts.hasBias = true;
10 | opts.links = irow(1);
11 | //opts.pstep = 0.001f;
12 | //opts.aopts = opts;
13 | opts.texp = 0.4f;
14 | opts.nweight = 1e-4f
15 | 
16 | val net = Net.dnodes3(6,10,1f,1,opts,1);
17 | opts.nodeset = net
18 | val model = nn.model.asInstanceOf[Net];
19 | 
20 | nn.train
21 | 
22 | val res = nn.results(0,?)
23 | 
24 | val testdata = loadSMat(mdir+"trainsorted%02d.smat.lz4" format opts.nend);
25 | val testlabels = loadFMat(mdir+"trainlabel%02d.fmat.lz4" format opts.nend);
26 | 
27 | val (mm, mopts) = Net.predictor(model, testdata);
28 | 
29 | mm.predict
30 | 
31 | val preds=FMat(mm.preds(0))
32 | 
33 | val ll = DMat(ln(preds *@ testlabels + (1-preds) *@ (1-testlabels)))
34 | val rc = roc(preds, testlabels, 1-testlabels, 1000);
35 | 
36 | (mean(ll), mean(rc))
37 | 
38 | 


--------------------------------------------------------------------------------
/scripts/distribute.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | 
4 | while read slave; do
5 |     echo "distributing to ${slave}"
6 |     rsync -r "${1}/" "${slave}:${1}"
7 | done < /code/BIDMach/conf/slaves
8 | 


--------------------------------------------------------------------------------
/scripts/distribute_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash 
 2 | 
 3 | path=$1
 4 | number=$2
 5 | 
 6 | i=0
 7 | while read slave; do
 8 |     slaves[$i]=$slave
 9 |     i=$((i+1))
10 | done < /code/BIDMach/conf/slaves
11 | 
12 | alen=$i
13 | echo ${slaves[*]}
14 | 
15 | j=0
16 | k=0
17 | for i in `seq 0 $number`; do
18 |     fromname=`printf $path $i`
19 |     toname=`printf $path $j`
20 |     echo scp $fromname ${slaves[$k]}:$toname
21 |     scp $fromname ${slaves[$k]}:$toname
22 |     k=$((k+1))
23 |     if [ ${k} -ge ${alen} ]; then
24 |        k=0
25 |        j=$((j+1))
26 |     fi
27 | done
28 | 


--------------------------------------------------------------------------------
/scripts/distribute_file.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | folder=`dirname ${1}`
3 | while read slave; do
4 |     echo "distributing to ${slave}"
5 |     rsync "${1}" "${slave}:${folder}"
6 | done < /code/BIDMach/conf/slaves
7 | 


--------------------------------------------------------------------------------
/scripts/distributed/cmudict-tail-reducer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cmd-mux 7 -n 8 -c 'tail -f /opt/BIDMach/logs/master_s2s_cmudict/matIdx{idx}.log'
3 | 


--------------------------------------------------------------------------------
/scripts/distributed/news-commentary-tail-reducer.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cmd-mux 7 -n 8 -c 'tail -f /opt/BIDMach/logs/master_sts_2015-news-commentary-v10-fr-en/matIdx{idx}.log'
3 | 


--------------------------------------------------------------------------------
/scripts/distributed/tail-workers:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | CMD_STR="cluster-mux -p su2 -c 'tail -f /tmp/bidmach_worker.log'"
3 | if [[ $(whoami) != "aleks" ]]; then
4 |   sudo su aleks -c "$CMD_CTR"
5 | else
6 |   eval $CMD_STR
7 | fi
8 | 


--------------------------------------------------------------------------------
/scripts/distributed/testrecv.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat}
 2 | import BIDMat.MatFunctions._
 3 | import BIDMat.SciFunctions._
 4 | import BIDMat.Solvers._
 5 | import BIDMat.Plotting._
 6 | import BIDMach.Learner
 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD}
 8 | import BIDMach.networks.{Net}
 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
10 | import BIDMach.datasinks.{DataSink,MatSink}
11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping}
13 | import BIDMach.causal.{IPTW}
14 | import BIDMach.allreduce.{Master,Worker,Command}
15 | import BIDMach.models.GLM
16 | 
17 | import scala.concurrent.Future
18 | import scala.concurrent.ExecutionContext.Implicits.global
19 | 
20 | Mat.checkMKL(false)
21 | Mat.checkCUDA
22 | 
23 | val data_dir = "/mnt/BIDMach/data/MNIST8M/parts/"
24 | val (nn, nnopts) = GLM.learner(data_dir+"data%02d.fmat.lz4", data_dir+"cats%02d.fmat.lz4")
25 | 
26 | nnopts.useGPU = true;
27 | // nnopts.nstart = 0;
28 | // nnopts.nend = 0;
29 | nnopts.order = 0;
30 | nnopts.lookahead = 2;
31 | nnopts.featType = 1;
32 | nnopts.links = 2*iones(10,1);
33 | nnopts.eltsPerSample = 300;
34 | nnopts.targets = mkdiag(ones(10,1)) \ zeros(10, 784);
35 | nnopts.rmask = zeros(1,10) \ ones(1, 784);
36 | 
37 | nnopts.batchSize = 500;
38 | nnopts.npasses = 1;
39 | nnopts.lrate = 0.001;  // for logistic
40 | 
41 | val w = new Worker();
42 | val wopts = w.opts;
43 | wopts.trace = 4;
44 | wopts.machineTrace = 1;
45 | 
46 | w.start(nn)
47 | 
48 | nn.paused = true
49 | 


--------------------------------------------------------------------------------
/scripts/distributed/testsend.ssc:
--------------------------------------------------------------------------------
 1 | import java.net.{InetAddress,InetSocketAddress}
 2 | import BIDMach.allreduce.{Master,Worker,Command}
 3 | 
 4 | var addresses = scala.io.Source.fromFile("/opt/spark/conf/slaves").getLines.
 5 |   map(InetAddress.getByName(_).getHostAddress()).
 6 |   map(new InetSocketAddress(_, 50050)).toArray
 7 | 
 8 | // addresses = addresses.slice(0, 1)
 9 | 
10 | val m = new Master();
11 | val opts = m.opts;
12 | opts.trace = 3;
13 | opts.intervalMsec = 2000;
14 | //opts.limitFctn = Master.powerLimitFctn
15 | opts.limit = 1000000
16 | opts.timeScaleMsec = 2e-3f
17 | opts.permuteAlways = false
18 | 
19 | opts.machine_threshold = 0.75
20 | opts.min_time_to_wait_for_all = 3000
21 | opts.time_threshold = 5000
22 | 
23 | 
24 | val nmachines = addresses.length;
25 | 
26 | val gmods = irow(nmachines);
27 | val gmachines = irow(0->nmachines);
28 | 
29 | m.init
30 | m.config(gmods, gmachines, addresses)
31 | m.setMachineNumbers
32 | m.sendConfig
33 | 
34 | // m.parCall((w) => { w.learner.opts.npasses = 10; "done" })
35 | // m.parCall((w) => { w.learner.opts.asInstanceOf[GLM.FGOptions].nstart = w.imach * 20; "done"})
36 | // m.parCall((w) => { w.learner.opts.asInstanceOf[GLM.FGOptions].nend = (w.imach+1) * 20; "done"})
37 | // m.parCall((w) => { w.learner.paused = true; "done"})
38 | // m.parCall((w) => { w.learner.train; "not reached" }) // this will hang, just wait for it to timeout
39 | 
40 | // m.startLearners
41 | // m.startUpdates
42 | 
43 | //Testing by Max
44 | 


--------------------------------------------------------------------------------
/scripts/distributed/worker_distr_lr_rcv.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat}
 2 | import BIDMat.MatFunctions._
 3 | import BIDMat.SciFunctions._
 4 | import BIDMat.Solvers._
 5 | import BIDMat.Plotting._
 6 | import BIDMach.Learner
 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD}
 8 | import BIDMach.networks.{Net}
 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
10 | import BIDMach.datasinks.{DataSink,MatSink}
11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping}
13 | import BIDMach.causal.{IPTW}
14 | import BIDMach.allreduce.{Master,Worker,Command}
15 | import BIDMach.models.GLM
16 | 
17 | Mat.checkMKL(false)
18 | Mat.checkCUDA
19 | 
20 | val datadir = "/mnt/BIDMach/data/rcv1/"
21 | 
22 | val a0 = loadSMat(datadir + "docs.smat.lz4")
23 | val c0 = loadFMat(datadir + "cats.fmat.lz4")(0->100,?)
24 | val rr = rand(c0.ncols,1)
25 | val (ss, ii) = sort2(rr)
26 | val a = a0(?,ii)
27 | val c = c0(?,ii)
28 | 
29 | setNumThreads(1)
30 | val (nn,opts)=GLM.learnerX(a,c,1)
31 | opts.batchSize=20000
32 | opts.lrate = 0.02f
33 | opts.npasses = 10
34 | opts.reg1weight = 0.0
35 | opts.links = iones(103,1)
36 | opts.addConstFeat = true
37 | opts.aopts = opts
38 | opts.doVariance = true
39 | opts.evalStep = 3
40 | opts.useGPU = true
41 | 
42 | val w = new Worker()
43 | val wopts = w.opts
44 | wopts.trace = 4
45 | wopts.machineTrace = 1
46 | 
47 | w.start(nn)
48 | 
49 | nn.paused = true
50 | 


--------------------------------------------------------------------------------
/scripts/distributed/worker_mnist_rf.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat}
 2 | import BIDMat.MatFunctions._
 3 | import BIDMat.SciFunctions._
 4 | import BIDMat.Solvers._
 5 | import BIDMat.Plotting._
 6 | import BIDMach.Learner
 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD}
 8 | import BIDMach.networks.{Net}
 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
10 | import BIDMach.datasinks.{DataSink,MatSink}
11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping}
13 | import BIDMach.causal.{IPTW}
14 | import BIDMach.allreduce.{Master,Worker,Command}
15 | import BIDMach.models.GLM
16 | 
17 | Mat.checkMKL(false)
18 | Mat.checkCUDA
19 | 
20 | val mdir = "/mnt/BIDMach/data/MNIST8M/parts/"
21 | val (nn, opts) = RandomForest.learner(mdir+"data%02d.fmat.lz4", mdir+"cats%02d.imat.lz4")
22 | 
23 | opts.nend = 70
24 | opts.batchSize = 20000
25 | opts.depth = 30
26 | opts.ntrees = (32 / 4)
27 | opts.nsamps = 32
28 | opts.nnodes = 500000
29 | opts.nbits = 16
30 | opts.gain = 0.001f
31 | opts.ncats = 10
32 | 
33 | val w = new Worker()
34 | val wopts = w.opts
35 | wopts.trace = 4
36 | wopts.machineTrace = 1
37 | 
38 | w.start(nn)
39 | 
40 | nn.paused = true
41 | 


--------------------------------------------------------------------------------
/scripts/factorNet_test.ssc:
--------------------------------------------------------------------------------
 1 | // Script to test out BayesNet.scala for the general case.
 2 | 
 3 | val data = loadFMat("data/factorNet/data.txt")
 4 | val dagDense = loadIMat("data/factorNet/factorSet.txt")
 5 | val states = loadIMat("data/factorNet/statePerNode.txt")
 6 | 
 7 | // convert the dag to sparse
 8 | val dag = sparse(dagDense)
 9 | 
10 | val (nn , opts) = BIDMach.models.BayesNet.learner(states , dag , true , data)
11 | opts.npasses = 2
12 | opts.useGPU = false
13 | opts.batchSize = 2
14 | opts.updateAll = true
15 | opts.what
16 | nn.train
17 | nn.modelmats(0).t
18 | 


--------------------------------------------------------------------------------
/scripts/factorNet_test2.ssc:
--------------------------------------------------------------------------------
 1 | // Script to test out BayesNet.scala for factor graphs.
 2 | 
 3 | val data = loadFMat("data/factorNet/test2/data.txt")
 4 | val dagDense = loadIMat("data/factorNet/test2/factorSet.txt")
 5 | val states = loadIMat("data/factorNet/test2/statePerNode.txt")
 6 | val dag = sparse(dagDense)
 7 | 
 8 | val (nn , opts) = BIDMach.models.BayesNet.learner(states , dag , true , data)
 9 | opts.npasses = 50
10 | opts.useGPU = false
11 | opts.batchSize = 10
12 | opts.what
13 | nn.train
14 | nn.modelmats(0).t
15 | 


--------------------------------------------------------------------------------
/scripts/futures.ssc:
--------------------------------------------------------------------------------
 1 | import scala.concurrent.future
 2 | import scala.concurrent.ExecutionContextExecutor
 3 | import java.io._
 4 | import scala.concurrent.ExecutionContext.Implicits.global
 5 | 
 6 | def test(n:Int) = {
 7 |     for (i <- 0 until n) {
 8 |     	future {
 9 | 	       println("started %d" format i);
10 | 	       Thread.`yield`
11 | 	       println("done %d" format i);
12 | 	       }
13 |     }
14 | }


--------------------------------------------------------------------------------
/scripts/getcirfar10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 | else 
 7 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 8 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
 9 |   done
10 |   alias wget='curl --retry 2 -O'
11 | fi
12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
13 | cd "${BIDMACH_SCRIPTS}"
14 | BIDMACH_SCRIPTS=`pwd`
15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
16 | 
17 | echo "Loading CIFAR10 data"
18 | 
19 | CIFAR10="${BIDMACH_SCRIPTS}/../data/CIFAR10"
20 | mkdir -p ${CIFAR10}/parts
21 | cd ${CIFAR10}
22 | 
23 | if [ ! -e t10k-labels-idx1-ubyte ]; then
24 |     wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
25 |     tar -xf cifar-10-binary.tar.gz 
26 |     rm -f cifar-10-binary.tar.gz
27 |     mv cifar-10-batches-bin/* . 
28 |     rm -rf cifar-10-batches-bin
29 | fi
30 | 
31 | echo "Processing CIFAR10 data"
32 | cd "${BIDMACH_SCRIPTS}"
33 | ../bidmach processcifar10.ssc


--------------------------------------------------------------------------------
/scripts/getcriteo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd ../data/criteo
 4 | mkdir -p parts
 5 | 
 6 | split -a 2 -d -l 500000 train.txt parts/train
 7 | split -a 2 -d -l 500000 test.txt parts/test
 8 | 
 9 | cd ../../scripts
10 | 
11 | bidmach readcriteo.ssc
12 | 
13 | bidmach buildcriteo.ssc
14 | 
15 | bidmach sortcriteo.ssc
16 | 


--------------------------------------------------------------------------------
/scripts/getdata.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | 
 4 | BIDMACH_ROOT="${BASH_SOURCE[0]}"
 5 | if [ ! `uname` = "Darwin" ]; then
 6 |   BIDMACH_ROOT=`readlink -f "${BIDMACH_ROOT}"`
 7 | else
 8 |   while [ -L "${BIDMACH_ROOT}" ]; do
 9 |     BIDMACH_ROOT=`readlink "${BIDMACH_ROOT}"`
10 |   done
11 | fi
12 | BIDMACH_ROOT=`dirname "${BIDMACH_ROOT}"`
13 | BIDMACH_ROOT=`cd ${BIDMACH_ROOT}/..;pwd -P`
14 | BIDMACH_ROOT="$( echo ${BIDMACH_ROOT} | sed 's+/cygdrive/\(.\)+\1:+' )"
15 | 
16 | cd "${BIDMACH_ROOT}/scripts"
17 | 
18 | ./getrcv1.sh
19 | 
20 | ./getuci.sh nips
21 | 
22 | ./getuci.sh nytimes
23 | 
24 | ./getdigits.sh
25 | 
26 | ./getmovies.sh
27 | 
28 | ./getmnist.sh
29 | 
30 | ./getcifar10.sh
31 | 
32 | ./getmnist8m.sh
33 | 
34 | # this one is huge, make sure you really want it
35 | # ./getuci.sh pubmed
36 | 
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/scripts/getdigits.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 |   export WGET='wget --no-check-certificate'
 7 | else
 8 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 9 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
10 |   done
11 |   export WGET='curl --retry 2 -O'
12 | fi
13 | 
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
18 | 
19 | echo "Loading arabic digits data"
20 | 
21 | UCI=${BIDMACH_SCRIPTS}/../data/uci
22 | cd $UCI
23 | 
24 | if [ ! -e Train_Arabic_Digit.txt ]; then
25 |     ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/00195/Train_Arabic_Digit.txt
26 | fi 
27 | 
28 | sed -e 's/^[[:space:]]*$/0 0 0 0 0 0 0 0 0 0 0 0 0/g' Train_Arabic_Digit.txt > arabic.txt
29 | cd ${UCI}
30 | #${BIDMACH_SCRIPTS}/../bidmach "-e" "BIDMach.Experiments.DIGITS.preprocess(\"${UCI}/\",\"arabic\")"
31 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/getdigits.ssc
32 | 
33 | if [ -e "arabic.txt" ]; then
34 |   rm arabic.txt
35 | fi
36 | 


--------------------------------------------------------------------------------
/scripts/getdigits.ssc:
--------------------------------------------------------------------------------
 1 | object DIGITS {
 2 |   def preprocess(dict:String, fname:String) {
 3 |     println("Processing digits");
 4 |     val mat = loadFMat(dict+fname+".txt")
 5 |     val srow = sum(abs(mat),2)
 6 |     val inds = IMat((cumsum(srow==0)-1)/660)
 7 |     val ii = find(srow > 0)
 8 |     val mm = mat(ii,?)
 9 |     val inn = inds(ii,?)
10 |     saveFMat(dict+fname+".fmat.lz4", mm.t)
11 |     val cats = zeros(mm.nrows, maxi(inn).v + 1)
12 |     cats(icol(0->(inn.nrows)) + inn*mm.nrows) = 1f
13 |     saveFMat(dict+fname+"_cats.fmat.lz4", cats.t)
14 |   }
15 | }
16 | 
17 | DIGITS.preprocess("","arabic")
18 | println("done");
19 | sys.exit


--------------------------------------------------------------------------------
/scripts/getmnist8m.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # run this to load the MNIST8M data
 3 | 
 4 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 5 | if [ ! `uname` = "Darwin" ]; then
 6 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 7 |   export WGET='wget -c --no-check-certificate'
 8 | else 
 9 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
10 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
11 |   done
12 |   export WGET='curl -C - --retry 20 -O'
13 | fi
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
18 | 
19 | 
20 | echo "Loading MNIST8M data"
21 | 
22 | MNIST8M="${BIDMACH_SCRIPTS}/../data/MNIST8M"
23 | mkdir -p ${MNIST8M}/parts
24 | cd ${MNIST8M}
25 | 
26 | ${WGET} http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist8m.bz2
27 | 
28 | echo "Uncompressing MNIST8M data"
29 | 
30 | bunzip2 -c mnist8m.bz2 > mnist8m.libsvm
31 | 
32 | echo "Splitting MNIST8M data"
33 | 
34 | if [ ! `uname` = "Darwin" ]; then
35 |     split -l 100000 -d mnist8m.libsvm parts/part
36 | else
37 |     split -l 100000 mnist8m.libsvm parts/part
38 |     j=0
39 |     for i in {a..z}{a..z}; do
40 | 	jj=`printf "%02d" $j`
41 | 	mv parts/part$i parts/part$jj
42 | 	j=$((j+1))
43 | 	if [ $j -gt 80 ]; then break; fi
44 |     done
45 | fi
46 | 
47 | cd ${MNIST8M}/parts
48 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/processmnist8m.ssc
49 | 


--------------------------------------------------------------------------------
/scripts/getmnist8m_finesplit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # run this to load the MNIST8M data
 3 | 
 4 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 5 | if [ ! `uname` = "Darwin" ]; then
 6 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 7 |   export WGET='wget -c --no-check-certificate'
 8 | else
 9 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
10 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
11 |   done
12 |   export WGET='curl -C - --retry 20 -O'
13 | fi
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )"
18 | 
19 | if [[ -z "$BIDMACH_DATA_HOME" ]]; then
20 |   echo '$BIDMACH_DATA_HOME environment variable not set, aborting!' 1>&2
21 |   exit 1
22 | fi
23 | 
24 | echo "Loading MNIST8M data"
25 | MNIST8M="${BIDMACH_DATA_HOME}/MNIST8M"
26 | mkdir -p ${MNIST8M}/parts_fine
27 | cd ${MNIST8M}
28 | 
29 | ${WGET} http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/mnist8m.bz2
30 | 
31 | echo "Uncompressing MNIST8M data"
32 | 
33 | bunzip2 -c mnist8m.bz2 > mnist8m.libsvm
34 | 
35 | echo "Splitting MNIST8M data"
36 | 
37 | if [ ! `uname` = "Darwin" ]; then
38 |     split -l 10000 -a 3 -d mnist8m.libsvm parts_fine/part
39 | else
40 |     split -l 10000 -a 3 mnist8m.libsvm parts_fine/part
41 |     j=0
42 |     for i in {a..z}{a..z}{a..z}; do
43 | 	jj=`printf "%03d" $j`
44 | 	mv parts_fine/part$i parts_fine/part$jj
45 | 	j=$((j+1))
46 | 	if [ $j -gt 800 ]; then break; fi
47 |     done
48 | fi
49 | 
50 | cd ${MNIST8M}/parts_fine
51 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/processmnist8m_finesplit.ssc
52 | 


--------------------------------------------------------------------------------
/scripts/getmovies.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 |   export WGET='wget --no-check-certificate'
 7 | else
 8 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 9 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
10 |   done
11 |   export WGET='curl --retry 2 -O'
12 | fi
13 | 
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
18 | 
19 | echo "Loading movielens 10M data"
20 | 
21 | ML=${BIDMACH_SCRIPTS}/../data/movielens
22 | mkdir -p ${ML}
23 | cd ${ML}
24 | 
25 | if [ ! -e ml-10m.zip ]; then
26 |     ${WGET} http://files.grouplens.org/datasets/movielens/ml-10m.zip
27 | fi 
28 | 
29 | unzip -o ml-10m.zip
30 | cd ml-10M100K
31 | ./split_ratings.sh
32 | for i in 1 2 3 4 5 a b; do
33 |     mv r${i}.train r${i}.train.txt
34 |     mv r${i}.test r${i}.test.txt
35 | done
36 | cd ${BIDMACH_SCRIPTS}
37 | 
38 | ../bidmach getmovies.ssc
39 | 


--------------------------------------------------------------------------------
/scripts/getmovies.ssc:
--------------------------------------------------------------------------------
 1 | val dir="../data/movielens/ml-10M100K/"
 2 | val nusers = 71567
 3 | val nmovies = 65134
 4 | 
 5 | println("Converting movies")
 6 | for (i <- List("1","2","3","4","5","a","b")) {
 7 |     val a = loadDMat(dir + "r" + i + ".train.txt");
 8 |     val sa = sparse(IMat(a(?,1))-1, IMat(a(?,0))-1, FMat(a(?,2)), nmovies, nusers);
 9 |     sa.check
10 |     saveSMat(dir+"../train%s.smat.lz4" format i, sa);
11 | 
12 |     val b = loadDMat(dir + "r" + i + ".test.txt");
13 |     val sb = sparse(IMat(b(?,1))-1, IMat(b(?,0))-1, FMat(b(?,2)), nmovies, nusers);
14 |     sb.check
15 |     saveSMat(dir+"../test%s.smat.lz4" format i, sb);
16 |     print(".");
17 | }
18 | println("")
19 | val a=loadSMat(dir+"../train1.smat.lz4");
20 | val ta=loadSMat(dir+"../test1.smat.lz4");
21 | val aa = a + ta;
22 | val (ii, jj, vv) = find3(aa);
23 | val rs = rand(ii.length, 1) < 0.1;
24 | val itrain = find(rs == 0);
25 | val itest = find(rs);
26 | val train = sparse(ii(itrain), jj(itrain), vv(itrain), nmovies, nusers);
27 | val test = sparse(ii(itest), jj(itest), vv(itest), nmovies, nusers);
28 | saveSMat(dir+"../train.smat.lz4", train);
29 | saveSMat(dir+"../test.smat.lz4", test);
30 | println("done");
31 | sys.exit


--------------------------------------------------------------------------------
/scripts/getpubmed.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Run this to load and partition pubmed data
 3 | 
 4 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 5 | if [ ! `uname` = "Darwin" ]; then
 6 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 7 |   export WGET='wget --no-check-certificate'
 8 | else 
 9 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
10 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
11 |   done
12 |   export WGET='curl --retry 2 -O'
13 | fi
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
18 | 
19 | 
20 | echo "Loading pubmed data"
21 | 
22 | ${BIDMACH_SCRIPTS}/getuci.sh pubmed
23 | 
24 | cd "${BIDMACH_SCRIPTS}/../data/uci"
25 | mkdir -p pubmed_parts
26 | 
27 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/processpubmed.ssc
28 | 


--------------------------------------------------------------------------------
/scripts/getuci.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 |   export WGET='wget --no-check-certificate'
 7 | else
 8 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 9 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
10 |   done
11 |   export WGET='curl -C - --retry 2 -O'
12 | fi
13 | 
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
18 | 
19 | echo "Loading $1 data"
20 | 
21 | UCI="${BIDMACH_SCRIPTS}/../data/uci/${1}"
22 | mkdir -p ${UCI}
23 | cd ${UCI}
24 | 
25 | if [ ! -e docword.txt.gz ]; then
26 |     ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/docword.${1}.txt.gz
27 |     mv docword.${1}.txt.gz docword.txt.gz
28 | fi 
29 | if [ ! -e vocab.txt ]; then
30 |     ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.${1}.txt
31 |     mv vocab.${1}.txt vocab.txt
32 | fi 
33 | 
34 | echo "Uncompressing docword.${1}.txt.gz"
35 | gunzip -c "docword.txt.gz" | tail -n +4 > "docword.txt"
36 | ${BIDMACH_SCRIPTS}/../cbin/tparse.exe -i "docword.txt" -f "${UCI}/../../uci_fmt.txt" -o "" -m "" -d " " -c
37 | ${BIDMACH_SCRIPTS}/../cbin/tparse.exe -i "vocab.txt" -f "${UCI}/../../uci_wfmt.txt" -o "" -m "" -c
38 | cd ${BIDMACH_SCRIPTS}/..
39 | cd ${UCI}
40 | ${BIDMACH_SCRIPTS}/../bidmach ${BIDMACH_SCRIPTS}/getuci.ssc
41 | mv "smat.lz4" "../${1}.smat.lz4"
42 | mv "term.sbmat.gz" "../${1}.term.sbmat.gz"
43 | mv "term.imat.gz" "../${1}.term.imat.gz"
44 | if [ -e "docword.txt" ]; then
45 |     echo "clearing up"
46 |     rm docword.txt
47 | fi
48 | 


--------------------------------------------------------------------------------
/scripts/getuci.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | object UCI {
 4 |   def preprocess(dict:String, fname:String) {
 5 |     println("Processing "+fname); 
 6 |     tic; 
 7 |     val cols = loadIMat(dict+fname+"cols.imat.gz")
 8 |     val rows = loadIMat(dict+fname+"rows.imat.gz")
 9 |     val values = loadFMat(dict+fname+"vals.fmat.gz")
10 |     val m = cols2sparse(rows, cols, values, true, 1)
11 |     saveSMat(dict+fname+"smat.lz4", m)
12 |   }
13 | }
14 | 
15 | UCI.preprocess("","")
16 | println("done");
17 | sys.exit
18 | 
19 | 


--------------------------------------------------------------------------------
/scripts/getw2vdata.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | SCRIPTDIR=`pwd`
 4 | 
 5 | mkdir -p ../data/word2vec/raw
 6 | cd ../data/word2vec/raw
 7 | 
 8 | if [ ! -e 1-billion-word-language-modeling-benchmark-r13output.tar.gz ]; then
 9 | echo "Downloading"
10 | wget http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
11 | fi
12 | 
13 | if [ ! -d 1-billion-word-language-modeling-benchmark-r13output ]; then
14 | echo "Uncompressing"
15 | tar xvzf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
16 | # fix the misplaced first news item
17 | mv 1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/news.en-00000-of-00100 \
18 |    1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled
19 | fi
20 | 
21 | cd 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled
22 | 
23 | FILES=`echo news.en*00100 | sed 's/ /,/g'`
24 | 
25 | mkdir -p ${SCRIPTDIR}/../data/word2vec/tokenized
26 | mkdir -p ${SCRIPTDIR}/../data/word2vec/tokenized2
27 | mkdir -p ${SCRIPTDIR}/../data/word2vec/data
28 | 
29 | ${SCRIPTDIR}/../cbin/tparse2.exe -i "${FILES}" -f ../../fmt.txt -o ${SCRIPTDIR}/../data/word2vec/tokenized/ -c
30 | 
31 | cd ${SCRIPTDIR}/../data/word2vec/raw/1-billion-word-language-modeling-benchmark-r13output/heldout-monolingual.tokenized.shuffled/
32 | 
33 | FILES=`echo news.en*00050 | sed 's/ /,/g'`
34 | 
35 | ${SCRIPTDIR}/../cbin/tparse2.exe -i "${FILES}" -f ../../fmt.txt -o ${SCRIPTDIR}/../data/word2vec/tokenized2/ -c
36 | 
37 | cd ${SCRIPTDIR}
38 | 
39 | bidmach getw2vdata.ssc
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/scripts/getyearprediction.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 |   export WGET='wget -c --no-check-certificate'
 7 | else
 8 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 9 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
10 |   done
11 |   export WGET='curl -C - --retry 2 -O'
12 | fi
13 | 
14 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
15 | cd ${BIDMACH_SCRIPTS}
16 | BIDMACH_SCRIPTS=`pwd`
17 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
18 | 
19 | echo "Loading $1 data"
20 | 
21 | YP="${BIDMACH_SCRIPTS}/../data/YearPrediction"
22 | mkdir -p ${YP}
23 | cd ${YP}
24 | 
25 | ${WGET} http://archive.ics.uci.edu/ml/machine-learning-databases/00203/YearPredictionMSD.txt.zip
26 | 
27 | unzip YearPredictionMSD.txt.zip
28 | 
29 | cd ${BIDMACH_SCRIPTS}
30 | 
31 | bidmach getyearprediction.ssc
32 | 
33 | 


--------------------------------------------------------------------------------
/scripts/getyearprediction.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val wdir = "../data/YearPrediction/"
 3 | val a = loadFMat(wdir+"YearPredictionMSD.txt");
 4 | 
 5 | val labels = a(?,0);
 6 | val data = a(?,1->a.ncols);
 7 | 
 8 | saveFMat(wdir+"train.fmat.lz4", data.t);
 9 | saveFMat(wdir+"cats.fmat.lz4", labels.t);
10 | saveIMat(wdir+"cats.imat.lz4", IMat(labels.t));
11 | 
12 | sys.exit;


--------------------------------------------------------------------------------
/scripts/higgsdnn.ssc:
--------------------------------------------------------------------------------
 1 | val dir = "../data/uci/Higgs/parts/"
 2 | 
 3 | val (nn, opts) = Net.learner(dir+"data%03d.fmat.lz4", dir+"label%03d.fmat.lz4")
 4 | 
 5 | opts.nend = 10
 6 | opts.npasses = 5
 7 | opts.batchSize = 200
 8 | opts.reg1weight = 0.0001;
 9 | opts.hasBias = true;
10 | opts.links = iones(1,1);
11 | opts.lrate = 0.01f;
12 | opts.texp = 0.4f;
13 | opts.evalStep = 311;
14 | opts.nweight = 1e-4f
15 | val net = Net.dnodes3(4, 500, 0.6f, 1, opts, 2);
16 | opts.nodeset = net
17 | opts.lookahead = 0;
18 | 
19 | val model = nn.model.asInstanceOf[Net]
20 | 
21 | nn.train
22 | 
23 | val ta = loadFMat(dir + "data%03d.fmat.lz4" format 10);
24 | val tc = loadFMat(dir + "label%03d.fmat.lz4" format 10);
25 | 
26 | val (mm,mopts) = Net.predictor(model, ta);
27 | 
28 | mopts.batchSize=1000
29 | 
30 | mm.predict
31 | 
32 | val pc = FMat(mm.preds(0))
33 | 
34 | val rc = roc(pc, tc, 1-tc, 1000);
35 | 
36 | mean(rc)
37 | 
38 | 


--------------------------------------------------------------------------------
/scripts/higgsprep.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val dir = "../data/uci/Higgs/parts/"
 3 | 
 4 | for (i <- 0 until 110) {
 5 |   val a = HMat.loadFMatTxt(dir+"data%03d" format i, null, 0);
 6 |   val targ = a(?,0);
 7 |   val data = a(?,1->a.ncols);
 8 |   saveFMat(dir + "data%03d.fmat.lz4" format i, data.t);
 9 |   saveIMat(dir + "label%03d.imat.lz4" format i, int(targ.t));
10 |   saveFMat(dir + "label%03d.fmat.lz4" format i, targ.t);
11 |   print(".");
12 | }


--------------------------------------------------------------------------------
/scripts/higgsrf.ssc:
--------------------------------------------------------------------------------
 1 | val dir = "../data/uci/Higgs/parts/"
 2 | 
 3 | val (nn, opts) = RandomForest.learner(dir+"data%03d.fmat.lz4", dir+"label%03d.imat.lz4")
 4 | 
 5 | opts.nend = 10
 6 | opts.batchSize = 20000
 7 | opts.depth =  30
 8 | opts.ntrees = 128
 9 | opts.nsamps = 8
10 | opts.nnodes = 400000
11 | opts.nbits = 16
12 | opts.ncats = 2;
13 | opts.regression = true;
14 | opts.gain = 0.001f
15 | 
16 | val rf = nn.model.asInstanceOf[RandomForest]
17 | 
18 | nn.train
19 | 
20 | val ta = loadFMat(dir + "data%03d.fmat.lz4" format 10);
21 | val tc = loadIMat(dir + "label%03d.imat.lz4" format 10);
22 | 
23 | val (mm,mopts) = RandomForest.predictor(rf, ta);
24 | 
25 | mopts.batchSize=1000
26 | 
27 | mm.predict
28 | 
29 | val pc = FMat(mm.preds(0))
30 | 
31 | val rc = roc(pc, tc, 1-tc, 1000);
32 | 
33 | mean(rc)
34 | 
35 | 


--------------------------------------------------------------------------------
/scripts/ica_test.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val output = loadFMat("ica_output.txt")
 3 | val (nn, opts) = BIDMach.models.ICA.learner(output)
 4 | opts.npasses = 20
 5 | nn.train
 6 | 
 7 | val predW = FMat(nn.modelmats(0))
 8 | val predMean = FMat(nn.modelmats(1))
 9 | predMean.t
10 | 
11 | val result = FMat(predW * (output - predMean))
12 | HMat.saveFMatTxt("ica_pred_source.txt", result)
13 | sys.exit
14 | 


--------------------------------------------------------------------------------
/scripts/make_bayesnet_data.py:
--------------------------------------------------------------------------------
 1 | # I'll be using this code to generate some data for a simple Bayesian network.
 2 | # (c) 2015 by Daniel Seita
 3 | 
 4 | import numpy as np
 5 | 
 6 | ncols = 1000000 # Change as needed
 7 | nrows = 5
 8 | data = np.zeros([nrows,ncols])
 9 | # First, handle variables X_0 (intelligence) and X_1 (difficulty)
10 | data[0,:] = np.random.choice(2, ncols, p = [0.7, 0.3])
11 | data[1,:] = np.random.choice(2, ncols, p = [0.6, 0.4])
12 | third = []
13 | fourth = []
14 | fifth = []
15 | for i in range(ncols):
16 |     # Variable X_2 (SAT)
17 |     if data[0,i] == 0:
18 |         third.append( np.random.choice(2, 1, p = [0.95, 0.05])[0] )
19 |     else:
20 |         third.append( np.random.choice(2, 1, p = [0.2, 0.8])[0] )
21 |     # Variable X_3 (grade)
22 |     if (data[0,i] == 0 and data[1,i] == 0):
23 |         fourth.append( np.random.choice(3, 1, p = [0.3, 0.4, 0.3])[0] )
24 |     elif (data[0,i] == 0 and data[1,i] == 1):
25 |         fourth.append( np.random.choice(3, 1, p = [0.05, 0.25, 0.7])[0] )
26 |     elif (data[0,i] == 1 and data[1,i] == 0):
27 |         fourth.append( np.random.choice(3, 1, p = [0.9, 0.08, 0.02])[0] )
28 |     else:
29 |         fourth.append( np.random.choice(3, 1, p = [0.5, 0.3, 0.2])[0] )
30 |     # Variable X_4 (letter)
31 |     if fourth[i] == 0:
32 |         fifth.append( np.random.choice(2, 1, p = [0.1, 0.9])[0] )
33 |     elif fourth[i] == 1:
34 |         fifth.append( np.random.choice(2, 1, p = [0.4, 0.6])[0] )
35 |     else:
36 |         fifth.append( np.random.choice(2, 1, p = [0.99, 0.01])[0] )
37 | data[2,:] = third
38 | data[3,:] = fourth
39 | data[4,:] = fifth
40 | np.savetxt('dataStudent_' + str(ncols) + '.txt', data, fmt='%i')    
41 | 


--------------------------------------------------------------------------------
/scripts/mnistkmeans.ssc:
--------------------------------------------------------------------------------
 1 | val dir="../data/MNIST8M/parts/"
 2 | val (nn, opts) = KMeans.learner(dir+"alls%02d.fmat.lz4");
 3 | 
 4 | val test = loadFMat(dir+"alls80.fmat.lz4");
 5 | val testdata = test.copy;
 6 | testdata(0->10, ?) = 0;
 7 | val (vbest, ibest) = maxi2(test);
 8 | 
 9 | opts.dim = 256;
10 | opts.nend = 80;
11 | opts.batchSize = 5000;
12 | opts.npasses = 10;
13 | 
14 | nn.train
15 | val centroids = FMat(nn.modelmats(0));
16 | 
17 | val (mm, opts) = KMeans.predictor(nn.model, testdata);
18 | 
19 | mm.predict
20 | val preds = IMat(mm.preds(0))
21 | 
22 | val (dmy, cmap) = maxi2(centroids, 2);
23 | 
24 | mean(float(ibest == cmap(preds)))
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/scripts/mnistkmeans2.ssc:
--------------------------------------------------------------------------------
 1 | val dir="../data/MNIST8M/parts/"
 2 | val (nn, opts) = KMeans.learner(dir+"alls%02d.fmat.lz4");
 3 | 
 4 | val test = loadFMat(dir+"alls70.fmat.lz4");
 5 | val testdata = test.copy;
 6 | testdata(0->10, ?) = 0;
 7 | val (vbest, ibest) = maxi2(test);
 8 | 
 9 | opts.dim = 1000;
10 | opts.nend = 40;
11 | opts.batchSize = 5000;
12 | opts.npasses = 10;
13 | opts.useGPU=false
14 | 
15 | nn.train
16 | val centroids = FMat(nn.modelmats(0));
17 | 
18 | val (mm, opts) = KMeans.predictor(nn.model, testdata);
19 | 
20 | mm.predict
21 | val preds = IMat(mm.preds(0))
22 | 
23 | val (dmy, cmap) = maxi2(centroids, 2);
24 | 
25 | mean(float(ibest == cmap(preds)))
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/scripts/mnistlr.ssc:
--------------------------------------------------------------------------------
 1 | val dir="../data/MNIST8M/parts/"
 2 | val (nn, opts) = GLM.learner(List(dir+"part%02d.fmat.lz4",dir+"cats%02d.fmat.lz4"))
 3 | 
 4 | opts.useGPU = true;
 5 | opts.nend = 70;
 6 | opts.order = 0;
 7 | opts.lookahead = 2;
 8 | opts.featType = 1;
 9 | opts.links = 2*iones(10,1);
10 | opts.eltsPerSample = 300;
11 | opts.targets = mkdiag(ones(10,1)) \ zeros(10, 784);
12 | opts.rmask = zeros(1,10) \ ones(1, 784);
13 | 
14 | opts.batchSize = 500;
15 | opts.npasses = 1;
16 | opts.lrate = 0.001;  // for logistic
17 | 
18 | nn.train
19 | 
20 | println("Done training. Checking training set accuracy...")
21 | 
22 | val randPartNum = scala.util.Random.nextInt(80)
23 | 
24 | var test = loadFMat(data_dir+"data%02d.fmat.lz4" format randPartNum)
25 | val tcats = loadFMat(data_dir+"cats%02d.fmat.lz4" format randPartNum)
26 | val tcat = maxi2(tcats, 1)._2
27 | 
28 | val pmodel = new GLM(new GLM.PredOptions());
29 | pmodel.copyFrom(nn.model);
30 | val popts = pmodel.opts.asInstanceOf[GLM.Opts]
31 | popts.targmap = opts.targmap;
32 | popts.links = opts.links;
33 | popts.targets = null
34 | popts.iweight = opts.iweight;
35 | popts.lim = opts.lim;
36 | popts.hashFeatures = opts.hashFeatures;
37 | popts.hashBound1 = opts.hashBound1;
38 | popts.hashBound2 = opts.hashBound2;
39 | 
40 | val (pp, popts) = GLM.predictor(pmodel, test)
41 | pp.predict
42 | 
43 | val preds = FMat(pp.preds(0))
44 | 
45 | val rocs = roc2(preds, tcats, 1-tcats, 100)
46 | 
47 | println("Training AUCs:\n%s" format ((0 to 9) on mean(rocs)))
48 | 


--------------------------------------------------------------------------------
/scripts/networks/evalAlexnet.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.layers._
 2 | 
 3 | val traindir = "../../data/ImageNet/train/";
 4 | //val traindir = "/home/jfc/data/ImageNet/2012/BIDMach/train/";
 5 | val testdir = "../../data/ImageNet/val/";
 6 | val traindata = traindir+"partNCHW%04d.bmat.lz4";
 7 | val trainlabels = traindir+"label%04d.imat.lz4";
 8 | val testdata = testdir+"partNCHW%04d.bmat.lz4";
 9 | val testlabels = testdir+"label%04d.imat.lz4";
10 | val testpreds = testdir+"pred%04d.fmat.lz4";
11 | 
12 | val (nn, opts) = Net.gradLearner(traindata, trainlabels);
13 | val net = nn.model.asInstanceOf[Net]
14 | 
15 | // Load the most recent checkpoint matching the checkpoint filename template
16 | opts.checkPointFile = "../../models/AlexnetFullyTrained/alexnet%03d/"
17 | nn.loadCheckPoint();
18 | opts.checkPointFile = null;
19 | 
20 | // Enter the number of epochs completed already
21 | val doneEpochs = 79;
22 | val lrinit = 1e-2f;
23 | 
24 | def lr_update(ipass0:Float, istep:Float, frac:Float):Float = {
25 |   val ipass = ipass0 + doneEpochs;
26 |   val lr = if (ipass < 20) {
27 |       lrinit
28 |   } else if (ipass < 40) {
29 |       lrinit/10
30 |   } else lrinit/100
31 |   lr
32 | }
33 | 
34 | opts.logfile = "logAlexnet_%fc.txt" format (lrinit);
35 | opts.lr_policy = lr_update _;
36 | opts.npasses = 1;
37 | 
38 | nn.launchTrain;
39 | 
40 | println("Examine the 'nn' variable to track learning state.\n");
41 | 
42 | 
43 | def validate = { 
44 | val (mm, mopts) =  Net.predictor(net, testdata, testlabels, testpreds);
45 | mopts.batchSize = opts.batchSize
46 | mopts.nodeset(mopts.nodeset.length-1).asInstanceOf[SoftmaxOutputNode].lossType=SoftmaxOutputLayer.TargetProbs
47 | mm.predict; 
48 | println("Accuracy = %f" format mean(mm.results(0,?),2).v);
49 | }
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/scripts/networks/getImageNet.ssc:
--------------------------------------------------------------------------------
 1 | val doNCHW = true;
 2 | val tt = "val";
 3 | 
 4 | val imagenetroot = "/data/ImageNet/2012resized/"+tt+"/";
 5 | val dataroot = "../../data/ImageNet/";
 6 | val savefname = if (doNCHW) tt+"/partNCHW%04d.bmat.lz4" else tt+"/part%04d.bmat.lz4";
 7 | val labelfname = tt+"/label%04d.imat.lz4";
 8 | val namesfname = tt+"/names%04d.csmat.txt";
 9 | val loadtable = loadCSMat(dataroot+tt+".txt");
10 | 
11 | val bsize = 1024;
12 | 
13 | val nimgs = loadtable.nrows;
14 | 
15 | val fnames = loadtable(?,0);
16 | val alllabels = loadtable(?,1).toIMat;
17 | 
18 | val perm = randperm(nimgs);
19 | val mat = zeros(3 \ 256 \ 256 \ bsize);
20 | val labels = izeros(1, bsize);
21 | val names = CSMat(bsize,1);
22 | var i = 0;
23 | var jin = 0;
24 | while (jin < nimgs) {
25 |     val todo = math.min(bsize, nimgs - jin);
26 |     var j = 0;
27 |     while (j < todo && jin < nimgs) {
28 | 	val indx = perm(jin);
29 | 	try {
30 | 	    val im = loadImage(imagenetroot+fnames(indx));
31 | 	    val mm = im.toFMat(0->3,?,?).reshapeView(3,256,256,1);
32 | 	    mat(?,?,?,j) = mm;
33 | 	    labels(0, j) = alllabels(indx);
34 | 	    names(j) = fnames(indx);
35 | 	    j += 1;
36 | 	} catch {
37 | 	    case e:Exception => println("\nProblem reading %s, continuing" format fnames(indx));
38 | 	}
39 | 	jin += 1;
40 |     }
41 |     if (j == bsize) {
42 | 	saveBMat(dataroot+savefname format i, BMat(if (doNCHW) mat.fromNHWCtoNCHW else mat));
43 | 	saveIMat(dataroot+labelfname format i, labels);
44 | 	saveCSMat(dataroot+namesfname format i, names);
45 |     } else {
46 | 	val mc = mat.colslice(0,j);
47 | 	saveBMat(dataroot+savefname format i, BMat(if (doNCHW) mc.fromNHWCtoNCHW else mc));
48 | 	saveIMat(dataroot+labelfname format i, labels.colslice(0,j));
49 | 	saveCSMat(dataroot+namesfname format i, names(0->j,0));
50 |     }
51 |     i += 1;
52 |     print(".");
53 | }
54 | Mat.useCache=false;
55 | println("");
56 | 
57 | 


--------------------------------------------------------------------------------
/scripts/networks/getImageNetLabels.ssc:
--------------------------------------------------------------------------------
 1 | val tt = "train";
 2 | 
 3 | val dataroot = "../../data/ImageNet/";
 4 | val labelfname = dataroot+tt+"/label%04d.imat.lz4";
 5 | val labelsout = dataroot+tt+"/labels%04d.fmat.lz4";
 6 | 
 7 | val bsize = 1024;
 8 | val nparts = 1252;
 9 | 
10 | print("\nComputing one-hot labels");
11 | val omat = zeros(1000,bsize);
12 | val coln = irow(0->bsize) *@ 1000;
13 | for (i <- 0 until nparts) {
14 |     val mat = loadIMat(labelfname format i);
15 |     omat.clear;
16 |     val inds = mat + coln(0,0->mat.ncols);
17 |     omat(inds) = 1f;
18 |     if (mat.ncols == bsize) {
19 | 	saveFMat(labelsout format i, omat);
20 |     } else {
21 | 	saveFMat(labelsout format i, omat.colslice(0,mat.ncols));
22 |     }
23 |     print(".");
24 | }
25 | println("");
26 | 
27 | 


--------------------------------------------------------------------------------
/scripts/networks/getImageNetMeans.ssc:
--------------------------------------------------------------------------------
 1 | val tt = "train";
 2 | 
 3 | val traindir = "../../data/ImageNet/train/";
 4 | val traindata = traindir+"partNCHW%04d.bmat.lz4";
 5 | 
 6 | val bsize = 1024;
 7 | //val nparts = 1252;
 8 | val nparts = 125;
 9 | 
10 | var nimgs = 0L;
11 | val msum = dzeros(3\256\256\1);
12 | 
13 | print("\nComputing mean");
14 | val times = zeros(1,4)
15 | for (i <- 0 until nparts) {
16 |     tic;
17 |     val mat = loadBMat(traindata format i);
18 |     val t1 = toc;
19 |     val fmat = unsignedFloat(mat);
20 |     val t2 = toc;
21 |     val tmpsum = fmat.sum(irow(3));
22 |     val t3 = toc;
23 |     msum ~ msum + DMat(tmpsum);
24 |     val t4 = toc;
25 |     times ~ times + row(t1,t2-t1,t3-t2,t4-t3);
26 |     nimgs = nimgs + fmat.ncols;
27 |     print(".");
28 | }
29 | println("");
30 | 
31 | msum ~ msum / nimgs.toDouble;
32 | val means = FMat(msum);
33 | saveFMat(traindir+"/means.fmat.lz4", means);
34 | 


--------------------------------------------------------------------------------
/scripts/networks/getcifar10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 | else 
 7 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 8 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
 9 |   done
10 |   alias wget='curl --retry 2 -O'
11 | fi
12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
13 | cd "${BIDMACH_SCRIPTS}"
14 | BIDMACH_SCRIPTS=`pwd`
15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
16 | 
17 | echo "Loading CIFAR10 data"
18 | 
19 | CIFAR10="${BIDMACH_SCRIPTS}/../../data/CIFAR10"
20 | mkdir -p ${CIFAR10}/parts
21 | cd ${CIFAR10}
22 | 
23 | if [ ! -e t10k-labels-idx1-ubyte ]; then
24 |     wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
25 |     tar -xf cifar-10-binary.tar.gz 
26 |     rm -f cifar-10-binary.tar.gz
27 |     mv cifar-10-batches-bin/* . 
28 |     rm -rf cifar-10-batches-bin
29 | fi
30 | 
31 | echo "Processing CIFAR10 data"
32 | cd "${BIDMACH_SCRIPTS}"
33 | ../../bidmach processcifar10.ssc
34 | 


--------------------------------------------------------------------------------
/scripts/networks/getcifar100.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 | else 
 7 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 8 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
 9 |   done
10 |   alias wget='curl --retry 2 -O'
11 | fi
12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
13 | cd "${BIDMACH_SCRIPTS}"
14 | BIDMACH_SCRIPTS=`pwd`
15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
16 | 
17 | echo "Loading CIFAR100 data"
18 | 
19 | CIFAR100="${BIDMACH_SCRIPTS}/../../data/CIFAR100"
20 | mkdir -p ${CIFAR100}/parts
21 | cd ${CIFAR100}
22 | 
23 | if [ ! -e t10k-labels-idx1-ubyte ]; then
24 |     wget http://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz
25 |     tar -xf cifar-100-binary.tar.gz 
26 |     rm -f cifar-100-binary.tar.gz
27 |     mv cifar-100-binary/* . 
28 |     rm -rf cifar-100-binary
29 | fi
30 | 
31 | echo "Processing CIFAR100 data"
32 | cd "${BIDMACH_SCRIPTS}"
33 | ../../bidmach processcifar100.ssc
34 | 


--------------------------------------------------------------------------------
/scripts/networks/getmnist.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | BIDMACH_SCRIPTS="${BASH_SOURCE[0]}"
 4 | if [ ! `uname` = "Darwin" ]; then
 5 |   BIDMACH_SCRIPTS=`readlink -f "${BIDMACH_SCRIPTS}"`
 6 | else 
 7 |   while [ -L "${BIDMACH_SCRIPTS}" ]; do
 8 |     BIDMACH_SCRIPTS=`readlink "${BIDMACH_SCRIPTS}"`
 9 |   done
10 |   alias wget='curl --retry 2 -O'
11 | fi
12 | export BIDMACH_SCRIPTS=`dirname "$BIDMACH_SCRIPTS"`
13 | cd ${BIDMACH_SCRIPTS}
14 | BIDMACH_SCRIPTS=`pwd`
15 | BIDMACH_SCRIPTS="$( echo ${BIDMACH_SCRIPTS} | sed 's+/cygdrive/\([a-z]\)+\1:+' )" 
16 | 
17 | 
18 | echo "Loading MNIST data"
19 | 
20 | MNIST="${BIDMACH_SCRIPTS}/../../data/MNIST"
21 | mkdir -p ${MNIST}
22 | cd ${MNIST}
23 | 
24 | if [ ! -e train-images-idx3-ubyte ]; then
25 |     wget http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz 
26 |     gunzip train-images-idx3-ubyte.gz
27 | fi
28 | if [ ! -e train-labels-idx1-ubyte ]; then
29 |     wget http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
30 |     gunzip train-labels-idx1-ubyte.gz
31 | fi
32 | 
33 | if [ ! -e t10k-images-idx3-ubyte ]; then
34 |     wget http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz 
35 |     gunzip t10k-images-idx3-ubyte.gz
36 | fi
37 | if [ ! -e t10k-labels-idx1-ubyte ]; then
38 |     wget http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
39 |     gunzip t10k-labels-idx1-ubyte.gz
40 | fi
41 | 


--------------------------------------------------------------------------------
/scripts/networks/modelmat_test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/scripts/networks/modelmat_test


--------------------------------------------------------------------------------
/scripts/networks/modelmat_test.fmat.lz4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BIDData/BIDMach/b194bd21852877e7490d782f6129cda458d9dba7/scripts/networks/modelmat_test.fmat.lz4


--------------------------------------------------------------------------------
/scripts/networks/processcifar10.ssc:
--------------------------------------------------------------------------------
 1 | val inputdir = "../../data/CIFAR10/";
 2 | val outputdir = "../../data/CIFAR10/parts/";
 3 | val trainfname = "data_batch_%d.bin";
 4 | val testfname = "test_batch.bin";
 5 | val traindname = "train%d.fmat.lz4";
 6 | val trainNCHWdname = "trainNCHW%d.fmat.lz4";
 7 | val trainlabels = "labels%d.imat.lz4";
 8 | val testdname = "test0.fmat.lz4";
 9 | val testNCHWdname = "testNCHW0.fmat.lz4";
10 | val testlabels = "testlabels0.imat.lz4";
11 | 
12 | val nparts = 5;
13 | val nimages = 10000;
14 | val nimgbytes = 3 * 32 * 32;
15 | 
16 | import java.io._
17 | 
18 | val buffer = new Array[Byte](nimgbytes);
19 | val datamat = zeros(3\32\32\nimages);
20 | val labelmat = izeros(1, nimages);
21 | 
22 | def getFile(fname:String, datamat:FMat, labelmat:IMat) {
23 |   val ds = new DataInputStream(new FileInputStream(fname));
24 |   for (j <- 0 until nimages) {
25 |     val label = ds.readByte();
26 |     labelmat(j) = label;
27 |     ds.readFully(buffer, 0, nimgbytes);
28 |     var k = 0;
29 |     while (k < nimgbytes) {
30 |       datamat.data(k + j * nimgbytes) = buffer(k) & 0xFF;
31 |       k += 1;
32 |     }
33 |   }
34 |   ds.close();
35 | }
36 | 
37 | 
38 | print("\nConverting CIFAR10");
39 | for (i <- 1 to nparts) {
40 |   getFile(inputdir + trainfname format i, datamat, labelmat);
41 |   saveFMat(outputdir+traindname format (i-1), datamat.fromNCHWtoNHWC);
42 |   saveFMat(outputdir+trainNCHWdname format (i-1), datamat);
43 |   saveIMat(outputdir+trainlabels format (i-1), labelmat);
44 |   print(".");
45 | }
46 | getFile(inputdir + testfname, datamat, labelmat);
47 | saveFMat(outputdir + testdname, datamat.fromNCHWtoNHWC);
48 | saveFMat(outputdir + testNCHWdname, datamat);
49 | saveIMat(outputdir + testlabels, labelmat);
50 | print(".");
51 | 
52 | println();
53 | System.exit(0)


--------------------------------------------------------------------------------
/scripts/networks/reduceRate.sc:
--------------------------------------------------------------------------------
 1 | 
 2 | import scala.concurrent.Future
 3 | import scala.concurrent.ExecutionContext.Implicits.global
 4 | 
 5 | Future {
 6 |   while (opts.resScale > 0.01f) {
 7 |     Thread.sleep(100*1000)
 8 |     opts.resScale = opts.resScale * 0.99f
 9 |   }
10 |   opts.resScale = 0f
11 | }
12 | 


--------------------------------------------------------------------------------
/scripts/networks/resumeAlexnet.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.layers._
 2 | 
 3 | val traindir = "../../data/ImageNet/train/";
 4 | //val traindir = "/home/jfc/data/ImageNet/2012/BIDMach/train/";
 5 | val testdir = "../../data/ImageNet/val/";
 6 | val traindata = traindir+"partNCHW%04d.bmat.lz4";
 7 | val trainlabels = traindir+"label%04d.imat.lz4";
 8 | val testdata = testdir+"partNCHW%04d.bmat.lz4";
 9 | val testlabels = testdir+"label%04d.imat.lz4";
10 | val testpreds = testdir+"pred%04d.fmat.lz4";
11 | 
12 | val (nn, opts) = Net.gradLearner(traindata, trainlabels);
13 | val net = nn.model.asInstanceOf[Net]
14 | 
15 | // Load the most recent checkpoint matching the checkpoint filename template
16 | opts.checkPointFile = "../../models/alexnet%03d/"
17 | nn.loadCheckPoint();
18 | 
19 | // Enter the number of epochs completed already
20 | val doneEpochs = 0;
21 | val lrinit = 1e-2f;
22 | 
23 | def lr_update(ipass0:Float, istep:Float, frac:Float):Float = {
24 |   val ipass = ipass0 + doneEpochs;
25 |   val lr = if (ipass < 20) {
26 |       lrinit
27 |   } else if (ipass < 40) {
28 |       lrinit/10
29 |   } else lrinit/100
30 |   lr
31 | }
32 | 
33 | opts.logfile = "logAlexnet_%fb.txt" format (lrinit);
34 | opts.lr_policy = lr_update _;
35 | opts.npasses = opts.npasses - doneEpochs;
36 | 
37 | nn.launchTrain;
38 | 
39 | println("Examine the 'nn' variable to track learning state.\n");
40 | 
41 | 
42 | def validate = { 
43 | val (mm, mopts) =  Net.predLabels(net, testdata, testlabels);
44 | mopts.batchSize= opts.batchSize;
45 | mm.predict; 
46 | println("Accuracy = %f" format mean(mm.results(0,?),2).v);
47 | }
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/scripts/networks/resumeResnet.ssc:
--------------------------------------------------------------------------------
 1 | // This script must be run from BIDMach/scripts/networks
 2 | 
 3 | import BIDMach.networks.layers._
 4 | 
 5 | val traindir = "../../data/ImageNet/train/";
 6 | val testdir = "../../data/ImageNet/val/";
 7 | val traindata = traindir+"partNCHW%04d.bmat.lz4";
 8 | val trainlabels = traindir+"label%04d.imat.lz4";
 9 | val testdata = testdir+"partNCHW%04d.bmat.lz4";
10 | val testlabels = testdir+"label%04d.imat.lz4";
11 | val testpreds = testdir+"pred%04d.fmat.lz4";
12 | 
13 | val (nn, opts) = Net.gradLearner(traindata, trainlabels);
14 | val net = nn.model.asInstanceOf[Net]
15 | 
16 | // Load the most recent checkpoint matching the checkpoint filename template
17 | opts.checkPointFile = "../../models/resnet%03d/"
18 | nn.loadCheckPoint();
19 | 
20 | // Enter the number of epochs completed already
21 | val doneEpochs = 0;
22 | val lrinit = 1e-1f;
23 | 
24 | def lr_update(ipass0:Float, istep:Float, frac:Float):Float = {
25 |   val ipass = ipass0 + doneEpochs;
26 |   val lr = if (ipass < 15) {
27 |     lrinit
28 |   } else if (ipass < 20) {
29 |     lrinit/10
30 |   } else lrinit/100
31 |   lr
32 | }
33 | 
34 | opts.lr_policy = lr_update _;
35 | opts.logfile = "logresv1b.txt";
36 | opts.npasses = opts.npasses - doneEpochs;
37 | 
38 | nn.launchTrain
39 | 
40 | def validate = { 
41 |   val (mm, mopts) =  Net.predLabels(net, testdata, testlabels);
42 |   mopts.batchSize= opts.batchSize;
43 |   mm.predict; 
44 |   println("Accuracy = %f" format mean(mm.results(0,?),2).v);
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/scripts/networks/testTrans.sc:
--------------------------------------------------------------------------------
 1 | // Sum before Layernorm
 2 | 
 3 | import BIDMach.networks.TransformerLT
 4 | import BIDMach.networks.layers._
 5 | 
 6 | val ddir = "/code/BIDMach/data/wikitext/"
 7 | val fname = ddir + "train/part%04d.imat.lz4"
 8 | 
 9 | val dict = loadCSMat(ddir + "wikitext_spm_vocab.txt")(?,0) on "막"
10 | 
11 | val (nn, opts) = TransformerLT.learner(fname);
12 | 
13 | opts.lrate = 1e-4f
14 | opts.seqlength = 2048
15 | opts.batchSize = 2048
16 | opts.npasses = 40
17 | opts.degree = 128
18 | opts.decay = 0.999f
19 | opts.depth = 16
20 | opts.nheads = 8
21 | opts.dim = 2048
22 | opts.dim = 1024
23 | opts.indim = opts.dim
24 | opts.outdim = opts.dim
25 | opts.dropout= 0.8f;
26 | opts.normInit = 2f
27 | opts.decay = 0.999f
28 | opts.texp = 0f
29 | opts.vel_decay = 0.8f;
30 | opts.lrate = opts.lrate*(1-opts.vel_decay)
31 | opts.gsq_decay = 0.999f;
32 | opts.clip_grad_norm = 10f
33 | opts.scoreType = SoftmaxOutputLayer.CrossEntropyScore
34 | opts.pstep = 0.01f
35 | opts.useCache = false
36 | opts.useGPUcache = true
37 | //opts.resScale = 0.9f
38 | //opts.resLinks = 2 \ 4 on 5 \ 7 on 9 \ 11 on 12 \ 14
39 | //opts.resLinks = 4 \ 8
40 | 
41 | val lrfinal = opts.lrate.v
42 | val lrinit = lrfinal / 2
43 | val lastepoch = 10f
44 | 
45 | def lr_update(ipass:Float, istep:Float, frac:Float):Float = {
46 |   val lr = if (ipass < 1) { 
47 |     lrinit + frac * (lrfinal - lrinit)
48 |   } else { 
49 |     lrfinal * math.max(0f, lastepoch - frac) / (lastepoch - 1)
50 |   }
51 |   opts.lrate = lr;
52 |   lr
53 | }
54 | 
55 | opts.lr_policy = lr_update _;
56 | 
57 | opts.logfile = "logTrans_d%d_n%d_m%d_lr%7.6f.txt" format (opts.degree, opts.depth, opts.dim, opts.lrate.v)
58 | 
59 | val tt = nn.model.asInstanceOf[TransformerLT]
60 | 
61 | //nn.train
62 | nn.launchTrain
63 | Thread.sleep(6000)
64 | 
65 | 
66 | val net = tt.txNets(0)
67 | val fe = tt.frontEnd
68 | val be = tt.backEnd
69 | 
70 | 


--------------------------------------------------------------------------------
/scripts/processmnist.ssc:
--------------------------------------------------------------------------------
1 | 
2 | val (d,c,w) = loadLibSVM("mnist.lsvm", 784);
3 | saveFMat("train.fmat.lz4", full(d));
4 | saveIMat("ctrain.imat.lz4", c);
5 | val (d2,c2,w2) = loadLibSVM("mnist.t.lsvm", 784);
6 | saveFMat("test.fmat.lz4", full(d2));
7 | saveIMat("ctest.imat.lz4", c2);
8 | 
9 | 


--------------------------------------------------------------------------------
/scripts/processmnist8m.ssc:
--------------------------------------------------------------------------------
 1 | // This script needs to be run after getmnist8m.sh
 2 | // from the BIDMach/data/MNIST8M/parts directory
 3 | 
 4 | for (i <- 0 to 80) {
 5 |     val (d,c,w) = loadLibSVM("part%02d" format i, 784);
 6 |     val fd = full(d);
 7 |     val fc = accum(c.t \ icol(0->c.length), 1f, 10, c.length);
 8 |     saveIMat("cat%02d.imat.lz4" format i, c);
 9 |     saveFMat("data%02d.fmat.lz4" format i, fd);
10 |     saveFMat("cats%02d.fmat.lz4" format i, fc);
11 |     val alls = (fc * 10000f) on fd;
12 |     saveFMat("alls%02d.fmat.lz4" format i, alls);
13 | //    saveFMat("allst%02d.fmat.txt" format i, alls.t);
14 |     print(".");
15 | }
16 | 
17 | System.exit(0)


--------------------------------------------------------------------------------
/scripts/processmnist8m_finesplit.ssc:
--------------------------------------------------------------------------------
 1 | // This script needs to be run after getmnist8m_finesplit.sh
 2 | // from the $BIDMACH_DATA_HOME/MNIST8M/parts_fine directory
 3 | 
 4 | for (i <- 0 until 800) {
 5 |     val (d,c,w) = loadLibSVM("part%03d" format i, 784);
 6 |     val fd = full(d);
 7 |     val fc = accum(c.t \ icol(0->c.length), 1f, 10, c.length);
 8 |     saveIMat("cat%03d.imat.lz4" format i, c);
 9 |     saveFMat("data%03d.fmat.lz4" format i, fd);
10 |     saveFMat("cat_onehot%03d.fmat.lz4" format i, fc);
11 |     val alls = (fc * 10000f) on fd;
12 |     saveFMat("alls%03d.fmat.lz4" format i, alls);
13 | 
14 |     println("%d / 800 processed" format i);
15 | }
16 | 


--------------------------------------------------------------------------------
/scripts/processpubmed.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val a=loadSMat("pubmed.smat.lz4");
 3 | val d=loadSBMat("pubmed.term.sbmat.gz");
 4 | val dc=CSMat(d);
 5 | val sa = sum(a,2);
 6 | val (vx,ix) = sortdown2(sa);
 7 | val ip = invperm(ix);
 8 | val nc = a.ncols;
 9 | for (i <- 0 until 10) {
10 |     val icmin = ((i * 1L * nc)/10).toInt;
11 |     val icmax = (((i+1) * 1L * nc)/10).toInt;
12 |     val a0 = a(?, icmin -> icmax);
13 |     val (ii, jj, vv) = find3(a0);
14 |     val aa = sparse(ip(ii), jj, vv, a.nrows, a0.ncols);
15 |     aa.check
16 |     saveSMat("pubmed_parts/part%02d.smat.lz4" format i, aa);    
17 |     print(".");
18 | }
19 | saveSBMat("pubmed.term.sbmat.lz4",SBMat(dc(ix)));
20 | sys.exit()
21 | 


--------------------------------------------------------------------------------
/scripts/pubmedlda.ssc:
--------------------------------------------------------------------------------
 1 | val dir = "../data/uci/";
 2 | 
 3 | val (nn,opts)=LDA.learner(dir + "pubmed_parts/part%02d.smat.lz4", 256);
 4 | 
 5 | opts.batchSize = 50000;
 6 | opts.nend = 9;
 7 | opts.eltsPerSample = 400;
 8 | opts.npasses = 3;
 9 | 
10 | 
11 | val dict = Dict(loadSBMat(dir+"pubmed.term.sbmat.gz"))
12 | 
13 | nn.train
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/pubmednmf.ssc:
--------------------------------------------------------------------------------
 1 | val dir = "../data/uci/"
 2 | 
 3 | val (nn, opts) = NMF.learner(dir + "pubmed_parts/part%02d.smat.lz4", 256);
 4 | 
 5 | opts.nend = 9;
 6 | opts.eltsPerSample = 400
 7 | 
 8 | opts.batchSize = 20000;
 9 | opts.npasses = 3;
10 | 
11 | val dict = Dict(loadSBMat(dir+"pubmed.term.sbmat.gz"))
12 | 
13 | nn.train
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/recompress.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val dir = "/data/MNIST8M/parts";
 3 | 
 4 | for (i<- 0 until 80) {
 5 |     val a=loadSMat(dir+"/part%02d.smat.lz4" format i);
 6 |     saveSMat(dir+"/part%02d.smat.gz" format i, a);
 7 |     val c=loadSMat(dir+"/cats%02d.smat.lz4" format i);
 8 |     saveSMat(dir+"/cats%02d.smat.gz" format i, c);
 9 |     print(".");
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/runall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | while read slave; do
4 |     echo ssh "${slave}" "${1}"
5 |     ssh -n -o StrictHostKeyChecking=no "${slave}" "${1}"
6 | done < /code/BIDMach/conf/slaves
7 | 


--------------------------------------------------------------------------------
/scripts/runback.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | while read slave; do
4 |     echo ssh "${slave}" "nohup sh -c \"${1}\" > ${HOME}/logs/bklog.txt 2>&1 &"
5 |     ssh -n -o StrictHostKeyChecking=no "${slave}" "nohup sh -c \"${1}\" > ${HOME}/logs/bklog.txt 2>&1 &"
6 | done < /code/BIDMach/conf/slaves
7 | 


--------------------------------------------------------------------------------
/scripts/runmaster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd /code/BIDMach/scripts
3 | bidmach testAllReduceGridMaster.ssc
4 | 


--------------------------------------------------------------------------------
/scripts/runmaster16.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cd /code/BIDMach/scripts
3 | bidmach testAllReduceGridMaster16.ssc
4 | 


--------------------------------------------------------------------------------
/scripts/runnode.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /code/BIDMach/scripts
4 | bidmach testAllReduceNodeResnet.ssc
5 | 


--------------------------------------------------------------------------------
/scripts/runnode16.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd /code/BIDMach/scripts
4 | bidmach testAllReduceNodeResnet.ssc
5 | 


--------------------------------------------------------------------------------
/scripts/seedActor.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | import BIDMach.allreduce.SeedActor
 3 | 
 4 | val seeds = SeedActor.startup(Seq("2551","2552"));
 5 |     
 6 | val gn = new SeedActor.GetNodes();
 7 | 
 8 | def queryHosts() = {
 9 | gn.query(seeds(0));
10 | }


--------------------------------------------------------------------------------
/scripts/sortcriteo.ssc:
--------------------------------------------------------------------------------
 1 | val ntrain = 92
 2 | val ntest = 13
 3 | val ndense = 2*ntest+1;
 4 | val dir = "../data/criteo/parts2/";
 5 |  
 6 | println("\nCounting features");
 7 | val x= loadSMat(dir+"train00.smat.lz4");
 8 | val nfeats = x.nrows;
 9 | val counts = dzeros(nfeats,1);
10 | val counts2 = dzeros(nfeats,1);
11 | 
12 | //for (i <- 0 until (ntrain+ntest)) {
13 | for (i <- 0 until (ntrain)) {
14 |   val a = if (i < ntrain) { 
15 |     loadSMat(dir+("train%02d.smat.lz4" format i));
16 |   } else { 
17 |     loadSMat(dir+("test%02d.smat.lz4" format i-ntrain));
18 |   }
19 |   counts ~ counts + DMat(sum(a,2));
20 |   print(".")
21 | }
22 | 
23 | saveDMat(dir+"featurecounts.dmat.lz4", counts)
24 | 
25 | val cmeans = counts / counts(0);
26 | val cscale = FMat(cmeans);
27 | cscale(ndense->nfeats) = 1f;
28 | 
29 | println("\nSorting");
30 | val (vv, ii0) = sortdown2(counts(ndense->counts.length,0));
31 | println("\nMapping");
32 | 
33 | val iperm = icol(0->ndense) on (ii0 + ndense);
34 | saveIMat(dir+"permutation.imat.lz4", iperm);
35 | val uperm = invperm(iperm);
36 | 
37 | for (i <- 0 until (ntrain+ntest)) {
38 |   val a = if (i < ntrain) { 
39 |     loadSMat(dir+("train%02d.smat.lz4" format i));
40 |   } else { 
41 |     loadSMat(dir+("test%02d.smat.lz4" format i-ntrain));
42 |   }
43 |   val (ii, jj, vv) = find3(a);
44 |   val newii = uperm(ii);
45 |   val b = sparse(newii, jj, vv / cscale(newii), a.nrows, a.ncols);
46 |   b.check;
47 |   if (i < ntrain) { 
48 |     saveSMat(dir+("trainsorted%02d.smat.lz4" format i), b/cscale);
49 |   } else { 
50 |     saveSMat(dir+("testsorted%02d.smat.lz4" format i-ntrain), b/cscale);
51 |   }
52 |   counts2 ~ counts2 + DMat(sum(b,2));
53 |   print(".")
54 | }
55 | saveDMat(dir+"sortedfeaturecounts.dmat.lz4", counts2)
56 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/check.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | 
 3 | if [ ! -s /home/ubuntu/sparseallreduce/PageRank/done.mount ]; then
 4 |   echo "mount fail"
 5 | fi
 6 | if [ ! -s /home/ubuntu/sparseallreduce/PageRank/machines ]; then
 7 |   echo "missing machines"
 8 | fi
 9 | if [ ! -s /home/ubuntu/sparseallreduce/PageRank/rmachines ]; then
10 |   echo "missing rmachines"
11 | fi
12 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/checkall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | hosts=`cat $1`
 3 | counter=0;
 4 | 
 5 | for i in `echo $hosts`; do
 6 |   host=`echo $i`
 7 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank/;./check.sh"
 8 |   counter=`expr $counter + 1`
 9 |   echo $counter
10 | done
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/checkssh.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #set -x
3 | status=$(ssh $1 echo ok 2>&1)
4 | if [[ $status == ok ]] ; then
5 |     mkdir ips/$2/$1
6 | fi
7 | 
8 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/checksshall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # $1 machine ips from one placement group $2 number of placement group
 3 | hosts=`cat $1`
 4 | #remove all existing ips
 5 | rm -r ips/$2/*
 6 | for i in `echo $hosts`; do
 7 |   host=`echo $i`
 8 |   ./checkssh.sh $host $2 &
 9 | done
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | scalac -cp $ALL_LIBS Twitter.scala
 4 | scalac -cp $ALL_LIBS Yahoo.scala
 5 | scalac relabelmachines.scala
 6 | scalac splitmachines.scala
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/kill.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ps aux | grep mount | awk '{print $2}' | xargs sudo kill 9
3 | ps aux | grep scala | awk '{print $2}' | xargs sudo kill 15
4 | sleep 3s
5 | ps aux | grep scala | awk '{print $2}' | xargs sudo kill 2
6 | sleep 3s
7 | ps aux | grep scala | awk '{print $2}' | xargs sudo kill 9
8 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/killall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | hosts=`cat $1`
 4 | 
 5 | for i in `echo $hosts`; do
 6 |   host=`echo $i`
 7 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./kill.sh;" &
 8 | done
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/logcollect.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | hosts=`cat $1`
 3 | counter=0;
 4 | 
 5 | for i in `echo $hosts`; do
 6 |   host=`echo $i`
 7 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cp /disk4/log* /disk4/copylog/" &
 8 |   sleep 1s
 9 |   scp -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host:/disk4/copylog/log* /logs &
10 |   counter=`expr $counter + 1`
11 |   echo $counter
12 | done
13 | 
14 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/mount.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | export JAVA_HOME="/usr/"
 3 | export EC2_HOME="/home/ubuntu/lib/ec2-api-tools-1.6.7.2"
 4 | export PATH=$PATH:$EC2_HOME/bin
 5 | export AWS_ACCESS_KEY=AAAA
 6 | export AWS_SECRET_KEY=BBBB
 7 | 
 8 | if [ ! -d /disk4 ]; then
 9 |   sudo mkdir /disk4
10 | fi
11 | #dir for copy logs
12 | if [ ! -d /disk4/copylog ]; then
13 |   sudo mkdir /disk4/copylog
14 | fi
15 | sudo chown -R ubuntu /disk4
16 | sudo chgrp -R ubuntu /disk4
17 | sudo chmod -R 755 /disk4
18 | 
19 | ec2-attach-volume $1 -i $(ec2metadata --instance-id) -d /dev/xvdk
20 | sleep 15s
21 | while [ $(sudo file -s /dev/xvdk | grep ERROR | wc -l) -eq 1 ]; do
22 |   sleep 1s
23 | done
24 | sudo mount /dev/xvdk /disk4
25 | sudo chown -R ubuntu /disk4
26 | sudo chgrp -R ubuntu /disk4
27 | sudo chmod -R 755 /disk4
28 | 
29 | echo "x" > done.mount
30 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/mountall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | while read host<&4 && read volume<&5
3 | do
4 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./mount.sh $volume;" &
5 | done 4<$1 5<$2
6 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/ping.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | hosts=`cat $1`
 3 | counter=0;
 4 | 
 5 | for i in `echo $hosts`; do
 6 |   host=`echo $i`
 7 |   ping -c 1 $host
 8 | done
 9 | 
10 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/pingall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | #set -x
 3 | hosts=`cat $1`
 4 | 
 5 | for i in `echo $hosts`; do
 6 |   host=`echo $i`
 7 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./ping.sh machines >& /disk3/log-ping-$host;" &
 8 | done
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/runtwitter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | export JAVA_OPTS=-Xmx28G
 4 | export LD_LIBRARY_PATH=/home/ubuntu/lib/BIDMat/lib:/usr/local/lib
 5 | export PATH=$1
 6 | export ALL_LIBS=$2
 7 | 
 8 | scala -cp $ALL_LIBS Twitter 41652230 $3 $4 $5 $6 machines
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/runtwitterall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | hosts=`cat $1`
 4 | imachine=0
 5 | config="8,8"
 6 | 
 7 | for i in `echo $hosts`; do
 8 |   host=`echo $i`
 9 |   echo $imachine
10 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;nohup ./runtwitter.sh $PATH $ALL_LIBS $config $imachine 10000000 1 >& /disk3/log-twitter-$config-$machine &" &
11 |   imachine=`expr $imachine + 1` 
12 | done
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/runyahoo.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | export JAVA_OPTS=-Xmx60G
 4 | export LD_LIBRARY_PATH=/home/ubuntu/lib/BIDMat/lib:/usr/local/lib
 5 | export PATH=$1
 6 | export ALL_LIBS=$2
 7 | 
 8 | scala -cp $ALL_LIBS Yahoo 1413511394 $3 $4 $5 $6 machines
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/runyahooall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | hosts=`cat $1`
 4 | imachine=0
 5 | config="4,4,2,2"
 6 | 
 7 | for i in `echo $hosts`; do
 8 |   host=`echo $i`
 9 |   echo $imachine
10 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;nohup ./runyahoo.sh $PATH $ALL_LIBS $config $imachine 30000000 1 >& /disk4/log-yahoo-$config-$imachine &" &
11 |   imachine=`expr $imachine + 1` 
12 | done
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/runyahoor.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | export JAVA_OPTS=-Xmx60G
 4 | export LD_LIBRARY_PATH=/home/ubuntu/lib/BIDMat/lib:/usr/local/lib
 5 | export PATH=$1
 6 | export ALL_LIBS=$2
 7 | 
 8 | scala -cp $ALL_LIBS Yahoo 1413511394 $3 $4 $5 $6 rmachines
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/runyahoorall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | hosts=`cat $1`
 4 | imachine=0
 5 | config="128"
 6 | 
 7 | for i in `echo $hosts`; do
 8 |   host=`echo $i`
 9 |   echo $imachine
10 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;nohup ./runyahoor.sh $PATH $ALL_LIBS $config $imachine 30000000 2 >& /disk4/log-yahoor-$config-$imachine &" &
11 |   imachine=`expr $imachine + 1` 
12 | done
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/setup.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | N=69
 3 | TYPE="cc2.8xlarge"
 4 | PLACEMENT="sparseallreduce"
 5 | 
 6 | ec2-run-instances ami-53c5c03a -n $N -g template-all-access -k supermario -t $TYPE --placement-group $PLACEMENT --availability-zone us-east-1a
 7 | 
 8 | sleep 10s
 9 | 
10 | ec2-describe-instances --filter "instance-type=$TYPE" --filter "placement-group-name=$PLACEMENT" | grep -o 'ip[0-9-]\+' > /home/ubuntu/sparseallreduce/PageRank/rawmachines
11 | 
12 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/unmount.sh:
--------------------------------------------------------------------------------
 1 | #/bin/bash
 2 | export JAVA_HOME="/usr/"
 3 | export EC2_HOME="/home/ubuntu/lib/ec2-api-tools-1.6.7.2"
 4 | export PATH=$PATH:$EC2_HOME/bin
 5 | export AWS_ACCESS_KEY=AAAA
 6 | export AWS_SECRET_KEY=BBBB
 7 | 
 8 | sudo umount -d /dev/xvdk
 9 | ec2-detach-volume $1
10 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/unmountall.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | while read host<&4 && read volume<&5
3 | do
4 |   ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "cd /home/ubuntu/sparseallreduce/PageRank;./unmount.sh $volume;" &
5 | done 4<$1 5<$2
6 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/update.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | hosts=`cat $1`
 4 | imachine=0
 5 | 
 6 | for i in `echo $hosts`; do
 7 |   host=`echo $i`
 8 |   scp -i /home/ubuntu/.ssh/supermario.pem *.sh ubuntu@$host:sparseallreduce/PageRank/ &
 9 |   scp -i /home/ubuntu/.ssh/supermario.pem machines ubuntu@$host:sparseallreduce/PageRank/ &
10 |   scp -i /home/ubuntu/.ssh/supermario.pem rmachines ubuntu@$host:sparseallreduce/PageRank/machines &
11 |   #scp -i /home/ubuntu/.ssh/supermario.pem Twitter* ubuntu@$host:sparseallreduce/PageRank/ &
12 |   #scp -i /home/ubuntu/.ssh/supermario.pem Yahoo* ubuntu@$host:sparseallreduce/PageRank/ &
13 |   #scp -i /home/ubuntu/.ssh/supermario.pem ~/lib/BIDMat/BIDMat.jar ubuntu@$host:lib/BIDMat/ &
14 | done
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/updatecheck.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | hosts=`cat $1`
 3 | 
 4 | for i in `echo $hosts`; do
 5 |   host=`echo $i`
 6 |   result=$(ssh -i /home/ubuntu/.ssh/supermario.pem ubuntu@$host "ls machines | wc -l;" 2>&1)
 7 |   if [ "$result" -ne 1 ];
 8 |   then
 9 |     echo "$host: false"
10 |   fi
11 | done
12 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/volumes1:
--------------------------------------------------------------------------------
 1 | vol-42b66e34
 2 | vol-4bb66e3d
 3 | vol-32b66e44
 4 | vol-31b66e47
 5 | vol-15b66e63
 6 | vol-10b66e66
 7 | vol-11b66e67
 8 | vol-1eb66e68
 9 | vol-1fb66e69
10 | vol-1ab66e6c
11 | vol-19b66e6f
12 | vol-06b66e70
13 | vol-0cb66e7a
14 | vol-0ab66e7c
15 | vol-08b66e7e
16 | vol-f6b66e80
17 | vol-f7b66e81
18 | vol-f4b66e82
19 | vol-f5b66e83
20 | vol-f2b66e84
21 | vol-fab66e8c
22 | vol-f9b66e8f
23 | vol-e2b66e94
24 | vol-e0b66e96
25 | vol-d6b66ea0
26 | vol-d7b66ea1
27 | vol-d3b66ea5
28 | vol-d1b66ea7
29 | vol-deb66ea8
30 | vol-c6b66eb0
31 | vol-c2b66eb4
32 | vol-c0b66eb6
33 | vol-ceb66eb8
34 | vol-cbb66ebd
35 | vol-b7b66ec1
36 | vol-b5b66ec3
37 | vol-b1b66ec7
38 | vol-beb66ec8
39 | vol-bdb66ecb
40 | vol-bab66ecc
41 | vol-b8b66ece
42 | vol-a4b66ed2
43 | vol-a5b66ed3
44 | vol-acb66eda
45 | vol-aab66edc
46 | vol-96b66ee0
47 | vol-94b66ee2
48 | vol-93b66ee5
49 | vol-9cb66eea
50 | vol-98b66eee
51 | vol-86b66ef0
52 | vol-85b66ef3
53 | vol-8eb66ef8
54 | vol-8fb66ef9
55 | vol-8db66efb
56 | vol-8ab66efc
57 | vol-89b66eff
58 | vol-75b76f03
59 | vol-72b76f04
60 | vol-7cb76f0a
61 | vol-7ab76f0c
62 | vol-79b76f0f
63 | vol-67b76f11
64 | vol-62b76f14
65 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/volumes2:
--------------------------------------------------------------------------------
 1 | vol-6eb76f18
 2 | vol-6db76f1b
 3 | vol-68b76f1e
 4 | vol-56b76f20
 5 | vol-55b76f23
 6 | vol-53b76f25
 7 | vol-5eb76f28
 8 | vol-5db76f2b
 9 | vol-5ab76f2c
10 | vol-58b76f2e
11 | vol-42b76f34
12 | vol-40b76f36
13 | vol-41b76f37
14 | vol-4eb76f38
15 | vol-4db76f3b
16 | vol-4bb76f3d
17 | vol-49b76f3f
18 | vol-36b76f40
19 | vol-37b76f41
20 | vol-33b76f45
21 | vol-30b76f46
22 | vol-31b76f47
23 | vol-38b76f4e
24 | vol-24b76f52
25 | vol-21b76f57
26 | vol-2eb76f58
27 | vol-2fb76f59
28 | vol-2cb76f5a
29 | vol-2db76f5b
30 | vol-2ab76f5c
31 | vol-2bb76f5d
32 | vol-28b76f5e
33 | vol-14b76f62
34 | vol-11b76f67
35 | vol-1eb76f68
36 | vol-1fb76f69
37 | vol-04b76f72
38 | vol-05b76f73
39 | vol-00b76f76
40 | vol-f1b76f87
41 | vol-fab76f8c
42 | vol-f8b76f8e
43 | vol-e5b76f93
44 | vol-e1b76f97
45 | vol-efb76f99
46 | vol-ecb76f9a
47 | vol-ebb76f9d
48 | vol-e9b76f9f
49 | vol-d6b76fa0
50 | vol-d7b76fa1
51 | vol-d4b76fa2
52 | vol-d3b76fa5
53 | vol-d1b76fa7
54 | vol-c7b76fb1
55 | vol-c4b76fb2
56 | vol-c5b76fb3
57 | vol-c9b76fbf
58 | vol-b6b76fc0
59 | vol-b7b76fc1
60 | vol-b4b76fc2
61 | vol-b5b76fc3
62 | vol-b2b76fc4
63 | vol-b0b76fc6
64 | vol-b1b76fc7
65 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/volumes3:
--------------------------------------------------------------------------------
 1 | vol-bfb76fc9
 2 | vol-a1b76fd7
 3 | vol-a9b76fdf
 4 | vol-94b76fe2
 5 | vol-95b76fe3
 6 | vol-91b76fe7
 7 | vol-9eb76fe8
 8 | vol-87b76ff1
 9 | vol-85b76ff3
10 | vol-8cb76ffa
11 | vol-77a87001
12 | vol-75a87003
13 | vol-73a87005
14 | vol-70a87006
15 | vol-71a87007
16 | vol-7ea87008
17 | vol-7aa8700c
18 | vol-79a8700f
19 | vol-65a87013
20 | vol-63a87015
21 | vol-60a87016
22 | vol-56a87020
23 | vol-52a87024
24 | vol-53a87025
25 | vol-5aa8702c
26 | vol-5ba8702d
27 | vol-58a8702e
28 | vol-41a87037
29 | vol-4fa87039
30 | vol-36a87040
31 | vol-30a87046
32 | vol-31a87047
33 | vol-3da8704b
34 | vol-3ba8704d
35 | vol-38a8704e
36 | vol-39a8704f
37 | vol-27a87051
38 | vol-24a87052
39 | vol-22a87054
40 | vol-21a87057
41 | vol-2da8705b
42 | vol-29a8705f
43 | vol-17a87061
44 | vol-15a87063
45 | vol-10a87066
46 | vol-11a87067
47 | vol-1da8706b
48 | vol-1ba8706d
49 | vol-04a87072
50 | vol-0ea87078
51 | vol-0fa87079
52 | vol-0ca8707a
53 | vol-09a8707f
54 | vol-f4a87082
55 | vol-f0a87086
56 | vol-f1a87087
57 | vol-ffa87089
58 | vol-fba8708d
59 | vol-f8a8708e
60 | vol-e7a87091
61 | vol-e4a87092
62 | vol-e0a87096
63 | vol-eea87098
64 | vol-efa87099
65 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/volumes4:
--------------------------------------------------------------------------------
 1 | vol-eca8709a
 2 | vol-eda8709b
 3 | vol-eba8709d
 4 | vol-d4a870a2
 5 | vol-d5a870a3
 6 | vol-dfa870a9
 7 | vol-daa870ac
 8 | vol-dba870ad
 9 | vol-d8a870ae
10 | vol-c4a870b2
11 | vol-c2a870b4
12 | vol-c3a870b5
13 | vol-c0a870b6
14 | vol-c1a870b7
15 | vol-cfa870b9
16 | vol-cca870ba
17 | vol-cba870bd
18 | vol-c8a870be
19 | vol-b5a870c3
20 | vol-b0a870c6
21 | vol-b1a870c7
22 | vol-bea870c8
23 | vol-baa870cc
24 | vol-bba870cd
25 | vol-b9a870cf
26 | vol-a6a870d0
27 | vol-a4a870d2
28 | vol-a2a870d4
29 | vol-aea870d8
30 | vol-96a870e0
31 | vol-97a870e1
32 | vol-94a870e2
33 | vol-91a870e7
34 | vol-9ca870ea
35 | vol-87a870f1
36 | vol-82a870f4
37 | vol-83a870f5
38 | vol-81a870f7
39 | vol-8ea870f8
40 | vol-8ca870fa
41 | vol-8da870fb
42 | vol-8ba870fd
43 | vol-76a97100
44 | vol-7da9710b
45 | vol-79a9710f
46 | vol-65a97113
47 | vol-62a97114
48 | vol-63a97115
49 | vol-60a97116
50 | vol-6ca9711a
51 | vol-6da9711b
52 | vol-6ba9711d
53 | vol-69a9711f
54 | vol-56a97120
55 | vol-54a97122
56 | vol-55a97123
57 | vol-51a97127
58 | vol-5ba9712d
59 | vol-44a97132
60 | vol-45a97133
61 | vol-20a97156
62 | vol-21a97157
63 | vol-2ca9715a
64 | vol-2aa9715c
65 | 


--------------------------------------------------------------------------------
/scripts/sparseallreduce/volumesetup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | for i in {0..191}
 4 | do
 5 |   ec2-create-volume --size 1 --availability-zone us-east-1a
 6 | done
 7 | 
 8 | ec2-describe-volumes --filter "size=1" | grep -o 'vol[a-zA-Z0-9-]\+' > volumesbackup
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/scripts/start_workers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | if [[ -z $1 ]]; then
 5 |   echo 'Must supply script argument for workers to start!' 1>&2
 6 |   echo 'Example: ./start_workers.sh distributed/worker_criteo_lr.ssc' 1>&2
 7 |   exit 1
 8 | fi
 9 | 
10 | WORKER_SCRIPT="${1}"
11 | 
12 | SSH_OPTS='-T -o ConnectTimeout=3'
13 | while read worker_ip; do
14 |     echo "Starting BIDMach worker on ${worker_ip}"
15 |     ssh $SSH_OPTS "ubuntu@${worker_ip}" << EOS
16 | 
17 |     JAVA_OPTS=$JAVA_OPTS nohup bidmach $WORKER_SCRIPT </dev/null >/tmp/bidmach_worker.log 2>&1 & disown
18 | 
19 | EOS
20 | done < /code/BIDMach/conf/slaves
21 | echo 'Done!'
22 | 


--------------------------------------------------------------------------------
/scripts/startup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting the Grid Master script here"
 4 | screen -d -m bash -i -x /code/BIDMach/scripts/runmaster.sh
 5 | 
 6 | echo "Waiting 20 seconds for Master startup"
 7 | sleep 20
 8 | 
 9 | echo "Starting Nodes"
10 | runall.sh 'screen -d -m bash -i -x /code/BIDMach/scripts/runnode.sh'
11 | 


--------------------------------------------------------------------------------
/scripts/startup16.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Starting the Grid Master script here"
 4 | screen -d -m bash -i -x /code/BIDMach/scripts/runmaster16.sh
 5 | 
 6 | echo "Waiting 20 seconds for Master startup"
 7 | sleep 20
 8 | 
 9 | echo "Starting Nodes"
10 | runall.sh 'screen -d -m bash -i -x /code/BIDMach/scripts/runnode16.sh'
11 | 


--------------------------------------------------------------------------------
/scripts/stop_workers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | SSH_OPTS='-T -o ConnectTimeout=3 -o BatchMode=yes -o StrictHostKeyChecking=no'
 4 | while read worker_ip; do
 5 |     echo "Killing BIDMach worker on ${worker_ip}"
 6 |     ssh $SSH_OPTS "ubuntu@${worker_ip}" << EOS
 7 | 
 8 |     jps | grep 'MainGenericRunner' | awk '{print \$1}' | xargs -I% kill %
 9 | 
10 | EOS
11 | done < /code/BIDMach/conf/slaves
12 | echo 'Done!'
13 | 


--------------------------------------------------------------------------------
/scripts/testActor.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.allreduce.TestActor
 2 | import com.typesafe.config.ConfigFactory
 3 | 
 4 | val nodes = TestActor.startup(Seq("2553","2554"));
 5 | 
 6 | val conf = ConfigFactory.load()
 7 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped
 8 | val seedhostPort = seeds.get(0).toString
 9 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":"));
10 | 
11 | // The default message size for Akka is about 100 kB, so stay under 25k.
12 | val msize = 20000
13 | val sv = new TestActor.SendData(seedhost + ":2553/user/testActor",rand(1,msize),1000);
14 | //val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",10);
15 | 
16 | 
17 | def sendIt() = {
18 |     nodes(0) ! sv;
19 |     }
20 |     
21 | 


--------------------------------------------------------------------------------
/scripts/testActor2.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.allreduce.TestActor
 2 | import scala.io.Source
 3 | import com.typesafe.config.ConfigFactory
 4 | 
 5 | val nodes = TestActor.startup(Seq("2555","2556"));
 6 | 
 7 | 
 8 | val conf = ConfigFactory.load()
 9 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped
10 | val seedhostPort = seeds.get(0).toString
11 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":"));
12 | 
13 | 
14 | val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",101)
15 | 
16 | def sendIt() = {
17 |     nodes(0) ! sv;
18 |     }
19 |     
20 | 


--------------------------------------------------------------------------------
/scripts/testActor3.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.allreduce.TestActor
 2 | import com.typesafe.config.ConfigFactory
 3 | 
 4 | val nodes = TestActor.startup(Seq("2553","2554"));
 5 | 
 6 | val conf = ConfigFactory.load()
 7 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped
 8 | val seedhostPort = seeds.get(0).toString
 9 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":"));
10 | 
11 | val msize = 20000;
12 | val sv = new TestActor.SendData(seedhost + ":2553/user/testActor",rand(1,msize),1000);
13 | //val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",10);
14 | 
15 | 
16 | def sendIt() = {
17 |     nodes(0) ! sv;
18 |     }
19 |     
20 | 


--------------------------------------------------------------------------------
/scripts/testActor3.ssc~:
--------------------------------------------------------------------------------
 1 | import BIDMach.allreduce.TestActor
 2 | import com.typesafe.config.ConfigFactory
 3 | 
 4 | val nodes = TestActor.startup(Seq("2553","2554"));
 5 | 
 6 | val conf = ConfigFactory.load()
 7 | val seeds = conf.getList("akka.cluster.seed-nodes").unwrapped
 8 | val seedhostPort = seeds.get(0).toString
 9 | val seedhost = seedhostPort.substring(0, seedhostPort.lastIndexOf(":"));
10 | 
11 | val msize = 20000;
12 | val sv = new TestActor.SendData(seedhost + ":2553/user/testActor",zeros(1,msize),100);
13 | //val sv = new TestActor.SendTo(seedhost + ":2553/user/testActor",10);
14 | 
15 | 
16 | def sendIt() = {
17 |     nodes(0) ! sv;
18 |     }
19 |     
20 | 


--------------------------------------------------------------------------------
/scripts/testAllReduceGridMaster.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.allreduce._
 2 | 
 3 | import scala.concurrent.duration._
 4 | 
 5 | // Override the configuration of the port when specified as program argument
 6 | val port = "2551"
 7 | val nodeNum = 4
 8 | val masterConfig = GridMasterConfig(nodeNum = nodeNum, nodeResolutionTimeout = 10.seconds)
 9 | 
10 | AllreduceGridMaster.startUp(port, masterConfig)
11 | 


--------------------------------------------------------------------------------
/scripts/testAllReduceNodeDummy.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.allreduce.AllreduceNode.getBasicConfigs
 2 | import BIDMach.allreduce.binder.{AssertCorrectnessBinder, NoOpBinder}
 3 | import BIDMach.allreduce.{AllreduceDummyLearner, AllreduceNode}
 4 | 
 5 | val learner = new AllreduceDummyLearner()
 6 | learner.ipass = 20
 7 | 
 8 | val dataSize = 60000000
 9 | val maxChunkSize = 20000
10 | 
11 | val basicConfig = getBasicConfigs()
12 | val modifiedConfig = basicConfig.copy(workerConfig =
13 |   basicConfig.workerConfig.copy(
14 |     metaData = basicConfig.workerConfig.metaData.copy(dataSize = dataSize, maxChunkSize = maxChunkSize),
15 |     threshold =  basicConfig.workerConfig.threshold.copy(thComplete = 1.0f)
16 |   )
17 | )
18 | 
19 | 
20 | val binder = new NoOpBinder(dataSize, 10)
21 | AllreduceNode.startNodeAfterIter(learner = learner, iter = 0, nodeConfig = modifiedConfig, binder = binder)


--------------------------------------------------------------------------------
/scripts/testLogging.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks._
 2 | import scala.io.Source
 3 | import BIDMach.datasources._
 4 | import scala.util.Random
 5 | import BIDMach._
 6 | import BIDMach.updaters._
 7 | import BIDMach.mixins._
 8 | import BIDMat.TMat
 9 | 
10 | val prefix = "/data/word2vec/simple-examples/data/"
11 | val filename=prefix+"ptb.train.imat.lz4"
12 | val vob = 10000
13 | val data = loadIMat(filename)(0,0 until 920000)
14 | val test = loadIMat(prefix+"ptb.valid.imat.lz4")(0,0 until 70000);
15 | val (l,o)=NextWord.learner(data)
16 | val lr = 1f
17 | o.nvocab=vob;
18 | o.npasses=1;
19 | o.lrate=lr;
20 | o.height=2;
21 | o.width = 20
22 | o.batchSize=10000;//For fast testing only... set to 200 if want reasonable results
23 | o.dim=200
24 | o.kind=2
25 | o.pstep = 0.09f
26 | o.hasBias = true
27 | o.max_grad_norm = 5
28 | o.logDataSink = new MatSink() //nmats will be computed automatically during the logging
29 | o.logFuncs = Array(Logging.logGradientL2Norm,Logging.logGradientL1Norm)
30 | l.train
31 | val log = Logging.getResults(l) // or Logging.getResults(l.model)
32 | 


--------------------------------------------------------------------------------
/scripts/testPowerNet.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMat.TMat
 2 | 
 3 | val mdir = "../data/criteo/parts/"
 4 | 
 5 | val (nn,opts) = Net.learnerX(mdir+"trainsortedx%02d.smat.lz4",mdir+"trainlabel%02d.fmat.lz4");
 6 | 
 7 | opts.nend = 90
 8 | opts.batchSize= 100
 9 | opts.npasses = 1
10 | opts.lrate = 0.01f 
11 | opts.texp = 0.3f
12 | opts.pstep = 0.001f
13 | 
14 | opts.aopts = opts
15 | //opts.reg1weight = 0.0001
16 | //opts.hasBias = true
17 | opts.links = iones(1,1);
18 | opts.nweight = 1e-4f
19 | opts.lookahead = 0
20 | opts.autoReset = false
21 | 
22 | val tshape = 0.25f
23 | val shape = irow(200,120,80,50,1)
24 | opts.tmatShape = Net.powerShape(tshape)_;
25 | opts.nodeset = Net.powerNet(shape,opts,0,2);
26 | opts.what
27 | println(tshape.toString)
28 | println(shape.toString)
29 | 
30 | val model = nn.model.asInstanceOf[Net]
31 | nn.train
32 | 
33 | val res = nn.results(0,?)
34 | 
35 | val testdata = loadSMat(mdir+"trainsortedx%02d.smat.lz4" format opts.nend);
36 | val testlabels = loadFMat(mdir+"trainlabel%02d.fmat.lz4" format opts.nend);
37 | 
38 | val (mm, mopts) = Net.predictor(model, testdata);
39 | mm.predict
40 | 
41 | val preds=FMat(mm.preds(0))
42 | 
43 | val ll = DMat(ln(preds *@ testlabels + (1-preds) *@ (1-testlabels)))
44 | val rc = roc(preds, testlabels, 1-testlabels, 1000);
45 | 
46 | (mean(ll), mean(rc))


--------------------------------------------------------------------------------
/scripts/testPredMNT2015.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.SeqToSeq
 2 | 
 3 | val mdir = "/mnt/BIDMach/data/MNT2015/models/local_10passes/model256_te0.3_02/"
 4 | // val mdir = "/mnt/BIDMach/data/MNT2015/models/local/"
 5 | val datadir = "/mnt/BIDMach/data/MNT2015/data/"
 6 | 
 7 | val PADsym = 1
 8 | 
 9 | val batchSize = 128
10 | var src = loadSMat(datadir+"news-commentary-v10.fr-en.fr.smat.lz4")
11 | var dst = loadSMat(datadir+"news-commentary-v10.fr-en.en.smat.lz4")
12 | 
13 | val ncols = src.ncols
14 | var traincols = Math.floor(0.9*ncols).toInt
15 | traincols = traincols - (traincols % batchSize)
16 | var srcTest = src(?, traincols -> (ncols-1))
17 | var dstTest = dst(?, traincols -> (ncols-1))
18 | srcTest = srcTest(?, 0 -> (srcTest.ncols - (srcTest.ncols % batchSize)))
19 | dstTest = dstTest(?, 0 -> (dstTest.ncols - (dstTest.ncols % batchSize)))
20 | 
21 | // var srcTestFull = full(srcTest)
22 | // var dstTestFull = full(dstTest)
23 | var srcTestFull = full(src)
24 | var dstTestFull = full(dst)
25 | srcTestFull ~ srcTestFull + PADsym * (srcTestFull == 0)
26 | dstTestFull ~ dstTestFull + PADsym * (dstTestFull == 0)
27 | 
28 | val model = new SeqToSeq
29 | model.setmodelmats(new Array[Mat](7))
30 | for (i <- 0 until 7) {
31 |     model.modelmats(i) = loadMat(mdir+"modelmat%02d.lz4" format i)
32 | }
33 | 
34 | val srcSlice = srcTestFull(?, 0 -> batchSize)
35 | val dstSlice = dstTestFull(?, 0 -> batchSize)
36 | val (nn, opts) = SeqToSeq.predict(model, srcSlice)
37 | 
38 | opts.nvocab = 20000
39 | opts.height = 2
40 | opts.dim = 256
41 | opts.batchSize = batchSize
42 | 
43 | opts.kind = 1
44 | opts.netType = 0
45 | opts.scoreType = 1
46 | opts.inwidth = 30
47 | opts.outwidth = 30
48 | opts.hasBias = true
49 | opts.pstep = 0.005f
50 | opts.cumScore = 3
51 | opts.PADsym = PADsym
52 | opts.OOVsym = 2
53 | opts.STARTsym = 0
54 | 
55 | nn.predict
56 | 
57 | val pred = nn.datasink.asInstanceOf[MatSink].omats(0).asInstanceOf[IMat]
58 | 


--------------------------------------------------------------------------------
/scripts/testSeqToSeqPred.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 |  /**
 3 |   * LSTM training script.
 4 |   *
 5 |   */
 6 | import BIDMach.networks.SeqToSeq
 7 | 
 8 | val dir = "/data/livejournal/srcdst/";  // Directory for input data
 9 | val mdir = "/data/livejournal/models/"
10 | val odir = "/data/livejournal/preds2/";  // Directory for input data
11 | 
12 | val model = SeqToSeq.load(mdir+"model256_te0.3_12/")
13 | val (nn,opts) = SeqToSeq.embed(model, dir+  "src%04d.smat.lz4", odir + "pred%04d.fmat.lz4")
14 | 
15 | opts.nend = 1132
16 | opts.batchSize = 128
17 | opts.ofcols = 128000
18 | 
19 | opts.nvocab = 100000;                     // Vocabulary limit
20 | opts.npasses = 1;                         // Number of passes over the dataset
21 | opts.height = 2;                          // Height of the network
22 | opts.dim = 256;                           // Dimension of LSTM units
23 | opts.kind = 1;                            // LSTM structure
24 | opts.netType = 0;                         // Net type (softmax=0, or negsampling=1)
25 | opts.scoreType = 1;                       // Score type (logloss=0, accuracy=1)
26 | opts.inwidth = 30;                        // Max input sentence length (truncates)
27 | opts.outwidth = 30;                       // Max output sentence length (truncates)
28 | opts.hasBias = true;                      // Use bias terms in linear layers
29 | opts.pstep = 0.0001f;                     // How often to print
30 | opts.cumScore = 3;                        // Accumulate scores for less-noisy printing
31 | opts.PADsym = 1;                          // The padding symbol
32 | opts.OOVsym = 2;                          // The OOV symbol
33 | opts.STARTsym = 0;
34 | opts.lookahead = 0;
35 | 
36 | nn.predict
37 | 


--------------------------------------------------------------------------------
/scripts/test_cmudict_s2s.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.SeqToSeq
 2 | 
 3 | val datadir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/smat_data/"
 4 | val modeldir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/bidmach_model_10pass/"
 5 | 
 6 | val src = loadMat(datadir+"train.src_grapheme.shuf.smat.lz4")
 7 | val dst = loadMat(datadir+"train.dst_phoneme.shuf.smat.lz4")
 8 | 
 9 | val (nn, opts) = SeqToSeq.learner(src, dst)
10 | val net = nn.model.asInstanceOf[BIDMach.networks.SeqToSeq]
11 | 
12 | opts.lrate = 0.05f
13 | opts.nvocabIn = 31
14 | opts.nvocabOut = 43
15 | opts.npasses = 10
16 | opts.height = 2
17 | opts.dim = 512
18 | opts.batchSize = 64
19 | 
20 | opts.checkPointInterval = 1f
21 | opts.checkPointFile = modeldir+"model256_te0.3_%02d/"
22 | opts.kind = 1
23 | opts.netType = 0
24 | opts.scoreType = 0
25 | opts.inwidth = 22
26 | opts.outwidth = 20
27 | opts.hasBias = true
28 | opts.pstep = 0.0001f
29 | opts.cumScore = 3
30 | opts.PADsym = 1
31 | opts.OOVsym = 2
32 | opts.STARTsym = 0
33 | opts.texp = 0.3f
34 | 
35 | nn.train
36 | 


--------------------------------------------------------------------------------
/scripts/test_grid.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #read testing parameters from user
 4 | read -p 'nodeNum: ' nn
 5 | read -p 'dataSize: ' ds
 6 | read -p 'threshold: ' th
 7 | read -p 'maxRound: ' mr
 8 | 
 9 | #set parameters on master 
10 | sed -i "s/^val nodeNum .*$/val nodeNum = ${nn}/" testAllReduceGridMaster.scala
11 | 
12 | #run testAllReduceGridMaster
13 | nohup bidmach ./testAllReduceGridMaster.scala &
14 | 
15 | #set parameters on slaves
16 | runall.sh "cd /code/BIDMach/scripts;sed -i \"s/^val maxRound .*$/val maxRound = ${mr}/\" testAllReduceNode.scala;sed -i \"s/^val dataSize.*$/val dataSize = ${ds}/\" testAllReduceNode.scala;sed -i \"s/^val threshold = ThresholdConfig(thAllreduce = .*$/val threshold = ThresholdConfig(thAllreduce = ${th}f, thReduce = ${th}f, thComplete = ${th}f)/\" testAllReduceNode.scala"
17 | 
18 | #run testAllReduceNode on each slave
19 | ./start_workers.sh /code/BIDMach/scripts/testAllReduceNode.scala
20 | 


--------------------------------------------------------------------------------
/scripts/test_pred_cmudict_s2s.ssc:
--------------------------------------------------------------------------------
 1 | import util.control.Breaks._
 2 | import BIDMach.networks.SeqToSeq
 3 | 
 4 | val datadir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/smat_data/"
 5 | val modeldir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/bidmach_model_10pass/"
 6 | val preddir = "/mnt/BIDMach/data/phonetisaurus-cmudict-split/pred/"
 7 | 
 8 | var src = loadMat(datadir+"valid.src_grapheme.shuf.smat.lz4")
 9 | var dst = loadMat(datadir+"valid.dst_phoneme.shuf.smat.lz4")
10 | 
11 | // val model = SeqToSeq.load(modeldir+"model256_te0.3_00/")
12 | val model = new SeqToSeq
13 | model.setmodelmats(new Array[Mat](7))
14 | for (i <- 0 until 7) {
15 |   model.modelmats(i) = loadMat(modeldir+"model256_te0.3_00/modelmat%02d.lz4" format i)
16 | }
17 | 
18 | val (nn, opts) = SeqToSeq.predict(model, src)
19 | val net = nn.model.asInstanceOf[BIDMach.networks.SeqToSeq]
20 | 
21 | opts.nvocabIn = 31
22 | opts.nvocabOut = 43
23 | opts.height = 2
24 | opts.dim = 512
25 | opts.batchSize = 64
26 | 
27 | opts.kind = 1
28 | opts.netType = 0
29 | opts.scoreType = 0
30 | opts.inwidth = 22
31 | opts.outwidth = 20
32 | opts.hasBias = true
33 | opts.pstep = 0.0001f
34 | opts.cumScore = 3
35 | opts.PADsym = 1
36 | opts.OOVsym = 2
37 | opts.STARTsym = 0
38 | 
39 | opts.autoReset = false
40 | 
41 | nn.predict
42 | 
43 | val preds = IMat(nn.preds(0))
44 | 
45 | def calcWER(preds:IMat, dst:IMat):(Int, Float) = {
46 |   var error = 0
47 |   for (j <- 0 until preds.ncols) {
48 |     breakable {
49 |       for (i <- 0 until preds.nrows) {
50 |         if (preds(i, j) == 1) {
51 |           if (dst(i, j) > 1) error += 1 // early prediction termination
52 |           break
53 |         } else if (preds(i, j) != dst(i, j)) {
54 |           error += 1
55 |           break
56 |         }
57 |       }
58 |     }
59 |   }
60 |   (error, error.toFloat/preds.ncols)
61 | }
62 | 
63 | val WER = calcWER(preds, IMat(full(dst)))
64 | 


--------------------------------------------------------------------------------
/scripts/testds.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val opts = new SFileSource.Options;
 3 | 
 4 | def getDS() = {
 5 |     implicit val threads = threadPool(4)
 6 |     new SFileSource(opts);
 7 | }
 8 | 
 9 | val ds=getDS;
10 | 
11 | opts.nend=10;
12 | opts.fnames=List(FileSource.simpleEnum("../data/uci/pubmed_parts/part%02d.smat.lz4", 1, 0));
13 | opts.batchSize = 100000;
14 | opts.fcounts = 141043
15 | opts.eltsPerSample = 400;
16 | ds.init;
17 | 
18 | var i = 0;
19 | var total = 0L;
20 | tic;
21 | while (ds.hasNext) {
22 |     val mats = ds.next;
23 |     total += mats(0).asInstanceOf[SMat].nnz * 8L;
24 |     val t=toc;
25 |     println("Speed %4.3f MB/s, %4.3f GB in %4.3f secs" format (total/t/1e6, total/1e9, t));
26 | }
27 |     
28 | 
29 | 


--------------------------------------------------------------------------------
/scripts/testldagibbs.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val dd= loadSMat("../data/uci/nytimes.smat.lz4");
 3 | 
 4 | val (nn,opts)=LDAgibbs.learner(dd)
 5 | 
 6 | opts.dim=128;
 7 | opts.uiter=5;
 8 | opts.batchSize=1024;
 9 | opts.npasses=1;
10 | opts.useBino=true;
11 | opts.doDirichlet=true;
12 | opts.alpha=0.2f;
13 | opts.doAlpha=true;
14 | opts.nsamps=100;
15 | opts.power=0.5f;
16 | 
17 | nn.train
18 | 


--------------------------------------------------------------------------------
/scripts/testlincomb.sc:
--------------------------------------------------------------------------------
 1 | 
 2 | :silent
 3 | 
 4 | val a=grand(2,2)
 5 | val b=grand(2,2)
 6 | val c=gzeros(2,2)
 7 | 
 8 | Grad.linComb(a,1f,b,1f,c)
 9 | 
10 | c
11 | 
12 | System.exit(0)


--------------------------------------------------------------------------------
/scripts/testlr.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | val dir="../data/rcv1/"
 3 | 
 4 | val a0 = loadSMat(dir + "docs.smat.lz4")
 5 | val c0 = loadFMat(dir + "cats.fmat.lz4")(0->100,?)
 6 | val rr = rand(c0.ncols,1);
 7 | val (ss, ii) = sort2(rr);
 8 | val a = a0(?,ii);
 9 | val c = c0(?,ii);
10 | 
11 | val ta = loadSMat(dir + "testdocs.smat.lz4")
12 | val tc = loadFMat(dir + "testcats.fmat.lz4")(0->100,?)
13 | 
14 | setNumThreads(1)
15 | val (nn,opts)=GLM.learnerX(a,c,1)
16 | 
17 | opts.batchSize=20000
18 | opts.lrate = 0.02f
19 | opts.npasses = 4
20 | opts.reg1weight = 0.0
21 | opts.links = iones(103,1)
22 | opts.addConstFeat=true;
23 | opts.aopts = opts;
24 | //opts.doVariance = true;
25 | opts.evalStep = 3;
26 | //opts.debugMem = true
27 | //opts.useGPU = false
28 | //Mat.useMKL = false
29 | 
30 | val model = nn.model.asInstanceOf[GLM]
31 | nn.train
32 | 
33 | val (mm, mopts) = GLM.predictor(nn.model, ta)
34 | mopts.addConstFeat=opts.addConstFeat;
35 | mopts.batchSize=20000
36 | mopts.links = opts.links
37 | mm.predict
38 | 
39 | val pc = FMat(mm.preds(0))
40 | 
41 | //val tc2= tmap * tc
42 | val rc = roc2(pc, tc, 1-tc, 1000)
43 | val nc = sum(tc,2);
44 | val wmean = mean(rc)*nc/sum(nc)
45 | 
46 | println("roc6 = %5.4f, roc weighted mean = %5.4f" format (mean(rc)(6), wmean.v))
47 | 


--------------------------------------------------------------------------------
/scripts/testlstm.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.LSTMnextWord
 2 | 
 3 | val dir="d:/data/twitter/featurized3/"
 4 | val wlim = 10000
 5 | 
 6 | val a0 = loadIMat(dir + "sentfeats000000.imat.lz4")(1,?);
 7 | val igood = find((a0 < wlim) *@ (a0 >= 0));
 8 | val a = a0(0,igood);
 9 | //val a = a1(0,0->400000);
10 | 
11 | val ta0 = loadIMat(dir + "sentfeats000001.imat.lz4")(1,?)
12 | val tigood = find((ta0 < wlim) *@ (ta0 >= 0));
13 | val ta = ta0(0,tigood);
14 | 
15 | val (nn,opts) = LSTMnextWord.learner(a)
16 | 
17 | //opts.aopts = opts
18 | 
19 | //opts.useGPU = false
20 | //Mat.useCache = false
21 | 
22 | opts.npasses = 3
23 | opts.lrate = 0.3f
24 | opts.batchSize=10000
25 | opts.width=5;
26 | opts.height=1;
27 | opts.dim=128;
28 | opts.kind = 3;
29 | opts.nvocab = 10000;
30 | opts.autoReset=false
31 | opts.bylevel = false;
32 | //opts.debug =1;
33 | 
34 | opts.reg1weight = 0.00001
35 | 
36 | val dnn = nn.model.asInstanceOf[LSTMnextWord]
37 | nn.train
38 | 
39 | val ll = dnn.layers
40 | val d1 = ll(3).asInstanceOf[BIDMach.networks.LSTMLayer]
41 | val d2 = ll(4).asInstanceOf[BIDMach.networks.LSTMLayer]
42 | val dl1 = d1.internal_layers
43 | val dl2 = d2.internal_layers
44 | 
45 | val in = ll(0).output.asInstanceOf[GSMat];
46 | val rin = IMat(new GIMat(1, in.nc, in.ir, in.nc));
47 | val dict = Dict(loadSBMat(dir+"../alldict.gz"));
48 | 
49 | val lres = nn.results.ncols
50 | mean(nn.results(?,(lres-11)->(lres-1)),2)
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/scripts/testnet.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.Net
 2 | 
 3 | val dir="/mnt/BIDMach/data/rcv1/"
 4 | 
 5 | val a0 = loadSMat(dir + "docs.smat.lz4")(0->100000,?)
 6 | val c0 = loadFMat(dir + "cats.fmat.lz4")(0->100,?)
 7 | val rr = rand(c0.ncols,1);
 8 | val (ss, ii) = sort2(rr);
 9 | val a = a0(?,ii);
10 | val c = c0(?,ii);
11 | 
12 | val ta = loadSMat(dir + "testdocs.smat.lz4")(0->100000,0->23000)
13 | val tc = loadFMat(dir + "testcats.fmat.lz4")(0->100,0->23000)
14 | 
15 | val (nn,opts) = Net.learnerX(a,c);
16 | 
17 | opts.aopts = opts
18 | opts.batchSize=200
19 | opts.reg1weight = 0.0001
20 | opts.npasses = 2
21 | opts.hasBias = true
22 | opts.links = iones(100,1);
23 | opts.lrate = 0.4f // best for 6-layer
24 | opts.lrate = 0.01f
25 | opts.texp = 0.3f
26 | opts.nweight = 1e-4f
27 | //opts.useGPU = false
28 | 
29 | val net = Net.dnodes4(2,500,0.5f,100,opts,2);
30 | opts.nodeset = net
31 | 
32 | val dnn = nn.model.asInstanceOf[Net]
33 | 
34 | nn.train
35 | 
36 | 
37 | val (mm,mopts) = Net.predictor(dnn, ta);
38 | val dmm = mm.model.asInstanceOf[Net]
39 | mopts.batchSize=1000
40 | 
41 | mm.predict
42 | 
43 | val pc = FMat(mm.preds(0))
44 | 
45 | val rc = roc2(pc, tc, 1-tc, 1000)
46 | val counts = sum(tc,2);
47 | println("auc6 = %5.4f, auc weighted mean = %5.4f" format (mean(rc)(6), (mean(rc) * counts / sum(counts)).dv));
48 | 
49 | 


--------------------------------------------------------------------------------
/scripts/testrecv_local.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMat.{CMat,CSMat,DMat,Dict,FMat,FND,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,GND,HMat,IDict,Image,IMat,LMat,Mat,SMat,SBMat,SDMat,TMat}
 2 | import BIDMat.MatFunctions._
 3 | import BIDMat.SciFunctions._
 4 | import BIDMat.Solvers._
 5 | import BIDMat.Plotting._
 6 | import BIDMach.Learner
 7 | import BIDMach.models.{Click,FM,GLM,KMeans,KMeansw,LDA,LDAgibbs,Model,NMF,SFA,RandomForest,SVD}
 8 | import BIDMach.networks.{Net}
 9 | import BIDMach.datasources.{DataSource,MatSource,FileSource,SFileSource}
10 | import BIDMach.datasinks.{DataSink,MatSink}
11 | import BIDMach.mixins.{CosineSim,Perplexity,Top,L1Regularizer,L2Regularizer}
12 | import BIDMach.updaters.{ADAGrad,Batch,BatchNorm,Grad,IncMult,IncNorm,Telescoping}
13 | import BIDMach.causal.{IPTW}
14 | import BIDMach.allreduce.{Master,Worker,Command}
15 | import BIDMach.models.GLM
16 | 
17 | import scala.concurrent.Future
18 | import scala.concurrent.ExecutionContext.Implicits.global
19 | 
20 | Mat.checkMKL(false)
21 | Mat.checkCUDA
22 | 
23 | val data_dir = "/mnt/BIDMach/data/MNIST8M/parts/"
24 | val (nn, nnopts) = GLM.learner(data_dir+"data%02d.fmat.lz4", data_dir+"cats%02d.fmat.lz4")
25 | 
26 | nnopts.useGPU = true;
27 | nnopts.nstart = 0;
28 | nnopts.nend = 0;
29 | nnopts.order = 0;
30 | nnopts.lookahead = 2;
31 | nnopts.featType = 1;
32 | nnopts.links = 2*iones(10,1);
33 | nnopts.eltsPerSample = 300;
34 | nnopts.targets = mkdiag(ones(10,1)) \ zeros(10, 784);
35 | nnopts.rmask = zeros(1,10) \ ones(1, 784);
36 | 
37 | nnopts.batchSize = 500;
38 | nnopts.npasses = 1;
39 | nnopts.lrate = 0.001;  // for logistic
40 | 
41 | val w = new Worker();
42 | val wopts = w.opts;
43 | wopts.trace = 4;
44 | wopts.machineTrace = 1;
45 | wopts.commandSocketNum = 12345
46 | wopts.responseSocketNum = 12346
47 | wopts.peerSocketNum = 12347
48 | 
49 | w.start(nn)
50 | 
51 | nn.paused = true
52 | 
53 | // Future {
54 | //   nn.train
55 | // }
56 | 


--------------------------------------------------------------------------------
/scripts/testrf.ssc:
--------------------------------------------------------------------------------
 1 | val (mm,opts) = RandomForest.learner(
 2 |   "/opt/BIDMach/data/MNIST8M/parts/data%02d.fmat.lz4",
 3 |   "/opt/BIDMach/data/MNIST8M/parts/cats%02d.imat.lz4"
 4 | )
 5 | opts.useGPU = true
 6 | opts.batchSize = 20000
 7 | opts.depth = 10
 8 | // opts.nend = 8
 9 | opts.ntrees = 100
10 | opts.ncats = 10
11 | opts.impurity = 0
12 | 
13 | opts.nsamps = 12
14 | opts.nnodes = 50000
15 | opts.nbits = 16
16 | mm.train
17 | 


--------------------------------------------------------------------------------
/scripts/testrforest.ssc:
--------------------------------------------------------------------------------
 1 | val mdir = "../data/MNIST8M/parts/"
 2 | 
 3 | val (nn, opts) = RandomForest.learner(mdir+"data%02d.fmat.lz4", mdir+"cats%02d.imat.lz4")
 4 | 
 5 | opts.nend = 70
 6 | opts.batchSize = 20000
 7 | opts.depth =  30
 8 | opts.ntrees = 32
 9 | opts.nsamps = 32
10 | opts.nnodes = 300000
11 | opts.nbits = 16
12 | opts.gain = 0.001f
13 | opts.ncats = 10
14 | 
15 | val rf = nn.model.asInstanceOf[RandomForest]
16 | 
17 | nn.train
18 | 
19 | 


--------------------------------------------------------------------------------
/scripts/testsend_local.ssc:
--------------------------------------------------------------------------------
 1 | import java.net.{InetAddress,InetSocketAddress}
 2 | import BIDMach.allreduce.{Master,Worker,Command}
 3 | 
 4 | val addresses = new Array[InetSocketAddress](1)
 5 | addresses(0) = new InetSocketAddress("0.0.0.0", 12345)
 6 | 
 7 | val m = new Master();
 8 | val opts = m.opts;
 9 | opts.trace = 3;
10 | opts.intervalMsec = 2000;
11 | //opts.limitFctn = Master.powerLimitFctn
12 | opts.limit = 1000000
13 | opts.timeScaleMsec = 2e-3f
14 | opts.permuteAlways = false
15 | 
16 | opts.machine_threshold = 0.75
17 | opts.min_time_to_wait_for_all = 3000
18 | opts.time_threshold = 5000
19 | 
20 | 
21 | val nmachines = addresses.length;
22 | 
23 | val gmods = irow(nmachines);
24 | val gmachines = irow(0->nmachines);
25 | 
26 | m.init
27 | m.config(gmods, gmachines, addresses)
28 | m.sendConfig
29 | m.setMachineNumbers
30 | 
31 | //m.startLearners
32 | //m.startUpdates
33 | //m.permuteAllreduce(0,1000000)
34 | 
35 | 


--------------------------------------------------------------------------------
/scripts/testsvd.ssc:
--------------------------------------------------------------------------------
 1 | // Run approx. SVD on the Climate dataset
 2 | 
 3 | val dir="/code/BIDMach/data/MNIST8M/parts/"
 4 | val (nn, opts) = SVD.learner(dir+"data%02d.fmat.lz4");
 5 | 
 6 | opts.nend = 10;
 7 | opts.dim = 20;
 8 | opts.npasses = 10;
 9 | opts.batchSize = 10000;
10 | opts.useDouble = true;
11 | opts.pstep = 0.01f
12 | opts.miniBatchPasses = 1;
13 | opts.batchesPerUpdate = 200;
14 | opts.updateAll = true
15 | opts.lookahead = 2;
16 | opts.evalType = 0;
17 | opts.order = 1;
18 | opts.doRayleighRitz = false
19 | opts.autoReset = false
20 | opts.subMean = false
21 | //opts.traceFileSource = 1
22 | //opts.useGPU = false
23 | 
24 | val model = nn.model.asInstanceOf[SVD]
25 | 
26 | nn.train
27 | 
28 | // Singular values and vectors
29 | 
30 | val svals = FMat(nn.modelmats(1));
31 | val svecs = FMat(nn.modelmats(0));
32 | 
33 | // Compute M * M^t directly to compute a reference SVD (can only do this
34 | // for small feature spaces).
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/scripts/testword2vec.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.Word2Vec
 2 | 
 3 | val mdir = "../data/word2vec/data/"
 4 | 
 5 | val (nn, opts) = Word2Vec.learner(mdir+"train%05d.imat.lz4");
 6 | 
 7 | opts.nstart = 0;
 8 | opts.nend = 7;
 9 | opts.npasses = 4;
10 | opts.batchSize = 1000000;
11 | opts.lrate = 1e-3f
12 | opts.vexp = 0.5f
13 | opts.nreuse = 5
14 | opts.dim = 300
15 | opts.vocabSize = 100000
16 | 
17 | opts.useGPU = true;
18 | //opts.autoReset = false;
19 | //Mat.useMKL = false;
20 | 
21 | nn.train
22 | 
23 | val mod = nn.model.asInstanceOf[Word2Vec]
24 | 
25 | //saveFMat(mdir+"model0.fmat.lz4", FMat(mod.modelmats(0)))
26 | 
27 | //saveFMat(mdir+"model1.fmat.lz4", FMat(mod.modelmats(1)))
28 | 
29 | val test = loadIMat(mdir+"test00000.imat.lz4");
30 | 
31 | val (mm,mopts) = Word2Vec.predictor(mod, test);
32 | 
33 | mopts.useGPU = opts.useGPU
34 | mm.predict
35 | 
36 | val score = mean(mm.results(0,0->(mm.results.ncols-2)));
37 | 
38 | val dict = loadCSMat(mdir+"dict.csmat.lz4");
39 | 
40 | Word2Vec.saveGoogleW2V(dict, FMat(mod.modelmats(0)), mdir+"googmodel.bin", true);


--------------------------------------------------------------------------------
/scripts/testword2vecp.ssc:
--------------------------------------------------------------------------------
 1 | import BIDMach.networks.Word2Vec
 2 | 
 3 | val mdir = "/code/word2vec/data/";
 4 | 
 5 | val (nn, opts) = Word2Vec.learnPar(mdir+"data%03d.imat.lz4");
 6 | 
 7 | opts.nstart = 0;
 8 | opts.nend = 7;
 9 | opts.npasses = 1;
10 | opts.batchSize = 1000000;
11 | opts.lrate = 1e-3f;
12 | opts.vexp = 0.5f
13 | opts.nreuse = 5
14 | opts.dim = 300
15 | opts.vocabSize = 400000
16 | 
17 | opts.syncStep = 256
18 | 
19 | //opts.useGPU = false;
20 | //opts.autoReset = false;
21 | //Mat.useMKL = false;
22 | 
23 | nn.train
24 | 
25 | val mod0 = nn.models(0).asInstanceOf[Word2Vec]
26 | 
27 | //saveFMat(mdir+"model0.fmat.lz4", FMat(mod.modelmats(0)))
28 | 
29 | //saveFMat(mdir+"model1.fmat.lz4", FMat(mod.modelmats(1)))
30 | 


--------------------------------------------------------------------------------
/scripts/tmp.sc:
--------------------------------------------------------------------------------
1 | println("ran script")
2 | System.exit(0);
3 | 


--------------------------------------------------------------------------------
/scripts/trainLSTM.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 |  /**
 3 |   * LSTM training script. 
 4 |   * 
 5 |   */
 6 | import BIDMach.networks.SeqToSeq
 7 | 
 8 | val dir = "/data01/livejournal/srcdst/";  // Directory for input data
 9 | 
10 | val (nn, opts) = SeqToSeq.learner(dir + "src%04d.smat.lz4", dir + "dst%04d.smat.lz4");
11 | 
12 | opts.lrate = 0.1f;                       // Learning rate
13 | opts.nvocab = 100000;                     // Vocabulary limit
14 | opts.npasses = 2;                         // Number of passes over the dataset
15 | opts.height = 2;                          // Height of the network
16 | opts.dim = 256;                           // Dimension of LSTM units
17 | opts.batchSize = 128;                     // Batch size
18 | opts.nstart = 0;                          // File start number
19 | opts.nend = 1132;                         // File end number
20 | opts.checkPointFile = dir + "../models/livejournal_256d_1lr_%02d/"; // Where to save models
21 | opts.checkPointInterval = 24f;            // How often to save in hours
22 | opts.netType = 0;                         // Net type (softmax=0, or negsampling=1)
23 | opts.scoreType = 1;                       // Score type (logloss=0, accuracy=1)
24 | opts.inwidth = 30;                        // Max input sentence length (truncates)
25 | opts.outwidth = 30;                       // Max ouptut sentence length (truncates)
26 | opts.hasBias = true;                      // Use bias terms in linear layers
27 | opts.pstep = 0.0001f;                     // How often to print
28 | opts.cumScore = 3;                        // Accumulate scores for less-noisy printing
29 | opts.PADsym = 1;                          // The padding symbol
30 | opts.OOVsym = 2;                          // The OOV symbol
31 | opts STARTsym = 0;
32 | opts.reg1weight = 1e-9f                   // L1 regularization weight
33 | 
34 | println(opts.what)
35 | 
36 | nn.train
37 | 


--------------------------------------------------------------------------------
/scripts/workout.ssc:
--------------------------------------------------------------------------------
 1 | 
 2 | // This script needs to be run from <BIDMACH_DIR>/scripts
 3 | 
 4 | var useGPU=true
 5 | var doTwitter=false
 6 | 
 7 | println("\n<<<<<<< Testing with GPU >>>>>>>")
 8 | :load workout_slave.ssc
 9 | 
10 | println("\n<<<<<<< Testing without GPU >>>>>>>")
11 | useGPU = false
12 | :load workout_slave.ssc
13 | 
14 | println("\n<<<<<<< Testing without MKL >>>>>>>")
15 | Mat.useMKL = false
16 | :load workout_slave.ssc
17 | 


--------------------------------------------------------------------------------
/scripts/workout2.ssc:
--------------------------------------------------------------------------------
 1 | :silent
 2 | 
 3 | // This script needs to be run from <BIDMACH_DIR>/scripts
 4 | 
 5 | var useGPU=false
 6 | var doTwitter=false
 7 | 
 8 | println("\n<<<<<<< Testing with GPU >>>>>>>")
 9 | :load workout_slave.ssc
10 | 
11 | println("\n<<<<<<< Testing without GPU >>>>>>>")
12 | useGPU = false
13 | :load workout_slave.ssc
14 | 
15 | println("\n<<<<<<< Testing without MKL >>>>>>>")
16 | Mat.useMKL = false
17 | :load workout_slave.ssc
18 | :silent


--------------------------------------------------------------------------------
/shortpath.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | echo %~s1%
3 | 


--------------------------------------------------------------------------------
/src/main/C/newparse/configure:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # If on Windows, fix ZLIBPATH below to point to zlib.lib. 
 4 | 
 5 | if [[ "$1" == win* ]]; then 
 6 |     ZLIBPATH="/LIBPATH:/code/zlib/lib/ zlib.lib"
 7 |     MYDEFS="/D WITHGZS"
 8 |     RELOPTS="/O2 /I . ${MYDEFS} /EHsc /D NDEBUG /Gd /GF /MT"
 9 |     DBGOPTS="/EHsc /I . ${MYDEFS} /D _DEBUG /Gd /GF /MTd /W4 /wd4996"
10 |     RELLINK="${ZLIBPATH} /MACHINE:AMD64"
11 |     DBGLINK="${ZLIBPATH} /NODEFAULTLIB:LIBCMT /MACHINE:AMD64"
12 |     CC="cl"
13 |     CPP="cl"
14 |     LD="link"
15 | else
16 |     GCCV=`gcc -dumpversion`
17 |     GV=`echo $GCCV | sed -e 's/\.\([0-9][0-9]\)/\1/g' -e 's/\.\([0-9]\)/0\1/g' -e 's/^[0-9]\{3,4\}$/&00/'`
18 | 
19 |     if [ $GV -ge "40700" ] ; then
20 |         CPPFLAGS="-std=c++11"
21 |     else
22 |         if [ $GV -ge "40400" ] ; then
23 |             CPPFLAGS="-std=c++0x"
24 |         fi
25 |     fi
26 |     RELOPTS="${CFLAGS} -O2 -DNDEBUG -DWITHGZS -Wno-deprecated -I."
27 |     DBGOPTS="${CFLAGS} -O2 -DDEBUG -DWITHGZS -g -I."
28 |     RELLINK="-lz"
29 |     DBGLINK="-lz"
30 |     CC="gcc"
31 |     CPP="g++"
32 |     LD="g++"
33 | fi
34 | 
35 | if [[ "$1" == *debug ]]; then 
36 |     CC_OPTS="${DBGOPTS}"
37 |     LINK_OPTS="${DBGLINK}"
38 | else
39 |     CC_OPTS="${RELOPTS}"
40 |     LINK_OPTS="${RELLINK}"
41 | fi 
42 | 
43 | echo "CC=$CC" > makefile.incl
44 | echo "CPP=$CPP" >> makefile.incl
45 | echo "LD=$LD" >> makefile.incl
46 | echo "CC_OPTS=$CC_OPTS" >> makefile.incl
47 | echo "CPPFLAGS=$CPPFLAGS" >> makefile.incl
48 | echo "LINK_OPTS=$LINK_OPTS" >> makefile.incl
49 | 
50 | 


--------------------------------------------------------------------------------
/src/main/C/newparse/makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | include makefile.incl
 3 | 
 4 | ifeq ($(CC),gcc)
 5 | 	include makefile.gcc
 6 | else
 7 | 	include makefile.w32
 8 | endif
 9 | 
10 | install: all
11 | 	mkdir -p ../../../../cbin
12 | 	cp *.exe ../../../../cbin
13 | 	mkdir -p ../../../../src/main/resources/cbin
14 | 	cp *.exe ../../../../src/main/resources/cbin


--------------------------------------------------------------------------------
/src/main/C/newparse/makefile.gcc:
--------------------------------------------------------------------------------
 1 | 
 2 | .SUFFIXES: 
 3 | .SUFFIXES: .c .cpp .o .exe .lxc .flex
 4 | 
 5 | EXES=xmltweet.exe xmlwiki.exe trec.exe tparse.exe parsevw.exe tparse2.exe
 6 | OBJS=newparse.o utils.o gzstream.o
 7 | 
 8 | .SECONDARY: xmltweet.lxc xmlwiki.lxc trec.lxc
 9 | 
10 | all: $(EXES)
11 | 
12 | .flex.lxc: 
13 | 	flex -o $@ $< 
14 | 
15 | tparse.exe: gzstream.o utils.o tparse.o utils.h
16 | 	$(LD) -o tparse.exe tparse.o utils.o gzstream.o $(LINK_OPTS)
17 | 
18 | tparse2.exe: gzstream.o utils.o tparse2.o utils.h
19 | 	$(LD) -o tparse2.exe tparse2.o utils.o gzstream.o $(LINK_OPTS)
20 | 
21 | parsevw.exe: gzstream.o utils.o parsevw.o utils.h
22 | 	$(LD) -o parsevw.exe parsevw.o utils.o gzstream.o $(LINK_OPTS)
23 | 
24 | .o.exe: $(OBJS)
25 | 	$(LD) -o $@ $(OBJS) $< $(LINK_OPTS)
26 | 
27 | .cpp.o: utils.h gzstream.h
28 | 	$(CPP) $(CPPFLAGS) $(CC_OPTS) -o $@ -c $<
29 | 
30 | .lxc.o: 
31 | 	$(CC) $(CC_OPTS) -DYY_NO_UNISTD_H -o $@ -c -x c $<
32 | 
33 | $(EXES): $(OBJS)
34 | 
35 | gzstream.o: gzstream.h
36 | 
37 | clean:
38 | 	rm -f $(EXES) *.o *.lxc
39 | 


--------------------------------------------------------------------------------
/src/main/C/newparse/makefile.w32:
--------------------------------------------------------------------------------
 1 | 
 2 | .SUFFIXES: 
 3 | .SUFFIXES: .c .cpp .obj .exe .lxc .flex
 4 | 
 5 | EXES=xmltweet.exe xmlwiki.exe trec.exe tparse.exe parsevw.exe tparse2.exe
 6 | OBJS=newparse.obj utils.obj gzstream.obj
 7 | 
 8 | .SECONDARY: xmltweet.lxc xmlwiki.lxc trec.lxc
 9 | 
10 | all: $(EXES)
11 | 
12 | .flex.lxc: 
13 | 	flex -o $@ $< 
14 | 
15 | tparse.exe: gzstream.obj utils.obj tparse.obj utils.h
16 | 	$(LD) tparse.obj utils.obj gzstream.obj $(LINK_OPTS) /OUT:tparse.exe
17 | 
18 | tparse2.exe: gzstream.obj utils.obj tparse2.obj utils.h
19 | 	$(LD) tparse2.obj utils.obj gzstream.obj $(LINK_OPTS) /OUT:tparse2.exe
20 | 
21 | parsevw.exe: gzstream.obj utils.obj parsevw.obj utils.h
22 | 	$(LD) parsevw.obj utils.obj gzstream.obj $(LINK_OPTS) /OUT:parsevw.exe
23 | 
24 | .obj.exe: $(OBJS)
25 | 	$(LD) $(OBJS) $< $(LINK_OPTS) /OUT:"$@"
26 | 
27 | .cpp.obj: utils.h gzstream.h
28 | 	$(CPP) $(CC_OPTS) /Fo"$@" /c $<
29 | 
30 | .lxc.obj: 
31 | 	$(CPP) $(CC_OPTS) /DYY_NO_UNISTD_H /Fo"$@" /c /Tc"$<"
32 | 
33 | $(EXES): $(OBJS)
34 | 
35 | gzstream.obj: gzstream.h
36 | 
37 | clean:
38 | 	rm -f $(EXES) *.obj *.lxc
39 | 


--------------------------------------------------------------------------------
/src/main/C/newparse/trec.flex:
--------------------------------------------------------------------------------
 1 | /* Scanner for TREC format */
 2 | 
 3 | %{
 4 |   extern int checkword(char *);
 5 |   extern void addtok(int tok);
 6 |   extern int parsedate(char * str);
 7 |   extern int numlines;
 8 |   
 9 | %}
10 | 
11 | %option never-interactive
12 | %option noyywrap
13 | 
14 | LETTER	   [a-zA-Z_]
15 | DIGF	   [0-9][0-9][0-9][0-9]
16 | DIGT	   [0-9][0-9]
17 | DIGIT	   [0-9]
18 | PUNCT	   [;:,.?!]
19 | 
20 | %% 
21 | 
22 | -?{DIGIT}+    {
23 | #if __STDC_VERSION__ >= 199901L
24 |   long long iv = strtoll(yytext, NULL, 10);
25 | #else
26 |   long iv = strtol(yytext, NULL, 10);
27 | #endif
28 |   addtok(iv);
29 |   iv = iv >> 31;
30 |   if (iv > 0 || iv < -1) {
31 |     addtok(iv);
32 |   }
33 | }	     
34 |      
35 | -?{DIGIT}+"."{DIGIT}*   {
36 |   float f = (float)strtod(yytext, NULL);
37 |   int iv = *((int *)(&f));
38 |   addtok(iv >> 1);
39 | }
40 | 
41 | {DIGF}"-"{DIGT}"-"{DIGT}"T"{DIGT}":"{DIGT}":"{DIGT}("-"|"+"){DIGT}":"{DIGT}       {
42 |   int tt = parsedate(yytext);
43 |   addtok(tt);
44 | }
45 | 
46 | {LETTER}+    {
47 |   int iv = checkword(yytext);
48 | 	}
49 | 
50 | "<"{LETTER}+">"    {
51 |   int iv = checkword(yytext);
52 | 	}
53 | 
54 | "</"{LETTER}+">"    {
55 |   int iv = checkword(yytext);
56 | 	}
57 | 
58 | ".I"    {
59 |   int iv = checkword(yytext);
60 | }
61 | 
62 | ".W"    {
63 |   int iv = checkword(yytext);
64 | }
65 | 
66 | {PUNCT}	  {
67 |   int iv = checkword(yytext);
68 | 	  }
69 | 
70 | "..""."*  {
71 |   char ell[] = "...";
72 |   int iv  = checkword(ell);
73 | 	  }
74 | 
75 | [\n]	  {
76 | 	  numlines++;
77 | 	  if (numlines % 1000000 == 0) {
78 | 	  fprintf(stderr, "\r%05d lines", numlines);
79 |       fflush(stderr);
80 | 	  }	  
81 | 	  }
82 | 
83 | .         {}
84 | 
85 | %%
86 | 
87 | 


--------------------------------------------------------------------------------
/src/main/java/edu/berkeley/bvlc/CAFFE.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.bvlc;
 2 | 
 3 | public final class CAFFE {
 4 | 
 5 |     private CAFFE() {}
 6 | 
 7 |     static {
 8 |         LibUtils.loadLibrary("caffe");
 9 |     } 
10 |     
11 |     public static native void set_mode(int mode);
12 |     
13 |     public static native void set_phase(int phase);
14 |     
15 |     public static native int get_mode();
16 |     
17 |     public static native int get_phase();
18 |     
19 |     public static native void set_device(int n);
20 | 
21 |     public static native void DeviceQuery();
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/java/edu/berkeley/bvlc/LAYER.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.bvlc;
 2 | 
 3 | public final class LAYER {
 4 | 
 5 |     static {
 6 |         LibUtils.loadLibrary("caffe");
 7 |     } 
 8 | 
 9 |     private LAYER() {}
10 | 
11 |     protected LAYER(long shptr) {
12 |         _shptr = shptr;
13 |     }
14 | 
15 |     public int num_blobs() {if (_shptr != 0) return num_blobs(_shptr); else throw new RuntimeException("Layer uninitialized");}
16 | 
17 |     public BLOB blob(int i) {
18 |     	if (_shptr == 0) {
19 |     		throw new RuntimeException("Layer uninitialized");
20 |     	} else {
21 |     		int n = num_blobs();
22 |     		if (i < 0 || i >= n) {
23 |     			throw new RuntimeException("Layer blob index "+i+" out of range (0, "+(n-1)+")");
24 |     		}
25 |     		return new BLOB(blob(_shptr, i));
26 |     	}
27 |     }
28 | 
29 |     @Override
30 |     protected void finalize() {
31 |         if (_shptr != 0) clearLayer(_shptr);
32 |         _shptr = 0;
33 |     }
34 | 
35 |     private long _shptr = 0;
36 | 
37 |     private static native int num_blobs(long ref);
38 | 
39 |     private static native long blob(long ref, int i);
40 | 
41 |     private static native int clearLayer(long ref);
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/java/edu/berkeley/bvlc/SGDSOLVER.java:
--------------------------------------------------------------------------------
 1 | package edu.berkeley.bvlc;
 2 | 
 3 | public final class SGDSOLVER {
 4 | 
 5 |     static {
 6 |         LibUtils.loadLibrary("caffe");
 7 |     } 
 8 | 
 9 |     public SGDSOLVER(String pfile) {
10 |         _sptr = fromParams(pfile); 
11 |         _net = new NET(net(_sptr));
12 |     }
13 | 
14 |     public NET net() {return _net;}
15 | 
16 |     public void Solve() {if (_sptr != 0) Solve(_sptr);}
17 | 
18 |     public void SolveResume(String s) {if (_sptr != 0) SolveResume(_sptr, s);}
19 | 
20 |     @Override
21 |     protected void finalize() {
22 |         if (_sptr != 0) clearSGDSolver(_sptr);
23 |     }
24 | 
25 |     private final long _sptr;
26 | 
27 |     private final NET _net;
28 |     
29 |     private static native long fromParams(String name);
30 | 
31 |     private static native long net(long n);
32 | 
33 |     private static native void Solve(long n);
34 | 
35 |     private static native void SolveResume(long n, String s);
36 | 
37 |     private static native void clearSGDSolver(long ref);
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/BytesListOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: feature.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface BytesListOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.BytesList)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>repeated bytes value = 1;</code>
12 |    */
13 |   java.util.List<com.google.protobuf.ByteString> getValueList();
14 |   /**
15 |    * <code>repeated bytes value = 1;</code>
16 |    */
17 |   int getValueCount();
18 |   /**
19 |    * <code>repeated bytes value = 1;</code>
20 |    */
21 |   com.google.protobuf.ByteString getValue(int index);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/ExampleOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: example.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface ExampleOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.Example)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>.tensorflow.Features features = 1;</code>
12 |    */
13 |   boolean hasFeatures();
14 |   /**
15 |    * <code>.tensorflow.Features features = 1;</code>
16 |    */
17 |   org.tensorflow.example.Features getFeatures();
18 |   /**
19 |    * <code>.tensorflow.Features features = 1;</code>
20 |    */
21 |   org.tensorflow.example.FeaturesOrBuilder getFeaturesOrBuilder();
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/FeatureListOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: feature.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface FeatureListOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.FeatureList)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>repeated .tensorflow.Feature feature = 1;</code>
12 |    */
13 |   java.util.List<org.tensorflow.example.Feature> 
14 |       getFeatureList();
15 |   /**
16 |    * <code>repeated .tensorflow.Feature feature = 1;</code>
17 |    */
18 |   org.tensorflow.example.Feature getFeature(int index);
19 |   /**
20 |    * <code>repeated .tensorflow.Feature feature = 1;</code>
21 |    */
22 |   int getFeatureCount();
23 |   /**
24 |    * <code>repeated .tensorflow.Feature feature = 1;</code>
25 |    */
26 |   java.util.List<? extends org.tensorflow.example.FeatureOrBuilder> 
27 |       getFeatureOrBuilderList();
28 |   /**
29 |    * <code>repeated .tensorflow.Feature feature = 1;</code>
30 |    */
31 |   org.tensorflow.example.FeatureOrBuilder getFeatureOrBuilder(
32 |       int index);
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/FeatureOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: feature.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface FeatureOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.Feature)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>.tensorflow.BytesList bytes_list = 1;</code>
12 |    */
13 |   boolean hasBytesList();
14 |   /**
15 |    * <code>.tensorflow.BytesList bytes_list = 1;</code>
16 |    */
17 |   org.tensorflow.example.BytesList getBytesList();
18 |   /**
19 |    * <code>.tensorflow.BytesList bytes_list = 1;</code>
20 |    */
21 |   org.tensorflow.example.BytesListOrBuilder getBytesListOrBuilder();
22 | 
23 |   /**
24 |    * <code>.tensorflow.FloatList float_list = 2;</code>
25 |    */
26 |   boolean hasFloatList();
27 |   /**
28 |    * <code>.tensorflow.FloatList float_list = 2;</code>
29 |    */
30 |   org.tensorflow.example.FloatList getFloatList();
31 |   /**
32 |    * <code>.tensorflow.FloatList float_list = 2;</code>
33 |    */
34 |   org.tensorflow.example.FloatListOrBuilder getFloatListOrBuilder();
35 | 
36 |   /**
37 |    * <code>.tensorflow.Int64List int64_list = 3;</code>
38 |    */
39 |   boolean hasInt64List();
40 |   /**
41 |    * <code>.tensorflow.Int64List int64_list = 3;</code>
42 |    */
43 |   org.tensorflow.example.Int64List getInt64List();
44 |   /**
45 |    * <code>.tensorflow.Int64List int64_list = 3;</code>
46 |    */
47 |   org.tensorflow.example.Int64ListOrBuilder getInt64ListOrBuilder();
48 | 
49 |   public org.tensorflow.example.Feature.KindCase getKindCase();
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/FeaturesOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: feature.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface FeaturesOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.Features)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <pre>
12 |    * Map from feature name to feature.
13 |    * </pre>
14 |    *
15 |    * <code>map&lt;string, .tensorflow.Feature&gt; feature = 1;</code>
16 |    */
17 |   int getFeatureCount();
18 |   /**
19 |    * <pre>
20 |    * Map from feature name to feature.
21 |    * </pre>
22 |    *
23 |    * <code>map&lt;string, .tensorflow.Feature&gt; feature = 1;</code>
24 |    */
25 |   boolean containsFeature(
26 |       java.lang.String key);
27 |   /**
28 |    * Use {@link #getFeatureMap()} instead.
29 |    */
30 |   @java.lang.Deprecated
31 |   java.util.Map<java.lang.String, org.tensorflow.example.Feature>
32 |   getFeature();
33 |   /**
34 |    * <pre>
35 |    * Map from feature name to feature.
36 |    * </pre>
37 |    *
38 |    * <code>map&lt;string, .tensorflow.Feature&gt; feature = 1;</code>
39 |    */
40 |   java.util.Map<java.lang.String, org.tensorflow.example.Feature>
41 |   getFeatureMap();
42 |   /**
43 |    * <pre>
44 |    * Map from feature name to feature.
45 |    * </pre>
46 |    *
47 |    * <code>map&lt;string, .tensorflow.Feature&gt; feature = 1;</code>
48 |    */
49 | 
50 |   org.tensorflow.example.Feature getFeatureOrDefault(
51 |       java.lang.String key,
52 |       org.tensorflow.example.Feature defaultValue);
53 |   /**
54 |    * <pre>
55 |    * Map from feature name to feature.
56 |    * </pre>
57 |    *
58 |    * <code>map&lt;string, .tensorflow.Feature&gt; feature = 1;</code>
59 |    */
60 | 
61 |   org.tensorflow.example.Feature getFeatureOrThrow(
62 |       java.lang.String key);
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/FloatListOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: feature.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface FloatListOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.FloatList)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>repeated float value = 1 [packed = true];</code>
12 |    */
13 |   java.util.List<java.lang.Float> getValueList();
14 |   /**
15 |    * <code>repeated float value = 1 [packed = true];</code>
16 |    */
17 |   int getValueCount();
18 |   /**
19 |    * <code>repeated float value = 1 [packed = true];</code>
20 |    */
21 |   float getValue(int index);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/Int64ListOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: feature.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface Int64ListOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.Int64List)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>repeated int64 value = 1 [packed = true];</code>
12 |    */
13 |   java.util.List<java.lang.Long> getValueList();
14 |   /**
15 |    * <code>repeated int64 value = 1 [packed = true];</code>
16 |    */
17 |   int getValueCount();
18 |   /**
19 |    * <code>repeated int64 value = 1 [packed = true];</code>
20 |    */
21 |   long getValue(int index);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/RecordWriter.java:
--------------------------------------------------------------------------------
 1 | package org.tensorflow.example;
 2 | import java.io.*;
 3 | import java.util.zip.*;
 4 | 
 5 | public class RecordWriter {
 6 |     private static final long serialVersionUID = 0L;
 7 | 
 8 |     public RecordWriter(DataInputStream ds) {
 9 |     }
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/example/SequenceExampleOrBuilder.java:
--------------------------------------------------------------------------------
 1 | // Generated by the protocol buffer compiler.  DO NOT EDIT!
 2 | // source: example.proto
 3 | 
 4 | package org.tensorflow.example;
 5 | 
 6 | public interface SequenceExampleOrBuilder extends
 7 |     // @@protoc_insertion_point(interface_extends:tensorflow.SequenceExample)
 8 |     com.google.protobuf.MessageOrBuilder {
 9 | 
10 |   /**
11 |    * <code>.tensorflow.Features context = 1;</code>
12 |    */
13 |   boolean hasContext();
14 |   /**
15 |    * <code>.tensorflow.Features context = 1;</code>
16 |    */
17 |   org.tensorflow.example.Features getContext();
18 |   /**
19 |    * <code>.tensorflow.Features context = 1;</code>
20 |    */
21 |   org.tensorflow.example.FeaturesOrBuilder getContextOrBuilder();
22 | 
23 |   /**
24 |    * <code>.tensorflow.FeatureLists feature_lists = 2;</code>
25 |    */
26 |   boolean hasFeatureLists();
27 |   /**
28 |    * <code>.tensorflow.FeatureLists feature_lists = 2;</code>
29 |    */
30 |   org.tensorflow.example.FeatureLists getFeatureLists();
31 |   /**
32 |    * <code>.tensorflow.FeatureLists feature_lists = 2;</code>
33 |    */
34 |   org.tensorflow.example.FeatureListsOrBuilder getFeatureListsOrBuilder();
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/org/tensorflow/io/RecordWriter.java:
--------------------------------------------------------------------------------
 1 | package org.tensorflow.io;
 2 | import java.io.*;
 3 | import java.util.zip.*;
 4 | 
 5 | public class RecordWriter {
 6 |     private static final long serialVersionUID = 0L;
 7 |     private static final int DEFAULT_BUFSIZE = 64*1024;
 8 |     
 9 |     private BufferedOutputStream ds_;
10 |     
11 |     public RecordWriter(OutputStream ds) {
12 |         ds_ = new BufferedOutputStream(ds, DEFAULT_BUFSIZE);
13 |     }
14 | 
15 |     public RecordWriter(String fname) throws IOException {
16 |         FileOutputStream fout = new FileOutputStream(fname);
17 |         ds_ = new BufferedOutputStream(fout, DEFAULT_BUFSIZE);
18 |     }
19 | 
20 |     public int maskedCRC(byte [] bytes, int count) {
21 |         return CRC32C.mask(CRC32C.getValue(bytes, 0, count));
22 |     }
23 | 
24 |     public int writeRecord(byte [] data) throws IOException {
25 |         byte [] header = new byte[12];
26 |         byte [] footer = new byte[4];
27 |         CRC32C.encodeFixed64(header, 0, data.length);
28 |         CRC32C.encodeFixed32(header, 8, maskedCRC(header, 8));
29 | 
30 |         CRC32C.encodeFixed32(footer, 0, maskedCRC(data, data.length));
31 | 
32 |         ds_.write(header, 0, 12);
33 |         ds_.write(data, 0, data.length);
34 |         ds_.write(footer, 0, 4);
35 | 
36 |         return 0;        
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/resources/application.conf:
--------------------------------------------------------------------------------
 1 | akka {
 2 |   actor {
 3 |     provider = cluster
 4 |   }
 5 |   remote {
 6 |     log-remote-lifecycle-events = off
 7 |     netty.tcp {
 8 |       hostname = "127.0.0.1"
 9 |       port = 0
10 |     }
11 |   }
12 | 
13 |   cluster {
14 |     seed-nodes = [
15 |       "akka.tcp://ClusterSystem@127.0.0.1:2551",
16 |       "akka.tcp://ClusterSystem@127.0.0.1:2552"]
17 | 
18 |     # auto downing is NOT safe for production deployments.
19 |     # you may want to use it during development, read more about it in the docs.
20 |     auto-down-unreachable-after = 10s
21 |   }
22 |   log-dead-letters = 0
23 |   log-dead-letters-during-shutdown = off
24 | }
25 | 
26 | # Disable legacy metrics in akka-cluster.
27 | akka.cluster.metrics.enabled=off
28 | 
29 | # Enable metrics extension in akka-cluster-metrics.
30 | //akka.extensions=["akka.cluster.metrics.ClusterMetricsExtension"]
31 | 
32 | # Sigar native library extract location during tests.
33 | # Note: use per-jvm-instance folder when running multiple jvm on one host.
34 | akka.cluster.metrics.native-library-extract-folder=${user.dir}/target/native
35 | 


--------------------------------------------------------------------------------
/src/main/resources/lib/touch.txt:
--------------------------------------------------------------------------------
1 | touch
2 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/Copyright.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012, Regents of the University of California
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the <organization> nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/Logging.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach
 2 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GDMat,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat,TMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMat.Plotting._
 6 | import BIDMach.models._
 7 | import BIDMach.datasinks._
 8 | 
 9 | 
10 | object Logging{
11 |     def logGradientL2Norm(model:Model,data:Array[Mat]):Array[Mat] = {
12 |       val m = model.modelmats
13 |       val res = new Array[Float](m.length)
14 |       for(i<-0 until m.length){
15 |           res(i) = sum(snorm(m(i))).dv.toFloat
16 |       }
17 |       Array(new FMat(m.length,1,res))
18 |     }
19 |   
20 |     def logGradientL1Norm(model:Model,data:Array[Mat]):Array[Mat] = {
21 |       val m = model.modelmats
22 |       val res = new Array[Float](m.length)
23 |       for(i<-0 until m.length){
24 |           res(i) = sum(sum(abs(m(i)))).dv.toFloat
25 |       }
26 |       Array(new FMat(m.length,1,res))
27 |     }
28 |     
29 |     def getResults(model:Model): Array[Mat] = {
30 |         model.opts.logDataSink match {
31 |             case f:FileSink=>{println("Found results at "+f.opts.ofnames.head(0));null}
32 |             case m:MatSink=>m.mats
33 |             case null=>{println("No logDataSink found");null}
34 |         }
35 |     }
36 |     
37 |     def getResults(l:Learner): Array[Mat] = getResults(l.model)
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/AllreduceDummyLearner.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce
 2 | 
 3 | import BIDMach.Learner
 4 | import BIDMach.networks.Net
 5 | 
 6 | /**
 7 |   * A dummy learner for ease of test. Can be opt or refactored out if necessary
 8 |   * @param learner
 9 |   * @param dummy_model
10 |   */
11 | class AllreduceDummyLearner(learner:Learner, dummy_model:AllreduceDummyModel)
12 |   extends Learner(learner.datasource,dummy_model,learner.mixins, learner.updater, learner.datasink ,learner.opts) {
13 | 
14 |   def this(){
15 |     this(Net.learner("dummy learner")._1, new AllreduceDummyModel())
16 |   }
17 | 
18 | 
19 |   override def train: Unit = {
20 |     println("dummy model is training!")
21 |     while(true){
22 |       this.ipass+=1
23 |       myLogger.info("pass=%2d" format ipass)
24 |       this.dummy_model.showSomeWork()
25 |     }
26 | 
27 |   }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/AllreduceDummyModel.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce
 2 | 
 3 | import BIDMach.models.Model
 4 | import BIDMat.{FMat, Mat}
 5 | 
 6 | class AllreduceDummyModel(val _modelmat: Array[Mat]) extends Model {
 7 |   def this(){
 8 |     this(Array[Mat](FMat.ones(30,100),FMat.ones(100,30)))
 9 |   }
10 | 
11 | 
12 |   override def modelmats:Array[Mat] = {
13 |     _modelmat
14 |   }
15 |   override def init()={}
16 |   override def dobatch(mats:Array[Mat], ipass:Int, here:Long)={}
17 |   override def evalbatch(mats: Array[Mat], ipass: Int, here:Long):FMat = {
18 |     FMat.zeros(0,0)
19 |   }
20 |   def showSomeWork(){
21 |     println("I'm learning something")
22 |     Thread.sleep(1000)
23 |   }
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/AllreduceMessage.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce
 2 | 
 3 | import akka.actor.ActorRef
 4 | import scala.collection.mutable.ArrayBuffer
 5 | 
 6 | 
 7 | // worker messages
 8 | final case class StartAllreduce(config : RoundConfig)
 9 | final case class CompleteAllreduce(srcId : Int, config : RoundConfig)
10 | 
11 | final case class ScatterBlock(value : Array[Float], srcId : Int, destId : Int, chunkId : Int, config : RoundConfig)
12 | final case class ReduceBlock(value: Array[Float], srcId : Int, destId : Int, chunkId : Int, config : RoundConfig, count: Int)
13 | 
14 | final case class AllreduceStats(outgoingFloats: Long, incomingFloats: Long)
15 | 
16 | /**
17 |   * "comparison override to provide a (line master version, round) pair for a smooth transition when nodes are added or removed
18 |   */
19 | final case class RoundConfig(lineMasterVersion : Int, round: Int, lineMaster : ActorRef, peerWorkers: Map[Int, ActorRef], workerId: Int) {
20 |   def < (other : RoundConfig): Boolean = {
21 |   	return if (lineMasterVersion < other.lineMasterVersion || 
22 |   	  		   (lineMasterVersion == other.lineMasterVersion && round < other.round)) {true}
23 |   	else {false}
24 |   }
25 | 
26 |   def == (other : RoundConfig): Boolean = {
27 |   	return if (lineMasterVersion == other.lineMasterVersion && round == other.round) {true} else {false}
28 |   }
29 | 
30 |   def > (other : RoundConfig): Boolean = {
31 |   	return !(this < other || this == other)
32 |   }
33 | }
34 | 
35 | /*
36 |  * Following message used by Line Master
37 |  */
38 | final case class StartAllreduceTask(peerNodes: ArrayBuffer[ActorRef], lineMasterVersion : Int)
39 | final case class StopAllreduceTask(lineMasterVersion : Int)
40 | 
41 | /*
42 |  * For grid master in case we want to kill the node
43 |  */
44 | final case class StopAllreduceNode()


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/binder/AllreduceBinder.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce.binder
 2 | 
 3 | import BIDMach.allreduce.binder.AllreduceBinder.{DataSink, DataSource}
 4 | 
 5 | /**
 6 |   * Trait to specify source and sink, allowing binding data input/output to the all-reduce process.
 7 |   */
 8 | trait AllreduceBinder {
 9 | 
10 |   def totalDataSize: Int
11 | 
12 |   def dataSource: DataSource
13 | 
14 |   def dataSink: DataSink
15 | 
16 | }
17 | 
18 | object AllreduceBinder {
19 | 
20 |   type DataSink = AllReduceOutput => Unit
21 |   type DataSource = AllReduceInputRequest => AllReduceInput
22 |   var updateCounts = 100
23 | 
24 | }
25 | 
26 | case class AllReduceInputRequest(iteration: Int)
27 | 
28 | case class AllReduceInput(data: Array[Float])
29 | 
30 | case class AllReduceOutput(data: Array[Float], iteration: Int)
31 | 
32 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/binder/AssertCorrectnessBinder.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce.binder
 2 | 
 3 | import BIDMach.allreduce.binder.AllreduceBinder.{DataSink, DataSource}
 4 | 
 5 | 
 6 | class AssertCorrectnessBinder(dataSize: Int, checkpoint: Int) extends AllreduceBinder {
 7 | 
 8 |   val random = new scala.util.Random(100)
 9 |   val totalInputSample = 8
10 | 
11 |   lazy val randomFloats = {
12 |     val nestedArray = new Array[Array[Float]](totalInputSample)
13 |     for (i <- 0 until totalInputSample) {
14 |       nestedArray(i) = Array.range(0, dataSize).toList.map(_ => random.nextFloat()).toArray
15 |     }
16 |     nestedArray
17 |   }
18 | 
19 |   private def ~=(x: Double, y: Double, precision: Double = 1e-5) = {
20 |     if ((x - y).abs < precision) true else false
21 |   }
22 | 
23 |   override def dataSource: DataSource = r => {
24 |     AllReduceInput(randomFloats(r.iteration % totalInputSample))
25 |   }
26 | 
27 |   override def dataSink: DataSink = r => {
28 | 
29 |     if (r.iteration % checkpoint == 0) {
30 |       val inputUsed = randomFloats(r.iteration % totalInputSample)
31 |       println(s"\n----Asserting #${r.iteration} output...")
32 |       for (i <- 0 until dataSize) {
33 |         val meanActual = r.data(i)
34 |         val expected = inputUsed(i)
35 |         assert(~=(expected, meanActual), s"Expected [$expected], but actual [$meanActual] at pos $i for iteraton #${r.iteration}")
36 |       }
37 |       println("OK: Means match the expected value!")
38 |     }
39 | 
40 |   }
41 | 
42 |   override def totalDataSize: Int = dataSize
43 | }
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/binder/NoOpBinder.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce.binder
 2 | import BIDMach.allreduce.binder.AllreduceBinder.{DataSink, DataSource}
 3 | 
 4 | /**
 5 |   * Just for experiment. Can be opted out or refactored.
 6 |   */
 7 | class NoOpBinder(dataSize: Int, printFrequency: Int = 10) extends AllreduceBinder {
 8 | 
 9 | 
10 |   val random = new scala.util.Random(100)
11 |   val totalInputSample = 4
12 | 
13 |   lazy val randomFloats = {
14 |     val nestedArray: Array[Array[Float]] = Array.ofDim(totalInputSample, dataSize)
15 |     for (i <- 0 until totalInputSample) {
16 |       for (j <- 0 until dataSize)
17 |       nestedArray(i)(j) = random.nextFloat()
18 |     }
19 |     nestedArray
20 |   }
21 | 
22 | 
23 |   override def dataSource: DataSource = { inputRequest =>
24 |     if (inputRequest.iteration % printFrequency == 0) {
25 |       println(s"--NoOptBinder: dump model data at ${inputRequest.iteration}--")
26 |     }
27 | 
28 |     AllReduceInput(randomFloats(inputRequest.iteration % totalInputSample))
29 |   }
30 | 
31 |   override def dataSink: DataSink = { output =>
32 |     if (output.iteration % printFrequency == 0) {
33 |       println(s"--NoOptBinder: reduced done data at ${output.iteration}--")
34 |     }
35 | 
36 |   }
37 | 
38 |   override def totalDataSize: Int = dataSize
39 | }
40 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/allreduce/buffer/AllReduceBuffer.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.allreduce.buffer
 2 | 
 3 | 
 4 | abstract class AllReduceBuffer(dataSize: Int,
 5 |                                peerSize: Int,
 6 |                                maxChunkSize: Int) {
 7 | 
 8 |   type Buffer = Array[Array[Float]]
 9 | 
10 |   val peerBuffer: Buffer = Array.ofDim(peerSize, dataSize)
11 | 
12 |   val numChunks = getNumChunk(dataSize)
13 | 
14 |   protected def store(data: Array[Float], srcId: Int, chunkId: Int) = {
15 | 
16 |     val array = peerBuffer(srcId)
17 |     System.arraycopy(
18 |       data, 0,
19 |       array, chunkId * maxChunkSize,
20 |       data.size)
21 |   }
22 | 
23 |   protected def getNumChunk(size: Int) = {
24 |     math.ceil(1f * size / maxChunkSize).toInt
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/caffe/Classifier.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.caffe
 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,GMat,GIMat,GSMat,HMat,Image,IMat,ND,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMach.datasources._
 6 | import edu.berkeley.bvlc.SGDSOLVER
 7 | import edu.berkeley.bvlc.NET
 8 | import edu.berkeley.bvlc.CAFFE
 9 | 
10 | class Classifier {
11 |   
12 |   val net = new Net
13 |   
14 |   def init(model_file:String, pretrained_file:String, image_dims:Array[Int] = Array(256, 256), 
15 |       gpu:Boolean = false, mean_file:String = null, input_scale:Float = 1f, channel_swap:IMat = 2\1\0) = {
16 |     
17 |     net.init(model_file, pretrained_file);
18 |     
19 |     CAFFE.set_phase(1);
20 |     
21 |     CAFFE.set_mode(if (gpu) 1 else 0)
22 |         
23 |     if (image_dims != null) {
24 |       net.set_image_dims(image_dims)
25 |     } else {
26 |       net.set_image_dims(Array(net.inwidth, net.inheight))
27 |     }
28 |     
29 |     if (mean_file != null) net.set_mean(mean_file)
30 |     
31 |     if (input_scale != 1f) net.set_input_scale(input_scale)
32 |     
33 |     if (channel_swap.asInstanceOf[AnyRef] != null) net.set_channel_swap(channel_swap)
34 |      
35 |   }
36 |     
37 |   def classify(im:Image):FMat = {
38 |   	val fnd = net.preprocess(im)
39 |   	net.clear_inputs
40 |   	net.add_input(fnd, 0, 0)
41 |   	net.forward
42 |   	net.output_data(0)(?,?,?,0)
43 |   }
44 | 
45 | 
46 | }
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/caffe/SGDSolver.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.caffe
 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,GMat,GIMat,GSMat,HMat,Image,IMat,ND,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMach.datasources._
 6 | import edu.berkeley.bvlc.SGDSOLVER
 7 | import edu.berkeley.bvlc.NET
 8 | import edu.berkeley.bvlc.CAFFE
 9 | 
10 | class SGDSolver (val sgd:SGDSOLVER) {
11 |   val net = sgd.net
12 |   
13 |   def Solve = sgd.Solve
14 |   
15 |   def SolveResume(fname:String) = sgd.SolveResume(fname)
16 |   
17 | }
18 | 
19 | object SGDSolver {
20 |   def apply(paramFile:String):SGDSolver = new SGDSolver(new SGDSOLVER(paramFile))
21 | }
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/datasinks/DataSink.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.datasinks
 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import java.io._
 6 | 
 7 | @SerialVersionUID(100L)
 8 | abstract class DataSink(val opts:DataSink.Opts = new DataSink.Options) extends Serializable {   
 9 |   private var _GUID = Mat.myrand.nextLong
10 |   def setGUID(v:Long):Unit = {_GUID = v} 
11 |   def GUID:Long = _GUID
12 |   def put;
13 |   def init:Unit = {}
14 |   def close = {}
15 |   private var _nmats = 0;
16 |   def nmats = _nmats;
17 |   def setnmats(k:Int) = {_nmats = k;}
18 |   var omats:Array[Mat] = null
19 | }
20 | 
21 | @SerialVersionUID(100L)
22 | object DataSink {
23 |   trait Opts extends BIDMat.Opts {
24 |   } 
25 |   
26 |   class Options extends Opts {}
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/datasinks/FileSink.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.datasinks
 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,LMat,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMach.datasources._
 6 | import scala.collection.mutable.ListBuffer
 7 | 
 8 | @SerialVersionUID(100L)
 9 | class FileSink(override val opts:FileSink.Opts = new FileSink.Options) extends MatSink(opts) { 
10 |   var ifile = 0;
11 |   var colsdone = 0;
12 |   
13 |   override def init = { 
14 |     blocks = new ListBuffer[Array[Mat]]();
15 |     setnmats(opts.ofnames.length);
16 |     omats = new Array[Mat](nmats);
17 |     ifile = 0;
18 |     opts match {
19 |       case fopts:FileSource.Opts => {
20 |         ifile = fopts.nstart;
21 |       }
22 |     }
23 |     colsdone = 0;
24 |   }
25 |   
26 |   override def put = {
27 |     blocks += omats.map(MatSink.copyCPUmat);
28 |     colsdone += omats(0).ncols;
29 |     if (colsdone >= opts.ofcols) {
30 |       mergeSaveBlocks;
31 |       colsdone = 0;
32 |       ifile += 1;
33 |       blocks = new ListBuffer[Array[Mat]]();
34 |     }
35 |   }
36 | 
37 |   override def close () = {
38 |     mergeSaveBlocks;
39 |   }
40 |   
41 |   def mergeSaveBlocks = {
42 |     mergeBlocks
43 |     if (blocks.size > 0) {
44 |     	for (i <- 0 until opts.ofnames.length) {
45 |     		saveMat(opts.ofnames(i)(ifile), mats(i));
46 |     	}
47 |     }
48 |   }
49 | }
50 | 
51 | @SerialVersionUID(100L)
52 | object FileSink {
53 |   trait Opts extends MatSink.Opts {
54 |   	var ofnames:List[(Int)=>String] = null;
55 |   	var ofcols = 100000;
56 |   }
57 |   
58 |   class Options extends Opts {
59 | 
60 |   }
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/datasources/ArraySource.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.datasources
 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMat.MatIOtrait
 6 | import scala.concurrent.Future
 7 | import scala.concurrent.ExecutionContextExecutor
 8 | import java.io._
 9 | 
10 | @SerialVersionUID(100L)
11 | class ArraySource(override val opts:ArraySource.Opts = new ArraySource.Options) extends IteratorSource(opts) {
12 |   @transient var dataArray:Array[_ <: AnyRef] = null
13 | 
14 |   override def init = {
15 |     dataArray = opts.dataArray
16 |     super.init
17 |   }
18 | 
19 |   override def iterHasNext:Boolean = {
20 |     iblock += 1
21 |     iblock < dataArray.length
22 |   }
23 | 
24 |   override def hasNext:Boolean = {
25 |     val matq = inMats(0)
26 |     val matqnr = if (opts.dorows) matq.nrows else matq.ncols
27 |     val ihn = iblock < dataArray.length
28 |     if (! ihn && iblock > 0) {
29 |       nblocks = iblock
30 |     }
31 |     (ihn || (matqnr - samplesDone) == 0);
32 |   }
33 | 
34 |   override def iterNext() = {
35 |     val marr = dataArray(iblock)
36 |     marr match {
37 |       case (key:AnyRef,v:MatIOtrait) => {inMats = v.get}
38 |       case m:Mat => {
39 |         if (inMats == null) inMats = Array[Mat](1);
40 |         inMats(0) = m;
41 |       }
42 |       case ma:Array[Mat] => inMats = ma;
43 |     }
44 |   }
45 | 
46 |   override def close = {
47 |     iblock = 0
48 |   }
49 | }
50 | 
51 | @SerialVersionUID(100L)
52 | object ArraySource {
53 |   def apply(opts:ArraySource.Opts):ArraySource = {
54 |     new ArraySource(opts);
55 |   }
56 | 
57 |   trait Opts extends IteratorSource.Opts {
58 |     @transient var dataArray:Array[_ <: AnyRef] = null
59 |   }
60 | 
61 |   @SerialVersionUID(100L)
62 |   class Options extends Opts {}
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/datasources/DataSource.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.datasources
 2 | import BIDMat.{Mat,SBMat,CMat,CSMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import java.io._
 6 | 
 7 | @SerialVersionUID(100L)
 8 | abstract class DataSource(val opts:DataSource.Opts = new DataSource.Options) extends Serializable {   
 9 |   private var _GUID = Mat.myrand.nextLong
10 |   def setGUID(v:Long):Unit = {_GUID = v} 
11 |   def GUID:Long = _GUID
12 |   def next:Array[Mat]  
13 |   def hasNext:Boolean
14 |   def reset:Unit
15 |   def putBack(mats:Array[Mat],i:Int):Unit = {throw new RuntimeException("putBack not implemented")}
16 |   def setupPutBack(n:Int,dim:Int):Unit = {throw new RuntimeException("putBack not implemented")}
17 |   def nmats:Int
18 |   def init:Unit
19 |   def progress:Float
20 |   def close = {}
21 |   var omats:Array[Mat] = null
22 |   var endmats:Array[Mat] = null
23 |   var fullmats:Array[Mat] = null
24 | }
25 | 
26 | @SerialVersionUID(100L)
27 | object DataSource {
28 |   trait Opts extends BIDMat.Opts {
29 |     var batchSize = 10000
30 |     var sizeMargin = 3f
31 |     var sample = 1f
32 |     var addConstFeat:Boolean = false
33 |     var featType:Int = 1                 // 0 = binary features, 1 = linear features, 2 = threshold features
34 |     var featThreshold:Mat = null
35 |     var putBack = -1
36 |   } 
37 |   
38 |   class Options extends Opts {}
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/mixins/Mixin.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.mixins
 2 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMach.models._
 6 | 
 7 | @SerialVersionUID(100L)
 8 | abstract class Mixin(val opts:Mixin.Opts = new Mixin.Options) extends Serializable { 
 9 |   val options = opts
10 |   var modelmats:Array[Mat] = null
11 |   var updatemats:Array[Mat] = null
12 |   var counter = 0 
13 | 
14 |   def compute(mats:Array[Mat], step:Float)
15 |   
16 |   def score(mats:Array[Mat], step:Float):FMat
17 |   
18 |   def init(model:Model) = {
19 |     modelmats = model.modelmats
20 |     updatemats = model.updatemats
21 |   }
22 | }
23 | 
24 | object Mixin {
25 | 	trait Opts extends BIDMat.Opts {
26 |         var mixinInterval = 1
27 |     }
28 | 	
29 | 	class Options extends Opts {}
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/networks/layers/ForwardLayer.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.networks.layers
 2 | 
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,LMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.datasources._
 7 | import BIDMach.updaters._
 8 | import BIDMach.mixins._
 9 | import BIDMach.models._
10 | import BIDMach._
11 | import edu.berkeley.bid.CPUMACH
12 | import edu.berkeley.bid.CUMACH
13 | import scala.util.hashing.MurmurHash3;
14 | import java.util.HashMap;
15 | import BIDMach.networks._
16 | 
17 | 
18 | @SerialVersionUID(100L)
19 | class ForwardLayer(override val net:Net, override val opts:ForwardNodeOpts = new ForwardNode) extends Layer(net, opts) {
20 | 
21 |   override def forward = {
22 | 		  val start = toc;
23 | 		  inplaceNoConnectGetOutput();
24 | 		  
25 | 		  output <-- inputData;
26 | //		  clearDeriv;
27 | 		  forwardtime += toc - start;
28 |   }
29 | 
30 |   override def backward = {
31 |   }
32 |   
33 |   override def toString = {
34 |     "forward@"+Integer.toHexString(hashCode % 0x10000).toString
35 |   }
36 | }
37 | 
38 | trait ForwardNodeOpts extends NodeOpts {  
39 | }
40 | 
41 | @SerialVersionUID(100L)
42 | class ForwardNode extends Node with ForwardNodeOpts {
43 | 
44 |   override def clone:ForwardNode = {copyTo(new ForwardNode).asInstanceOf[ForwardNode];}
45 | 
46 |   override def create(net:Net):ForwardLayer = {ForwardLayer(net, this);}
47 |   
48 |   override def toString = {
49 |     "forward@"+Integer.toHexString(hashCode % 0x10000).toString
50 |   }
51 | }
52 | 
53 | @SerialVersionUID(100L)
54 | object ForwardLayer {  
55 |   
56 |   def apply(net:Net) = new ForwardLayer(net, new ForwardNode);
57 |   
58 |   def apply(net:Net, opts:ForwardNode) = new ForwardLayer(net, opts);
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/networks/layers/MaxIndexLayer.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.networks.layers
 2 | 
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,LMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.datasources._
 7 | import BIDMach.updaters._
 8 | import BIDMach.mixins._
 9 | import BIDMach.models._
10 | import BIDMach._
11 | import edu.berkeley.bid.CPUMACH
12 | import edu.berkeley.bid.CUMACH
13 | import scala.util.hashing.MurmurHash3;
14 | import java.util.HashMap;
15 | import BIDMach.networks._
16 | 
17 | @SerialVersionUID(100L)
18 | class MaxIndexLayer(override val net:Net, override val opts:MaxIndexNodeOpts = new MaxIndexNode) extends Layer(net, opts) {
19 | 
20 |   override def forward = {
21 |     val start = toc;
22 |     output = maxi2(inputData, 1)._2;
23 |     forwardtime += toc - start;
24 |   }
25 | 
26 |   override def backward = {
27 |     val start = toc;
28 |     backwardtime += toc - start;
29 |   }
30 | 
31 |   override def toString = {
32 |     "copy@"+Integer.toHexString(hashCode % 0x10000).toString
33 |   }
34 | }
35 | 
36 | trait MaxIndexNodeOpts extends NodeOpts {
37 | }
38 | 
39 | @SerialVersionUID(100L)
40 | class MaxIndexNode extends Node with MaxIndexNodeOpts {
41 | 
42 |   override def clone:MaxIndexNode = {copyTo(new MaxIndexNode).asInstanceOf[MaxIndexNode];}
43 | 
44 |   override def create(net:Net):MaxIndexLayer = {MaxIndexLayer(net, this);}
45 | 
46 |   override def toString = {
47 |     "maxidx@"+Integer.toHexString(hashCode % 0x10000).toString
48 |   }
49 | }
50 | 
51 | @SerialVersionUID(100L)
52 | object MaxIndexLayer {
53 | 
54 |   def apply(net:Net) = new MaxIndexLayer(net, new MaxIndexNode);
55 | 
56 |   def apply(net:Net, opts:MaxIndexNode) = new MaxIndexLayer(net, opts);
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/networks/layers/NodeSet.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.networks.layers
 2 | 
 3 | @SerialVersionUID(100L)
 4 | class NodeSet(val nnodes:Int, val nodes:Array[Node]) extends Serializable {
 5 |   
 6 |   def this(nnodes:Int) = this(nnodes, new Array[Node](nnodes));
 7 |   
 8 |   def this(nodes:Array[Node]) = this(nodes.length, nodes);
 9 |   
10 |   def apply(i:Int):Node = nodes(i);
11 |   
12 |   def update(i:Int, lopts:Node) = {nodes(i) = lopts; this}
13 |   
14 |   def size = nnodes;
15 | 
16 |   def length = nnodes;
17 |   
18 |   override def clone = copyTo(new NodeSet(nnodes));
19 |   
20 |   def copyTo(lopts:NodeSet):NodeSet = {
21 |     for (i <- 0 until nnodes) {
22 |       lopts.nodes(i) = nodes(i).clone;
23 |       nodes(i).myGhost = lopts.nodes(i);
24 |     }
25 |     for (i <- 0 until nnodes) {
26 |       for (j <- 0 until nodes(i).inputs.length) {
27 |       	if (nodes(i).inputs(j) != null) lopts.nodes(i).inputs(j) = nodes(i).inputs(j).node.myGhost;
28 |       }
29 |     }
30 |     lopts;
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/networks/layers/SignLayer.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.networks.layers
 2 | 
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,LMat,HMat,GMat,GDMat,GIMat,GLMat,GSMat,GSDMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.datasources._
 7 | import BIDMach.updaters._
 8 | import BIDMach.mixins._
 9 | import BIDMach.models._
10 | import BIDMach._
11 | import edu.berkeley.bid.CPUMACH
12 | import edu.berkeley.bid.CUMACH
13 | import scala.util.hashing.MurmurHash3;
14 | import java.util.HashMap;
15 | import BIDMach.networks._
16 | 
17 | 
18 | /**
19 |  * Sign layer. 
20 |  */
21 | 
22 | @SerialVersionUID(100L)
23 | class SignLayer(override val net:Net, override val opts:SignNodeOpts = new SignNode) extends Layer(net, opts) {
24 | 
25 | 	override def forward = {
26 | 			val start = toc;
27 | 			inplaceNoConnectGetOutput();
28 | 					  
29 | 			sign(inputData, output);
30 | 
31 | 			forwardtime += toc - start;
32 | 	}
33 | 
34 | 	override def backward = {
35 | 			val start = toc; 
36 | 			
37 | 			backwardtime += toc - start;
38 | 	}
39 |   
40 |   override def toString = {
41 |     "exp@"+Integer.toHexString(hashCode % 0x10000).toString
42 |   }
43 | }
44 | 
45 | 
46 | trait SignNodeOpts extends NodeOpts {  
47 | }
48 | 
49 | @SerialVersionUID(100L)
50 | class SignNode extends Node with SignNodeOpts {
51 | 
52 | 	override def clone:SignNode = {copyTo(new SignNode).asInstanceOf[SignNode];}
53 | 
54 |   override def create(net:Net):SignLayer = {SignLayer(net, this);}
55 |   
56 |   override def toString = {
57 |     "exp@"+Integer.toHexString(hashCode % 0x10000).toString
58 |   }
59 | }
60 | 
61 | @SerialVersionUID(100L)
62 | object SignLayer {  
63 |   
64 |   def apply(net:Net) = new SignLayer(net, new SignNode);
65 |   
66 |   def apply(net:Net, opts:SignNode) = new SignLayer(net, opts);
67 | }
68 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/updaters/Batch.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.updaters
 2 | 
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.models._
 7 | 
 8 | @SerialVersionUID(100L)
 9 | class Batch(override val opts:Batch.Opts = new Batch.Options) extends Updater {
10 |   
11 |   override def init(model0:Model) = {
12 |     super.init(model0)
13 |   }
14 |      
15 |   override def update(ipass:Int, step:Long) = {}
16 | }
17 | 
18 | @SerialVersionUID(100L)
19 | object Batch {
20 |   trait Opts extends Updater.Opts {
21 |     var beps = 1e-5f
22 |   }
23 |   
24 |   class Options extends Opts {}
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/updaters/BatchNorm.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.updaters
 2 | 
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.models._
 7 | 
 8 | @SerialVersionUID(100L)
 9 | class BatchNorm(override val opts:BatchNorm.Opts = new BatchNorm.Options) extends Updater {
10 |   var accumulators:Array[Mat] = null
11 |   
12 |   override def init(model0:Model) = {
13 |     super.init(model0)
14 |     val modelmats = model.modelmats
15 |     val updatemats = model.updatemats
16 |     accumulators = new Array[Mat](updatemats.length)
17 |     for (i <- 0 until accumulators.length) {
18 |     	accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
19 |     }
20 |   }
21 |      
22 |   override def update(ipass:Int, step:Long) = {
23 |   	val updatemats = model.updatemats
24 |     for (i <- 0 until accumulators.length) {
25 |     	accumulators(i) ~ accumulators(i) + updatemats(i) 
26 |     }
27 |   }
28 |   
29 |   override def clear() = {
30 | 	  for (i <- 0 until accumulators.length) {
31 | 	  	accumulators(i).clear
32 | 	  }
33 |   }
34 |   
35 |   override def updateM(ipass:Int):Unit = {
36 |     val mm = model.modelmats(0)
37 |     mm ~ accumulators(0) / accumulators(1)
38 |     mm ~ mm / sum(mm,2)
39 |     clear
40 |   }
41 | }
42 | 
43 | @SerialVersionUID(100L)
44 | object BatchNorm {
45 |   trait Opts extends Updater.Opts {
46 |   }
47 |   
48 |   class Options extends Opts {}
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/updaters/IncMult.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.updaters
 2 |  
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.models._
 7 | 
 8 | @SerialVersionUID(100L)
 9 | class IncMult(override val opts:IncMult.Opts = new IncMult.Options) extends Updater {
10 |   
11 |   var firstStep = 0f
12 |   var rm:Mat = null
13 |   
14 |   override def init(model0:Model) = {
15 |     super.init(model0)
16 |     rm = model0.modelmats(0).zeros(1,1)
17 |   }
18 |       
19 |   override def update(ipass:Int, step:Long) = {
20 |     val modelmats = model.modelmats
21 |     val updatemats = model.updatemats
22 |     val mm = modelmats(0)
23 |     val ms = modelmats(1)
24 |     val um = updatemats(0)
25 |     val ums = updatemats(1)
26 |     val rr = if (step == 0) 1f else {
27 | 	    if (firstStep == 0f) {
28 | 	    		firstStep = step
29 | 	    		1f
30 | 	    	} else {
31 | 	    	(math.pow(firstStep / step, opts.power)).toFloat
32 | 	    }
33 |   	}
34 | 
35 |     um ~ um *@ rm.set(rr)
36 |     ln(mm, mm)
37 |     mm ~ mm *@ rm.set(1-rr)
38 |     mm ~ mm + um 
39 |     exp(mm, mm)
40 |     if (opts.isprob) mm ~ mm / sum(mm,2)
41 |   }
42 |   
43 |   override def clear() = {
44 | 	  firstStep = 0f
45 |   }
46 | }
47 | 
48 | @SerialVersionUID(100L)
49 | object IncMult {
50 |   trait Opts extends Updater.Opts {
51 |     var warmup = 0L 
52 |     var power = 0.3f
53 |     var isprob = true
54 |   }
55 |   
56 |   class Options extends Opts {}
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/updaters/Telescoping.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.updaters
 2 |  
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.models._
 7 | 
 8 | @SerialVersionUID(100L)
 9 | class Telescoping(override val opts:Telescoping.Opts = new Telescoping.Options) extends Updater {
10 | 	var accumulators:Array[Mat] = null
11 |   var firstStep = 0L
12 |   var nextStep = 10L
13 |   var nextCount = 0L
14 |   var rm:Mat = null
15 |   
16 |   override def init(model0:Model) = {
17 |   	super.init(model0)
18 |     val modelmats = model0.modelmats
19 |     val updatemats = model0.updatemats
20 |     rm = model0.modelmats(0).zeros(1,1)
21 |     accumulators = new Array[Mat](updatemats.length)
22 |     for (i <- 0 until updatemats.length) yield {
23 |       accumulators(i) = updatemats(i).zeros(updatemats(i).nrows, updatemats(i).ncols)
24 |     }
25 |   	firstStep = 0L
26 |     nextStep = 10L
27 |     nextCount = 0L
28 |   }
29 | 	
30 | 	override def update(ipass:Int, step:Long) = {
31 | 	  if (firstStep == 0 && step > 0) {
32 | 	    firstStep = step
33 | 	  }
34 | 	  val updatemats = model.updatemats
35 |     for (i <- 0 until updatemats.length) {
36 | 	    accumulators(i) ~ accumulators(i) + updatemats(i) 
37 |     }
38 | 	  if (step >= nextCount) {
39 | 	    model.modelmats(0) ~ accumulators(0) / accumulators(1)
40 | 	    nextStep = (nextStep * opts.factor).toLong
41 | 	    nextCount = step + nextStep
42 | 	  }
43 |   }
44 |   
45 |   override def clear() = {
46 | 	  for (i <- 0 until accumulators.length) {
47 |      	accumulators(i).clear
48 | 	  }
49 |   }
50 | }
51 | 
52 | @SerialVersionUID(100L)
53 | object Telescoping {
54 |   trait Opts extends Updater.Opts {
55 |     val factor = 1.5f
56 |   }
57 |   
58 |   class Options extends Opts {}
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/updaters/Updater.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.updaters
 2 |  
 3 | import BIDMat.{Mat,SBMat,CMat,DMat,FMat,IMat,HMat,GMat,GIMat,GSMat,SMat,SDMat}
 4 | import BIDMat.MatFunctions._
 5 | import BIDMat.SciFunctions._
 6 | import BIDMach.models._
 7 | 
 8 | 
 9 | abstract class Updater(val opts:Updater.Opts = new Updater.Options) extends Serializable {
10 |   var model:Model = null;
11 |   var runningtime = 0.0;
12 |   
13 |   def init(model0:Model) = {
14 |     model = model0 
15 |   }
16 |   
17 |   def clear():Unit = {}
18 |   
19 |   def update(ipass:Int, step:Long):Unit = {}
20 |   
21 |   def update(ipass:Int, step:Long, gprogress:Float):Unit = update(ipass, step)
22 |   
23 |   def updateM(ipass:Int):Unit = {
24 |     model.updatePass(ipass)
25 |   }
26 |   
27 |   def preupdate(ipass:Int, step:Long, gprogress:Float):Unit = {}
28 | }
29 | 
30 | @SerialVersionUID(100L)
31 | object Updater {
32 |   trait Opts extends BIDMat.Opts {  
33 |   }
34 |   
35 |   class Options extends Opts {}
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/viz/LogViz.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.viz;
 2 | import BIDMat.{BMat,Mat,SBMat,CMat,DMat,FMat,FFilter,IMat,HMat,GDMat,GFilter,GLMat,GMat,GIMat,GSDMat,GSMat,LMat,SMat,SDMat,TMat}
 3 | import BIDMat.MatFunctions._
 4 | import BIDMat.SciFunctions._
 5 | import BIDMach.models.Model;
 6 | import BIDMach.networks.Net;
 7 | import BIDMach.networks.layers._;
 8 | import BIDMach.Learner;
 9 | import scala.collection.mutable.ListBuffer;
10 | 
11 | /***
12 |     Collect and Visualize some logged values
13 | **/
14 | 
15 | class LogViz(val name: String = "varName") extends Visualization{
16 |     val data:ListBuffer[FMat] = new ListBuffer[FMat];
17 |     interval = 1;
18 |     
19 |     // Override one of these to collect some log data
20 |     def collect(model:Model, mats:Array[Mat], ipass:Int, pos:Long):FMat = {
21 |       collect(model);
22 |     }
23 |     
24 |     def collect(model:Model):FMat = {
25 |       collect();
26 |     }
27 |     
28 |     def collect():FMat = {
29 |       row(0);
30 |     }
31 |         
32 |     override def doUpdate(model:Model, mats:Array[Mat], ipass:Int, pos:Long) = {  
33 |       data.synchronized  {
34 |     	  data += FMat(collect(model, mats, ipass, pos));
35 |       }
36 |     }
37 |     
38 |     def snapshot = {
39 |       Learner.scores2FMat(data);
40 |     }
41 |     
42 |     def fromto(n0:Int, n1:Int) = {
43 |       data.synchronized {
44 |         val len = data.length;
45 |         val na = math.min(n0, len);
46 |         val nb = math.min(n1, len);
47 |         val out = zeros(data(0).nrows, nb - na);
48 |         var i = 0;
49 |         data.foreach(f => {
50 |           if (i >= na && i < nb) out(?, i - na) = f;
51 |           i += 1;
52 |         })
53 |         out
54 |       }
55 |     }
56 |     
57 |     def lastn(n0:Int) = {
58 |       val len = data.synchronized {data.length};
59 |       fromto(math.max(0, len - n0), len);
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/main/scala/BIDMach/viz/Visualization.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach.viz
 2 | import BIDMach.models.Model;
 3 | import BIDMat.Mat
 4 | 
 5 | 
 6 | /**
 7 |     Abstract class for visualizations. Extend this class to get correct behavior 
 8 | */
 9 | 
10 | abstract class Visualization {
11 |     var interval = 10;
12 |     var cnt = 0
13 |     var checkStatus = -1
14 |         
15 |     def doUpdate(model:Model,mats:Array[Mat],ipass:Int, pos:Long)
16 |     
17 |     //Perform some initial check to make sure data type is correct
18 |     def check(model:Model,mats:Array[Mat]):Int = 0
19 |         
20 |     //Initialize variables and states during the first update.
21 |     def init(model:Model,mats:Array[Mat]) {}
22 |        
23 |     //Update the visualization per cnt batches
24 |     def update(model:Model,mats:Array[Mat],ipass:Int, pos:Long){
25 |         if (checkStatus == -1){
26 |             checkStatus = check(model, mats)
27 |             if (checkStatus == 0) init(model, mats)
28 |         }
29 |         if (checkStatus == 0) {
30 |             if (cnt == 0) {
31 |                 //doUpdate(model, mats, ipass, pos)
32 |                 try { 
33 |                     doUpdate(model, mats, ipass, pos)
34 |                 }
35 |                 catch {
36 |                     case e:Exception=> {
37 |                         checkStatus = 2
38 |                         println(e.toString)
39 |                         println(e.getStackTrace.mkString("\n"))
40 |                     }
41 |                 }
42 |             }
43 |             cnt = (cnt + 1) % interval           
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/test/scala/BIDMach/BIDMachSpec.scala:
--------------------------------------------------------------------------------
 1 | package BIDMach
 2 | 
 3 | import org.scalatest._
 4 | 
 5 | abstract class BIDMachSpec extends FlatSpec
 6 |   with Matchers
 7 |   with BeforeAndAfterAll {
 8 | 
 9 |   override def beforeAll {
10 |     BIDMat.Mat.checkMKL(false);
11 |   }
12 | 
13 |   def assert_approx_eq(a: Array[Float], b: Array[Float], eps: Float = 1e-4f) = {
14 |     (a, b).zipped foreach {
15 |       case (x, y) => { 
16 |         val scale = (math.abs(x) + math.abs(y) + eps).toFloat;
17 |         x / scale should equal ((y / scale) +- eps)
18 |       }
19 |     }
20 |   }
21 |   
22 |   def assert_approx_eq_double(a: Array[Double], b: Array[Double], eps: Double = 1e-6f) = {
23 |     (a, b).zipped foreach {
24 |       case (x, y) => { 
25 |         val scale = (math.abs(x) + math.abs(y) + eps);
26 |         x / scale should equal ((y / scale) +- eps)
27 |       }
28 |     }
29 |   }
30 |   
31 | }
32 | 


--------------------------------------------------------------------------------