├── .gitignore ├── README.md ├── apps ├── cpp │ ├── Makefile │ ├── graph │ │ ├── graph.cpp │ │ ├── graph.hpp │ │ ├── pagerank │ │ │ ├── Makefile │ │ │ └── pagerank.cpp │ │ ├── timer.hpp │ │ └── trianglecounting │ │ │ ├── Makefile │ │ │ └── trianglecounting.cpp │ ├── ml │ │ ├── OptiML.hpp │ │ ├── Random.cpp │ │ ├── gda │ │ │ ├── GDA_arma.cpp │ │ │ ├── GDA_ublas.cpp │ │ │ ├── Makefile.arma │ │ │ └── Makefile.ublas │ │ ├── kmeans │ │ │ ├── Makefile │ │ │ └── kmeans.cpp │ │ ├── linreg │ │ │ ├── Makefile │ │ │ └── linreg.cpp │ │ ├── logreg │ │ │ ├── Makefile │ │ │ └── logreg.cpp │ │ ├── nb │ │ │ ├── Makefile │ │ │ └── nb.cpp │ │ ├── rbm │ │ │ ├── Makefile │ │ │ └── rbm.cpp │ │ └── svm │ │ │ ├── Makefile │ │ │ └── svm.cpp │ ├── ql │ │ ├── Makefile │ │ ├── Table.scala │ │ ├── q1.cpp │ │ └── q1.scala │ ├── run_experiments.sh │ └── wrangler │ │ ├── Makefile │ │ └── gene_wrangler.cpp ├── matlab │ ├── autoencoder │ │ └── autoencoder.m │ ├── gda │ │ ├── gda.m │ │ ├── gdagpu.m │ │ └── gdajacket.m │ ├── ica │ │ ├── bellsej.m │ │ └── ica.m │ ├── kmeans │ │ ├── kmeans.m │ │ ├── kmeansgpu.m │ │ ├── kmeansjacket.m │ │ ├── kmeansparfor.m │ │ └── kmeansvec.m │ ├── lbp │ │ ├── bpfactorgraph.m │ │ ├── count_spaces.m │ │ ├── get_graph_values.m │ │ ├── lbp.m │ │ ├── read_beliefs.m │ │ ├── read_graph_lr.m │ │ ├── struct │ │ │ ├── bpfactorgraph.m │ │ │ ├── count_spaces.m │ │ │ ├── get_graph_values.m │ │ │ ├── lbp.m │ │ │ ├── read_beliefs.m │ │ │ └── read_graph_lr.m │ │ └── sum_out_product.m │ ├── linreg │ │ ├── linreg.m │ │ ├── linreggpu.m │ │ ├── linregjacket.m │ │ └── linregjacket2.m │ ├── matmult │ │ ├── matmult.m │ │ ├── matmultgpu.m │ │ └── matmultjacket.m │ ├── nb │ │ ├── nb_test.m │ │ ├── nb_train.m │ │ ├── nb_traingpu.m │ │ ├── nb_trainjacket.m │ │ └── readMatrix.m │ ├── newton-ralphson │ │ └── newton.m │ ├── rbm │ │ ├── rbm.m │ │ ├── rbmgpu.m │ │ └── rbmjacket.m │ ├── rl │ │ ├── cart_pole.m │ │ ├── control.m │ │ ├── get_state.m │ │ ├── plot_learning_curve.m │ │ └── show_cart.m │ ├── runautoencoder.m │ ├── rungda.m │ ├── runkmeans.m │ ├── runlbp.m │ ├── runlinreg.m │ ├── runmatmult.m │ ├── runnb.m │ ├── runrbm.m │ ├── runsvm.m │ ├── runsvm2.m │ ├── svm │ │ ├── readMatrix.m │ │ ├── smo_train.m │ │ ├── smo_traingpu.m │ │ ├── smo_trainjacket.m │ │ ├── smo_verify.m │ │ ├── svm.m │ │ ├── svm_test.m │ │ ├── svm_train.m │ │ ├── svm_traingpu.m │ │ └── svm_trainjacket.m │ ├── timeapp.m │ └── timeapps.m ├── multi-dsl │ └── src │ │ └── ppl │ │ └── apps │ │ └── interop │ │ ├── ClosedWorld.scala │ │ ├── CustomerPricing.scala │ │ ├── GIterable.scala │ │ ├── Graph.scala │ │ ├── MDP.scala │ │ ├── MDPBase.scala │ │ ├── NodeGIterable.scala │ │ ├── ParProperty.scala │ │ ├── Property.scala │ │ ├── Regression.scala │ │ ├── SimpleMatrix.scala │ │ ├── SimpleParMatrix.scala │ │ ├── SimpleParVector.scala │ │ ├── SimpleVector.scala │ │ ├── Twitter.scala │ │ ├── Twitter2.scala │ │ ├── TwitterParallel.scala │ │ └── Types.scala ├── optiml │ └── src │ │ ├── Examples.scala │ │ ├── HelloWorld.scala │ │ ├── Scratchpad.scala │ │ ├── StencilExperiments.scala │ │ └── ppl │ │ └── apps │ │ ├── GraphExample.scala │ │ ├── bio │ │ └── spade │ │ │ ├── Clustering.scala │ │ │ ├── Downsampling.scala │ │ │ ├── Spade.scala │ │ │ └── Upsampling.scala │ │ ├── minimsmbuilder │ │ ├── Cluster.scala │ │ ├── DirectSolver.scala │ │ └── TheoData.scala │ │ ├── ml │ │ ├── arff │ │ │ └── arff.scala │ │ ├── cf │ │ │ ├── TopN.scala │ │ │ └── userdefined │ │ │ │ ├── PairwiseRating.scala │ │ │ │ └── Similarity.scala │ │ ├── gda │ │ │ ├── GDA.scala │ │ │ └── GDAVectorized.scala │ │ ├── kmeans │ │ │ └── kmeans.scala │ │ ├── lbpdenoise │ │ │ ├── LBPDenoise.scala │ │ │ └── LBPDenoiseRaw.scala │ │ ├── linreg │ │ │ └── LinReg.scala │ │ ├── logreg │ │ │ └── LogReg.scala │ │ ├── nb │ │ │ ├── NaiveBayes.scala │ │ │ └── NaiveBayesVectorized.scala │ │ ├── rbm │ │ │ └── RBM.scala │ │ └── svm │ │ │ ├── SVM.scala │ │ │ ├── SVMModel.scala │ │ │ └── SVMRelaxedModel.scala │ │ ├── robotics │ │ └── gradient │ │ │ ├── BinarizedGradientGrid.scala │ │ │ ├── BinarizedGradientPyramid.scala │ │ │ ├── BinarizedGradientTemplate.scala │ │ │ ├── gradient.scala │ │ │ └── userdefined │ │ │ ├── BiGGDetection.scala │ │ │ ├── BinarizedGradientPyramid.scala │ │ │ ├── BinarizedGradientTemplate.scala │ │ │ └── Rect.scala │ │ └── tests │ │ ├── AnyType.scala │ │ ├── GPUWhileLoop.scala │ │ ├── MutableOps.scala │ │ ├── Sigmoid.scala │ │ ├── UntilConverged.scala │ │ └── VerticesForeach.scala └── optiql │ └── src │ └── ppl │ └── apps │ ├── cidr │ └── CIDR.scala │ ├── dataquery │ └── tpch │ │ ├── TPCH.scala │ │ └── Types.scala │ └── traceroute │ └── TraceRoute.scala ├── bin ├── build-interop ├── build-optigraph ├── build-optiml ├── build-optiql ├── cloc.pl ├── delite ├── delitec ├── delitecommon.py ├── delitep ├── dlines ├── lift_user_class_optiml.py ├── lift_user_class_optiql.py ├── make_data ├── make_data_clustered ├── make_doc_data ├── make_logistic_data.py ├── netflix-gen │ └── src │ │ └── ppl │ │ └── ert │ │ └── scripts │ │ └── netflixERTgen.scala ├── parse_perf_report ├── server.py ├── tokenlist └── windows │ ├── README.md │ ├── delite │ ├── delite.bat │ ├── delitec │ ├── delitec.bat │ ├── delitecfg │ ├── delitecfg.bat │ ├── delites │ ├── delites.bat │ └── shared ├── build.sbt ├── config └── delite │ ├── BLAS.xml.linux │ ├── BLAS.xml.ubuntu-mkl │ ├── CPP.xml.linux │ ├── CPP.xml.linux.pcm │ ├── CUDA.xml.linux │ ├── LAPACK.xml.macos-accelerate │ ├── LAPACK.xml.ubuntu-lapack │ ├── LAPACK.xml.ubuntu-mkl │ ├── OpenCL.xml.linux │ ├── clBLAS.xml.linux │ ├── cuBLAS.xml.linux │ └── protobuf.xml.example ├── delite.properties.sample ├── dsls ├── optiql │ ├── src │ │ └── ppl │ │ │ └── dsl │ │ │ └── optiql │ │ │ ├── OptiQL.scala │ │ │ ├── Types.scala │ │ │ ├── datastruct │ │ │ └── scala │ │ │ │ └── container │ │ │ │ └── Table.scala │ │ │ └── ops │ │ │ ├── DSArrayOps.scala │ │ │ ├── DateOps.scala │ │ │ ├── InputReaderOps.scala │ │ │ ├── OptiQLMiscOps.scala │ │ │ ├── QueryableOps.scala │ │ │ └── TableOps.scala │ └── tests │ │ └── QuerySuite.scala └── simple │ └── src │ └── ppl │ ├── apps │ └── assignment2 │ │ └── SimpleVectorApp.scala │ └── dsl │ └── assignment2 │ ├── SimpleVector.scala │ └── Vector.scala ├── framework ├── delite-test │ ├── failing │ │ └── ppl │ │ │ └── tests │ │ │ └── scalatest │ │ │ └── delite │ │ │ └── MutabilitySuite.scala │ ├── src │ │ └── ppl │ │ │ └── tests │ │ │ └── scalatest │ │ │ ├── DeliteSuite.scala │ │ │ ├── DeliteTestApplication.scala │ │ │ └── DeliteTestDSL.scala │ └── tests │ │ └── ppl │ │ └── tests │ │ └── scalatest │ │ ├── delite │ │ ├── DeliteGPUSuite.scala │ │ ├── DeliteGPUSyncSuite.scala │ │ ├── DeliteOpSuite.scala │ │ └── test.txt │ │ └── firstdsl │ │ ├── ProfileArrayOps.scala │ │ ├── ProfileDSL.scala │ │ ├── ProfileOps.scala │ │ ├── ProfileTest.scala │ │ └── datastruct │ │ └── scala │ │ └── ProfileArray.scala └── src │ └── ppl │ └── delite │ └── framework │ ├── Cache.scala │ ├── Config.scala │ ├── DeliteApplication.scala │ ├── DeliteIL.scala │ ├── DeliteILOps.scala │ ├── DeliteInteractive.scala │ ├── DeliteRestage.scala │ ├── Interfaces.scala │ ├── Util.scala │ ├── analysis │ ├── LoopAnalysis.scala │ ├── StencilAnalysis.scala │ └── TraversalAnalysis.scala │ ├── codegen │ ├── Target.scala │ ├── Utils.scala │ ├── cpp │ │ └── TargetCpp.scala │ ├── cuda │ │ └── TargetCuda.scala │ ├── delite │ │ ├── DeliteCodegen.scala │ │ ├── DeliteKernelCodegen.scala │ │ ├── TargetDelite.scala │ │ ├── generators │ │ │ ├── DeliteGenScalaVariables.scala │ │ │ └── DeliteGenTaskGraph.scala │ │ └── overrides │ │ │ ├── DeliteIfThenElse.scala │ │ │ ├── DeliteOverrides.scala │ │ │ ├── DeliteRangeOps.scala │ │ │ ├── DeliteVariables.scala │ │ │ └── DeliteWhile.scala │ ├── opencl │ │ └── TargetOpenCL.scala │ ├── restage │ │ ├── DeliteCodeGenRestage.scala │ │ └── DeliteILGenExternal.scala │ └── scala │ │ └── TargetScala.scala │ ├── datastructures │ ├── DeliteArray.scala │ ├── DeliteArrayBuffer.scala │ ├── DeliteCppHostTransfer.scala │ ├── DeliteCudaDeviceTransfer.scala │ ├── DeliteMap.scala │ └── DeliteStruct.scala │ ├── extern │ ├── codegen │ │ ├── GenericGenExternal.scala │ │ ├── cpp │ │ │ └── CGenExternalBase.scala │ │ ├── cuda │ │ │ └── CudaGenExternalBase.scala │ │ ├── opencl │ │ │ └── OpenCLGenExternalBase.scala │ │ └── scala │ │ │ └── ScalaGenExternalBase.scala │ └── lib │ │ ├── BLAS.scala │ │ ├── ExternalLibrary.scala │ │ ├── LAPACK.scala │ │ ├── ProtoBuf.scala │ │ ├── clBLAS.scala │ │ └── cuBLAS.scala │ ├── ops │ ├── DeliteCollectionOps.scala │ ├── DeliteFileReader.scala │ ├── DeliteFileWriter.scala │ ├── DeliteInternalOps.scala │ ├── DeliteOps.scala │ ├── DeliteOpsBaseGenericGen.scala │ ├── DeliteOpsCGen.scala │ ├── DeliteOpsGPUGen.scala │ ├── DeliteOpsIR.scala │ ├── DeliteOpsScalaGen.scala │ ├── DeliteReduction.scala │ └── RuntimeService.scala │ └── transform │ ├── DeliteTransform.scala │ ├── DistributedArrayTransformer.scala │ ├── ForeachReduceTransformer.scala │ ├── ForwardPassTransformer.scala │ ├── LoweringTransformer.scala │ └── MultiLoopSoA.scala ├── profiler ├── README.md ├── assets │ └── kMeans_2.png ├── bar-chart-controller.js ├── bar-chart.js ├── common.js ├── compare-run-summaries.js ├── compare-runs.js ├── dataflow-deprecated.js ├── dataflow.js ├── datamodel.js ├── datastructures.js ├── editor.js ├── execution-profile.js ├── gcStats-graph.js ├── gcStats.js ├── grid.html ├── lib │ ├── ace.js │ ├── c3.min.js │ ├── cola.v1.min.js │ ├── d3.min.js │ ├── graphlib-dot.min.js │ ├── highlight.pack.js │ ├── jquery-ui-1.11.0 │ │ ├── external │ │ │ └── jquery │ │ │ │ └── jquery.js │ │ ├── images │ │ │ ├── animated-overlay.gif │ │ │ ├── ui-bg_diagonals-thick_18_b81900_40x40.png │ │ │ ├── ui-bg_diagonals-thick_20_666666_40x40.png │ │ │ ├── ui-bg_flat_10_000000_40x100.png │ │ │ ├── ui-bg_glass_100_f6f6f6_1x400.png │ │ │ ├── ui-bg_glass_100_fdf5ce_1x400.png │ │ │ ├── ui-bg_glass_65_ffffff_1x400.png │ │ │ ├── ui-bg_gloss-wave_35_f6a828_500x100.png │ │ │ ├── ui-bg_highlight-soft_100_eeeeee_1x100.png │ │ │ ├── ui-bg_highlight-soft_75_ffe45c_1x100.png │ │ │ ├── ui-icons_222222_256x240.png │ │ │ ├── ui-icons_228ef1_256x240.png │ │ │ ├── ui-icons_ef8c08_256x240.png │ │ │ ├── ui-icons_ffd27a_256x240.png │ │ │ └── ui-icons_ffffff_256x240.png │ │ ├── index.html │ │ ├── jquery-ui.css │ │ ├── jquery-ui.js │ │ ├── jquery-ui.min.css │ │ ├── jquery-ui.min.js │ │ ├── jquery-ui.structure.css │ │ ├── jquery-ui.structure.min.css │ │ ├── jquery-ui.theme.css │ │ └── jquery-ui.theme.min.css │ ├── jquery.min.js │ ├── jquery.min.map │ ├── mode-scala.js │ ├── sql.js │ └── theme-twilight.js ├── main.js ├── qunit │ ├── qunit-1.14.0.css │ └── qunit-1.14.0.js ├── stack-graph.js ├── styles │ ├── bar-chart.css │ ├── c3.css │ ├── compare-run-summaries.css │ ├── compare-runs.css │ ├── dataflow.css │ ├── editor.css │ ├── grid.css │ ├── sidebar.css │ ├── stack-graph.css │ ├── tab-view.css │ ├── table.css │ └── timeline.css ├── table.js ├── tests │ ├── datamodel_tests.js │ ├── inputs │ │ └── test_1_profileData.js │ └── test.html └── timeline.js ├── project ├── Build.scala └── build.properties ├── runtime ├── src │ ├── ppl │ │ └── delite │ │ │ └── runtime │ │ │ ├── Config.scala │ │ │ ├── Delite.scala │ │ │ ├── DeliteMesosExecutor.scala │ │ │ ├── DeliteMesosScheduler.scala │ │ │ ├── Exceptions.scala │ │ │ ├── codegen │ │ │ ├── AliasTable.scala │ │ │ ├── CCompile.scala │ │ │ ├── CodeCache.scala │ │ │ ├── Compilers.scala │ │ │ ├── ConditionGenerator.scala │ │ │ ├── CppCompile.scala │ │ │ ├── CppExecutableGenerator.scala │ │ │ ├── CudaCompile.scala │ │ │ ├── CudaExecutableGenerator.scala │ │ │ ├── ExecutableGenerator.scala │ │ │ ├── NestedGenerator.scala │ │ │ ├── OpenCLCompile.scala │ │ │ ├── OpenCLExecutableGenerator.scala │ │ │ ├── SavedEnvironmentGenerator.scala │ │ │ ├── ScalaCompile.scala │ │ │ ├── ScalaExecutableGenerator.scala │ │ │ ├── WhileGenerator.scala │ │ │ ├── examples │ │ │ │ ├── ExampleContainer.scala │ │ │ │ ├── ExampleExecutable0.scala │ │ │ │ ├── ExampleExecutable1.scala │ │ │ │ └── ExampleGPUExecutable.scala │ │ │ ├── kernels │ │ │ │ ├── MultiLoop_SMP_Array_Generator.scala │ │ │ │ ├── cpp │ │ │ │ │ └── CppMultiLoopGenerator.scala │ │ │ │ ├── cuda │ │ │ │ │ ├── MultiLoop_GPU_Array_Generator.scala │ │ │ │ │ └── SingleTask_GPU_Generator.scala │ │ │ │ ├── opencl │ │ │ │ │ └── MultiLoop_GPU_Array_Generator.scala │ │ │ │ └── scala │ │ │ │ │ ├── Foreach_SMP_Array_Generator.scala │ │ │ │ │ ├── RPC_Generator.scala │ │ │ │ │ ├── ScalaMultiLoopGenerator.scala │ │ │ │ │ └── examples │ │ │ │ │ ├── ExampleForeach.scala │ │ │ │ │ └── ExampleMultiLoop.scala │ │ │ └── sync │ │ │ │ ├── CppSyncGenerator.scala │ │ │ │ ├── CppSyncObjectGenerator.scala │ │ │ │ ├── CudaSyncGenerator.scala │ │ │ │ ├── ScalaSyncGenerator.scala │ │ │ │ ├── ScalaSyncObjectGenerator.scala │ │ │ │ └── SyncGenerator.scala │ │ │ ├── data │ │ │ ├── Data.scala │ │ │ ├── DeliteArray.scala │ │ │ └── RaggedArray.scala │ │ │ ├── executor │ │ │ ├── DeliteExecutable.scala │ │ │ ├── ExecutionThread.scala │ │ │ ├── Executor.scala │ │ │ ├── MultiAccExecutor.scala │ │ │ ├── NativeExecutionThread.scala │ │ │ ├── Shutdown.scala │ │ │ └── ThreadPool.scala │ │ │ ├── graph │ │ │ ├── DeliteTaskGraph.scala │ │ │ ├── Stencil.scala │ │ │ ├── TestGraph.scala │ │ │ ├── TestKernels.scala │ │ │ ├── cost │ │ │ │ └── StaticCostModel.scala │ │ │ ├── ops │ │ │ │ ├── Arguments.scala │ │ │ │ ├── DeliteOP.scala │ │ │ │ ├── EOP.scala │ │ │ │ ├── OP_Condition.scala │ │ │ │ ├── OP_Control.scala │ │ │ │ ├── OP_Executable.scala │ │ │ │ ├── OP_External.scala │ │ │ │ ├── OP_Foreach.scala │ │ │ │ ├── OP_Input.scala │ │ │ │ ├── OP_MultiLoop.scala │ │ │ │ ├── OP_Nested.scala │ │ │ │ ├── OP_Single.scala │ │ │ │ ├── OP_While.scala │ │ │ │ ├── Sync.scala │ │ │ │ └── TestOP.scala │ │ │ └── targets │ │ │ │ ├── CudaMetadata.scala │ │ │ │ ├── GPUMetadata.scala │ │ │ │ ├── OS.scala │ │ │ │ ├── OpenCLMetadata.scala │ │ │ │ ├── Resource.scala │ │ │ │ └── Targets.scala │ │ │ ├── messages │ │ │ ├── Connection.scala │ │ │ ├── ConnectionManager.scala │ │ │ ├── Future.scala │ │ │ ├── Message.scala │ │ │ ├── Messages.java │ │ │ ├── Serialization.scala │ │ │ └── messages.proto │ │ │ ├── profiler │ │ │ ├── MemoryProfiler.scala │ │ │ ├── PerformanceTimer.scala │ │ │ ├── PostProcessor.scala │ │ │ ├── PostProcessorDataStructures.scala │ │ │ ├── Profiling.scala │ │ │ ├── SamplerThread.scala │ │ │ └── Timing.scala │ │ │ ├── scheduler │ │ │ ├── AccStaticScheduler.scala │ │ │ ├── GraphUtil.scala │ │ │ ├── OpHelper.scala │ │ │ ├── ScheduleOptimizer.scala │ │ │ ├── StaticSchedule.scala │ │ │ └── StaticScheduler.scala │ │ │ └── sync │ │ │ └── MultiLoopSync.scala │ └── static │ │ ├── cpp │ │ ├── Config.cpp │ │ ├── Config.h │ │ ├── DeliteCpp.cpp │ │ ├── DeliteCpp.h │ │ ├── DeliteCppProfiler.cpp │ │ ├── DeliteCppProfiler.h │ │ ├── DeliteCppRandom.h │ │ ├── DeliteDatastructures.h │ │ ├── DeliteFileInputStream.h │ │ ├── DeliteFileOutputStream.h │ │ ├── DeliteFileSystem.h │ │ ├── DeliteMemory.cpp │ │ ├── DeliteMemory.h │ │ ├── DeliteNamespaces.h │ │ ├── DeliteString.h │ │ ├── DeliteThreadPool.cpp │ │ ├── MultiLoopSync.h │ │ ├── cppDeliteArray.cpp │ │ ├── cppHashMap.h │ │ ├── cppInit.cpp │ │ ├── cppInit.h │ │ ├── cppList.cpp │ │ ├── cppList.h │ │ ├── cppRef.cpp │ │ ├── cppRef.h │ │ ├── pcmHelper.cpp │ │ ├── pcmHelper.h │ │ └── standalone │ │ │ ├── DeliteStandaloneMain.cpp │ │ │ ├── DeliteStandaloneMain.h │ │ │ └── Makefile.standalone │ │ ├── cuda │ │ ├── CudaIOBuffer.cu │ │ ├── CudaIOBuffer.h │ │ ├── DeliteCuda.cu │ │ ├── DeliteCuda.h │ │ ├── DeliteCudaProfiler.cu │ │ ├── DeliteCudaProfiler.h │ │ ├── HostcudaDeliteArray.h │ │ ├── HostcudaList.h │ │ ├── HostcudaRef.h │ │ ├── cudaDeliteArray.h │ │ ├── cudaHashMap.h │ │ ├── cudaInit.cu │ │ ├── cudaInit.sh │ │ ├── cudaList.h │ │ ├── cudaProfiler.cu │ │ └── cudaRef.h │ │ ├── opencl │ │ ├── DeliteArray.cl │ │ ├── DeliteArray.h │ │ ├── DeliteOpenCL.cpp │ │ ├── DeliteOpenCL.h │ │ ├── blas │ │ │ ├── README │ │ │ ├── blasgen.sh │ │ │ ├── clblas.cpp │ │ │ ├── clblas.h │ │ │ └── libclblas.so │ │ ├── openclInit.cpp │ │ └── openclInit.sh │ │ └── scala │ │ ├── DeliteInterfaces.scala │ │ ├── ResourceInfo.scala │ │ ├── container │ │ ├── HashMapImpl.scala │ │ ├── IntComparator.java │ │ ├── IntTimSort.java │ │ ├── LongComparator.java │ │ ├── LongTimSort.java │ │ └── SortingImpl.scala │ │ └── io │ │ ├── DeliteFileInputStream.scala │ │ ├── DeliteFileOutputStream.scala │ │ └── LineReader.java └── tests │ └── ppl │ └── tests │ └── scalatest │ ├── MultiLoopSuite.scala │ ├── ProfilerSuite.scala │ └── inputs │ └── ProfilerSuite │ └── input_1 │ ├── input_1.deg │ ├── profile_t_0.csv │ ├── profile_t_1.csv │ ├── profile_t_2.csv │ ├── profile_tic_toc_cpp.csv │ └── profile_tic_toc_scala.csv └── unsafe_classes ├── unsafe.jar └── unsafe └── UnsafeAccessor.java /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.iml 3 | *.ipr 4 | *.iws 5 | *.idea 6 | *.DS_Store 7 | *.~ 8 | *.o 9 | *.so 10 | Makefile 11 | /local.properties 12 | delite.properties 13 | target/ 14 | config/delite/*.xml 15 | core-site.xml 16 | *lib_managed 17 | *src_managed 18 | project/boot 19 | project/plugins/project 20 | runtime/project/boot 21 | generated 22 | generatedCache 23 | *.deg 24 | *.debug 25 | *.pgm 26 | *.pyc 27 | *.checksum 28 | benchmark/times 29 | test-out/*/*-log 30 | test-out/*/*-skel 31 | test-out/*/*-src 32 | dsls/deliszt/lisztlib 33 | profiler/less 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Delite 2.0 2 | ========== 3 | 4 | [Setup instructions](http://stanford-ppl.github.com/Delite/source.html) 5 | -------------------------------------------------------------------------------- /apps/cpp/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | .PHONY: apps 4 | apps: 5 | cd graph/pagerank && $(MAKE) 6 | cd graph/trianglecounting && $(MAKE) 7 | cd ml/gda && $(MAKE) -f Makefile.arma 8 | cd ml/kmeans && $(MAKE) 9 | cd ml/logreg && $(MAKE) 10 | cd ql && $(MAKE) 11 | cd wrangler && $(MAKE) 12 | 13 | -------------------------------------------------------------------------------- /apps/cpp/graph/graph.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class graph { 6 | public: 7 | uint32_t num_nodes; 8 | uint32_t num_edges; 9 | const uint32_t* edge_ptr; 10 | const uint32_t* ngbr_idx; 11 | const uint32_t* out_degree; 12 | 13 | public: 14 | graph(uint32_t nn, uint32_t ne, const uint32_t* ep, const uint32_t* ni, const uint32_t* od): 15 | num_nodes(nn), num_edges(ne), edge_ptr(ep), ngbr_idx(ni), out_degree(od) { } 16 | 17 | ~graph() { 18 | delete edge_ptr; 19 | delete ngbr_idx; 20 | delete out_degree; 21 | } 22 | }; 23 | 24 | struct edge { 25 | uint32_t n1; 26 | uint32_t n2; 27 | }; 28 | 29 | 30 | std::vector load_edgelist(const char* path); 31 | const graph* graph_from_edgelist(std::vector& edges); 32 | -------------------------------------------------------------------------------- /apps/cpp/graph/pagerank/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | all: pagerank 4 | .PHONY: all 5 | 6 | pagerank: pagerank.cpp ../graph.cpp ../graph.hpp ../timer.hpp 7 | g++ -std=gnu++11 -O3 -fopenmp -o pagerank ../graph.cpp pagerank.cpp 8 | 9 | -------------------------------------------------------------------------------- /apps/cpp/graph/timer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | class timer { 8 | private: 9 | timeval start; 10 | timeval end; 11 | 12 | public: 13 | void tic() { 14 | gettimeofday(&start,NULL); 15 | } 16 | 17 | double toc(const char* msg, bool dump = false) { 18 | gettimeofday(&end, NULL); 19 | uint64_t total_usec = (end.tv_sec * 1000000L + end.tv_usec) - (start.tv_sec * 1000000L + start.tv_usec); 20 | double total_sec = total_usec * 1e-6; 21 | if(msg != NULL) { 22 | fprintf(stderr, "timer: %f sec for %s\n", total_sec, msg); 23 | } 24 | 25 | if(dump) { 26 | const char* timer_path = getenv("TIMER_PATH"); 27 | if(timer_path != NULL) { 28 | FILE* timer_file = fopen(timer_path, "a"); 29 | if(timer_file != NULL) { 30 | fprintf(timer_file, "%f\n", total_sec); 31 | fclose(timer_file); 32 | } 33 | else { 34 | fprintf(stderr, "warning: unable to open timing file \"%s\"\n", timer_path); 35 | } 36 | } 37 | } 38 | 39 | return total_sec; 40 | } 41 | }; 42 | 43 | class autotimer { 44 | private: 45 | timer t; 46 | const char* msg; 47 | bool dump; 48 | 49 | public: 50 | autotimer(): msg("autotimer"), dump(false) { 51 | t.tic(); 52 | } 53 | 54 | autotimer(const char* m, bool d = false): msg(m), dump(d) { 55 | t.tic(); 56 | } 57 | 58 | ~autotimer() { 59 | t.toc(msg, dump); 60 | } 61 | }; 62 | -------------------------------------------------------------------------------- /apps/cpp/graph/trianglecounting/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | all: trianglecounting 4 | .PHONY: all 5 | 6 | trianglecounting: trianglecounting.cpp ../graph.cpp ../graph.hpp ../timer.hpp 7 | g++ -std=gnu++11 -O3 -fopenmp -o trianglecounting ../graph.cpp trianglecounting.cpp 8 | 9 | -------------------------------------------------------------------------------- /apps/cpp/ml/gda/GDA_arma.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "armadillo" 5 | 6 | using std::cout; 7 | using namespace arma; 8 | 9 | void print_usage() { 10 | cout << "Usage: GDA \n"; 11 | exit(-1); 12 | } 13 | 14 | int main(int argc,char *argv[]) { 15 | 16 | if(argc != 3) { 17 | print_usage(); 18 | } 19 | 20 | mat x_in = OptiML::armad::MLInputReader::readDoubleMatrix(argv[1]); 21 | //transpose the matrix to get performance 22 | mat x = trans(x_in); 23 | bvec y = OptiML::armad::MLInputReader::readBooleanVectorAsCol(argv[2]); 24 | 25 | 26 | 27 | 28 | 29 | cout << "Computing GDA using armadillo and MKL\n"; 30 | 31 | OptiML::tic(); 32 | 33 | uint m = y.n_rows; 34 | uint n = x.n_rows; 35 | 36 | double y_ones = 0.0, y_zeros=0.0; 37 | 38 | vec mu0_num(n); 39 | vec mu1_num(n); 40 | 41 | for(uint i=0; i 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using std::cout; 9 | using namespace boost::numeric::ublas; 10 | using boost::numeric::ublas::vector; 11 | 12 | void print_usage() { 13 | cout << "Usage: GDA \n"; 14 | exit(-1); 15 | } 16 | 17 | int main(int argc,char *argv[]) { 18 | 19 | if(argc != 3) { 20 | print_usage(); 21 | } 22 | 23 | matrix x = OptiML::ublas::MLInputReader::readDoubleMatrix(argv[1]); 24 | vector y = OptiML::ublas::MLInputReader::readBooleanVector(argv[2]); 25 | 26 | cout << "Computing GDA using ublas\n"; 27 | 28 | OptiML::tic(); 29 | 30 | uint m = y.size(); 31 | uint n = x.size2(); 32 | 33 | double y_ones = 0.0, y_zeros=0.0; 34 | 35 | vector mu0_num(n); 36 | vector mu1_num(n); 37 | 38 | for(uint i=0; i mu0 = mu0_num / y_zeros; 50 | vector mu1 = mu1_num / y_ones; 51 | 52 | matrix sigma(n,n); 53 | for(uint i=0; i out = outer_prod(trans(row(x,i) - mu0), row(x,i)- mu0); 56 | noalias(sigma) += out; 57 | } else { 58 | matrix out = outer_prod(trans(row(x,i) - mu1), row(x,i)- mu1); 59 | noalias(sigma) += out; 60 | } 61 | } 62 | 63 | OptiML::toc(); 64 | // cout << "phi: " << phi << endl << "sigma: "; 65 | //OptiML::ublas::Matrix::pprint(sigma); 66 | 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /apps/cpp/ml/gda/Makefile.arma: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -O3 -Wall -I../../ 3 | LDFLAGS= -O3 -larmadillo 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=GDA_arma.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=GDA_arma 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) -------------------------------------------------------------------------------- /apps/cpp/ml/gda/Makefile.ublas: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -O3 -Wall -I../../ 3 | LDFLAGS= -O3 -larmadillo 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=GDA_ublas.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=GDA_ublas 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) -------------------------------------------------------------------------------- /apps/cpp/ml/kmeans/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-fopenmp -pthread -c -O3 -Wall -I../../ 3 | LDFLAGS= -O3 -fopenmp -pthread 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=kmeans.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=kmeans 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) 21 | -------------------------------------------------------------------------------- /apps/cpp/ml/linreg/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -O3 -Wall -I../../ 3 | LDFLAGS= -O3 -larmadillo 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=linreg.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=linreg 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) -------------------------------------------------------------------------------- /apps/cpp/ml/linreg/linreg.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "armadillo" 7 | 8 | using std::cout; 9 | using namespace arma; 10 | 11 | void print_usage() { 12 | cout << "Usage: LinReg \n"; 13 | exit(-1); 14 | } 15 | 16 | int main(int argc,char *argv[]) { 17 | 18 | if(argc != 3) { 19 | print_usage(); 20 | } 21 | 22 | vec x = OptiML::armad::MLInputReader::readDoubleVectorAsCol(argv[1]); 23 | vec y = OptiML::armad::MLInputReader::readDoubleVectorAsCol(argv[2]); 24 | 25 | OptiML::tic(); 26 | int tau = 10; 27 | mat X(x.n_rows,2); 28 | vec ones(x.n_rows); 29 | ones.ones(); 30 | OptiML::armad::Matrix::insertColIntoMatrix(0,ones,X); 31 | OptiML::armad::Matrix::insertColIntoMatrix(1,x,X); 32 | 33 | double xstep = 25.0 / X.n_rows; 34 | vec xref_pts = OptiML::armad::Vector::uniformCol(-10,xstep,14.99); 35 | mat xref(xref_pts.n_rows, 2); 36 | OptiML::armad::Matrix::insertColIntoMatrix(0,ones,xref); 37 | OptiML::armad::Matrix::insertColIntoMatrix(1,xref_pts,xref); 38 | mat Xt = trans(X); 39 | 40 | rowvec guess(xref.n_rows); 41 | for(uint e = 0; e < xref.n_rows; e++) { 42 | double x_cur = xref(e,1); 43 | vec weights(x.n_rows); 44 | for(uint i = 0; i < x.n_rows; i++) { 45 | weights(i) = exp(-0.1*pow(x_cur-x(i),2)/(2.0*pow(tau,2))); 46 | } 47 | mat W = OptiML::armad::Matrix::doubleMatrixDiag(weights.n_rows, weights); 48 | mat t1 = Xt*W; 49 | vec theta = inv(t1*X)*(t1*y); 50 | guess.at(e) = dot(trans(theta),(trans(xref.row(e)))); 51 | } 52 | 53 | OptiML::toc(); 54 | //guess.print("guess = "); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /apps/cpp/ml/logreg/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -O3 -Wall -I../../ -fopenmp 3 | LDFLAGS= -O3 -fopenmp 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=logreg.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=logreg 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) 21 | -------------------------------------------------------------------------------- /apps/cpp/ml/nb/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -g -O3 -Wall -I../../ 3 | LDFLAGS= -O3 -larmadillo 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=nb.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=nb 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) -------------------------------------------------------------------------------- /apps/cpp/ml/rbm/Makefile: -------------------------------------------------------------------------------- 1 | CC = /opt/intel/bin/icc 2 | LDFLAGS = -c -g -O3 -Wall -I../../ 3 | SOURCES = rbm.cpp 4 | OBJECTS = $(SOURCES:.cpp=.o) 5 | EXECUTABLE = rbm 6 | 7 | MKL_INCLUDE_PATH = /opt/intel/mkl/include 8 | MKL_LIBS = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lmkl_mc3 -lmkl_def 9 | 10 | all: $(EXECUTABLE) 11 | 12 | $(EXECUTABLE): 13 | $(CC) -o $@ -I$(MKL_INCLUDE_PATH) rbm.cpp -L/opt/intel/mkl/lib/intel64 -L/opt/intel/lib/intel64 $(MKL_LIBS) 14 | 15 | $(OBJECTS): $(HEADERS) 16 | 17 | .cpp.o: 18 | $(CC) $(CFLAGS) $< -o $@ 19 | 20 | clean: 21 | rm -f *.o *~ $(EXECUTABLE) 22 | -------------------------------------------------------------------------------- /apps/cpp/ml/svm/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -g -O3 -Wall -I../../ 3 | LDFLAGS= -O3 -larmadillo 4 | HEADERS= ../OptiML.hpp 5 | SOURCES=svm.cpp 6 | OBJECTS=$(SOURCES:.cpp=.o) 7 | EXECUTABLE=svm 8 | 9 | all: $(SOURCES) $(EXECUTABLE) 10 | 11 | $(EXECUTABLE): $(OBJECTS) 12 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 13 | 14 | $(OBJECTS): $(HEADERS) 15 | 16 | .cpp.o: 17 | $(CC) $(CFLAGS) $< -o $@ 18 | 19 | clean: 20 | rm -f *.o *~ $(EXECUTABLE) -------------------------------------------------------------------------------- /apps/cpp/ql/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS= -O3 -Wall -fopenmp -shared -fPIC -std=c++0x -I/usr/lib/jvm/default-java/include -I/usr/lib/jvm/default-java/include/linux 3 | CPP_SOURCES=q1.cpp 4 | SCALA_SOURCES=q1.scala Table.scala 5 | 6 | all: q1.so Table.class Query1.class 7 | 8 | Table.class: $(SCALA_SOURCES) 9 | scalac $(SCALA_SOURCES) 10 | 11 | Query1.class: 12 | scalac $(SCALA_SOURCES) 13 | 14 | q1.so: $(CPP_SOURCES) 15 | $(CC) $(CFLAGS) $< -o $@ 16 | 17 | clean: 18 | rm -f *.so *.class 19 | -------------------------------------------------------------------------------- /apps/cpp/wrangler/Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-c -std=gnu++11 -O3 -Wall -I../../ -fopenmp 3 | LDFLAGS= -O3 -fopenmp 4 | SOURCES=gene_wrangler.cpp 5 | OBJECTS=$(SOURCES:.cpp=.o) 6 | EXECUTABLE=gene_wrangler 7 | 8 | all: $(SOURCES) $(EXECUTABLE) 9 | 10 | $(EXECUTABLE): $(OBJECTS) 11 | $(CC) $(LDFLAGS) $(OBJECTS) -o $@ 12 | 13 | $(OBJECTS): 14 | 15 | .cpp.o: 16 | $(CC) $(CFLAGS) $< -o $@ 17 | 18 | clean: 19 | rm -f *.o *~ $(EXECUTABLE) 20 | -------------------------------------------------------------------------------- /apps/matlab/autoencoder/autoencoder.m: -------------------------------------------------------------------------------- 1 | function exectime = autoencoder(datafile, Ninp, Nex, Nhid) 2 | 3 | %x = load(datafile); 4 | x = randn(Ninp, Nex); 5 | 6 | alpha = 0.01; 7 | 8 | tic 9 | 10 | W1 = randn(Nhid,Ninp); 11 | W2 = randn(Ninp,Nhid); 12 | 13 | b1 = randn(Nhid,1); 14 | b2 = randn(Ninp,1); 15 | 16 | for i = 1:1000 17 | z2 = W1*x+repmat(b1,1,Nex); 18 | a2 = tanh(z2); 19 | 20 | z3 = W2*a2 + repmat(b2,1,Nex); 21 | 22 | % Error 23 | %error = norm(z3-x,'fro')/Nex 24 | 25 | delta3 = -(x - z3)/Nex; 26 | delta2 = (W2'*delta3).*(1-a2.^2); 27 | 28 | 29 | grad_W2 = delta3*a2'; 30 | grad_W1 = delta2*x'; 31 | 32 | grad_b2 = sum(delta3,2); 33 | grad_b1 = sum(delta2,2); 34 | 35 | 36 | W2 = W2 - alpha*grad_W2; 37 | W1 = W1 - alpha*grad_W1; 38 | 39 | b2 = b2 - alpha*grad_b2; 40 | b1 = b1 - alpha*grad_b1; 41 | end 42 | exectime = toc -------------------------------------------------------------------------------- /apps/matlab/gda/gda.m: -------------------------------------------------------------------------------- 1 | function exectime = gda(xfile, yfile) 2 | 3 | x = load(xfile); 4 | y = load(yfile); 5 | 6 | tic 7 | 8 | % num of training samples 9 | m = length(y); 10 | 11 | % dimensionality of training data 12 | n = size(x, 2); 13 | 14 | % phi, mu0, mu1, and sigma parameterize the GDA model, where we assume the input features are continuous-valued random variables with a multivariate normal distribution. 15 | % phi is a scalar, mu0 and mu1 are n dimensional vectors, where n is the width of x, and sigma is an n x n matrix. 16 | 17 | 18 | y_zeros = sum(y==0); 19 | y_ones = sum(y==1); 20 | 21 | x0 = x(y==0,:); 22 | x1 = x(y==1,:); 23 | 24 | mu0_num = sum(x0); 25 | mu1_num = sum(x1); 26 | 27 | % y_ones = 0; 28 | % y_zeros = 0; 29 | % mu0_num = zeros(1,n); 30 | % mu1_num = zeros(1,n); 31 | % 32 | % parfor i=1:m 33 | % if (y(i) == 0) 34 | % y_zeros = y_zeros + 1; 35 | % mu0_num = mu0_num + x(i,:); 36 | % else 37 | % y_ones = y_ones + 1; 38 | % mu1_num = mu1_num + x(i,:); 39 | % end 40 | % end 41 | 42 | phi = 1/m * y_ones; 43 | mu0 = mu0_num / y_zeros; 44 | mu1 = mu1_num / y_ones; 45 | 46 | x0_mu0_tmp = x0-repmat(mu0,y_zeros,1); 47 | x1_mu1_tmp = x1-repmat(mu1,y_ones,1); 48 | 49 | sigma = (x0_mu0_tmp)'*(x0_mu0_tmp) + (x1_mu1_tmp)'*(x1_mu1_tmp); 50 | 51 | % % calculate covariance matrix sigma 52 | % 53 | % sigma = zeros(n, n); 54 | % parfor i=1:m 55 | % if (y(i) == 0) 56 | % sigma = sigma + (x(i,:)-mu0)'*(x(i,:)-mu0); 57 | % else 58 | % sigma = sigma + (x(i,:)-mu1)'*(x(i,:)-mu1); 59 | % end 60 | % end 61 | exectime = toc; 62 | -------------------------------------------------------------------------------- /apps/matlab/gda/gdagpu.m: -------------------------------------------------------------------------------- 1 | function exectime = gdagpu(xfile, yfile) 2 | 3 | x = load(xfile); 4 | y = load(yfile); 5 | 6 | tic 7 | 8 | % num of training samples 9 | m = length(y); 10 | 11 | % dimensionality of training data 12 | n = size(x, 2); 13 | 14 | % phi, mu0, mu1, and sigma parameterize the GDA model, where we assume the input features are continuous-valued random variables with a multivariate normal distribution. 15 | % phi is a scalar, mu0 and mu1 are n dimensional vectors, where n is the width of x, and sigma is an n x n matrix. 16 | 17 | y_ones = 0; 18 | y_zeros = 0; 19 | mu0_num = zeros(1,n); 20 | mu1_num = zeros(1,n); 21 | 22 | for i=1:m 23 | if (y(i) == 0) 24 | y_zeros = y_zeros + 1; 25 | mu0_num = mu0_num + x(i,:); 26 | else 27 | y_ones = y_ones + 1; 28 | mu1_num = mu1_num + x(i,:); 29 | end 30 | end 31 | 32 | phi = 1/m * y_ones; 33 | mu0 = mu0_num / y_zeros; 34 | mu1 = mu1_num / y_ones; 35 | 36 | % calculate covariance matrix sigma 37 | 38 | sigma = gpuArray(zeros(n, n)); 39 | for i=1:m 40 | if (y(i) == 0) 41 | sigma = sigma + gpuArray(x(i,:)-mu0)'*gpuArray(x(i,:)-mu0); 42 | else 43 | sigma = sigma + gpuArray(x(i,:)-mu1)'*gpuArray(x(i,:)-mu1); 44 | end 45 | end 46 | exectime = toc; -------------------------------------------------------------------------------- /apps/matlab/gda/gdajacket.m: -------------------------------------------------------------------------------- 1 | function exectime = gdagpu(xfile, yfile) 2 | addpath /usr/local/jacket/engine 3 | 4 | x = load(xfile); 5 | y = load(yfile); 6 | 7 | tic 8 | 9 | % num of training samples 10 | m = length(y); 11 | 12 | % dimensionality of training data 13 | n = size(x, 2); 14 | 15 | % phi, mu0, mu1, and sigma parameterize the GDA model, where we assume the input features are continuous-valued random variables with a multivariate normal distribution. 16 | % phi is a scalar, mu0 and mu1 are n dimensional vectors, where n is the width of x, and sigma is an n x n matrix. 17 | 18 | y_ones = 0; 19 | y_zeros = 0; 20 | mu0_num = zeros(1,n); 21 | mu1_num = zeros(1,n); 22 | 23 | for i=1:m 24 | if (y(i) == 0) 25 | y_zeros = y_zeros + 1; 26 | mu0_num = mu0_num + x(i,:); 27 | else 28 | y_ones = y_ones + 1; 29 | mu1_num = mu1_num + x(i,:); 30 | end 31 | end 32 | 33 | phi = 1/m * y_ones; 34 | mu0 = mu0_num / y_zeros; 35 | mu1 = mu1_num / y_ones; 36 | 37 | % calculate covariance matrix sigma 38 | 39 | sigma = gzeros(n, n); 40 | y = gdouble(y); 41 | x = gdouble(x); 42 | for i=1:m 43 | if (y(i) == 0) 44 | sigma = sigma + (x(i,:)-mu0)'*(x(i,:)-mu0); 45 | else 46 | sigma = sigma + (x(i,:)-mu1)'*(x(i,:)-mu1); 47 | end 48 | end 49 | exectime = toc; -------------------------------------------------------------------------------- /apps/matlab/ica/ica.m: -------------------------------------------------------------------------------- 1 | load mix.dat % load mixed sources 2 | 3 | anneal = [0.1 0.1 0.1 0.05 0.05 0.05 0.02 0.02 0.01 0.01 ... 4 | 0.005 0.005 0.002 0.002 0.001 0.001]; 5 | 6 | % Initialize parameters 7 | n = size(mix, 2); 8 | m = size(mix, 1); 9 | W = eye(n); 10 | 11 | % Iterate through the annealing schedule 12 | tic 13 | for iter=1:length(anneal) 14 | iter 15 | % Randomly interate through the samples running stochastic gradient descent 16 | rowIndices = randperm(m); 17 | for i = 1:length(rowIndices) 18 | rowIndex = rowIndices(i); 19 | % Perform the ICA stochastic gradient descent update 20 | W = W + anneal(iter) * ((ones(n,1)-2*ones(n,1)./(ones(n,1)+exp(-W*mix(rowIndex,:)')))*mix(rowIndex,:) + (W')^-1); 21 | end 22 | end; 23 | toc -------------------------------------------------------------------------------- /apps/matlab/kmeans/kmeans.m: -------------------------------------------------------------------------------- 1 | function exectime = kmeans(inputfile, mufile) 2 | 3 | tol = 0.001; 4 | 5 | x = load(inputfile); 6 | x = reshape(x, size(x, 1), 1, 3); 7 | m = size(x,1); 8 | 9 | % Initialize mu values 10 | %indices = randi(m, k, 1); 11 | %mu = shiftdim(x(indices, :), -1); 12 | mu = load(mufile); 13 | k = size(mu, 1); 14 | mu = shiftdim(mu, -1); 15 | 16 | oldmu = zeros(size(mu)); 17 | 18 | tic 19 | 20 | xExt = repmat(x, 1, k); 21 | 22 | iters = 0; 23 | 24 | % Run K-means until convergence 25 | while (sum(abs(oldmu - mu)) > tol) 26 | % Update c's to reflect closest mu to each point 27 | muExt = repmat(mu, m, 1); 28 | distances = sum((xExt - muExt).^2, 3); 29 | [junk, c] = min(distances, [], 2); 30 | 31 | % Update mu's to average of their respective points 32 | oldmu = mu; 33 | for j = 1:k 34 | mu(1,j,:) = sum(x(find(c==j),1,:))/sum(c==j); 35 | end 36 | iters = iters + 1; 37 | end 38 | 39 | iters 40 | mu 41 | exectime = toc; 42 | -------------------------------------------------------------------------------- /apps/matlab/kmeans/kmeansgpu.m: -------------------------------------------------------------------------------- 1 | function exectime = kmeansgpu(inputfile, mufile) 2 | 3 | tol = 0.001; 4 | 5 | x = load(inputfile); 6 | x = reshape(x, size(x, 1), 1, 3); 7 | m = size(x,1); 8 | 9 | % Initialize mu values 10 | %indices = randi(m, k, 1); 11 | %mu = shiftdim(x(indices, :), -1); 12 | mu = load(mufile); 13 | k = size(mu, 1); 14 | mu = shiftdim(mu, -1); 15 | 16 | oldmu = zeros(size(mu)); 17 | 18 | tic 19 | 20 | xExt = gpuArray(repmat(x, 1, k)); 21 | 22 | iters = 0; 23 | 24 | % Run K-means until convergence 25 | while (sum(abs(oldmu - mu)) > tol) 26 | % Update c's to reflect closest mu to each point 27 | muExt = gpuArray(repmat(mu, m, 1)); 28 | distances = sum(gather((xExt - muExt).^2), 3); 29 | [junk, c] = min(distances, [], 2); 30 | 31 | % Update mu's to average of their respective points 32 | oldmu = mu; 33 | for j = 1:k 34 | mu(1,j,:) = sum(x(find(c==j),1,:))/sum(c==j); 35 | end 36 | iters = iters + 1; 37 | end 38 | 39 | iters 40 | exectime = toc; 41 | -------------------------------------------------------------------------------- /apps/matlab/kmeans/kmeansjacket.m: -------------------------------------------------------------------------------- 1 | function exectime = kmeansgpu(inputfile, mufile) 2 | addpath /usr/local/jacket/engine 3 | 4 | tol = 0.001; 5 | 6 | x = load(inputfile); 7 | x = reshape(x, size(x, 1), 1, 3); 8 | m = size(x,1); 9 | 10 | % Initialize mu values 11 | %indices = randi(m, k, 1); 12 | %mu = shiftdim(x(indices, :), -1); 13 | mu = load(mufile); 14 | k = size(mu, 1); 15 | mu = shiftdim(mu, -1); 16 | 17 | oldmu = zeros(size(mu)); 18 | 19 | tic 20 | 21 | xExt = gdouble(repmat(x, 1, k)); 22 | 23 | iters = 0; 24 | 25 | % Run K-means until convergence 26 | while (sum(abs(oldmu - mu)) > tol) 27 | % Update c's to reflect closest mu to each point 28 | muExt = gdouble(repmat(mu, m, 1)); 29 | distances = sum(double((xExt - muExt).^2), 3); 30 | [junk, c] = min(distances, [], 2); 31 | 32 | % Update mu's to average of their respective points 33 | oldmu = mu; 34 | for j = 1:k 35 | mu(1,j,:) = sum(x(find(c==j),1,:))/sum(c==j); 36 | end 37 | iters = iters + 1; 38 | end 39 | 40 | iters 41 | exectime = toc; 42 | -------------------------------------------------------------------------------- /apps/matlab/kmeans/kmeansparfor.m: -------------------------------------------------------------------------------- 1 | function exectime = kmeans(inputfile, mufile) 2 | 3 | tol = 0.001; 4 | 5 | x = load(inputfile); 6 | x = reshape(x, size(x, 1), 3); 7 | m = size(x,1); 8 | 9 | % Initialize mu values 10 | %indices = randi(m, k, 1); 11 | %mu = shiftdim(x(indices, :), -1); 12 | mu = load(mufile); 13 | k = size(mu, 1); 14 | 15 | oldmu = zeros(size(mu)); 16 | 17 | tic 18 | iters = 0; 19 | 20 | % Run K-means until convergence 21 | distances = zeros(m, k); 22 | while (sum(abs(oldmu - mu)) > tol) 23 | % % Update c's to reflect closest mu to each point 24 | % parfor i=1:m 25 | % xrep = repmat(x(i,:), k, 1); 26 | % distances(i,:) = sum((xrep-mu).^2,2); 27 | % end 28 | 29 | parfor j=1:k 30 | muJrep = repmat(mu(j,:), m, 1); 31 | distances(:,j) = sum((x-muJrep).^2, 2); 32 | end 33 | 34 | % for j=1:k 35 | % for i=1:m 36 | % distances(i, j) = (x(i,:)-mu(j,:))*(x(i,:)-mu(j,:))'; 37 | % end 38 | % end 39 | 40 | % muExt = repmat(mu, m, 1); 41 | % distances = sum((xExt - muExt).^2, 3); 42 | [junk, c] = min(distances, [], 2); 43 | 44 | 45 | 46 | % non-vectorized version of step 1 47 | %for i=1:m 48 | % mindist = 1000000000; minj = -1; 49 | % for j=1:k 50 | % dist = (x(i,:)-u(j,:))*(x(i,:)-u(j,:)).'; 51 | % if (dist < mindist) 52 | % minj = j; 53 | % mindist = dist; 54 | % end 55 | % end 56 | % c(i) = minj; 57 | %end; 58 | 59 | 60 | % Update mu's to average of their respective points 61 | oldmu = mu; 62 | for j = 1:k 63 | mu(j,:) = sum(x(find(c==j),:))/sum(c==j); 64 | end 65 | iters = iters + 1 66 | end 67 | 68 | iters 69 | mu 70 | exectime = toc; 71 | -------------------------------------------------------------------------------- /apps/matlab/kmeans/kmeansvec.m: -------------------------------------------------------------------------------- 1 | function exectime = kmeans(inputfile, mufile) 2 | 3 | tol = 0.001; 4 | 5 | x = load(inputfile); 6 | x = reshape(x, size(x, 1), 1, 3); 7 | m = size(x,1); 8 | 9 | % Initialize mu values 10 | %indices = randi(m, k, 1); 11 | %mu = shiftdim(x(indices, :), -1); 12 | mu = load(mufile); 13 | k = size(mu, 1); 14 | mu = shiftdim(mu, -1); 15 | 16 | oldmu = zeros(size(mu)); 17 | 18 | tic 19 | 20 | xExt = repmat(x, 1, k); 21 | 22 | iters = 0; 23 | 24 | % Run K-means until convergence 25 | while (sum(abs(oldmu - mu)) > tol) 26 | % Update c's to reflect closest mu to each point 27 | muExt = repmat(mu, m, 1); 28 | distances = sum((xExt - muExt).^2, 3); 29 | [junk, c] = min(distances, [], 2); 30 | 31 | % Update mu's to average of their respective points 32 | oldmu = mu; 33 | for j = 1:k 34 | mu(1,j,:) = sum(x(find(c==j),1,:))/sum(c==j); 35 | end 36 | iters = iters + 1; 37 | end 38 | 39 | iters 40 | mu 41 | exectime = toc; 42 | -------------------------------------------------------------------------------- /apps/matlab/lbp/count_spaces.m: -------------------------------------------------------------------------------- 1 | function num_spaces = count_spaces(s) 2 | num_spaces = sum(s == ' '); 3 | end -------------------------------------------------------------------------------- /apps/matlab/lbp/get_graph_values.m: -------------------------------------------------------------------------------- 1 | % Parse pertinent statistics from an edge file representing a graph (e.g. onlyedges-stanford) 2 | function [num_nodes, num_edges, max_line_size, max_node_num] = get_graph_values(filename) 3 | num_nodes = 0; 4 | num_edges = 0; 5 | max_line_size = 0; 6 | 7 | max_node_num = 0; 8 | cur_line_size = 0; 9 | node_num_index = 0; 10 | 11 | % first pass: find number of nodes, edges, and max line size 12 | fid = fopen(filename); 13 | while ~feof(fid) 14 | line = fgetl(fid); 15 | 16 | fields = regexp(line,'\t','split'); 17 | node = str2double(fields{1}); 18 | edges = sscanf(fields{2}, '%f'); 19 | 20 | num_nodes = num_nodes + 1; 21 | num_edges = num_edges + length(edges); 22 | 23 | max_node_num = max([max_node_num; node; edges]); 24 | max_line_size = max(max_line_size, length(line)); 25 | end 26 | fclose(fid); 27 | end -------------------------------------------------------------------------------- /apps/matlab/lbp/read_graph_lr.m: -------------------------------------------------------------------------------- 1 | % Parse an entire edge file representing a graph (e.g. onlyedges-stanford). 2 | function [read_edges, node_ids, edges] = read_graph_lr(filename, max_node_num, num_edges) 3 | 4 | node_ids = zeros(max_node_num, 1); 5 | edges = zeros(num_edges, 2); 6 | 7 | % second pass: build graph 8 | edge_index = 1; 9 | 10 | fid = fopen(filename); 11 | while ~feof(fid) 12 | line = fgetl(fid); 13 | 14 | fields = regexp(line,'\t','split'); 15 | node = str2double(fields{1}); 16 | f_edges = sscanf(fields{2}, '%f'); 17 | 18 | node_ids(node) = node; 19 | 20 | for i=1:size(f_edges, 1) 21 | node_ids(f_edges(i)) = f_edges(i); 22 | edges(edge_index, :) = [node, f_edges(i)]; 23 | edge_index = edge_index + 1; 24 | end 25 | end 26 | fclose(fid); 27 | 28 | read_edges = edge_index - 1; 29 | end -------------------------------------------------------------------------------- /apps/matlab/lbp/struct/count_spaces.m: -------------------------------------------------------------------------------- 1 | function num_spaces = count_spaces(s) 2 | num_spaces = sum(s == ' '); 3 | end -------------------------------------------------------------------------------- /apps/matlab/lbp/struct/get_graph_values.m: -------------------------------------------------------------------------------- 1 | % Parse pertinent statistics from an edge file representing a graph (e.g. onlyedges-stanford) 2 | function [num_nodes, num_edges, max_line_size, max_node_num] = get_graph_values(filename) 3 | num_nodes = 0; 4 | num_edges = 0; 5 | max_line_size = 0; 6 | 7 | max_node_num = 0; 8 | cur_line_size = 0; 9 | node_num_index = 0; 10 | 11 | % first pass: find number of nodes, edges, and max line size 12 | fid = fopen(filename); 13 | while ~feof(fid) 14 | line = fgetl(fid); 15 | 16 | fields = regexp(line,'\t','split'); 17 | node = str2double(fields{1}); 18 | edges = sscanf(fields{2}, '%f'); 19 | 20 | num_nodes = num_nodes + 1; 21 | num_edges = num_edges + length(edges); 22 | 23 | max_node_num = max([max_node_num; node; edges]); 24 | max_line_size = max(max_line_size, length(line)); 25 | end 26 | fclose(fid); 27 | end -------------------------------------------------------------------------------- /apps/matlab/lbp/struct/read_graph_lr.m: -------------------------------------------------------------------------------- 1 | % Parse an entire edge file representing a graph (e.g. onlyedges-stanford). 2 | function [read_edges, node_ids, edges] = read_graph_lr(filename, max_node_num, num_edges) 3 | 4 | node_ids = zeros(max_node_num, 1); 5 | edges = zeros(num_edges, 2); 6 | 7 | % second pass: build graph 8 | edge_index = 1; 9 | 10 | fid = fopen(filename); 11 | while ~feof(fid) 12 | line = fgetl(fid); 13 | 14 | fields = regexp(line,'\t','split'); 15 | node = str2double(fields{1}); 16 | f_edges = sscanf(fields{2}, '%f'); 17 | 18 | node_ids(node) = node; 19 | 20 | for i=1:size(f_edges, 1) 21 | node_ids(f_edges(i)) = f_edges(i); 22 | edges(edge_index, :) = [node, f_edges(i)]; 23 | edge_index = edge_index + 1; 24 | end 25 | end 26 | fclose(fid); 27 | 28 | read_edges = edge_index - 1; 29 | end -------------------------------------------------------------------------------- /apps/matlab/lbp/sum_out_product.m: -------------------------------------------------------------------------------- 1 | function vals = sum_out_product(edge_potential, that_belief, that_message, that_size, this_size, first) 2 | 3 | huge_value = 1e200; 4 | min_prec = 1e-100; 5 | 6 | if (first) 7 | this_mult = 6; 8 | that_mult = 1; 9 | else 10 | this_mult = 1; 11 | that_mult = 6; 12 | end 13 | 14 | min = huge_value; 15 | for i = 1 : that_size 16 | if ((that_message(i) > 0) && (that_message(i) < min)) 17 | min = that_message(i); 18 | end 19 | end 20 | 21 | if (min < min_prec) 22 | min = min_prec; 23 | end 24 | 25 | vals = zeros(this_size, 1); 26 | sum = 0; 27 | for i = 1 : this_size 28 | num = 0; 29 | denum = 0; 30 | entry = 0; 31 | v = 0; 32 | for j = 1 : that_size 33 | num = that_belief(j); 34 | if (num > 0) 35 | denum = that_message(j); 36 | if (denum > min_prec) 37 | entry = num / (denum / min); 38 | v = v + entry * edge_potential((i-1)*this_mult + (j-1)*that_mult + 1); 39 | end 40 | end 41 | end 42 | vals(i) = v; 43 | sum = sum + v; 44 | end 45 | 46 | vals = vals / sum; 47 | end -------------------------------------------------------------------------------- /apps/matlab/linreg/linreg.m: -------------------------------------------------------------------------------- 1 | function exectime = linreg(xfile, yfile) 2 | 3 | % CS 229 Problem Set 1 4 | % Arvind Sujeeth 5 | % developed and tested on octave 3.0.0 in ubuntu 8.04 6 | 7 | % problem 2 8 | 9 | % (d.i) 10 | % unweighted linear regression using the normal equations to minimize 11 | % J(theta). This value of theta = inv(X.'X)*X.'*y 12 | % the resulting fitted line is given by the equation 13 | % h(x) = theta_0 + theta_1*x_1 14 | 15 | % the input vector x and output vector y are stored in these files 16 | q2x = load(xfile); 17 | q2y = load(yfile); 18 | 19 | tic 20 | 21 | dims = size(q2x); 22 | m = dims(1); 23 | n = dims(2)+1; 24 | % initialize x and theta vector with the intercept term. 25 | % (by convention, x_0 = 1.) 26 | x = [ones(m,1),q2x]; 27 | 28 | xstep = 25/dims(1); 29 | xref = -10:xstep:14.99; % reasonable sampling range for this data with 30 | % same number of points as the input vector q2x 31 | xref = [ones(dims(1),1),xref.']; 32 | 33 | % (d.ii) 34 | % locally weighted linear regression using the normal equations to minimize 35 | % J(theta). This value of theta = inv(x.'*W*x)*x.'*W*q2y; 36 | 37 | % initialize the W matrix based on the hand calculated results 38 | % and the given formula, w_i = exp(-(x-x_i)^2/2*tau^2) 39 | 40 | tau = 10; 41 | 42 | 43 | % for every query point, ALL of the weights must be re-calculated 44 | dims_ref=size(xref); 45 | guess = zeros(dims_ref(1), 1); 46 | parfor i=1:dims_ref(1) 47 | weights = zeros(dims(1),1); 48 | for j=1:dims(1) 49 | weights(j) = exp(-1*(xref(i,2)-q2x(j))^2/(2*tau^2)); 50 | end 51 | % we can vectorize this only in the special case that 52 | % dims_ref = dims (num query pts = num sample pts) 53 | %weights = exp(-1*(xref(i,2)-q2x(:)).^2./(2*tau^2)); 54 | W = diag(1/2.*weights); 55 | % directly calculate theta using the weighted normal equations 56 | t1 = x.'*W; 57 | theta = inv(t1*x)*t1*q2y; 58 | guess(i) = theta.'*xref(i,:).'; 59 | end 60 | 61 | guess(1) 62 | guess(end) 63 | 64 | exectime = toc; 65 | -------------------------------------------------------------------------------- /apps/matlab/linreg/linreggpu.m: -------------------------------------------------------------------------------- 1 | function exectime = linreg(xfile, yfile) 2 | 3 | % CS 229 Problem Set 1 4 | % Arvind Sujeeth 5 | % developed and tested on octave 3.0.0 in ubuntu 8.04 6 | 7 | % problem 2 8 | 9 | % (d.i) 10 | % unweighted linear regression using the normal equations to minimize 11 | % J(theta). This value of theta = inv(X.'X)*X.'*y 12 | % the resulting fitted line is given by the equation 13 | % h(x) = theta_0 + theta_1*x_1 14 | 15 | % the input vector x and output vector y are stored in these files 16 | q2x = load(xfile); 17 | q2y = load(yfile); 18 | 19 | tic 20 | 21 | dims = size(q2x); 22 | m = dims(1); 23 | n = dims(2)+1; 24 | % initialize x and theta vector with the intercept term. 25 | % (by convention, x_0 = 1.) 26 | x = [ones(m,1),q2x]; 27 | 28 | xstep = 25/dims(1); 29 | xref = -10:xstep:14.99; % reasonable sampling range for this data with 30 | % same number of points as the input vector q2x 31 | xref = [ones(dims(1),1),xref.']; 32 | 33 | % (d.ii) 34 | % locally weighted linear regression using the normal equations to minimize 35 | % J(theta). This value of theta = inv(x.'*W*x)*x.'*W*q2y; 36 | 37 | % initialize the W matrix based on the hand calculated results 38 | % and the given formula, w_i = exp(-(x-x_i)^2/2*tau^2) 39 | 40 | tau = 10; 41 | 42 | weights = zeros(dims(1),1); 43 | 44 | x=gpuArray(x); 45 | 46 | % for every query point, ALL of the weights must be re-calculated 47 | dims_ref=size(xref); 48 | guess = zeros(dims_ref(1), 1); 49 | for i=1:dims_ref(1) 50 | for j=1:dims(1) 51 | weights(j) = exp(-1*(xref(i,2)-q2x(j))^2/(2*tau^2)); 52 | end 53 | % we can vectorize this only in the special case that 54 | % dims_ref = dims (num query pts = num sample pts) 55 | %weights = exp(-1*(xref(i,2)-q2x(:)).^2./(2*tau^2)); 56 | W = gpuArray(diag(1/2.*weights)); 57 | % directly calculate theta using the weighted normal equations 58 | t1 = x.'*W; 59 | theta = inv(gather(t1*x))*t1*q2y; 60 | guess(i) = gather(theta.'*xref(i,:).'); 61 | end 62 | 63 | guess(1) 64 | guess(end) 65 | 66 | exectime = toc; 67 | -------------------------------------------------------------------------------- /apps/matlab/matmult/matmult.m: -------------------------------------------------------------------------------- 1 | function exectime = matmult(n) 2 | 3 | a = rand(n, n, 'single'); 4 | b = rand(n, n, 'single'); 5 | 6 | tic 7 | for i=1:5 8 | z = a * b; 9 | end 10 | exectime = toc; 11 | -------------------------------------------------------------------------------- /apps/matlab/matmult/matmultgpu.m: -------------------------------------------------------------------------------- 1 | function exectime = matmultgpu(n) 2 | 3 | a = gpuArray(rand(n, n, 'single')); 4 | b = gpuArray(rand(n, n, 'single')); 5 | 6 | tic 7 | for i=1:5 8 | z = a * b; 9 | end 10 | exectime = toc; 11 | -------------------------------------------------------------------------------- /apps/matlab/matmult/matmultjacket.m: -------------------------------------------------------------------------------- 1 | function exectime = matmult(n) 2 | addpath /usr/local/jacket/engine 3 | 4 | a = gsingle(rand(n, n, 'single')); 5 | b = gsingle(rand(n, n, 'single')); 6 | 7 | tic 8 | for i=1:5 9 | z = a * b; 10 | end 11 | exectime = toc; 12 | -------------------------------------------------------------------------------- /apps/matlab/nb/readMatrix.m: -------------------------------------------------------------------------------- 1 | function [matrix, tokenlist, category] = readMatrix(filename) 2 | 3 | fid = fopen(filename); 4 | 5 | %Read the header line 6 | headerline = fgetl(fid); 7 | 8 | %Read number of documents and tokens 9 | rowscols = fscanf(fid, '%d %d\n', 2); 10 | 11 | %Read the list of tokens - just a long string! 12 | blah = fscanf(fid, '%s', 1); % required for octave 13 | tokenlist = fgetl(fid); 14 | 15 | % Document word matrix 16 | % Each row represents a document (mail) 17 | % Each column represents a distinct token 18 | % The (i,j)-th element represents the number of times token j appeared in 19 | % document i 20 | matrix = sparse(1, 1, 0, rowscols(2), rowscols(1)); % the transpose! 21 | 22 | % Vector containing the categories corresponding to each row in the 23 | % document word matrix 24 | % The i-th component is 1 if the i-th document (row) in the document word 25 | % matrix is SPAM, and 0 otherwise. 26 | category = matrix(rowscols(1)); 27 | 28 | %Read in the matrix and the categories 29 | for m = 1:rowscols(1) % as many rows as number of documents 30 | line = fgetl(fid); 31 | nums = sscanf(line, '%d'); 32 | category(m) = nums(1); 33 | matrix(1 + cumsum(nums(2:2:end - 1)), m) = nums(3:2:end - 1); 34 | end 35 | 36 | matrix = matrix'; % flip it back 37 | 38 | fclose(fid); 39 | 40 | -------------------------------------------------------------------------------- /apps/matlab/rl/cart_pole.m: -------------------------------------------------------------------------------- 1 | function [new_x, new_x_dot, new_theta, new_theta_dot] = cart_pole(action, x, x_dot, theta, theta_dot) 2 | 3 | % Parameters for simulation dynamics 4 | GRAVITY = 9.8; 5 | MASSCART = 1.0; 6 | MASSPOLE = 0.3; 7 | TOTAL_MASS = (MASSPOLE + MASSCART); 8 | LENGTH = 0.7; % actually half the pole's length 9 | POLEMASS_LENGTH = (MASSPOLE * LENGTH); 10 | FORCE_MAG = 10.0; % WAS 10.0 11 | TAU = 0.02; % seconds between state updates 12 | FOURTHIRDS = 1.3333333333333; 13 | 14 | % Noise parameters 15 | action_flip_prob = 0.00; 16 | force_noise_factor = 0.0; % multiplied by between 1-.. and 1+.. 17 | no_control_prob = 0.00; % Force is 0 with this probability 18 | 19 | action = action - 1; 20 | 21 | % Flip action with action_flip_prob 22 | if (rand(1)0) 27 | force=FORCE_MAG; 28 | else 29 | force=-FORCE_MAG; 30 | end 31 | 32 | force = force * (1 - force_noise_factor + rand(1) * 2*force_noise_factor); 33 | 34 | if (rand(1) 2.4 || theta < -twelve_degrees || theta > twelve_degrees) 18 | state=total_states-1; % to signal failure 19 | else 20 | if (x < -1.5) 21 | state = 0; 22 | elseif (x < 1.5) 23 | state = 1; 24 | else 25 | state = 2; 26 | end 27 | 28 | if (x_dot < -0.5) 29 | ; 30 | elseif (x_dot < 0.5) 31 | state = state + 3; 32 | else 33 | state = state + 6; 34 | end 35 | 36 | if (theta < -six_degrees) 37 | ; 38 | elseif (theta < -one_degree) 39 | state = state + 9; 40 | elseif (theta < 0) 41 | state = state + 18; 42 | elseif (theta < one_degree) 43 | state = state + 27; 44 | elseif (theta < six_degrees) 45 | state = state + 36; 46 | else 47 | state = state + 45; 48 | end 49 | 50 | if (theta_dot < -fifty_degrees) 51 | ; 52 | elseif (theta_dot < fifty_degrees) 53 | state = state + 54; 54 | else 55 | state = state + 108; 56 | end 57 | 58 | end 59 | 60 | state=state+1; 61 | -------------------------------------------------------------------------------- /apps/matlab/rl/plot_learning_curve.m: -------------------------------------------------------------------------------- 1 | 2 | % A log plot may show the convergence better, as the learning curve is 3 | % typically jagged even on convergence. 4 | figure; 5 | hold on; 6 | 7 | log_tstf = log(time_steps_to_failure); 8 | plot(log_tstf,'k'); 9 | 10 | % compute simple moving average 11 | window = 50; 12 | i = 1:window; 13 | w = ones(1,window) ./ window; 14 | weights = filter(w,1,log_tstf); 15 | 16 | x1 = window/2:size(log_tstf,2)-(window/2); 17 | h = plot(x1,weights(window:size(log_tstf,2)), 'r--'); 18 | set(h, 'LineWidth', 2); 19 | 20 | -------------------------------------------------------------------------------- /apps/matlab/rl/show_cart.m: -------------------------------------------------------------------------------- 1 | % This function displays the "animation" 2 | function [] = show_cart(x, x_dot, theta, theta_dot, pause_time) 3 | 4 | set(gcf,'DoubleBuffer','on'); 5 | 6 | length = 3; 7 | 8 | plotx(1) = x; 9 | ploty(1) = 0; 10 | 11 | %plotx(2) = x + length * cos(theta + pi/2.0); 12 | %ploty(2) = length * sin(theta + pi/2.0); 13 | 14 | plotx(2) = x + length * sin(theta); 15 | ploty(2) = length * cos(theta); 16 | 17 | plot(plotx, ploty); 18 | rectangle('Position', [x-0.4, -0.25, 0.8, 0.25], 'FaceColor', 'cyan'); 19 | rectangle('Position', [x-0.01, -0.5, 0.02, 0.25], 'FaceColor', 'r'); 20 | axis([-3 3 -0.5 3.5]); 21 | 22 | drawnow; 23 | pause(pause_time); 24 | 25 | -------------------------------------------------------------------------------- /apps/matlab/runautoencoder.m: -------------------------------------------------------------------------------- 1 | function exectime = runautoencoder(gpu) 2 | addpath autoencoder 3 | if (gpu==1) 4 | exectime = autoencodergpu('../../data/ml/autoencoder/naturalimages.dat', 2000, 2000); 5 | elseif (gpu==2) 6 | exectime = autoencoderjacket('../../data/ml/autoencoder/naturalimages.dat', 2000, 2000); 7 | else 8 | %exectime = autoencoder('../../data/ml/autoencoder/naturalimages.dat', 2000, 2000); 9 | exectime = autoencoder('RAND', 100, 100, 500); 10 | end 11 | -------------------------------------------------------------------------------- /apps/matlab/rungda.m: -------------------------------------------------------------------------------- 1 | function exectime = rungda(gpu) 2 | addpath gda 3 | if (gpu==1) 4 | exectime = gdagpu('/kunle/ppl/delite/data/ml/gda/2048-1200x.dat','/kunle/ppl/delite/data/ml/gda/q1y.dat'); 5 | elseif (gpu==2) 6 | exectime = gdajacket('/kunle/ppl/delite/data/ml/gda/2048-1200x.dat','/kunle/ppl/delite/data/ml/gda/q1y.dat'); 7 | else 8 | exectime = gda('/kunle/ppl/delite/data/ml/gda/2048-1200x.dat','/kunle/ppl/delite/data/ml/gda/q1y.dat'); 9 | end 10 | -------------------------------------------------------------------------------- /apps/matlab/runkmeans.m: -------------------------------------------------------------------------------- 1 | function exectime = runkmeans(gpu) 2 | addpath kmeans 3 | if (gpu == 1) 4 | exectime = kmeansgpu('/kunle/ppl/delite/data/ml/kmeans/mandrill-xlarge.dat', '/kunle/ppl/delite/data/ml/kmeans/initmu.dat'); 5 | elseif (gpu == 2) 6 | exectime = kmeansjacket('/kunle/ppl/delite/data/ml/kmeans/mandrill-xlarge.dat', '/kunle/ppl/delite/data/ml/kmeans/initmu.dat'); 7 | else 8 | exectime = kmeans('/kunle/ppl/delite/data/ml/kmeans/mandrill-xlarge.dat', '/kunle/ppl/delite/data/ml/kmeans/initmu.dat'); 9 | end 10 | -------------------------------------------------------------------------------- /apps/matlab/runlbp.m: -------------------------------------------------------------------------------- 1 | function exectime = runlbp(gpu) 2 | addpath lbp 3 | f1 = '/kunle/ppl/delite/data/ml/lbp/onlyedges1'; 4 | f2 = '/kunle/ppl/delite/data/ml/lbp/graphprint1'; 5 | rand('state', 0); 6 | if (gpu == 1) 7 | exectime = lbpgpu(f1, f2); 8 | elseif (gpu == 2) 9 | exectime = lbpjacket(f1, f2); 10 | else 11 | exectime = lbp(f1, f2); 12 | end 13 | 14 | -------------------------------------------------------------------------------- /apps/matlab/runlinreg.m: -------------------------------------------------------------------------------- 1 | function exectime = runlinreg(gpu) 2 | addpath linreg 3 | xfile = '/kunle/ppl/delite/data/ml/linreg/x-2048.dat'; 4 | yfile = '/kunle/ppl/delite/data/ml/linreg/y-2048.dat'; 5 | if (gpu == 1) 6 | exectime = linreggpu(xfile, yfile); 7 | elseif (gpu == 2) 8 | exectime = linregjacket(xfile, yfile); 9 | else 10 | exectime = linreg(xfile, yfile); 11 | end 12 | -------------------------------------------------------------------------------- /apps/matlab/runmatmult.m: -------------------------------------------------------------------------------- 1 | function exectime = runmatmult(gpu) 2 | addpath matmult 3 | if (gpu==1) 4 | disp 'GPU' 5 | exectime = matmultgpu(5000) 6 | elseif (gpu==2) 7 | disp 'Jacket' 8 | exectime = matmultjacket(5000) 9 | else 10 | disp 'CPU' 11 | exectime = matmult(5000) 12 | end 13 | -------------------------------------------------------------------------------- /apps/matlab/runnb.m: -------------------------------------------------------------------------------- 1 | function exectime = runnb(gpu) 2 | addpath nb 3 | trainfile = '/kunle/ppl/delite/data/ml/nb/MATRIX.TRAIN.50k'; 4 | if (gpu == 1) 5 | exectime = nb_traingpu(trainfile); 6 | elseif(gpu == 2) 7 | exectime = nb_trainjacket(trainfile); 8 | else 9 | exectime = nb_train(trainfile); 10 | end 11 | -------------------------------------------------------------------------------- /apps/matlab/runrbm.m: -------------------------------------------------------------------------------- 1 | function exectime = runrbm(gpu) 2 | addpath rbm 3 | if (gpu==1) 4 | exectime = rbmgpu('/kunle/ppl/delite/data/ml/rbm/mnist2000x10.dat', 2000, 2000); 5 | elseif (gpu==2) 6 | exectime = rbmjacket('/kunle/ppl/delite/data/ml/rbm/mnist2000x10.dat', 2000, 2000); 7 | else 8 | exectime = rbm('/kunle/ppl/delite/data/ml/rbm/mnist2000x10.dat', 2000, 2000); 9 | end 10 | -------------------------------------------------------------------------------- /apps/matlab/runsvm.m: -------------------------------------------------------------------------------- 1 | function exectime = runsvm(gpu) 2 | addpath svm 3 | trainfile = '/kunle/ppl/delite/data/ml/svm/MATRIX.TRAIN.400' 4 | tol = 0.001 5 | rand('state', 0); 6 | if (gpu == 1) 7 | exectime = svm_traingpu(trainfile, tol); 8 | elseif (gpu == 2) 9 | exectime = svm_trainjacket(trainfile, tol); 10 | else 11 | exectime = svm_train(trainfile, tol); 12 | end 13 | -------------------------------------------------------------------------------- /apps/matlab/runsvm2.m: -------------------------------------------------------------------------------- 1 | function exectime = runsvm(gpu) 2 | addpath svm 3 | trainfile = '/kunle/ppl/delite/data/ml/svm/MATRIX.TRAIN.400' 4 | tol = 0.0001 5 | rand('state', 0); 6 | if (gpu == 1) 7 | exectime = svm_traingpu(trainfile, tol); 8 | elseif (gpu == 2) 9 | exectime = svm_trainjacket(trainfile, tol); 10 | else 11 | exectime = svm_train(trainfile, tol); 12 | end -------------------------------------------------------------------------------- /apps/matlab/svm/readMatrix.m: -------------------------------------------------------------------------------- 1 | function [matrix, tokenlist, category] = readMatrix(filename) 2 | 3 | fid = fopen(filename); 4 | 5 | %Read the header line 6 | headerline = fgetl(fid); 7 | 8 | %Read number of documents and tokens 9 | rowscols = fscanf(fid, '%d %d\n', 2); 10 | 11 | %Read the list of tokens - just a long string! 12 | blah = fscanf(fid, '%s', 1); % required for octave 13 | tokenlist = fgetl(fid); 14 | 15 | % Document word matrix 16 | % Each row represents a document (mail) 17 | % Each column represents a distinct token 18 | % The (i,j)-th element represents the number of times token j appeared in 19 | % document i 20 | matrix = sparse(1, 1, 0, rowscols(2), rowscols(1)); % the transpose! 21 | 22 | % Vector containing the categories corresponding to each row in the 23 | % document word matrix 24 | % The i-th component is 1 if the i-th document (row) in the document word 25 | % matrix is SPAM, and 0 otherwise. 26 | category = matrix(rowscols(1)); 27 | 28 | %Read in the matrix and the categories 29 | for m = 1:rowscols(1) % as many rows as number of documents 30 | line = fgetl(fid); 31 | nums = sscanf(line, '%d'); 32 | category(m) = nums(1); 33 | matrix(1 + cumsum(nums(2:2:end - 1)), m) = nums(3:2:end - 1); 34 | end 35 | 36 | matrix = matrix'; % flip it back 37 | 38 | fclose(fid); 39 | 40 | category = full(category); 41 | 42 | -------------------------------------------------------------------------------- /apps/matlab/svm/svm.m: -------------------------------------------------------------------------------- 1 | function exectime = svm() 2 | -------------------------------------------------------------------------------- /apps/matlab/svm/svm_test.m: -------------------------------------------------------------------------------- 1 | 2 | [spmatrix, tokenlist, category] = readMatrix('/kunle/ppl/delite/data/ml/svm/MATRIX.TEST'); 3 | 4 | testMatrix = full(spmatrix); 5 | numTestDocs = size(testMatrix, 1); 6 | numTokens = size(testMatrix, 2); 7 | 8 | % Assume classify.m has just been executed, and all the parameters computed/needed 9 | % by your classifier are in memory through that execution. You can also assume 10 | % that the columns in the test set are arranged in exactly the same way as for the 11 | % training set (i.e., the j-th column represents the same token in the test data 12 | % matrix as in the original training data matrix). 13 | 14 | % Write code below to classify each document in the test set (ie, each row 15 | % in the current document word matrix) as 1 for SPAM and 0 for NON-SPAM. 16 | 17 | % Construct the (numTestDocs x 1) vector 'output' such that the i-th entry 18 | % of this vector is the predicted class (1/0) for the i-th email (i-th row 19 | % in testMatrix) in the test set. 20 | output = zeros(numTestDocs, 1); 21 | 22 | %--------------- 23 | % a prediction is made by the equation w'*X + b 24 | for i=1:numTestDocs 25 | if ((w'*testMatrix(i,:)' + b) >= 0) 26 | output(i) = 1; 27 | end 28 | end 29 | 30 | %--------------- 31 | 32 | 33 | % Compute the error on the test set 34 | error=0; 35 | for i=1:numTestDocs 36 | if (category(i) ~= output(i)) 37 | error=error+1; 38 | end 39 | end 40 | 41 | %Print out the classification error on the test set 42 | error/numTestDocs 43 | 44 | 45 | -------------------------------------------------------------------------------- /apps/matlab/svm/svm_train.m: -------------------------------------------------------------------------------- 1 | function exectime = svm_train(trainfile, tol) 2 | 3 | [spmatrix, tokenlist, trainCategory] = readMatrix(trainfile); 4 | 5 | trainMatrix = full(spmatrix); 6 | numTrainDocs = size(trainMatrix, 1); 7 | numTokens = size(trainMatrix, 2); 8 | 9 | % trainMatrix is now a (numTrainDocs x numTokens) matrix. 10 | % Each row represents a unique document (email). 11 | % The j-th column of the row $i$ represents the number of times the j-th 12 | % token appeared in email $i$. 13 | 14 | % tokenlist is a long string containing the list of all tokens (words). 15 | % These tokens are easily known by position in the file TOKENS_LIST 16 | 17 | % trainCategory is a (numTrainDocs x 1) vector containing the true 18 | % classifications for the documents just read in. The i-th entry gives the 19 | % correct class for the i-th email (which corresponds to the i-th row in 20 | % the document word matrix). 21 | 22 | % Spam documents are indicated as class 1, and non-spam as class 0. 23 | % Note that for the SVM, you would want to convert these to +1 and -1. 24 | 25 | % convert labels 26 | %Y(:) = (trainCategory(:) == 1)*(1) + (trainCategory(:) == 0)*(-1); 27 | Y = (trainCategory(:) == 1)*(1) + (trainCategory(:) == 0)*(-1); 28 | Y=Y'; 29 | % format trainMatrix for SMO algorithm? 30 | 31 | % SMO algorithm 32 | tic 33 | [b, alphas] = smo_train(trainMatrix, Y', 1, tol, 10); 34 | exectime = toc; 35 | 36 | % compute the weights (assuming a linear kernel) 37 | dim = size(trainMatrix,2); 38 | w = zeros(dim, 1); 39 | for i=1:size(trainMatrix,1) 40 | w = w + alphas(i)*Y(i)*trainMatrix(i,:)'; 41 | end 42 | 43 | svm_test 44 | -------------------------------------------------------------------------------- /apps/matlab/svm/svm_traingpu.m: -------------------------------------------------------------------------------- 1 | function exectime = svm_traingpu(trainfile, tol) 2 | 3 | [spmatrix, tokenlist, trainCategory] = readMatrix(trainfile); 4 | 5 | trainMatrix = full(spmatrix); 6 | numTrainDocs = size(trainMatrix, 1); 7 | numTokens = size(trainMatrix, 2); 8 | 9 | % trainMatrix is now a (numTrainDocs x numTokens) matrix. 10 | % Each row represents a unique document (email). 11 | % The j-th column of the row $i$ represents the number of times the j-th 12 | % token appeared in email $i$. 13 | 14 | % tokenlist is a long string containing the list of all tokens (words). 15 | % These tokens are easily known by position in the file TOKENS_LIST 16 | 17 | % trainCategory is a (numTrainDocs x 1) vector containing the true 18 | % classifications for the documents just read in. The i-th entry gives the 19 | % correct class for the i-th email (which corresponds to the i-th row in 20 | % the document word matrix). 21 | 22 | % Spam documents are indicated as class 1, and non-spam as class 0. 23 | % Note that for the SVM, you would want to convert these to +1 and -1. 24 | 25 | % convert labels 26 | Y = (trainCategory(:) == 1)*(1) + (trainCategory(:) == 0)*(-1); 27 | Y=Y'; 28 | % format trainMatrix for SMO algorithm? 29 | 30 | % SMO algorithm 31 | tic 32 | [b, alphas] = smo_traingpu(trainMatrix, Y', 1, tol, 10); 33 | exectime = toc; 34 | 35 | % compute the weights (assuming a linear kernel) 36 | dim = size(trainMatrix,2); 37 | w = gpuArray(zeros(dim, 1)); 38 | for i=1:size(trainMatrix,1) 39 | w = w + alphas(i)*Y(i)*trainMatrix(i,:)'; 40 | end 41 | w = gather(w); 42 | 43 | svm_test 44 | -------------------------------------------------------------------------------- /apps/matlab/svm/svm_trainjacket.m: -------------------------------------------------------------------------------- 1 | function exectime = svm_trainjacket(trainfile, tol) 2 | addpath /usr/local/jacket/engine 3 | 4 | [spmatrix, tokenlist, trainCategory] = readMatrix(trainfile); 5 | 6 | trainMatrix = full(spmatrix); 7 | numTrainDocs = size(trainMatrix, 1); 8 | numTokens = size(trainMatrix, 2); 9 | 10 | % trainMatrix is now a (numTrainDocs x numTokens) matrix. 11 | % Each row represents a unique document (email). 12 | % The j-th column of the row $i$ represents the number of times the j-th 13 | % token appeared in email $i$. 14 | 15 | % tokenlist is a long string containing the list of all tokens (words). 16 | % These tokens are easily known by position in the file TOKENS_LIST 17 | 18 | % trainCategory is a (numTrainDocs x 1) vector containing the true 19 | % classifications for the documents just read in. The i-th entry gives the 20 | % correct class for the i-th email (which corresponds to the i-th row in 21 | % the document word matrix). 22 | 23 | % Spam documents are indicated as class 1, and non-spam as class 0. 24 | % Note that for the SVM, you would want to convert these to +1 and -1. 25 | 26 | % convert labels 27 | Y = (trainCategory(:) == 1)*(1) + (trainCategory(:) == 0)*(-1); 28 | Y=Y'; 29 | % format trainMatrix for SMO algorithm? 30 | 31 | % SMO algorithm 32 | tic 33 | [b, alphas] = smo_trainjacket(trainMatrix, Y', 1, tol, 10); 34 | exectime = toc; 35 | 36 | % compute the weights (assuming a linear kernel) 37 | dim = size(trainMatrix,2); 38 | w = gzeros(dim, 1); 39 | for i=1:size(trainMatrix,1) 40 | w = w + alphas(i)*Y(i)*trainMatrix(i,:)'; 41 | end 42 | w = double(w); 43 | 44 | svm_test 45 | -------------------------------------------------------------------------------- /apps/matlab/timeapp.m: -------------------------------------------------------------------------------- 1 | function exectime = timeapp(appname, procs) 2 | if (procs == -2) 3 | exectime = eval(['run' appname '(2)']); 4 | elseif (procs == -1) 5 | exectime = eval(['run' appname '(1)']); 6 | else 7 | oldprocs = maxNumCompThreads(procs); 8 | if (procs ~= 1) 9 | matlabpool('local',procs); 10 | end 11 | disp(appname) 12 | exectime = eval(['run' appname '(0)']); 13 | maxNumCompThreads(oldprocs); 14 | if (procs ~= 1) 15 | matlabpool close; 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /apps/matlab/timeapps.m: -------------------------------------------------------------------------------- 1 | function timeapps(app, gpu) 2 | 3 | if (gpu==2) 4 | proclist = -2; 5 | elseif (gpu==1) 6 | proclist = -1; 7 | else 8 | proclist = [1, 2, 4, 8]; % -1 denotes gpu, -2 denotes jacket 9 | end 10 | 11 | if (strcmpi(app, 'all')) 12 | % apps = {'gda', 'nb', 'linreg', 'kmeans', 'svm', 'lbp', 'matmult' 'rbm'}; 13 | apps = {'linreg', 'kmeans', 'svm', 'lbp', 'matmult'}; 14 | else 15 | apps = {app}; 16 | end 17 | 18 | outputTimes = -1 * ones(length(proclist), length(apps)); 19 | for j=1:length(apps) 20 | for i=1:length(proclist) 21 | outputTimes(i, j) = timeapp(char(apps(j)), proclist(i)); 22 | end 23 | end 24 | 25 | outputTimes 26 | -------------------------------------------------------------------------------- /apps/multi-dsl/src/ppl/apps/interop/ParProperty.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.interop 2 | 3 | import scala.collection.parallel.mutable.ParArray 4 | import Types._ 5 | /** 6 | * A Property object is used to associate data with graph nodes or edges 7 | * When the Property object is first created, the data value of each node/edge is set 8 | * to the default of the data type (e.g. Boolean -> False, Int -> 0, String -> null) 9 | * Note: properties can be associated only with immutable graph instances 10 | */ 11 | 12 | class ParProperty[@specialized T: ClassManifest](val g: Graph, val size: Int) { 13 | 14 | /* Stores the property value for each graph node/edge */ 15 | var data: ParArray[T] = new ParArray[T](size) 16 | 17 | def update(n: Node, x: T): Unit = dcUpdate(n, x) 18 | def apply(n: Node): T = dcApply(n) 19 | 20 | // DeliteCollection ops 21 | /* Returns the property value of the node/edge with the given id */ 22 | def dcApply(idx: Int) : T = data(idx) 23 | /* Updates the property value of the node/edge with the given id */ 24 | def dcUpdate(idx: Int, x: T) : Unit = { data(idx) = x } 25 | /* Returns the length of the property data array (which is the number of nodes or edges in the graph)*/ 26 | def dcSize : Int = data.length 27 | } 28 | -------------------------------------------------------------------------------- /apps/multi-dsl/src/ppl/apps/interop/Property.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.interop 2 | 3 | import Types._ 4 | /** 5 | * A Property object is used to associate data with graph nodes or edges 6 | * When the Property object is first created, the data value of each node/edge is set 7 | * to the default of the data type (e.g. Boolean -> False, Int -> 0, String -> null) 8 | * Note: properties can be associated only with immutable graph instances 9 | */ 10 | 11 | class Property[@specialized T: ClassManifest](val g: Graph, val size: Int) { 12 | 13 | /* Stores the property value for each graph node/edge */ 14 | var data: Array[T] = new Array[T](size) 15 | 16 | def update(n: Node, x: T): Unit = dcUpdate(n, x) 17 | def apply(n: Node): T = dcApply(n) 18 | 19 | // DeliteCollection ops 20 | /* Returns the property value of the node/edge with the given id */ 21 | def dcApply(idx: Int) : T = data(idx) 22 | /* Updates the property value of the node/edge with the given id */ 23 | def dcUpdate(idx: Int, x: T) : Unit = { data(idx) = x } 24 | /* Returns the length of the property data array (which is the number of nodes or edges in the graph)*/ 25 | def dcSize : Int = data.length 26 | } 27 | -------------------------------------------------------------------------------- /apps/multi-dsl/src/ppl/apps/interop/Types.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.interop 2 | 3 | import scala.collection.mutable._ 4 | 5 | object Types { 6 | type Node = Int 7 | } 8 | -------------------------------------------------------------------------------- /apps/optiml/src/HelloWorld.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml._ 2 | 3 | object HelloWorldRunner extends OptiMLApplicationRunner with HelloWorld 4 | trait HelloWorld extends OptiMLApplication { 5 | def main() = { 6 | println("hello world") 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/GraphExample.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps 2 | 3 | import scala.virtualization.lms.common.Record 4 | import ppl.dsl.optiml._ 5 | 6 | object GraphExampleRunner extends OptiMLApplicationRunner with GraphExample 7 | trait GraphExample extends OptiMLApplication { 8 | 9 | def vertexData(x: Rep[String], n: Rep[Int]) = new Record { 10 | val count = n 11 | val name = x 12 | } 13 | type VD = Record{val count: Int; val name: String} 14 | 15 | def edgeData(x: Rep[String]) = new Record { 16 | val name = x 17 | } 18 | type ED = Record{val name: String} 19 | 20 | def main() = { 21 | // simple diamond-shaped graph 22 | val g = Graph[VD,ED]() 23 | 24 | val a = Vertex(g, vertexData("a",0)) 25 | g.addVertex(a) 26 | val b = Vertex(g, vertexData("b",1)) 27 | g.addVertex(b) 28 | val c = Vertex(g, vertexData("c",2)) 29 | g.addVertex(c) 30 | val d = Vertex(g, vertexData("d",3)) 31 | g.addVertex(d) 32 | 33 | val ab = Edge(g, edgeData("inAB"), edgeData("outAB"), a, b) 34 | g.addEdge(ab,a,b) 35 | 36 | val ac = Edge(g, edgeData("inAC"), edgeData("outAC"), a, c) 37 | g.addEdge(ac,a,c) 38 | 39 | val bd = Edge(g, edgeData("inBD"), edgeData("outBD"), b, d) 40 | g.addEdge(bd,b,d) 41 | 42 | val cd = Edge(g, edgeData("inCD"), edgeData("outCD"), c, d) 43 | g.addEdge(cd,c,d) 44 | 45 | g.freeze() 46 | 47 | for (v <- g.vertices) { 48 | println("vertex " + v.data.name) 49 | println(" has edges: ") 50 | for (e <- v.edges) { 51 | println(" " + e.inData.name + " / " + e.outData.name) 52 | } 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/bio/spade/Upsampling.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.bio.spade 2 | 3 | import ppl.dsl.optiml._ 4 | 5 | trait Upsampling { 6 | this: OptiMLApplication => 7 | 8 | /* 9 | def upsample(data: Rep[DenseMatrix[Double]], 10 | cluster_data: Rep[DenseMatrix[Double]], 11 | cluster_assign: Rep[DenseVector[Int]]) = { 12 | 13 | println("obs = "+data.numRows+", dim = "+data.numCols+", cls = "+cluster_data.numRows) 14 | 15 | val assign = data.mapRows {row1:Rep[DenseVector[Double]] => 16 | val distances = cluster_data.mapRows{row2:Rep[DenseVector[Double]]=> dist(row1,row2)} 17 | val idx = distances.minIndex 18 | cluster_assign(idx) 19 | } 20 | assign 21 | 22 | /* 23 | val assign = data.mapRows {row1:Rep[DenseVector[Double]] => 24 | val distances = cluster_data.mapRows{row2:Rep[DenseVector[Double]]=> dist(row1,row2)} 25 | val idx = distances.minIndex 26 | cluster_assign(idx) 27 | } 28 | assign 29 | */ 30 | /* 31 | var assign = DenseVector[Int](data.numRows) 32 | for( idx <- (0::data.numRows)){ 33 | if(idx%1000 == 0) println(" (imperative) # processed node = " + idx) 34 | var min_idx = 0 35 | var min_dist = scala.Double.MaxValue 36 | var j = 0 37 | while(j < cluster_data.numRows){ 38 | val d = dist(data(idx), cluster_data(j)) 39 | if (d < min_dist) { 40 | min_idx = j 41 | min_dist = d 42 | } 43 | j += 1 44 | } 45 | assign(idx) = cluster_assign(min_idx) 46 | } 47 | assign 48 | */ 49 | } 50 | */ 51 | 52 | } 53 | 54 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/arff/arff.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.ml.arff 2 | 3 | import scala.virtualization.lms.common.Record 4 | import ppl.dsl.optiml._ 5 | import ppl.delite.framework.DeliteApplication 6 | 7 | object ARFFRunner extends OptiMLApplicationRunner with ARFF 8 | 9 | /** 10 | * Example program showing how to use OptiML to read ARFF files. 11 | * 12 | * This program is meant to work with the 'iris.arff' dataset. OptiML does not currently 13 | * support reading arbitrary (no pre-defined schema) ARFF files, as that would result in the 14 | * loss of static types and potentially degrade performance without improved stage-time support. 15 | */ 16 | 17 | trait ARFF extends OptiMLApplication { 18 | def printUsage = { 19 | println("Usage: arff ") 20 | exit(-1) 21 | } 22 | 23 | def mySchema(v: Rep[DenseVector[String]]) = 24 | new Record { 25 | val sepalLength = v(0).toDouble 26 | val sepalWidth = v(1).toDouble 27 | val petalLength = v(2).toDouble 28 | val petalWidth = v(3).toDouble 29 | val cls = v(4) 30 | } 31 | 32 | def main() = { 33 | if (args.length < 1) printUsage 34 | 35 | val in = readARFF(args(0), mySchema) 36 | 37 | val sep0: Rep[Double] = in(0).sepalLength 38 | 39 | println("My first row is: ") 40 | println("sepalLength: " + in(0).sepalLength + ", sepalWidth: " + in(0).sepalWidth + ", petalLength: " + in(0).petalLength + ", petalWidth: " + in(0).petalWidth + ", class: " + in(0).cls) 41 | 42 | // create a Matrix[Double] out of the first four elements of the schema 43 | val m = Matrix(in map { row => DenseVector(row.sepalLength, row.sepalWidth, row.petalLength, row.petalWidth) }) 44 | m.sliceRows(0, 10).pprint 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/cf/userdefined/PairwiseRating.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml.datastruct.scala._ 2 | 3 | class PairwiseRating ( 4 | val profileA: Int, 5 | val profileB: Int, 6 | val scoreA: Int, 7 | val scoreB: Int 8 | ) -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/cf/userdefined/Similarity.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml.datastruct.scala._ 2 | 3 | class Similarity ( 4 | val a: Int, 5 | val b: Int, 6 | val value: Double 7 | ) -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/kmeans/kmeans.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.ml.kmeans 2 | 3 | import ppl.dsl.optiml._ 4 | import ppl.delite.framework.DeliteApplication 5 | 6 | object kmeansRunnerNCNR extends OptiMLApplicationRunnerBase with OptiMLNoCSE with OptiMLExp with kmeansApp 7 | object kmeansRunnerNC extends OptiMLApplicationRunner with OptiMLNoCSE with kmeansApp 8 | object kmeansRunnerNR extends OptiMLApplicationRunnerBase with OptiMLExp with kmeansApp 9 | object kmeansRunner extends OptiMLApplicationRunner with kmeansApp 10 | 11 | trait kmeansApp extends OptiMLApplication { 12 | 13 | def print_usage = { 14 | println("Usage: kmeans ") 15 | exit(-1) 16 | } 17 | 18 | private val tol = 0.001 // tolerance (for convergence) 19 | private val k = 16 // num clusters 20 | 21 | def main() { 22 | 23 | if (args.length < 1) print_usage 24 | 25 | val x = UnsupervisedTrainingSet(readMatrix(args(0))) 26 | val mu = readMatrix(args(1)) 27 | 28 | tic(mu) 29 | val (iter, mu2) = kmeans.cluster(x, k, tol, Some(mu)) 30 | toc(mu2) 31 | println("finished in " + iter + " iterations") 32 | mu2.pprint 33 | 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/lbpdenoise/LBPDenoiseRaw.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.ml.lbpdenoise 2 | 3 | import ppl.dsl.optiml.OptiMLApplicationRunner 4 | 5 | /** 6 | * author: Michael Wu (mikemwu@stanford.edu) 7 | * last modified: 01/19/2011 8 | * 9 | * Pervasive Parallelism Laboratory (PPL) 10 | * Stanford University 11 | */ 12 | 13 | object LBPDenoiseRawRunner extends OptiMLApplicationRunner with LBPDenoiseRaw 14 | 15 | trait LBPDenoiseRaw extends LBPDenoise { 16 | override def print_usage = { 17 | println("Usage: LBPDenoiseRaw ") 18 | println("Example: LBPDenoiseRaw noisy200x200.raw") 19 | exit(-1) 20 | } 21 | 22 | override def loadImage(args: Rep[Array[String]], colors: Rep[Int], sigma: Rep[Int]) = { 23 | val img = GrayscaleImage(readMatrix(args(0))) 24 | MLOutputWriter.writeImgPgm(img, "check.pgm") 25 | img 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/linreg/LinReg.scala: -------------------------------------------------------------------------------- 1 | /* locally weighted linear regression 2 | * 3 | * author: Arvind Sujeeth (asujeeth@stanford.edu) 4 | * 5 | * Pervasive Parallelism Laboratory (PPL) 6 | * Stanford University 7 | */ 8 | 9 | package ppl.apps.ml.linreg 10 | 11 | import ppl.dsl.optiml._ 12 | import ppl.delite.framework.DeliteApplication 13 | 14 | object LinRegRunner extends OptiMLApplicationRunner with LinReg 15 | 16 | trait LinReg extends OptiMLApplication { 17 | 18 | 19 | // file format is m lines with n floats per line, each float seperated by 2 spaces 20 | // (same as matlab .dat) 21 | def print_usage = { 22 | println("Usage: LinRegSerial ") 23 | exit(-1) 24 | } 25 | 26 | def main() = { 27 | if (args.length < 2) print_usage 28 | 29 | val x = readMatrix(args(0)) 30 | val y = readVector(args(1)).t 31 | 32 | // logElapsed("Input Section Complete") 33 | 34 | val theta = linreg.unweighted(x,y) 35 | println("Unweighted linear regression") 36 | println("theta: ") 37 | theta.pprint 38 | print("\n") 39 | 40 | tic() 41 | val guess = linreg.weighted(x,y) 42 | toc(guess) 43 | 44 | println("Locally weighted linear regression") 45 | println("guess: ") 46 | guess.pprint 47 | print("\n") 48 | 49 | //PerformanceTimer.save("LinReg") 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/ml/logreg/LogReg.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.ml.logreg 2 | 3 | import ppl.dsl.optiml._ 4 | import ppl.delite.framework.DeliteApplication 5 | 6 | object LogRegRunner extends OptiMLApplicationRunner with LogReg 7 | 8 | trait LogReg extends OptiMLApplication { 9 | 10 | // file format is m lines with n floats per line, each float seperated by 2 spaces 11 | // (same as matlab .dat) 12 | def print_usage = { 13 | println("Usage: LogReg ") 14 | exit(-1) 15 | } 16 | 17 | def main() = { 18 | if (args.length < 2) print_usage 19 | 20 | val x = readMatrix(args(0)) 21 | val y = readVector(args(1)).t 22 | 23 | println("x.numRows: " + x.numRows) 24 | println("x.numCols: " + x.numCols) 25 | println("y.length: " + y.length) 26 | 27 | // gradient descent with logistic function 28 | tic() 29 | val w = gradient (SupervisedTrainingSet(x,y), alpha = 1, tol = .001, maxIter = 30) { (t,xi) => 1.0 / (1.0 + exp(t*(-1.0) *:* xi)) } 30 | toc(w) 31 | println("w:") 32 | w.pprint 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/robotics/gradient/BinarizedGradientPyramid.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.robotics.gradient 2 | 3 | import ppl.dsl.optiml._ 4 | import ppl.dsl.optiml.application._ 5 | import ppl.delite.framework.DeliteApplication 6 | 7 | trait BinarizedGradientPyramidFuncs { 8 | this: OptiMLApplication => 9 | 10 | 11 | def makePyramid(gradientImage: Rep[GrayscaleImage]) = { 12 | var crt = gradientImage 13 | var currentLevel = 0 14 | 15 | val pyramid = DenseVector[GrayscaleImage](0, true) 16 | val startLevel = 3 17 | val levels = 1 18 | val fixedLevelIndex = 3 19 | 20 | while (currentLevel < startLevel + levels) { 21 | if (currentLevel >= startLevel) { 22 | pyramid <<= crt 23 | } 24 | if (currentLevel != (startLevel + levels - 1)) { 25 | crt = crt.bitwiseOrDownsample 26 | } 27 | currentLevel += 1 28 | } 29 | 30 | BinarizedGradientPyramid(pyramid, startLevel, levels, fixedLevelIndex) 31 | } 32 | 33 | def getIndex(pyramid: Rep[BinarizedGradientPyramid], index: Rep[Int]) = pyramid.pyramid(index - pyramid.start_level) 34 | } -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/robotics/gradient/gradient.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.robotics.gradient 2 | 3 | import ppl.dsl.optiml._ 4 | import ppl.dsl.optiml.application._ 5 | import ppl.delite.framework.DeliteApplication 6 | 7 | object gradientRunner extends OptiMLApplicationRunner with gradient 8 | 9 | trait gradient extends OptiMLApplication 10 | with BinarizedGradientGridFuncs with BinarizedGradientPyramidFuncs with BinarizedGradientTemplateFuncs { 11 | 12 | def main() = { 13 | val image = readGrayscaleImage(args(0)) 14 | val all_templates = readTemplateModels(args(1)) 15 | tic() 16 | detectAllObjects(all_templates, image) 17 | toc() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/robotics/gradient/userdefined/BiGGDetection.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml.datastruct.scala._ 2 | 3 | class BiGGDetection ( 4 | val name: String, 5 | val score: Float, 6 | val roi: Rect, 7 | val mask: GrayscaleImage, 8 | val index: Int, 9 | val x: Int, 10 | val y: Int, 11 | val tpl: BinarizedGradientTemplate, 12 | val crt_tpl: BinarizedGradientTemplate 13 | ) 14 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/robotics/gradient/userdefined/BinarizedGradientPyramid.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml.datastruct.scala._ 2 | import ppl.dsl.optila.datastruct.scala._ 3 | 4 | class BinarizedGradientPyramid ( 5 | val pyramid: DenseVector[GrayscaleImage], 6 | val start_level: Int, 7 | val levels: Int, 8 | val fixedLevelIndex: Int 9 | ) 10 | 11 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/robotics/gradient/userdefined/BinarizedGradientTemplate.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml.datastruct.scala._ 2 | import ppl.dsl.optila.datastruct.scala._ 3 | 4 | // object BinarizedGradientTemplate { 5 | // def apply(val radius: Int, ...) = newStruct(("radius","Int", radius), 6 | // } 7 | 8 | class BinarizedGradientTemplate ( 9 | // In the reduced image. The side of the template square is then 2*r+1. 10 | val radius: Int, 11 | 12 | // Holds a tighter bounding box of the object in the original image scale 13 | val rect: Rect, 14 | val mask_list: DenseVector[Int], 15 | 16 | // Pyramid level of the template (reduction_factor = 2^level) 17 | val level: Int, 18 | 19 | // The list of gradients in the template 20 | val binary_gradients: DenseVector[Double], 21 | 22 | // indices to use for matching (skips zeros inside binary_gradients) 23 | val match_list: IndexVectorDenseC, 24 | 25 | // This is a match list of list of sub-parts. Currently unused. 26 | val occlusions: DenseVector[DenseVector[Int]], 27 | 28 | val templates: DenseVector[BinarizedGradientTemplate], 29 | 30 | val hist: DenseVector[Float] 31 | ) 32 | 33 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/robotics/gradient/userdefined/Rect.scala: -------------------------------------------------------------------------------- 1 | import ppl.dsl.optiml.datastruct.scala._ 2 | 3 | class Rect ( 4 | val x: Int, 5 | val y: Int, 6 | val width: Int, 7 | val height: Int 8 | ) 9 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/tests/AnyType.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.tests 2 | 3 | import ppl.delite.framework.DeliteApplication 4 | import ppl.dsl.optiml._ 5 | 6 | object AnyTypeRunner extends OptiMLApplicationRunner with AnyType 7 | 8 | trait AnyType extends OptiMLApplication { 9 | 10 | def main() = { 11 | var i = 0 12 | while ( i < 5) { 13 | println("ITER: " + i) 14 | i += 1 15 | } 16 | } 17 | 18 | 19 | } 20 | 21 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/tests/GPUWhileLoop.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.tests 2 | 3 | import ppl.delite.framework.DeliteApplication 4 | import ppl.dsl.optiml._ 5 | 6 | object GPUWhileLoopRunner extends OptiMLApplicationRunner with GPUWhileLoop 7 | 8 | trait GPUWhileLoop extends OptiMLApplication { 9 | 10 | def main() = { 11 | var i = 0 12 | while ( i < 3) { 13 | val a = Matrix.onesf(5,10).mutable 14 | a(i,i) = 3.4f 15 | val b = Vector.onesf(10).mutable 16 | b(i) = 1.2f 17 | val c = a * b 18 | c.pprint 19 | i += 1 20 | } 21 | } 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/tests/MutableOps.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.tests 2 | 3 | import ppl.delite.framework.DeliteApplication 4 | import ppl.dsl.optiml._ 5 | 6 | object MutableOpsRunner extends OptiMLApplicationRunner with MutableOps 7 | 8 | trait MutableOps extends OptiMLApplication { 9 | 10 | def main() = { 11 | val acc = Vector.zerosf(10).mutable 12 | val in = Matrix.onesf(5,10).mutable 13 | 14 | var i = 0 15 | while ( i < 5) { 16 | acc += in(i) * i 17 | i += 1 18 | } 19 | acc.pprint 20 | } 21 | 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /apps/optiml/src/ppl/apps/tests/Sigmoid.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.tests 2 | 3 | import ppl.delite.framework.DeliteApplication 4 | import ppl.dsl.optiml._ 5 | 6 | object SigmoidOpRunner extends OptiMLApplicationRunner with SigmoidOp 7 | 8 | trait SigmoidOp extends OptiMLApplication { 9 | 10 | def main() = { 11 | val in = Matrix.onesf(5,10) 12 | val out = in.sigmoidf 13 | out.pprint 14 | } 15 | 16 | 17 | } 18 | 19 | -------------------------------------------------------------------------------- /apps/optiql/src/ppl/apps/cidr/CIDR.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.cidr 2 | 3 | import ppl.dsl.optiql.{OptiQLApplication, OptiQLApplicationRunner} 4 | import scala.virtualization.lms.common.Record 5 | 6 | object ExampleRunner extends OptiQLApplicationRunner with Example 7 | trait Example extends OptiQLApplication { 8 | 9 | type Content = Record { 10 | val cidr: String 11 | val startTime: Int 12 | val hitCount: Long 13 | val byteCount: Long 14 | val category: String 15 | val subCategory: String 16 | val country: String 17 | val continent: String 18 | } 19 | 20 | def Content(_cidr: Rep[String], start_time: Rep[Int], hit_count: Rep[Long], byte_count: Rep[Long], _category: Rep[String], sub_category: Rep[String], _country: Rep[String], _continent: Rep[String]): Rep[Content] = new Record { 21 | val cidr = _cidr 22 | val startTime = start_time 23 | val hitCount = hit_count 24 | val byteCount = byte_count 25 | val category = _category 26 | val subCategory = sub_category 27 | val country = _country 28 | val continent = _continent 29 | } 30 | 31 | def printUsage() = { 32 | println("Usage: TestRunner ") 33 | exit(-1) 34 | } 35 | 36 | def main() = { 37 | if (args.length < 1) printUsage() 38 | val path = args(0) 39 | val data = Table.fromFile[Content](path, ",") 40 | 41 | val c = data.Count(e => unit(true)) //why? 42 | println("total records processed: " + c) 43 | 44 | val q = data Where(_.country == "United States") GroupBy(_.category) Select(g => new Record { 45 | val category = g.key 46 | val totalHits = g.Sum(_.hitCount) 47 | val totalBytes = g.Sum(_.byteCount) 48 | val count = g.Count 49 | }) 50 | 51 | q.printAsTable() 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /bin/build-interop: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt "; project interop-apps; compile" 4 | -------------------------------------------------------------------------------- /bin/build-optigraph: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt "; project optigraph-apps; compile" 4 | -------------------------------------------------------------------------------- /bin/build-optiml: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt "; project optiml-apps; compile" 4 | -------------------------------------------------------------------------------- /bin/build-optiql: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sbt "; project optiql-apps; compile" 4 | -------------------------------------------------------------------------------- /bin/delitep: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Reference: http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in 4 | DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 5 | DSL_ROOT_DIR=$DIR/.. 6 | DEBUGGER=$DELITE_HOME/profiler/grid.html 7 | 8 | open $DEBUGGER 9 | 10 | cd $DSL_ROOT_DIR 11 | ./bin/server.py $DSL_ROOT_DIR -------------------------------------------------------------------------------- /bin/dlines: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | HOME=$(cd `dirname $0` && pwd) 4 | OUT=$PWD/dlines.out 5 | 6 | rm $OUT 7 | 8 | # optiml 9 | OPTIML_APPS=$DELITE_HOME/apps/scala/src/ppl/apps/ml/ 10 | apps[0]="$OPTIML_APPS/gda/GDA.scala" 11 | apps[1]="$OPTIML_APPS/kmeans/kmeans.scala" 12 | apps[2]="$OPTIML_APPS/linreg/LinReg.scala" 13 | apps[3]="$OPTIML_APPS/nb/NaiveBayes.scala" 14 | apps[4]="$OPTIML_APPS/rbm/RBM.scala" 15 | apps[5]="$OPTIML_APPS/svm/SVMModel.scala $OPTIML_APPS/svm/SVM.scala" 16 | 17 | echo "OPTIML SUMMARY :: " >> $OUT 18 | concat=$(printf " %s " "${apps[@]}") 19 | concat=${concat:1} 20 | 21 | perl $HOME/cloc.pl $concat >> $OUT 22 | 23 | echo -e "\n" >> $OUT 24 | echo "OPTIML INDIVIDUAL REPORTS :: " >> $OUT 25 | for (( i=0; i<${#apps[@]}; i++ )); 26 | do 27 | app=${apps[$i]} 28 | line=`perl $HOME/cloc.pl --quiet --csv $app` 29 | loc=`echo $line | awk -F, '{print $11}'` 30 | echo "$app,$loc" >> $OUT 31 | done 32 | 33 | 34 | #matlab 35 | MATLAB_APPS=$DELITE_HOME/apps/matlab/ 36 | unset apps 37 | apps[0]="$MATLAB_APPS/gda/gda.m" 38 | apps[1]="$MATLAB_APPS/kmeans/kmeans.m" 39 | apps[2]="$MATLAB_APPS/linreg/linreg.m" 40 | apps[3]="$MATLAB_APPS/nb/nb_test.m $MATLAB_APPS/nb/nb_train.m" 41 | apps[4]="$MATLAB_APPS/rbm/rbm.m" 42 | apps[5]="$MATLAB_APPS/svm/smo_train.m $MATLAB_APPS/svm/svm.m $MATLAB_APPS/svm/svm_test.m" 43 | 44 | echo -e "\n" >> $OUT 45 | echo "MATLAB SUMMARY :: " >> $OUT 46 | concat=$(printf " %s " "${apps[@]}") 47 | concat=${concat:1} 48 | 49 | perl $HOME/cloc.pl $concat >> $OUT 50 | 51 | echo -e "\n" >> $OUT 52 | echo "MATLAB INDIVIDUAL REPORTS :: " >> $OUT 53 | for (( i=0; i<${#apps[@]}; i++ )); 54 | do 55 | app=${apps[$i]} 56 | line=`perl $HOME/cloc.pl --quiet --csv $app` 57 | loc=`echo $line | awk -F, '{print $11}'` 58 | echo "$app,$loc" >> $OUT 59 | done 60 | 61 | -------------------------------------------------------------------------------- /bin/make_data: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import random 5 | from optparse import OptionParser 6 | 7 | def main(): 8 | usage = "usage: %prog width height" 9 | parser = OptionParser(usage=usage) 10 | (options, args) = parser.parse_args() 11 | 12 | if len(args) < 2: 13 | parser.error('not enough arguments') 14 | 15 | width = int(args[0]) 16 | height = int(args[1]) 17 | 18 | random.seed() 19 | line_num = 0 20 | 21 | x = [0]*width 22 | while (line_num < height): 23 | for d in x: 24 | sys.stdout.write(str(d + random.uniform(-10,10)) + ' ') 25 | sys.stdout.write('\n') 26 | line_num += 1 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /bin/make_doc_data: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import sys 4 | import random 5 | 6 | def print_usage(): 7 | print("usage: make_doc_data ") 8 | sys.exit(0) 9 | 10 | if (len(sys.argv) != 2): 11 | print_usage() 12 | 13 | num_docs = int(sys.argv.pop()) 14 | 15 | if (num_docs < 0): 16 | print_usage() 17 | 18 | random.seed() 19 | 20 | min_tokens = 20 21 | max_tokens = 1448 22 | max_token_offset = 200 23 | max_freq = 20 24 | 25 | # header 26 | print("DOC_WORD_MATRIX_TRAIN") 27 | print(str(num_docs) + ' ' + str(max_tokens)) 28 | 29 | # tokens 30 | f = open('tokenlist', 'r') 31 | sys.stdout.write(f.read()) 32 | f.close() 33 | 34 | # body, see readme for explanation 35 | line_num = 0 36 | 37 | while (line_num < num_docs): 38 | sys.stdout.write(str(random.randint(0,1))) 39 | cumsum = 0 40 | while (cumsum < max_tokens): 41 | offset = random.randint(1,max_token_offset) 42 | if (cumsum + offset >= max_tokens): 43 | cumsum = max_tokens 44 | continue 45 | freq = random.randint(1,max_freq) 46 | sys.stdout.write(' ' + str(offset) + ' ' + str(freq)) 47 | cumsum += offset 48 | sys.stdout.write(' -1') 49 | sys.stdout.write('\n') 50 | line_num += 1 51 | -------------------------------------------------------------------------------- /bin/make_logistic_data.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | from __future__ import division, print_function 3 | import sys 4 | import argparse 5 | import math 6 | import random 7 | import numpy 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("dimension", help="the dimension of the generated data", type=int) 11 | parser.add_argument("examples", help="the number of generated examples", type=int) 12 | parser.add_argument("-w" , "--wcoeff", help="coefficient to multiply the generated logistic parameter w by", 13 | type=float, default=10.0) 14 | parser.add_argument("-b" , "--bcoeff", help="coefficient to multiply the generated logistic parameter b by", 15 | type=float, default=0.1) 16 | parser.add_argument("-c", "--chunk", help="size of chunks to compute at once", 17 | type=int, default=512*1024) 18 | 19 | def main(): 20 | args = parser.parse_args() 21 | # select model parameters at random 22 | b = -args.bcoeff * numpy.random.randn() / math.sqrt(args.dimension) 23 | w = args.wcoeff * numpy.random.randn(args.dimension, 1) / math.sqrt(args.dimension) 24 | # generate data points 25 | remaining_examples = args.examples 26 | while remaining_examples > 0: 27 | cur_examples = min(remaining_examples, args.chunk) 28 | data = numpy.random.randn(cur_examples, args.dimension) / math.sqrt(args.dimension) 29 | u = data.dot(w) + b 30 | p = 1.0 / (numpy.exp(-u) + 1.0) 31 | r = numpy.random.rand(cur_examples, 1) 32 | y = (p >= r) 33 | for i in range(cur_examples): 34 | print(("1.0" if y[i] else "0.0") + ' ' + ' '.join(str(v) for v in data[i, :])) 35 | remaining_examples -= cur_examples 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /bin/windows/delite.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | SET SCRIPT=%TEMP%\%~n0 4 | if exist "%SCRIPT%" del "%SCRIPT%" 5 | copy "%~dp0\shared" "%SCRIPT%.prefix" > NUL 6 | echo try { >> "%SCRIPT%.prefix" 7 | echo.|set /P ="/" >> "%SCRIPT%.prefix" 8 | echo * >> "%SCRIPT%.prefix" 9 | copy "%~dpn0" "%SCRIPT%.middle" > NUL 10 | echo } catch { > "%SCRIPT%.postfix" 11 | echo case e: Exception =^> ^{ >> "%SCRIPT%.postfix" 12 | echo var message = ^"%%s^".format^(if ^(e.getMessage != null^) e.getMessage else e.getCause^) >> "%SCRIPT%.postfix" 13 | echo message = ^"%%s%%n%%s^".format^(message, ^"It may help to run delitecfg or manually adjust %%s.^" .format^(config.file.getAbsolutePath^)^) >> "%SCRIPT%.postfix" 14 | echo println(message) >> "%SCRIPT%.postfix" 15 | echo } >> "%SCRIPT%.postfix" 16 | echo } >> "%SCRIPT%.postfix" 17 | copy "%SCRIPT%.prefix"+"%SCRIPT%.middle"+"%SCRIPT%.postfix" "%SCRIPT%" > NUL 18 | 19 | SET SCRIPT_HOME=%~dp0 20 | SET SCRIPT_NAME=%~nx0 21 | scala "%SCRIPT%" %* 22 | -------------------------------------------------------------------------------- /bin/windows/delitec.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | SET SCRIPT=%TEMP%\%~n0 4 | if exist "%SCRIPT%" del "%SCRIPT%" 5 | copy "%~dp0\shared" "%SCRIPT%.prefix" > NUL 6 | echo try { >> "%SCRIPT%.prefix" 7 | echo.|set /P ="/" >> "%SCRIPT%.prefix" 8 | echo * >> "%SCRIPT%.prefix" 9 | copy "%~dpn0" "%SCRIPT%.middle" > NUL 10 | echo } catch { > "%SCRIPT%.postfix" 11 | echo case e: Exception =^> ^{ >> "%SCRIPT%.postfix" 12 | echo var message = ^"%%s^".format^(if ^(e.getMessage != null^) e.getMessage else e.getCause^) >> "%SCRIPT%.postfix" 13 | echo message = ^"%%s%%n%%s^".format^(message, ^"It may help to run delitecfg or manually adjust %%s.^" .format^(config.file.getAbsolutePath^)^) >> "%SCRIPT%.postfix" 14 | echo println(message) >> "%SCRIPT%.postfix" 15 | echo } >> "%SCRIPT%.postfix" 16 | echo } >> "%SCRIPT%.postfix" 17 | copy "%SCRIPT%.prefix"+"%SCRIPT%.middle"+"%SCRIPT%.postfix" "%SCRIPT%" > NUL 18 | 19 | SET SCRIPT_HOME=%~dp0 20 | SET SCRIPT_NAME=%~nx0 21 | scala "%SCRIPT%" %* 22 | -------------------------------------------------------------------------------- /bin/windows/delitecfg.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | SET SCRIPT=%TEMP%\%~n0 4 | if exist "%SCRIPT%" del "%SCRIPT%" 5 | copy "%~dp0\shared" "%SCRIPT%.prefix" > NUL 6 | echo try { >> "%SCRIPT%.prefix" 7 | echo.|set /P ="/" >> "%SCRIPT%.prefix" 8 | echo * >> "%SCRIPT%.prefix" 9 | copy "%~dpn0" "%SCRIPT%.middle" > NUL 10 | echo } catch { > "%SCRIPT%.postfix" 11 | echo case e: Exception =^> ^{ >> "%SCRIPT%.postfix" 12 | echo var message = ^"%%s^".format^(if ^(e.getMessage != null^) e.getMessage else e.getCause^) >> "%SCRIPT%.postfix" 13 | echo println(message) >> "%SCRIPT%.postfix" 14 | echo } >> "%SCRIPT%.postfix" 15 | echo } >> "%SCRIPT%.postfix" 16 | copy "%SCRIPT%.prefix"+"%SCRIPT%.middle"+"%SCRIPT%.postfix" "%SCRIPT%" > NUL 17 | 18 | SET SCRIPT_HOME=%~dp0 19 | SET SCRIPT_NAME=%~nx0 20 | scala "%SCRIPT%" %* -------------------------------------------------------------------------------- /bin/windows/delites: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | script_path=$0 3 | follow() { echo "$(readlink "$({ cd $(dirname "$1"); pwd -P; })/$(basename "$1")")"; } 4 | while [ -h "$script_path" ]; do script_path="$(follow "$script_path")"; done 5 | export script_home="$(dirname "$script_path")" 6 | export script_name="$(basename "$script_path")" 7 | synthetic="/tmp/$script_name.$(mktemp -u XXXX)" 8 | touch "$synthetic" 9 | cat "$script_home/shared" > "$synthetic" 10 | echo 'try {' >> "$synthetic" 11 | echo '/\c' >> "$synthetic" 12 | echo '*\c' >> "$synthetic" 13 | cat "$0" >> "$synthetic" 14 | echo '} catch {' >> "$synthetic" 15 | echo 'case e: Exception => {' >> "$synthetic" 16 | echo 'var message = "%s".format(if (e.getMessage != null) e.getMessage else e.getCause )' >> "$synthetic" 17 | echo 'message = "%s%n%s".format(message, "It may help to run delitecfg or manually adjust %s." .format(config.file.getAbsolutePath))' >> "$synthetic" 18 | echo 'println(message)' >> "$synthetic" 19 | echo '}' >> "$synthetic" 20 | echo '}' >> "$synthetic" 21 | exec scala "$synthetic" "$@" 22 | !#*/ 23 | 24 | args = javaopt("delite.build.dir", config.delite_home + File.separator + "generated") +: args 25 | args = javaopt("delite.home.dir", config.delite_home) +: args 26 | var (java_opts, runner_opts, other_opts, other_args) = parse_args_with_runner(args, verbose = false) 27 | if (other_args.length != 0) runner_opts = other_args.toIterator 28 | if (windows) { java_opts = java_opts map { arg => wrap(arg) }; env("JAVA_OPTS") = java_opts mkString " " } 29 | 30 | args = java_opts ++ other_opts ++ runner_opts 31 | classpath = env_classpath + config.optiml_classes + config.framework_classes + config.lms_classes + config.app_classes 32 | val (_, lines) = run(config.scala_virtualized_interpreter) 33 | if (lines.length > 2) sys.exit(1) 34 | println("%s".format("Successfully compiled")) 35 | 36 | -------------------------------------------------------------------------------- /bin/windows/delites.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | SET SCRIPT=%TEMP%\%~n0 4 | if exist "%SCRIPT%" del "%SCRIPT%" 5 | copy "%~dp0\shared" "%SCRIPT%.prefix" > NUL 6 | echo try { >> "%SCRIPT%.prefix" 7 | echo.|set /P ="/" >> "%SCRIPT%.prefix" 8 | echo * >> "%SCRIPT%.prefix" 9 | copy "%~dpn0" "%SCRIPT%.middle" > NUL 10 | echo } catch { > "%SCRIPT%.postfix" 11 | echo case e: Exception =^> ^{ >> "%SCRIPT%.postfix" 12 | echo var message = ^"%%s^".format^(if ^(e.getMessage != null^) e.getMessage else e.getCause^) >> "%SCRIPT%.postfix" 13 | echo message = ^"%%s%%n%%s^".format^(message, ^"It may help to run delitecfg or manually adjust %%s.^" .format^(config.file.getAbsolutePath^)^) >> "%SCRIPT%.postfix" 14 | echo println(message) >> "%SCRIPT%.postfix" 15 | echo } >> "%SCRIPT%.postfix" 16 | echo } >> "%SCRIPT%.postfix" 17 | copy "%SCRIPT%.prefix"+"%SCRIPT%.middle"+"%SCRIPT%.postfix" "%SCRIPT%" > NUL 18 | 19 | SET SCRIPT_HOME=%~dp0 20 | SET SCRIPT_NAME=%~nx0 21 | scala "%SCRIPT%" %* 22 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | version := "0.2" 2 | 3 | organization := "stanford-ppl" 4 | 5 | retrieveManaged := true 6 | 7 | //resolvers := Seq(mavenLocal) 8 | -------------------------------------------------------------------------------- /config/delite/BLAS.xml.linux: -------------------------------------------------------------------------------- 1 | 2 | gcc 3 | 4 | -I 5 | 6 | ${JAVA_HOME}/include 7 | ${JAVA_HOME}/include/linux 8 | 9 | /usr/include 10 | 11 | cblas.h 12 | 13 | 14 | -L 15 | 16 | /usr/lib/libblas 17 | 18 | -lblas 19 | 20 | 21 | -------------------------------------------------------------------------------- /config/delite/BLAS.xml.ubuntu-mkl: -------------------------------------------------------------------------------- 1 | 2 | icc 3 | 4 | -I 5 | 6 | ${JAVA_HOME}/include 7 | ${JAVA_HOME}/include/linux 8 | 9 | /opt/intel/mkl/include 10 | 11 | mkl.h 12 | mkl_lapacke.h 13 | math.h 14 | 15 | 16 | -L 17 | 18 | /opt/intel/mkl/lib/intel64 19 | /opt/intel/lib/intel64 20 | 21 | -lmkl_intel_lp64 22 | -lmkl_intel_thread 23 | -lmkl_core 24 | -liomp5 25 | -lmkl_mc3 26 | -lmkl_def 27 | 28 | 29 | -------------------------------------------------------------------------------- /config/delite/CPP.xml.linux: -------------------------------------------------------------------------------- 1 | 2 | g++ 3 | make -s -j 4 | -w -O3 -fPIC -pthread -ffast-math 5 | 6 | -I 7 | ${JAVA_HOME}/include 8 | ${JAVA_HOME}/include/linux 9 | 10 | -shared -fPIC -pthread 11 | 12 | -------------------------------------------------------------------------------- /config/delite/CPP.xml.linux.pcm: -------------------------------------------------------------------------------- 1 | 2 | g++ 3 | make -s -j 4 | -w -O3 -fPIC -pthread -ffast-math 5 | 6 | -I 7 | ${JAVA_HOME}/include 8 | ${JAVA_HOME}/include/linux 9 | ${PCM_HOME} 10 | 11 | -shared -fPIC -pthread 12 | 13 | -L 14 | ${PCM_HOME}/cpucounters.o 15 | ${PCM_HOME}/msr.o 16 | ${PCM_HOME}/pci.o 17 | ${PCM_HOME}/client_bw.o 18 | 19 | 20 | -------------------------------------------------------------------------------- /config/delite/CUDA.xml.linux: -------------------------------------------------------------------------------- 1 | 2 | /usr/local/cuda/bin/nvcc 3 | /usr/bin/make -s -j 4 | 2.0,3.0 5 | 6 | -I 7 | ${JAVA_HOME}/include 8 | ${JAVA_HOME}/include/linux 9 | 10 | 11 | -------------------------------------------------------------------------------- /config/delite/LAPACK.xml.macos-accelerate: -------------------------------------------------------------------------------- 1 | 2 | gcc 3 | 4 | -I 5 | ${JAVA_HOME}/include/ 6 | ${JAVA_HOME}/include/darwin/ 7 | ${LAPACK_HOME}/lapacke/include 8 | cblas.h 9 | lapacke.h 10 | 11 | 12 | -L 13 | ${LAPACK_HOME} 14 | -llapacke 15 | -framework Accelerate 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /config/delite/LAPACK.xml.ubuntu-lapack: -------------------------------------------------------------------------------- 1 | 2 | gcc 3 | 4 | -I 5 | ${JAVA_HOME}/include/ 6 | ${JAVA_HOME}/include/linux/ 7 | cblas.h 8 | lapacke.h 9 | 10 | 11 | -L 12 | -llapacke 13 | -lblas 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /config/delite/LAPACK.xml.ubuntu-mkl: -------------------------------------------------------------------------------- 1 | 2 | icc 3 | 4 | -I 5 | ${JAVA_HOME}/include/ 6 | ${JAVA_HOME}/include/linux/ 7 | 8 | /opt/intel/mkl/include 9 | 10 | mkl.h 11 | mkl_lapacke.h 12 | 13 | 14 | -L 15 | 16 | /opt/intel/mkl/lib/intel64 17 | /opt/intel/lib/intel64 18 | 19 | -lmkl_intel_lp64 20 | -lmkl_intel_thread 21 | -lmkl_core 22 | -liomp5 23 | -lmkl_mc3 24 | -lmkl_def 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /config/delite/OpenCL.xml.linux: -------------------------------------------------------------------------------- 1 | 2 | /usr/bin/g++ 3 | 4 | -I 5 | ${JAVA_HOME}/include 6 | ${JAVA_HOME}/include/linux 7 | /usr/local/cuda/include 8 | 9 | 10 | -------------------------------------------------------------------------------- /config/delite/clBLAS.xml.linux: -------------------------------------------------------------------------------- 1 | 2 | /usr/bin/g++ 3 | 4 | -I 5 | /usr/local/cuda/include 6 | ${DELITE_HOME}/runtime/opencl/blas 7 | 8 | 9 | -L 10 | ${DELITE_HOME}/runtime/opencl/blas 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /config/delite/cuBLAS.xml.linux: -------------------------------------------------------------------------------- 1 | 2 | /usr/local/cuda/bin/nvcc 3 | 2.0,3.0 4 | 5 | -I 6 | cublas.h 7 | 8 | 9 | -L 10 | -lcublas 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /config/delite/protobuf.xml.example: -------------------------------------------------------------------------------- 1 | 2 | ~/mesos-0.9.0/third_party/protobuf-2.4.1/src/protoc 3 | 4 | -------------------------------------------------------------------------------- /delite.properties.sample: -------------------------------------------------------------------------------- 1 | # Delite project properties 2 | [delite] 3 | 4 | # location of the JVM to use with Delite. 5 | # equivalent to setting the JAVA_HOME environment variable. 6 | #java.home=/home/user/java/jdk1.7.0_b121_x86_64 7 | 8 | # uncomment to override default lightweigt modular staging (LMS) location. 9 | # equivalent to setting the LMS_HOME environment variable. 10 | #libs.lms.home=/home/user/projects/virtualization-lms-core 11 | 12 | # used for the benchmark/gather-numbers.py benchmarking script. not 13 | # required for typical Delite usage. 14 | apps.data=/home/user/delite 15 | 16 | # test options for sbt tests 17 | tests.verbose=false 18 | tests.threads=1 19 | -------------------------------------------------------------------------------- /dsls/optiql/src/ppl/dsl/optiql/Types.scala: -------------------------------------------------------------------------------- 1 | package ppl.dsl.optiql 2 | 3 | import scala.virtualization.lms.common.Record 4 | import ppl.delite.framework.ops.DeliteCollection 5 | import ppl.delite.framework.datastructures._ 6 | 7 | trait Types { this: OptiQL => 8 | 9 | trait Table[T] extends DeliteCollection[T] 10 | 11 | trait Grouping[K,V] extends DeliteCollection[V] 12 | 13 | trait Date 14 | 15 | type Result = Record 16 | 17 | } 18 | -------------------------------------------------------------------------------- /dsls/simple/src/ppl/apps/assignment2/SimpleVectorApp.scala: -------------------------------------------------------------------------------- 1 | package ppl.apps.assignment2 2 | 3 | import ppl.dsl.assignment2.{SimpleVectorApplicationRunner, SimpleVectorApplication} 4 | 5 | object SimpleVectorAppRunner extends SimpleVectorApplicationRunner with SimpleVectorApp 6 | 7 | trait SimpleVectorApp extends SimpleVectorApplication { 8 | 9 | def main() { 10 | val x = Vector[Int](100) + 1 11 | val y = Vector[Int](100) + 2 12 | 13 | val z = x + y 14 | z.pprint 15 | 16 | //val f = z.filter 17 | //f.pprint 18 | 19 | //val res = z.sum 20 | //println(res) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /framework/delite-test/src/ppl/tests/scalatest/DeliteTestApplication.scala: -------------------------------------------------------------------------------- 1 | package ppl.tests.scalatest 2 | 3 | import ppl.delite.framework.datastructures._ 4 | import scala.virtualization.lms.common.Record 5 | 6 | //TODO: move this into test suite (need to be able to provide sample inputs) 7 | object DeliteTestMainRunner extends DeliteTestDSLApplicationRunner with DeliteTestFunction 8 | object DeliteTestFunctionRunner extends DeliteTestDSLApplicationRunner with DeliteTestFunction { 9 | override def functionName = "DeliteTestFunction" 10 | registerFunction(testFunction _) 11 | } 12 | 13 | trait DeliteTestFunction extends DeliteTestDSLApplication { 14 | def main() = { 15 | val a = DeliteArray.fromFunction(100){ i => i.toDouble } 16 | val res = testFunction(a, 1, 2.0) 17 | println(res) 18 | } 19 | 20 | def testFunction(a: Rep[DeliteArray[Double]], b: Rep[Int], c: Rep[Double]) = { 21 | val res = a.map(_ + b).reduce(_+_, 0.0) 22 | res / c 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /framework/delite-test/tests/ppl/tests/scalatest/delite/test.txt: -------------------------------------------------------------------------------- 1 | a 2 | a a 3 | a a a -------------------------------------------------------------------------------- /framework/delite-test/tests/ppl/tests/scalatest/firstdsl/ProfileOps.scala: -------------------------------------------------------------------------------- 1 | package ppl.tests.scalatest.firstdsl 2 | 3 | import scala.virtualization.lms.common.{ScalaGenEffect, Base, EffectExp} 4 | 5 | // this is the abstract interface of our profiling methods 6 | trait ProfileOps extends Base { 7 | def profile(n: Rep[Int]) = new ProfileOpsCls(n) 8 | 9 | // syntax 10 | class ProfileOpsCls(n: Rep[Int]) { 11 | def times(func: => Rep[Any]) = profile_body(n, func) 12 | } 13 | 14 | // implementation 15 | def profile_body(n: Rep[Int], func: => Rep[Any]): Rep[ProfileArray] 16 | } 17 | 18 | trait ProfileOpsExp extends ProfileOps with EffectExp { 19 | case class Profile(n: Exp[Int], body: Block[Any]) extends Def[ProfileArray] 20 | 21 | def profile_body(n: Exp[Int], func: => Exp[Any]) = { 22 | reflectEffect(Profile(n, reifyEffects(func))) // create an IR node 23 | } 24 | 25 | override def boundSyms(e: Any): List[Sym[Any]] = e match { 26 | case Profile(n, body) => effectSyms(body) 27 | case _ => super.boundSyms(e) 28 | } 29 | } 30 | 31 | trait ScalaGenProfileOps extends ScalaGenEffect { 32 | val IR: ProfileOpsExp 33 | import IR._ 34 | 35 | override def emitNode(sym: Sym[Any], rhs: Def[Any]) = 36 | rhs match { 37 | // insert instrumentation code around function body 38 | case Profile(n, body) => 39 | stream.println("val " + quote(sym) + " = {") 40 | stream.println("val out = new ProfileArray(" + quote(n) + ")") 41 | stream.println("var i = 0") 42 | stream.println("while (i < " + quote(n) + ") {") 43 | stream.println(" val start = System.currentTimeMillis()") 44 | emitBlock(body) 45 | stream.println(" val end = System.currentTimeMillis()") 46 | stream.println(" val duration = (end - start)/1000f ") 47 | stream.println(" out._data(i) = duration") 48 | stream.println(" i += 1") 49 | stream.println("}") 50 | stream.println("out") 51 | stream.println("}") 52 | 53 | case _ => super.emitNode(sym, rhs) 54 | } 55 | } -------------------------------------------------------------------------------- /framework/delite-test/tests/ppl/tests/scalatest/firstdsl/ProfileTest.scala: -------------------------------------------------------------------------------- 1 | package ppl.tests.scalatest.firstdsl 2 | 3 | import ppl.tests.scalatest.{DeliteTestRunner,DeliteTestModule,DeliteSuite} 4 | 5 | object ProfileTestRunner extends DeliteTestRunner with ProfileApplicationRunner with ProfileTest 6 | trait ProfileTest extends DeliteTestModule with ProfileApplication { 7 | def main() = { 8 | var acc = 0.0 9 | val time = 10 | profile (100) times { 11 | for (i <- 0 until 100000) { 12 | acc += Math.exp(i)*Math.pow(i,10.0)*42.0 13 | } 14 | } report average 15 | 16 | collect(time > 0.0) 17 | mkReport 18 | } 19 | } 20 | 21 | class FirstDSLTestSuite extends DeliteSuite { 22 | def testProfileDSL() { compileAndTest(ProfileTestRunner, enforceFullCoverage = false) } 23 | } 24 | -------------------------------------------------------------------------------- /framework/delite-test/tests/ppl/tests/scalatest/firstdsl/datastruct/scala/ProfileArray.scala: -------------------------------------------------------------------------------- 1 | package ppl.tests.scalatest.firstdsl.datastruct.scala 2 | 3 | class ProfileArray(val _numMeasurements: Int) { 4 | val _data = new Array[Double](_numMeasurements) 5 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/Util.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework 2 | 3 | import java.io.File 4 | 5 | object Util { 6 | 7 | def deleteDirectory(path: File) { 8 | if( path.exists) { 9 | path.listFiles.foreach{ f => 10 | if(f.isDirectory) deleteDirectory(f) 11 | else f.delete 12 | } 13 | } 14 | path.delete 15 | } 16 | 17 | // better way to do this? manifest <:< comparisons seem to fail 18 | def isSubtype(x: java.lang.Class[_], cls: java.lang.Class[_]): Boolean = { 19 | if ((x == cls) || x.getInterfaces().contains(cls)) true 20 | else if (x.getSuperclass() == null && x.getInterfaces().length == 0) false 21 | else { 22 | val superIsSub = if (x.getSuperclass() != null) isSubtype(x.getSuperclass(), cls) else false 23 | superIsSub || x.getInterfaces().exists(s=>isSubtype(s,cls)) 24 | } 25 | } 26 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/analysis/TraversalAnalysis.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.analysis 2 | 3 | import java.io.{PrintWriter, ByteArrayOutputStream} 4 | import scala.virtualization.lms.internal._ 5 | import ppl.delite.framework.DeliteApplication 6 | import scala.virtualization.lms.util.OverloadHack 7 | 8 | class MockStream extends ByteArrayOutputStream { 9 | override def flush() {} 10 | override def close() {} 11 | def print(line: String) {} 12 | } 13 | 14 | //TODO: a lot of this is deprecated with the new LMS traversal framework 15 | trait TraversalAnalysis extends GenericFatCodegen with OverloadHack { 16 | val IR: Expressions with Effects with FatExpressions 17 | import IR._ 18 | implicit val mockStream: PrintWriter = new PrintWriter(new MockStream()) 19 | val className: String 20 | var _result: Option[Any] = None 21 | 22 | def traverseNode(sym: Sym[Any], a: Def[Any]) = withStream(mockStream)(emitNode(sym, a)) 23 | def traverseBlock(b: Block[Any])(implicit o: Overloaded1) = withStream(mockStream)(emitBlock(b)) 24 | def traverse[A:Manifest,B:Manifest](f: Exp[A] => Exp[B]) = { emitSource(f, className, mockStream); result } 25 | def emitValDef(sym: Sym[Any], rhs: String) {} 26 | def emitAssignment(lhs: String, rhs: String) {} 27 | def result: Option[Any] = _result 28 | 29 | def emitSource[A : Manifest](args: List[Sym[_]], body: Block[A], className: String, stream: PrintWriter): List[(Sym[Any], Any)] = { 30 | traverseBlock(body) 31 | Nil 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/Target.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen 2 | 3 | import ppl.delite.framework.DeliteApplication 4 | 5 | 6 | /** 7 | * This trait encodes a target for code generation. Each DSL package must provide a code generator package for a 8 | * particular target via the method getCodeGenPkg in DeliteApplication. 9 | * 10 | * In the future, target may be expanded to include machine models, parameters, etc. 11 | */ 12 | trait Target { 13 | val IR: DeliteApplication 14 | import IR._ 15 | 16 | val name: String 17 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/Utils.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen 2 | 3 | import java.io.{BufferedWriter, FileWriter, File} 4 | 5 | /** 6 | * This trait is deprecated, and is only being used in old DSL implementations that have 7 | * not been updated to use DeliteStructs yet. 8 | */ 9 | trait Utils { 10 | def copyDataStructures(from: String, to: String, dsmap: String => String = s => s) { 11 | val dsDir = new File(from) 12 | if (!dsDir.exists) return 13 | val outDir = new File(to) 14 | outDir.mkdirs() 15 | copyDirectory(dsDir) 16 | 17 | def copyDirectory(dir: File) { 18 | for (f <- dir.listFiles) { 19 | if (f.isDirectory) 20 | copyDirectory(f) 21 | else { 22 | val outFile = to + File.separator + f.getName 23 | val out = new BufferedWriter(new FileWriter(outFile)) 24 | for (line <- _root_.scala.io.Source.fromFile(f).getLines) { 25 | var remappedLine = dsmap(line) 26 | remappedLine = remappedLine.replaceAll("ppl.delite.framework.datastruct", "generated") 27 | out.write(remappedLine + System.getProperty("line.separator")) 28 | } 29 | out.close() 30 | } 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/cpp/TargetCpp.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen.cpp 2 | 3 | import ppl.delite.framework.codegen.Target 4 | 5 | trait TargetCpp extends Target { 6 | import IR._ 7 | 8 | val name = "Cpp" 9 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/cuda/TargetCuda.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen.cuda 2 | 3 | import ppl.delite.framework.codegen.Target 4 | 5 | trait TargetCuda extends Target { 6 | import IR._ 7 | 8 | val name = "Cuda" 9 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/delite/TargetDelite.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen.delite 2 | 3 | import ppl.delite.framework.codegen.Target 4 | 5 | trait TargetDelite extends Target { 6 | val name = "Delite" 7 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/delite/generators/DeliteGenScalaVariables.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen.delite.generators 2 | 3 | import scala.virtualization.lms.common.{ VariablesExp, ScalaGenEffect, CGenEffect } 4 | import java.io.PrintWriter 5 | 6 | trait DeliteGenScalaVariables extends ScalaGenEffect { 7 | val IR: VariablesExp 8 | import IR._ 9 | 10 | override def emitNode(sym: Sym[Any], rhs: Def[Any]) = rhs match { 11 | case ReadVar(Variable(a)) => emitValDef(sym, quote(a) + ".get") 12 | case NewVar(init) => emitValDef(sym, "generated.scala.Ref(" + quote(init) + ")") 13 | case Assign(Variable(a), b) => stream.println(quote(a) + ".set(" + quote(b) + ")") 14 | case VarPlusEquals(Variable(a), b) => emitValDef(sym, quote(a) + ".set(" + quote(a) + ".get +" + quote(b) + ")") 15 | case _ => super.emitNode(sym, rhs) 16 | } 17 | } 18 | 19 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/opencl/TargetOpenCL.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen.opencl 2 | 3 | import ppl.delite.framework.codegen.Target 4 | 5 | trait TargetOpenCL extends Target { 6 | val name = "OpenCL" 7 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/codegen/scala/TargetScala.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.codegen.scala 2 | 3 | import ppl.delite.framework.codegen.Target 4 | 5 | trait TargetScala extends Target { 6 | import IR._ 7 | 8 | val name = "Scala" 9 | } -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/extern/lib/BLAS.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.extern.lib 2 | 3 | import ppl.delite.framework._ 4 | import ppl.delite.framework.codegen.scala._ 5 | import java.io._ 6 | 7 | object BLAS extends ExternalLibrary { 8 | val libName = "BLAS" 9 | val configFile = "BLAS.xml" 10 | val ext = "cpp" 11 | val libExt = "so" 12 | // should we consider library linking machine dependent? do we have a different external lib 13 | // for unix and windows? 14 | val compileFlags = List( "-w", "-O3", "-shared", "-fPIC") // dynamic shared library 15 | val outputSwitch = "-o" 16 | 17 | override lazy val header = """ 18 | #include 19 | #include 20 | 21 | #include 22 | """ + configHeader.map(h => "#include \"" + h + "\"\n").mkString("") 23 | } 24 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/extern/lib/LAPACK.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.extern.lib 2 | 3 | import ppl.delite.framework._ 4 | import ppl.delite.framework.codegen.scala._ 5 | import java.io._ 6 | 7 | object LAPACK extends ExternalLibrary { 8 | //val target = "scala" // this should be well-typed, but we don't have an IR reference yet, so we need to generalize that... 9 | val libName = "scalaLAPACK" 10 | val configFile = "LAPACK.xml" 11 | val ext = "c" 12 | val libExt = "so" 13 | // should we consider library linking machine dependent? do we have a different external lib 14 | // for unix and windows? 15 | val compileFlags = List( "-w", "-O3", "-shared", "-fPIC") // dynamic shared library 16 | val outputSwitch = "-o" 17 | 18 | override lazy val header = """ 19 | #include 20 | #include 21 | #include 22 | #include 23 | """ + configHeader.map(h => "#include \"" + h + "\"\n").mkString("") 24 | } 25 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/extern/lib/ProtoBuf.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.extern.lib 2 | 3 | import ppl.delite.framework.Config 4 | 5 | object ProtoBuf extends ExternalLibrary { 6 | val libName = "protobuf" 7 | val configFile = "protobuf.xml" 8 | val ext = "proto" 9 | val libExt = "java" 10 | // should we consider library linking machine dependent? do we have a different external lib 11 | // for unix and windows? 12 | 13 | val runtimePath=Config.homeDir + sep + Array("runtime", "src", "ppl", "delite", "runtime", "messages").mkString(sep) 14 | val structsPath=Config.buildDir + sep + Array("scala", "datastructures").mkString(sep) 15 | 16 | val compileFlags = List() 17 | val outputSwitch = "--java_out=" 18 | 19 | override lazy val headerDir = List("-I"+structsPath, "-I"+runtimePath) ++ config.headerDir 20 | override lazy val destFileName = "" 21 | override val separateOutput = false 22 | 23 | } 24 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/extern/lib/clBLAS.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.extern.lib 2 | 3 | object clBLAS extends ExternalLibrary { 4 | val libName = "clBLAS" 5 | val configFile = "clBLAS.xml" 6 | val ext = "cpp" 7 | val libExt = "so" 8 | val compileFlags = List( "-w", "-lOpenCL", "-lclblas", "-O3", "-shared", "-fPIC") 9 | val outputSwitch = "-o" 10 | 11 | override lazy val header = """ 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "clblas.h" 17 | """ 18 | } 19 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/extern/lib/cuBLAS.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.extern.lib 2 | 3 | import xml.XML 4 | import ppl.delite.framework.Config 5 | 6 | object cuBLAS extends ExternalLibrary { 7 | val libName = "cudaBLAS" 8 | val configFile = "cuBLAS.xml" 9 | val ext = "cu" 10 | val libExt = "so" 11 | def compileFlags = List("-w", "-O3", "-arch", "compute_"+arch, "-code", "sm_"+arch, "-c", /*"-shared", */"-Xcompiler", "-fPIC") 12 | val outputSwitch = "-o" 13 | 14 | lazy val arch = { 15 | val body = XML.loadFile(Config.homeDir + sep + "config" + sep + "delite" + sep + configFile) 16 | val arch = (body \\ "arch").text.trim 17 | arch.split('.').reduceLeft(_ + _) //remove 'dot' if it exists (e.g., 2.0 => 20) 18 | } 19 | 20 | override lazy val header = """ 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | """ + configHeader.map(h => "#include \"" + h + "\"\n").mkString("") 28 | } 29 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/ops/DeliteReduction.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.ops 2 | 3 | import scala.reflect.SourceContext 4 | import scala.virtualization.lms.common._ 5 | 6 | trait DeliteReductionOpsExp extends BaseFatExp { 7 | 8 | /** 9 | * Interface for binary reduction ops that write to the lhs. Instances of this interface 10 | * will be checked for and safely handled inside DeliteOpForeachReduce. 11 | */ 12 | abstract class DeliteReduction[L:Manifest, R:Manifest] extends Def[Unit] { 13 | /* variable being reduced - this must be mutable! */ 14 | val lhs: Exp[L] 15 | /* zero value of the reduction */ 16 | def zero: Exp[R] 17 | /* expression that produces the next value to reduce */ 18 | def rhs: Exp[R] 19 | /* reduction function */ 20 | def reduce: (Exp[R], Exp[R]) => Exp[R] 21 | /* updates lhs with result rhs */ 22 | // TODO: this becomes redundant in practice - something like: a.setValue(reduce(a.value), b). 23 | // can we rework this API to better focus on the necessary info? maybe: 24 | // set(x: Exp[L]): Exp[Unit] 25 | // get(x: Exp[L]): Exp[R] 26 | // updateValue = set(x, reduce(get(x), rhs)) 27 | def updateValue: (Exp[L], Exp[R]) => Exp[Unit] 28 | 29 | val mL = manifest[L] 30 | val mR = manifest[R] 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/transform/DeliteTransform.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.transform 2 | 3 | import reflect.{SourceContext} 4 | import scala.virtualization.lms.common.{ObjectOpsExp,WorklistTransformer} 5 | import ppl.delite.framework.DeliteApplication 6 | 7 | trait DeliteTransform extends LoweringTransform { 8 | this: DeliteApplication => 9 | 10 | // built-in phases 11 | object deviceIndependentLowering extends LoweringTransformer 12 | object deviceDependentLowering extends LoweringTransformer 13 | 14 | // list of all transformers to be applied 15 | private var _transformers: List[WorklistTransformer] = List(deviceIndependentLowering,deviceDependentLowering) 16 | 17 | /* 18 | * return the set of transformers to be applied 19 | */ 20 | def transformers = _transformers 21 | 22 | /* 23 | * api for registering new transformers with Delite 24 | */ 25 | def prependTransformer(t: WorklistTransformer) { 26 | _transformers ::= t 27 | } 28 | 29 | def appendTransformer(t: WorklistTransformer) { 30 | _transformers :+= t 31 | } 32 | 33 | 34 | /* 35 | * utilities 36 | */ 37 | 38 | // investigate: is this necessary? 39 | def reflectTransformed[A:Manifest](t: Transformer, x: Exp[A], u: Summary, es: List[Exp[Any]])(implicit ctx: SourceContext): Exp[A] = { 40 | reflectMirrored(Reflect(DUnsafeImmutable(x), mapOver(t,u), t(es)))(mtype(manifest[A]), ctx) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /framework/src/ppl/delite/framework/transform/ForwardPassTransformer.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.framework.transform 2 | 3 | import ppl.delite.framework.ops.{DeliteOpsExp,BaseDeliteOpsTraversalFat} 4 | 5 | import scala.virtualization.lms.common._ 6 | 7 | trait ForwardPassTransformer extends WorklistTransformer /*with BaseDeliteOpsTraversalFat*/ { 8 | val IR: DeliteOpsExp 9 | import IR._ 10 | 11 | override def apply[A](x: Exp[A]): Exp[A] = subst.get(x) match { // no transitive subst 12 | case Some(y) => y.asInstanceOf[Exp[A]] case _ => x 13 | } 14 | 15 | // single pass forward transform 16 | var runs = 0 17 | 18 | override def isDone = if (runs > 0) true else false 19 | 20 | override def runOnce[A:Manifest](s: Block[A]): Block[A] = { 21 | runs += 1 22 | super.runOnce(s) 23 | } 24 | } -------------------------------------------------------------------------------- /profiler/assets/kMeans_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/assets/kMeans_2.png -------------------------------------------------------------------------------- /profiler/bar-chart.js: -------------------------------------------------------------------------------- 1 | 2 | function createBarChart(parentDivId, data, comparisonAttr, getDisplayText, config, nodeClickHandler) { 3 | var svg = d3.select(parentDivId) 4 | .append("svg") 5 | .attr("class", "barChart"); 6 | 7 | var parentDiv = $(parentDivId); 8 | 9 | var width = parentDiv.width() - 5, 10 | barHeight = 20; 11 | 12 | var x = d3.scale.linear() 13 | .domain([-2, d3.max(data, function(d) {return parseFloat(d[comparisonAttr])})]) 14 | .range([0, width - 50]); 15 | 16 | var chart = d3.select(".barChart") 17 | .attr("width", width) 18 | .attr("height", barHeight * data.length); 19 | 20 | var bar = chart.selectAll("g") 21 | .data(data) 22 | .enter().append("g") 23 | .attr("transform", function(d, i) { return "translate(0," + i * barHeight + ")"; }); 24 | 25 | var handler = nodeClickHandler || genericNodeClickHandler; 26 | bar.append("rect") 27 | .attr("class", "bar") 28 | .attr("width", function(d) {return x(d[comparisonAttr])}) 29 | .attr("height", barHeight - 1) 30 | .on("click", handler); 31 | 32 | bar.append("text") 33 | .attr("class", "barLabel") 34 | .attr("x", function(d) { return x(d[comparisonAttr]) - 3; }) 35 | .attr("y", barHeight / 2) 36 | .attr("dy", ".35em") 37 | .text(getDisplayText) 38 | .on("click", handler); 39 | 40 | } 41 | 42 | function genericNodeClickHandler(d) { 43 | //config.populateKernelInfoTable(d.node); 44 | //config.highlightLineInEditorByKernelId(d.node.id); 45 | 46 | config.populateKernelInfoTable( d ); 47 | config.highlightLineInEditorForDNode( d ); 48 | } -------------------------------------------------------------------------------- /profiler/common.js: -------------------------------------------------------------------------------- 1 | 2 | // Convert 'name' to HTML class selector string 3 | function toClassSelector(name) { 4 | return '.' + name; 5 | } 6 | 7 | // Convert 'name' to HTML id selector string 8 | function toIdSelector(name) { 9 | return '#' + name; 10 | } 11 | 12 | function getDisplayTextForTimeAbsPctPair(abs, pct) { 13 | var displayInMs = (abs < 1000) ? true : false; 14 | var timeAbs = displayInMs ? abs : (abs/1000).toFixed(0); 15 | var timeUnit = displayInMs ? "ms" : "s"; 16 | return timeAbs + timeUnit + " : " + pct.toFixed(0) + "%"; 17 | } 18 | 19 | function memUsageValueToStr(memUsage) { 20 | if (memUsage > 1) { 21 | var labels = ["B", "KB", "MB", "GB"]; 22 | var i = 0; 23 | while ((i < labels.length) && (memUsage > 1)) { 24 | memUsage = memUsage / 1024; 25 | i++; 26 | } 27 | 28 | return ((memUsage * 1024).toFixed(0)) + labels[i - 1]; 29 | } 30 | 31 | return "0B"; 32 | } -------------------------------------------------------------------------------- /profiler/editor.js: -------------------------------------------------------------------------------- 1 | 2 | var aceRange = ace.require('ace/range').Range; 3 | 4 | function createEditor(editorId) { 5 | var editor = ace.edit(editorId); 6 | editor.setTheme("ace/theme/twilight"); 7 | editor.getSession().setMode("ace/mode/scala"); 8 | editor.setReadOnly(true) 9 | editor.setAnimatedScroll(true) 10 | 11 | return editor 12 | } 13 | 14 | function highlightLine(line) { 15 | var lineIndex = line - 1 16 | var r = new aceRange(lineIndex,0,lineIndex,10000) 17 | var res = editor.addSelectionMarker(r) 18 | editor.scrollToLine(line,true) 19 | 20 | return res 21 | } 22 | 23 | function unhighlightLine(r) { 24 | editor.removeSelectionMarker(r) 25 | } -------------------------------------------------------------------------------- /profiler/gcStats-graph.js: -------------------------------------------------------------------------------- 1 | 2 | function createGCStatsGraph(parentDivId, gcStats, xScale, config) { 3 | var margin = {top: 0, right: 0, bottom: 0, left: 20} 4 | var parentDiv = $(parentDivId) 5 | var width = parentDiv.width() * 2.5 6 | var height = parentDiv.height() - margin.top - margin.bottom 7 | 8 | var x = xScale 9 | 10 | var svg = d3.select(parentDivId).append("svg") 11 | .attr("width", width + margin.left + margin.right) 12 | .attr("height", height + margin.top + margin.bottom) 13 | .append("g") 14 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")") 15 | 16 | var annotationMarkers = svg.selectAll(".annotation") 17 | .data(gcStats) 18 | .enter() 19 | .append("rect") 20 | .attr("class", "annotation") 21 | .attr("x", function(d) {return x(d.start)}) 22 | .attr("y", 0) 23 | .attr("width", function(d) {return x(d.start + d.duration) - x(d.start) }) 24 | .attr("height", height) 25 | .style("fill", getFill) 26 | .style("stroke", "black") 27 | .style("stroke-width", 1) 28 | .on("click", clickHandler) 29 | 30 | function getFill(d) { 31 | switch(d.type) { 32 | case MAJOR_GC: return "red" 33 | case MINOR_GC: return "purple" 34 | default : console.error("Unexpected value for GC Event type") 35 | } 36 | } 37 | 38 | var previouslyClickedNode = 0; 39 | function clickHandler(d, i) { 40 | var data = []; 41 | var y = d.youngGenStats.sizeStats; 42 | data.push(["Young Gen", y.beforeGC, y.afterGC, y.committed]); 43 | 44 | if (d.type == MAJOR_GC) { 45 | var o = d.oldGenStats.sizeStats; 46 | data.push(["Old Gen", o.beforeGC, o.afterGC, o.committed]); 47 | } else { 48 | data.push(["Old Gen", "N/A", "N/A", "N/A"]); 49 | } 50 | 51 | var h = d.heapStats; 52 | data.push(["Heap", h.beforeGC, h.afterGC, h.committed]); 53 | 54 | config.populateGCEventInfoTable(data); 55 | $(".annotation")[previouslyClickedNode].style["stroke-width"] = "1px" 56 | $(".annotation")[i].style["stroke-width"] = "5px" 57 | previouslyClickedNode = i; 58 | } 59 | } -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/animated-overlay.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/animated-overlay.gif -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_diagonals-thick_18_b81900_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_diagonals-thick_18_b81900_40x40.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_diagonals-thick_20_666666_40x40.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_diagonals-thick_20_666666_40x40.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_flat_10_000000_40x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_flat_10_000000_40x100.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_glass_100_f6f6f6_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_glass_100_f6f6f6_1x400.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_glass_100_fdf5ce_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_glass_100_fdf5ce_1x400.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_glass_65_ffffff_1x400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_glass_65_ffffff_1x400.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_gloss-wave_35_f6a828_500x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_gloss-wave_35_f6a828_500x100.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_highlight-soft_100_eeeeee_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_highlight-soft_100_eeeeee_1x100.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-bg_highlight-soft_75_ffe45c_1x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-bg_highlight-soft_75_ffe45c_1x100.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-icons_222222_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-icons_222222_256x240.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-icons_228ef1_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-icons_228ef1_256x240.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-icons_ef8c08_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-icons_ef8c08_256x240.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-icons_ffd27a_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-icons_ffd27a_256x240.png -------------------------------------------------------------------------------- /profiler/lib/jquery-ui-1.11.0/images/ui-icons_ffffff_256x240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/profiler/lib/jquery-ui-1.11.0/images/ui-icons_ffffff_256x240.png -------------------------------------------------------------------------------- /profiler/styles/bar-chart.css: -------------------------------------------------------------------------------- 1 | .barChart { 2 | margin: 5 0 0 5; 3 | } 4 | 5 | .barChart rect { 6 | fill: steelblue; 7 | } 8 | 9 | .barChart text { 10 | fill: white; 11 | font: 10px sans-serif; 12 | text-anchor: end; 13 | } 14 | 15 | .bar:hover { 16 | fill: brown; 17 | } -------------------------------------------------------------------------------- /profiler/styles/compare-run-summaries.css: -------------------------------------------------------------------------------- 1 | #containerRunSummaries { 2 | width: 100%; 3 | height: 100%; 4 | font-family: Nunito, arial, verdana; 5 | font-size: 15; 6 | } 7 | 8 | #compareRunSummariesHeader { 9 | width: 100%; 10 | height: 50; 11 | } 12 | 13 | #compareRunSummariesDiv { 14 | width: 100%; 15 | height: 320; 16 | border: solid 1px black; 17 | } 18 | 19 | #uploadRunSummariesBtn { 20 | width: 150; 21 | height: 20; 22 | margin: 12 0 0 0; 23 | } 24 | 25 | #initializeViewsBtn { 26 | width: 120; 27 | height: 19; 28 | margin: 10 0 0 0; 29 | } 30 | 31 | #compareRunSummariesMetricOptions, #compareKernelSummariesMetricOptions { 32 | float: right; 33 | margin: 4 4 0 0; 34 | font-size: 11; 35 | } 36 | 37 | #compareRunSummariesMetricOptionsLabel, #compareKernelSummariesMetricOptionsLabel { 38 | float: right; 39 | margin: 4 25 0 0; 40 | font-family: arial; 41 | font-size: 11; 42 | } 43 | 44 | #compareSummariesOfKernelTxtBx { 45 | width: 150px; 46 | text-align: left; 47 | float: left; 48 | margin: 15 5 0 5; 49 | font-size: 11; 50 | } 51 | 52 | #compareSummariesOfKernelLabel { 53 | float: left; 54 | margin: 15 25 0 0; 55 | font-family: arial; 56 | font-size: 11; 57 | } 58 | 59 | #compareKernelSummariesHeader { 60 | width: 100%; 61 | height: 50; 62 | } 63 | 64 | #compareKernelSummariesDiv { 65 | width: 100%; 66 | height: 320; 67 | border: solid 1px black; 68 | } 69 | -------------------------------------------------------------------------------- /profiler/styles/compare-runs.css: -------------------------------------------------------------------------------- 1 | 2 | #containerCompareRuns { 3 | width: 100%; 4 | height: 100%; 5 | font-family: Nunito, arial, verdana; 6 | font-size: 15; 7 | } 8 | 9 | #compareRunsHeader { 10 | width: 100%; 11 | height: 50; 12 | } 13 | 14 | #addProfileDataBtn { 15 | width: 150; 16 | height: 20; 17 | margin: 12 0 0 0; 18 | } 19 | 20 | #compareRunsMetricOptions { 21 | float: right; 22 | margin: 4 4 0 0; 23 | font-size: 11; 24 | } 25 | 26 | #compareRunsMetricOptionsLabel { 27 | float: right; 28 | margin: 4 25 0 0; 29 | font-family: arial; 30 | font-size: 11; 31 | } 32 | 33 | #viewRunsDiv { 34 | width: 100%; 35 | height: 100%; 36 | overflow-y: auto; 37 | } 38 | 39 | .run-headers { 40 | height: 20px; 41 | width: 100%; 42 | border-top: 1px solid black; 43 | } 44 | 45 | .comp-timeline { 46 | overflow-x: hidden; 47 | } 48 | 49 | .runs { 50 | height: 30%; 51 | width: 100%; 52 | } 53 | 54 | .runs-0 { 55 | background: #CDD; 56 | } 57 | 58 | .runs-1 { 59 | background: #DDD; 60 | } 61 | 62 | .close-run { 63 | float: right; 64 | font-size: 10; 65 | height: 18; 66 | margin: 3 3 0 0; 67 | -webkit-border-radius: 5; 68 | } 69 | 70 | .timeline-level-selector { 71 | float: right; 72 | font-size: 10; 73 | width: 100; 74 | height: 8; 75 | margin: 3 30 0 0; 76 | -webkit-border-radius: 5; 77 | } 78 | -------------------------------------------------------------------------------- /profiler/styles/dataflow.css: -------------------------------------------------------------------------------- 1 | .background { 2 | stroke: white; 3 | stroke-width: 1px; 4 | fill: #DDD; 5 | } 6 | 7 | .node { 8 | stroke: black; 9 | stroke-width: 1.5px; 10 | cursor: move; 11 | fill: lightcoral; 12 | } 13 | 14 | .dataflow-kernel { 15 | stroke: black; 16 | } 17 | 18 | .link { 19 | fill: none; 20 | stroke: #000; 21 | stroke-width: 3px; 22 | opacity: 0.7; 23 | marker-end: url(#end-arrow); 24 | } 25 | 26 | .label { 27 | fill: black; 28 | font-family: Verdana; 29 | font-size: 25px; 30 | text-anchor: middle; 31 | cursor: move; 32 | } 33 | 34 | .link { 35 | fill: white; 36 | stroke: black; 37 | stroke-width: 1.5px; 38 | } -------------------------------------------------------------------------------- /profiler/styles/editor.css: -------------------------------------------------------------------------------- 1 | 2 | #editor { 3 | position: absolute; 4 | top: 0; 5 | right: 0; 6 | bottom: 0; 7 | left: 0; 8 | } 9 | 10 | .ace_editor { 11 | position: absolute; 12 | } 13 | 14 | .ace_selection { 15 | position: absolute; 16 | background: red; 17 | } 18 | 19 | .ace_text-input { 20 | position: absolute; 21 | right: 0; 22 | left: 0; 23 | opacity: 0; 24 | } -------------------------------------------------------------------------------- /profiler/styles/grid.css: -------------------------------------------------------------------------------- 1 | /*#container { 2 | position: absolute; 3 | width: 100%; 4 | height: 100%; 5 | overflow-x: hidden; 6 | overflow-y: hidden; 7 | }*/ 8 | 9 | body { 10 | height: 100%; 11 | } 12 | 13 | /*.ui-tabs-panel {*/ 14 | #container { 15 | position: absolute; 16 | width: 100%; 17 | height: 100%; 18 | overflow-x: hidden; 19 | overflow-y: auto; 20 | } 21 | 22 | #sidebar { 23 | position: fixed; 24 | background: white; 25 | width: 200px; 26 | height: 100%; 27 | } 28 | 29 | #right { 30 | position: absolute; 31 | left: 203px; 32 | width: 1000px; 33 | height: 100%; 34 | } 35 | 36 | #code { 37 | position: absolute; 38 | width: 50%; 39 | height: 50%; 40 | } 41 | 42 | #dfgHeader { 43 | position: absolute; 44 | width: 50%; 45 | height: 35px; 46 | left: 50%; 47 | background: #DDD; 48 | } 49 | 50 | #dfg, #generalInfo { 51 | position: absolute; 52 | width: 50%; 53 | height: 50%; 54 | left: 50%; 55 | top: 35px; 56 | } 57 | 58 | #generalInfo { 59 | display: none; 60 | } 61 | 62 | #gcStats { 63 | position: absolute; 64 | background: #DDD; 65 | width: 100%; 66 | height: 3%; 67 | top: 50%; 68 | overflow-x: hidden; 69 | font-size: 10; 70 | font-family: sans-serif; 71 | } 72 | 73 | #memory { 74 | position: absolute; 75 | background: rgb(49, 130, 189); 76 | width: 100%; 77 | height: 15%; 78 | top: 53%; 79 | overflow-x: hidden; 80 | font-size: 10; 81 | font-family: sans-serif; 82 | } 83 | 84 | #timeline { 85 | position: absolute; 86 | background: #DDD; 87 | width: 100%; 88 | height: 32%; 89 | top: 68%; 90 | overflow-x: auto; 91 | } 92 | 93 | #statusBar { 94 | position: absolute; 95 | width: 100%; 96 | height: 20px; 97 | top: 68%; 98 | } 99 | 100 | #globalViewOptions { 101 | margin-top: 7; 102 | opacity: 0; 103 | font-size: 11; 104 | } 105 | 106 | #globalViewOptionsLabel { 107 | margin: 12 15 0 10; 108 | font-family: arial; 109 | font-size: 11; 110 | } -------------------------------------------------------------------------------- /profiler/styles/stack-graph.css: -------------------------------------------------------------------------------- 1 | 2 | .axis path, .axis line { 3 | fill: none; 4 | stroke: #000; 5 | shape-rendering: crispEdges; 6 | } -------------------------------------------------------------------------------- /profiler/styles/tab-view.css: -------------------------------------------------------------------------------- 1 | #tabs { 2 | padding: 0px; 3 | background: none; 4 | border-width: 0px; 5 | } 6 | 7 | #tabs .ui-widget-header { 8 | border: 1px solid black 9 | } 10 | 11 | #tabs .ui-tabs-panel { 12 | padding: 0; 13 | } 14 | 15 | #tabs .ui-tabs-nav { 16 | padding-left: 0px; 17 | background: transparent; 18 | border-width: 0px 0px 1px 0px; 19 | -moz-border-radius: 0px; 20 | -webkit-border-radius: 0px; 21 | border-radius: 0px; 22 | 23 | } 24 | 25 | #tabs .ui-tabs-nav li { 26 | font-family: Nunito, arial, verdana; 27 | font-size: 12px; 28 | } 29 | 30 | #tabs .ui-tabs-nav li.ui-state-active a, 31 | #tabs .ui-tabs-nav li.ui-state-default a { 32 | color: black; 33 | } 34 | 35 | .ui-widget button, .ui-button-text { 36 | width: 100; 37 | height: 15; 38 | font-family: Nunito, arial, verdana; 39 | font-size: 10px; 40 | } -------------------------------------------------------------------------------- /profiler/styles/table.css: -------------------------------------------------------------------------------- 1 | 2 | #infoTable { 3 | font-family: sans-serif; 4 | font-size: 14; 5 | position: absolute; 6 | margin: 0; 7 | width: 100%; 8 | /*border: 1px solid black;*/ 9 | border-collapse: collapse; 10 | } 11 | 12 | #infoTable .tHead { 13 | font-weight: bold; 14 | background: grey; 15 | } 16 | 17 | #infoTable .tBody { 18 | font-weight: normal; 19 | } 20 | 21 | #infoTable .tBody .tr { 22 | font-weight: normal; 23 | background: lightsteelblue; 24 | } 25 | 26 | #infoTable .tBody .tr:hover { 27 | background: rgb(146, 181, 235); 28 | } 29 | 30 | #infoTable td { 31 | /*border: 1px solid black;*/ 32 | padding: 10px; 33 | border-top: 1px solid white; 34 | } 35 | -------------------------------------------------------------------------------- /profiler/styles/timeline.css: -------------------------------------------------------------------------------- 1 | /*html,body 2 | { 3 | height: 100%; 4 | margin: 0; 5 | padding: 0; 6 | overflow-y:hidden; 7 | }*/ 8 | 9 | .chart { 10 | shape-rendering: crispEdges; 11 | } 12 | 13 | .mini { 14 | /*width: 100%;*/ 15 | height: 100%; 16 | } 17 | 18 | .mini text { 19 | font: 9px sans-serif; 20 | } 21 | 22 | .main text { 23 | font: 12px sans-serif; 24 | } 25 | 26 | .kernelNode { 27 | stroke-width: 1; 28 | stroke: black; 29 | } 30 | 31 | .timelineWrapper { 32 | /*overflow-x: auto;*/ 33 | overflow-x: inherit; 34 | overflow-y: auto; 35 | } 36 | 37 | .brush .extent { 38 | stroke: gray; 39 | fill: dodgerblue; 40 | fill-opacity: .365; 41 | } 42 | 43 | .tooltip { 44 | position: absolute; 45 | text-align: left; 46 | width: 100px; 47 | color:white; 48 | padding: 10px; 49 | font: 8px sans-serif; 50 | background: lightslategray; 51 | pointer-events: none; 52 | border-collapse: collapse; 53 | } 54 | 55 | .tooltipValue { 56 | border:1px solid black; 57 | } 58 | 59 | .sync-node { 60 | opacity: 0.4; 61 | stroke-width: 1; 62 | stroke: grey; 63 | } -------------------------------------------------------------------------------- /profiler/table.js: -------------------------------------------------------------------------------- 1 | 2 | // rowData = [["Col 1", "Col 2"], ["Row 1 Col 1", "Row 1 Col 2"]] 3 | function createTable(id, parentId, rowData) { 4 | var table = d3.select(parentId).append("table").attr("id", id) 5 | 6 | table.selectAll('.tHead').data([0]).enter().append('thead').attr("class", "tHead"); 7 | var thead = table.select('thead'); 8 | 9 | table.selectAll('.tbody').data([0]).enter().append('tbody').attr("class", "tBody"); 10 | var tbody = table.select('tbody'); 11 | 12 | var columnNames = rowData[0] 13 | var headers = thead.selectAll(".header") 14 | .data(columnNames) 15 | .enter().append("td") 16 | .attr("class", "header") 17 | .text(function (d) {return d}) 18 | 19 | // create a row for each object in the data 20 | var rows = tbody.selectAll(".tr") 21 | .data(rowData.slice(1, rowData.length)) 22 | .enter() 23 | .append("tr") 24 | .attr("class", "tr"); 25 | 26 | // create a cell in each row for each column 27 | var cells = rows.selectAll(".td") 28 | .data(function(row) {return row}) 29 | .enter() 30 | .append("td") 31 | .attr("class", "td") 32 | .text(function(d) { return d}); 33 | 34 | return table; 35 | } -------------------------------------------------------------------------------- /profiler/tests/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Debugger Test Suite 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |

QUnit Test Suite

18 |

19 |
20 |

21 |
    22 | 23 | 24 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | #Project properties 2 | sbt.version=0.13.8 3 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/Exceptions.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime 2 | 3 | import scala.collection.mutable.HashMap 4 | 5 | 6 | class DeliteException(message: String, cause: Throwable) extends RuntimeException(message, cause, true, false) 7 | 8 | object Exceptions { 9 | 10 | //TODO: using the 'original' line number and a full dump of all SourceContext from the compiler we could provide exception locations for all symbols rather than just DEG symbols 11 | val sourceInfo = new HashMap[String, (String, Int, String)]() 12 | 13 | def translate(original: Throwable): Throwable = { 14 | def stripKernel(name: String) = name.stripPrefix("generated.scala.kernel_").takeWhile(_ != '$') 15 | var translated = worker(original) 16 | for (elem <- original.getStackTrace) { 17 | sourceInfo.get(stripKernel(elem.getClassName)) match { 18 | case Some((fileName, line, opName)) => 19 | val message = s"Error within $opName: $fileName:$line" 20 | if (translated.getMessage != message) translated = new DeliteException(message, translated) 21 | case _ => //keep looking 22 | } 23 | } 24 | translated 25 | } 26 | 27 | def worker(cause: Throwable) = new DeliteException(s"Exception in thread ${Thread.currentThread.getName}", cause) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/codegen/AliasTable.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.codegen 2 | 3 | import collection.mutable.ArrayBuffer 4 | 5 | final class AliasTable[T] { 6 | 7 | private val table = new ArrayBuffer[Set[T]] 8 | 9 | private[codegen] def get(name: T): Set[T] = { 10 | val sets = table.filter(s => s.contains(name)) 11 | assert(sets.length < 2) 12 | if (sets.length == 0) Set(name) 13 | else sets(0) 14 | } 15 | 16 | private[codegen] def add(name1: T, name2: T) { 17 | val sets = table.filter(s => s.contains(name1) || s.contains(name2)) 18 | assert(sets.length < 3) 19 | if (sets.length == 0) { 20 | table += Set(name1,name2) 21 | } 22 | else { 23 | val newSet = sets.reduceLeft(_ union _) 24 | for (s <- sets) table -= s 25 | table += newSet 26 | } 27 | } 28 | 29 | private[codegen] def remove(name: T) { 30 | val sets = table.filter(s => s.contains(name)) 31 | assert(sets.length == 1) 32 | table -= sets(0) 33 | table += (sets(0)-name) 34 | } 35 | 36 | private[codegen] def clear { 37 | table.clear 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/codegen/CppCompile.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.codegen 2 | 3 | import xml.XML 4 | import ppl.delite.runtime.Config 5 | import ppl.delite.runtime.graph.targets.Targets 6 | import tools.nsc.io._ 7 | 8 | object CppCompile extends CCompile { 9 | 10 | def target = Targets.Cpp 11 | override def ext = "cpp" 12 | 13 | protected def configFile = "CPP.xml" 14 | protected def compileFlags = Array() 15 | protected def linkFlags = Array() 16 | override protected def optionalFeatures = { 17 | config.features.collect { 18 | case "numa" => "__DELITE_CPP_NUMA__" 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/codegen/CudaCompile.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.codegen 2 | 3 | import xml.XML 4 | import ppl.delite.runtime.Config 5 | import ppl.delite.runtime.graph.targets.Targets 6 | import tools.nsc.io._ 7 | 8 | /** 9 | * Author: Kevin J. Brown 10 | * Date: Dec 2, 2010 11 | * Time: 9:39:10 PM 12 | * 13 | * Pervasive Parallelism Laboratory (PPL) 14 | * Stanford University 15 | */ 16 | 17 | object CudaCompile extends CCompile { 18 | 19 | def target = Targets.Cuda 20 | override def ext = "cu" 21 | 22 | lazy val arch = { 23 | val body = XML.loadFile(Config.deliteHome + sep + "config" + sep + "delite" + sep + configFile) 24 | val arch = (body \\ "arch").text.trim 25 | arch.split(',').map(_.trim.split('.').reduceLeft(_ + _)) //remove 'dot' if it exists (e.g., 2.0 => 20) 26 | } 27 | 28 | protected def configFile = "CUDA.xml" 29 | protected def compileFlags = Array("-m64", "-w", "-O3", "-shared", "-Xcompiler", "\'-fPIC\'") ++ arch.flatMap(a => Array("-gencode", "arch=compute_"+a+",code=sm_"+a)) 30 | protected def linkFlags = Array("-lcublas", "-shared", "-Xcompiler", "\'-fPIC\'") 31 | } 32 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/codegen/examples/ExampleExecutable0.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.codegen.examples 2 | 3 | import ppl.delite.runtime.data.Data 4 | import ppl.delite.runtime.executor.DeliteExecutable 5 | import ppl.delite.runtime.graph.ops.EOP_Kernel 6 | 7 | /** 8 | * Author: Kevin J. Brown 9 | * Date: Oct 25, 2010 10 | * Time: 11:24:51 PM 11 | * 12 | * Pervasive Parallelism Laboratory (PPL) 13 | * Stanford University 14 | */ 15 | 16 | /** 17 | * This is an example of what a generated DeliteExecutable should look like; it should not be invoked 18 | * Ex0 shows how to consume results from other threads 19 | * Ex1 shows how to produce results for other threads 20 | */ 21 | 22 | object ExampleExecutable0 extends DeliteExecutable { 23 | 24 | //the other executable objects this object can communicate with 25 | val thread1 = ExampleExecutable1 26 | //val thread2 = ExampleExecutable2 27 | 28 | def run() { 29 | val x1 = kernel1_run(0) //to be replaced with a call to the appropriate kernel object 30 | 31 | val x2 = thread1.get1 //this call blocks 32 | val x3 = kernel2_run(x1,x2) 33 | 34 | val x4 = thread1.get2 35 | val x5 = kernel3_run(x3,x4) 36 | 37 | val x6 = EOP_Kernel() 38 | 39 | } 40 | 41 | //ignore everything below 42 | def kernel1_run(arg0: Int): Data[Int] = null 43 | 44 | def kernel2_run(arg0: Data[Int], arg1: Data[Int]): Data[Int] = null 45 | 46 | def kernel3_run(arg0: Data[Int], arg1: Int): Data[Int] = null 47 | 48 | } -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/data/Data.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.data 2 | 3 | /** 4 | * Author: Kevin J. Brown 5 | * Date: Oct 11, 2010 6 | * Time: 1:52:32 AM 7 | * 8 | * Pervasive Parallelism Laboratory (PPL) 9 | * Stanford University 10 | */ 11 | 12 | abstract class Data[T] { 13 | 14 | def size : Int 15 | 16 | def apply(i: Int) : T 17 | 18 | def update(i: Int, x: T) : Unit 19 | 20 | } -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/DeliteExecutable.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | /** 4 | * Author: Kevin J. Brown 5 | * Date: Oct 11, 2010 6 | * Time: 2:12:33 AM 7 | * 8 | * Pervasive Parallelism Laboratory (PPL) 9 | * Stanford University 10 | */ 11 | 12 | abstract class DeliteExecutable { 13 | 14 | def run() : Unit 15 | 16 | def self = this 17 | 18 | } 19 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/ExecutionThread.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | import java.util.concurrent.{BrokenBarrierException, LinkedBlockingQueue} 4 | import ppl.delite.runtime.{Config, Delite, Exceptions} 5 | 6 | /** 7 | * Author: Kevin J. Brown 8 | * Date: Oct 10, 2010 9 | * Time: 10:24:14 PM 10 | * 11 | * Pervasive Parallelism Laboratory (PPL) 12 | * Stanford University 13 | */ 14 | 15 | /** 16 | * 17 | * A Runnable that represents the work of an execution thread for the CPU 18 | * 19 | * @author Kevin J. Brown 20 | */ 21 | 22 | class ExecutionThread extends Runnable { 23 | 24 | // the work queue for this ExecutionThread 25 | // synchronization is handled by the queue implementation 26 | private[executor] val queue = new LinkedBlockingQueue[DeliteExecutable](Config.queueSize) //work queue 27 | 28 | private[executor] var continue: Boolean = true 29 | 30 | //this loop should be terminated by interrupting the thread 31 | def run { 32 | while(continue) { 33 | try { 34 | val work = queue.take 35 | executeWork(work) 36 | } 37 | catch { 38 | case i: InterruptedException => continue = false //another thread threw an exception -> exit silently 39 | case b: BrokenBarrierException => continue = false //another thread threw an exception -> exit silently 40 | case e: Throwable => 41 | Delite.shutdown(Exceptions.translate(e)) 42 | continue = false 43 | } 44 | } 45 | } 46 | 47 | // how to execute work 48 | protected def executeWork(work: DeliteExecutable) = work.run 49 | 50 | } 51 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/Executor.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | import ppl.delite.runtime.scheduler.StaticSchedule 4 | 5 | /** 6 | * Author: Kevin J. Brown 7 | * Date: Dec 4, 2010 8 | * Time: 2:41:22 AM 9 | * 10 | * Pervasive Parallelism Laboratory (PPL) 11 | * Stanford University 12 | */ 13 | 14 | /** 15 | * The base class of all executors 16 | * Defines the public interface for the rest of the Delite Runtime 17 | */ 18 | 19 | abstract class Executor { 20 | 21 | def run(schedule: StaticSchedule) 22 | 23 | def runOne(location: Int, item: DeliteExecutable) 24 | 25 | def init() 26 | 27 | def shutdown() 28 | 29 | } 30 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/MultiAccExecutor.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | import ppl.delite.runtime.scheduler.StaticSchedule 4 | import ppl.delite.runtime.Config 5 | import ppl.delite.runtime.graph.targets.Targets 6 | 7 | /** 8 | * Author: Kevin J. Brown 9 | * Date: Dec 4, 2010 10 | * Time: 2:40:47 AM 11 | * 12 | * Pervasive Parallelism Laboratory (PPL) 13 | * Stanford University 14 | */ 15 | 16 | /** 17 | * An executor for a single machine consisting of multiple CPUs and GPUs 18 | * Supports an SMP consisting of an arbitrary number of threads, specified by Config.numThreads, Config.numCpp 19 | * Supports an arbitrary number of accelerators, specified by Config.numCuda, Config.numOpenCL 20 | */ 21 | class MultiAccExecutor extends Executor { 22 | 23 | private val threadPools = Map(Targets.Scala -> new ThreadPool(Config.numThreads, i => if (Config.pinThreads) new PinnedExecutionThread(i, Config.numThreads) else new ExecutionThread()), 24 | Targets.Cpp -> new ThreadPool(Config.numCpp, i => new CppExecutionThread(i, Config.numCpp)), 25 | Targets.Cuda -> new ThreadPool(Config.numCuda, i => new CudaExecutionThread(i, Config.numCuda)), 26 | Targets.OpenCL -> new ThreadPool(Config.numOpenCL, i => new OpenCLExecutionThread(i, Config.numOpenCL)) 27 | ) 28 | 29 | def init() { 30 | for ((t,pool) <- threadPools) pool.init() 31 | } 32 | 33 | def run(schedule: StaticSchedule) { 34 | for ((t,pool) <- threadPools) pool.submitAll(schedule.slice(Targets.resourceIDs(t))) 35 | } 36 | 37 | def runOne(location: Int, item: DeliteExecutable) { 38 | threadPools(Targets.getByLocation(location)).submitOne(Targets.getRelativeLocation(location), item) 39 | } 40 | 41 | def shutdown() { 42 | for ((t,pool) <- threadPools) pool.shutdown() 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/NativeExecutionThread.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | import ppl.delite.runtime.Config 4 | import ppl.delite.runtime.graph.targets.OS 5 | import ppl.delite.runtime.codegen.{CCompile, CppCompile, CudaCompile, OpenCLCompile} 6 | import java.io.File 7 | 8 | /** 9 | * Author: Kevin J. Brown 10 | * Date: 12/14/10 11 | * Time: 12:42 AM 12 | * 13 | * Pervasive Parallelism Laboratory (PPL) 14 | * Stanford University 15 | */ 16 | 17 | abstract class NativeExecutionThread(threadId: Int, numThreads: Int) extends ExecutionThread { 18 | 19 | override def run { 20 | initializeThread(threadId, numThreads) 21 | super.run 22 | } 23 | 24 | def initializeThread(threadId: Int, numThreads:Int): Unit 25 | 26 | protected def loadNative(fileName: String, compiler: CCompile) = { 27 | val sep = File.separator 28 | val root = compiler.staticResources + fileName 29 | val path = root + "." + OS.libExt 30 | val lib = new File(path) 31 | if (!lib.exists) compiler.compile(path,Array(root+"."+compiler.ext), false) 32 | System.load(path) //TODO: doesn't work properly with sbt test suite 33 | } 34 | 35 | } 36 | 37 | class CppExecutionThread(threadId: Int, numThreads: Int) extends ExecutionThread 38 | 39 | class PinnedExecutionThread(threadId: Int, numThreads: Int) extends NativeExecutionThread(threadId, numThreads) { 40 | @native def initializeThread(threadId: Int, numThreads: Int): Unit 41 | loadNative("cppInit", CppCompile) 42 | } 43 | 44 | class CudaExecutionThread(threadId: Int, numThreads: Int) extends NativeExecutionThread(threadId, numThreads) { 45 | @native def initializeThread(threadId: Int, numThreads:Int): Unit 46 | loadNative("cudaInit", CudaCompile) 47 | } 48 | 49 | class OpenCLExecutionThread(threadId: Int, numThreads: Int) extends NativeExecutionThread(threadId, numThreads) { 50 | @native def initializeThread(threadId: Int, numThreads:Int): Unit 51 | loadNative("openclInit", OpenCLCompile) 52 | } 53 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/Shutdown.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | /** 4 | * This Executable will cause an ExecutionThread to terminate immediately after its execution 5 | */ 6 | 7 | final class Shutdown(thread: ExecutionThread) extends DeliteExecutable { 8 | 9 | def run() { 10 | thread.continue = false 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/executor/ThreadPool.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.executor 2 | 3 | import ppl.delite.runtime.scheduler.StaticSchedule 4 | 5 | /** 6 | * An interface for initializing and submitting work to execution threads 7 | */ 8 | 9 | class ThreadPool(numThreads: Int, executor: Int => ExecutionThread) { 10 | 11 | private val pool = new Array[ExecutionThread](numThreads) 12 | private val threads = new Array[Thread](numThreads) 13 | 14 | def init() { 15 | for (i <- 0 until numThreads) { 16 | val worker = executor(i) 17 | pool(i) = worker 18 | val thread = new Thread(worker, worker.getClass.getSimpleName+i) //spawn new machine thread 19 | threads(i) = thread 20 | thread.start 21 | } 22 | } 23 | 24 | def shutdown() { 25 | for (thread <- threads if thread != null) { 26 | thread.interrupt() 27 | } 28 | } 29 | 30 | def submitOne(location: Int, item: DeliteExecutable) { 31 | pool(location).queue.put(item) 32 | } 33 | 34 | /** 35 | * Puts a static schedule into the appropriate thread queues for execution 36 | * 37 | * @param the StaticSchedule to be submitted for execution 38 | */ 39 | def submitAll(schedule: StaticSchedule) { 40 | assert(pool.length >= schedule.resources.length) 41 | for (i <- 0 until schedule.resources.length) { 42 | for (exec <- schedule.resources(i)) { 43 | pool(i).queue.put(exec) 44 | } 45 | } 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/TestGraph.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph 2 | 3 | import ops._ 4 | import targets._ 5 | 6 | /** 7 | * Author: Kevin J. Brown 8 | * Date: Oct 20, 2010 9 | * Time: 2:32:43 PM 10 | * 11 | * Pervasive Parallelism Laboratory (PPL) 12 | * Stanford University 13 | */ 14 | 15 | class TestGraph extends SingleGraph { //test choice 16 | val EOP = new EOP("eop", Map(Targets.Scala->Map("eop" -> "Unit", "functionReturn"->"Unit")), ("","")) 17 | EOP.addDependency(_result._1) 18 | _result._1.addConsumer(EOP) 19 | _result = (EOP, EOP.id) 20 | } 21 | 22 | //Scheduling & Optimized Execution Test 23 | abstract class SingleGraph extends DeliteTaskGraph { 24 | val base = "ppl.delite.runtime.graph.TestKernel" 25 | val node1 = new TestOP(base+"1a")() 26 | val node2 = new TestOP(base+"1b")(node1) 27 | val node3 = new TestOP(base+"1c")(node2) 28 | val node4 = new TestOP(base+"1d")(node3) 29 | val node5 = new TestOP(base+"2a")() 30 | val node6 = new TestOP(base+"2b")(node5) 31 | val node7 = new TestOP(base+"2c")(node6) 32 | val node8 = new TestOP(base+"2d")(node7) 33 | val node9 = new TestOP(base+"3")(node4,node8) 34 | 35 | _ops ++= Map[String,DeliteOP]("node1"->node1, "node2"->node2, "node3"->node3, "node4"->node4, "node5"->node5, 36 | "node6"->node6, "node7"->node7, "node8"->node8, "node9"->node9) 37 | _result = (node9, node9.id) 38 | } 39 | 40 | //simple foreach test 41 | abstract class ForeachGraph extends DeliteTaskGraph { 42 | val base = "ppl.delite.runtime.graph.TestKernel" 43 | val node1 = new TestSingle[ArrayColl[Int]](base+"Begin")()() 44 | val node2 = new TestSingle[ArrayColl[Int]](base+"Out")()() 45 | val node3 = new TestForeach(base+"Foreach")(node1,node2)(node1,node2) 46 | val node4 = new TestSingle[Unit](base+"Print0")(node3)(node2) 47 | 48 | _ops ++= Map[String,DeliteOP]("node1"->node1, "node2"->node2, "node3"->node3, "node4"->node4) 49 | _result = (node4, node4.id) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/Arguments.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | import ppl.delite.runtime.graph.targets.Targets 4 | import ppl.delite.runtime.Config 5 | 6 | /** 7 | * Author: Kevin J. Brown 8 | * Date: Nov 30, 2010 9 | * Time: 3:56:38 AM 10 | * 11 | * Pervasive Parallelism Laboratory (PPL) 12 | * Stanford University 13 | */ 14 | 15 | object Arguments { 16 | var args: Seq[Any] = Nil 17 | var staticDataMap: Map[String,_] = _ 18 | 19 | def getArg[T](idx: Int): T = { 20 | if (args.length > idx) args(idx).asInstanceOf[T] 21 | else throw new RuntimeException("Insufficient input arguments supplied: " + args.mkString(",")) 22 | } 23 | 24 | def staticData[T](id: String): T = { 25 | staticDataMap(id).asInstanceOf[T] 26 | } 27 | } 28 | 29 | final class Arguments(val id: String, val argIdx: Int, var outputTypesMap: Map[Targets.Value,Map[String,String]]) extends OP_Executable { 30 | 31 | def isDataParallel = false 32 | 33 | def task = if (scheduledOn(Targets.Scala)) "ppl.delite.runtime.graph.ops.ArgsKernel" + argIdx 34 | else if (scheduledOn(Targets.Cpp)) "in"+argIdx //function input 35 | else throw new RuntimeException("Unsupported target for Arguments") 36 | 37 | def cost = 0 38 | def size = 0 39 | 40 | assert(argIdx < 5, "Cannot currently support DEG with more than 5 Arguments") 41 | 42 | } 43 | 44 | object ArgsKernel0 { 45 | def apply[T](info: generated.scala.ResourceInfo): T = Arguments.getArg(0) 46 | } 47 | object ArgsKernel1 { 48 | def apply[T](info: generated.scala.ResourceInfo): T = Arguments.getArg(1) 49 | } 50 | object ArgsKernel2 { 51 | def apply[T](info: generated.scala.ResourceInfo): T = Arguments.getArg(2) 52 | } 53 | object ArgsKernel3 { 54 | def apply[T](info: generated.scala.ResourceInfo): T = Arguments.getArg(3) 55 | } 56 | object ArgsKernel4 { 57 | def apply[T](info: generated.scala.ResourceInfo): T = Arguments.getArg(4) 58 | } 59 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/EOP.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | import java.util.concurrent.locks.ReentrantLock 4 | import ppl.delite.runtime.graph.targets.Targets 5 | import java.util.concurrent.CyclicBarrier 6 | 7 | /** 8 | * Author: Kevin J. Brown 9 | * Date: Nov 29, 2010 10 | * Time: 11:21:14 PM 11 | * 12 | * Pervasive Parallelism Laboratory (PPL) 13 | * Stanford University 14 | */ 15 | 16 | /** 17 | * This is a special singleton OP that signifies the end of the application 18 | * This OP should always be inserted by the scheduler such that it is the last to run (depends on the "result" node of the task graph 19 | * Execution of the kernel will shut down the Delite Runtime 20 | */ 21 | class EOP(val id: String, var outputTypesMap: Map[Targets.Value,Map[String,String]], val result: (String,String)) extends OP_Executable { 22 | 23 | /** 24 | * OP features 25 | */ 26 | 27 | def isDataParallel = false 28 | 29 | def task = if (scheduledOn(Targets.Scala)) "ppl.delite.runtime.graph.ops.EOP_Kernel" 30 | else if (scheduledOn(Targets.Cpp)) "cppDeepCopy" 31 | else throw new RuntimeException("Unsupported target for EOP") 32 | 33 | def cost = 0 34 | def size = 0 35 | 36 | } 37 | 38 | object EOP_Global { 39 | 40 | private var result: Any = null 41 | private var barrier: CyclicBarrier = null 42 | 43 | def put(res: Any) { result = res } 44 | 45 | def take(): Any = { 46 | val res = result 47 | result = null 48 | res 49 | } 50 | 51 | def initializeBarrier(count: Int) { 52 | barrier = new CyclicBarrier(count) 53 | } 54 | 55 | def awaitBarrier() { barrier.await() } 56 | 57 | } 58 | 59 | object EOP_Kernel { 60 | 61 | def apply[T](): T = apply(null) 62 | 63 | def apply[T](resourceInfo: generated.scala.ResourceInfo): T = apply(resourceInfo, null.asInstanceOf[T]) 64 | 65 | def apply[T](resourceInfo: generated.scala.ResourceInfo, result: T): T = { 66 | EOP_Global.put(result) 67 | result 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/OP_Control.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | /** 4 | * Author: Kevin J. Brown 5 | * Date: 1/5/11 6 | * Time: 6:58 PM 7 | * 8 | * Pervasive Parallelism Laboratory (PPL) 9 | * Stanford University 10 | */ 11 | 12 | abstract class OP_Control extends OP_Nested 13 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/OP_Executable.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | /** 4 | * Author: Kevin J. Brown 5 | * Date: 1/23/11 6 | * Time: 5:36 AM 7 | * 8 | * Pervasive Parallelism Laboratory (PPL) 9 | * Stanford University 10 | */ 11 | 12 | abstract class OP_Executable extends DeliteOP { 13 | override def toString = id 14 | def opName: String = id 15 | } 16 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/OP_External.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | import ppl.delite.runtime.graph.targets._ 4 | 5 | class OP_External(val id: String, kernel: String, private[graph] var outputTypesMap: Map[Targets.Value, Map[String,String]]) extends OP_Executable { 6 | 7 | final def isDataParallel = false 8 | 9 | def task = kernel 10 | 11 | def cost = 0 12 | def size = 0 13 | 14 | } 15 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/OP_Input.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | import ppl.delite.runtime.graph.targets.Targets 4 | 5 | /** 6 | * Author: Kevin J. Brown 7 | * Date: 1/20/11 8 | * Time: 8:36 PM 9 | * 10 | * Pervasive Parallelism Laboratory (PPL) 11 | * Stanford University 12 | */ 13 | 14 | /** 15 | * An input to a DeliteTaskGraph 16 | * Used to register OPs' dependencies on the input to the (sub)graph 17 | */ 18 | class OP_Input(val op: DeliteOP) extends DeliteOP { 19 | 20 | def id = "input_" + op.id 21 | 22 | private[graph] var outputTypesMap = op.outputTypesMap 23 | private[graph] override val stencilMap = op.stencilMap 24 | 25 | override def partition = op.partition 26 | override def outputPartition = op.outputPartition 27 | 28 | isSchedulable = true 29 | isScheduled = true 30 | 31 | def task = null 32 | def isDataParallel = false 33 | def cost = 0 34 | def size = 0 35 | 36 | override def toString = id 37 | 38 | } 39 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/OP_Single.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | import ppl.delite.runtime.graph.targets._ 4 | import ppl.delite.runtime.graph._ 5 | 6 | 7 | /** 8 | * Author: Kevin J. Brown 9 | * Date: Nov 14, 2010 10 | * Time: 10:12:48 PM 11 | * 12 | * Pervasive Parallelism Laboratory (PPL) 13 | * Stanford University 14 | */ 15 | 16 | class OP_Single(val id: String, kernel: String, private[graph] var outputTypesMap: Map[Targets.Value, Map[String,String]]) extends OP_Executable { 17 | 18 | final def isDataParallel = false 19 | 20 | // What does it mean for a SingleTask to be partitioned non-locally? 21 | // 22 | // override def partition(sym: String) = { 23 | // if (getInputs.isEmpty || DeliteTaskGraph.isPrimitiveType(outputType)) Local //TODO: this seems like an approximation? 24 | // else getInputs.map(i => i._1.partition(i._2)).reduceLeft(_ combine _) 25 | // } 26 | 27 | def task = kernel 28 | 29 | def cost = 0 30 | def size = 0 31 | 32 | } 33 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/ops/TestOP.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.ops 2 | 3 | import ppl.delite.runtime.graph.targets.Targets 4 | 5 | /** 6 | * Author: Kevin J. Brown 7 | * Date: Oct 20, 2010 8 | * Time: 2:23:30 PM 9 | * 10 | * Pervasive Parallelism Laboratory (PPL) 11 | * Stanford University 12 | */ 13 | 14 | class TestOP(kernel: String)(deps: DeliteOP*) extends OP_Executable { 15 | 16 | def task = kernel 17 | 18 | def id = System.identityHashCode(this).toString 19 | 20 | private[graph] var outputTypesMap = Map(Targets.Scala -> Map(id -> "Unit", "functionReturn" -> "Unit")) 21 | 22 | //initialize 23 | for (dep <- deps) { 24 | this.addDependency(dep) 25 | dep.addConsumer(this) 26 | } 27 | 28 | def cost = 0 29 | def size = 0 30 | def isDataParallel = false 31 | 32 | } 33 | 34 | class TestSingle[T: Manifest](kernel: String)(deps: DeliteOP*)(inputs: DeliteOP*) 35 | extends OP_Single("", kernel, null) { 36 | 37 | override val id = System.identityHashCode(this).toString 38 | outputTypesMap = Map(Targets.Scala -> Map(id -> manifest[T].toString, "functionReturn" -> manifest[T].toString)) 39 | 40 | for (dep <- deps) { 41 | this.addDependency(dep) 42 | dep.addConsumer(this) 43 | } 44 | 45 | for (input <- inputs.reverse) { //need a reverse to preserve order (addInput prepends) 46 | this.addInput(input, input.id) 47 | } 48 | 49 | } 50 | 51 | class TestForeach(func: String)(deps: DeliteOP*)(input: DeliteOP, free: DeliteOP*) 52 | extends OP_Foreach("", func, null) { 53 | 54 | override val id = System.identityHashCode(this).toString 55 | outputTypesMap = Map(Targets.Scala -> Map(id -> "Unit", "functionReturn" -> "Unit")) 56 | 57 | for (dep <- deps) { 58 | this.addDependency(dep) 59 | dep.addConsumer(this) 60 | } 61 | 62 | for (f <- free.reverse) { //need a reverse to preserve order (addInput prepends) 63 | this.addInput(f, f.id) 64 | } 65 | this.addInput(input, input.id) 66 | 67 | } 68 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/targets/CudaMetadata.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.targets 2 | 3 | final class CudaMetadata extends GPUMetadata { 4 | } 5 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/targets/OS.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.targets 2 | 3 | object OS { 4 | 5 | sealed abstract class SupportedOS 6 | abstract class Unix extends SupportedOS 7 | case object Linux extends Unix 8 | case object Mac extends Unix 9 | case object Solaris extends Unix 10 | case object Windows extends SupportedOS 11 | 12 | private val theOS = { 13 | val os = System.getProperty("os.name") 14 | if (os.contains("Linux")) Linux 15 | else if (os.contains("Windows")) Windows 16 | else if (os.contains("Mac")) Mac 17 | else if (os.contains("SunOS")) Solaris 18 | else sys.error("OS " + os + " is not currently supported") 19 | } 20 | 21 | def currentOS: String = theOS.toString 22 | 23 | def libExt: String = theOS match { 24 | case u:Unix => "so" 25 | case Windows => "dll" 26 | } 27 | 28 | def objExt: String = theOS match { 29 | case u:Unix => "o" 30 | case Windows => "obj" 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/targets/OpenCLMetadata.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.targets 2 | 3 | final class OpenCLMetadata extends GPUMetadata { 4 | 5 | } -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/graph/targets/Resource.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.graph.targets 2 | 3 | sealed abstract class Resource { 4 | //val location: Int 5 | } 6 | 7 | case object Scala extends Resource 8 | case object Cpp extends Resource 9 | case object Cuda extends Resource 10 | case object OpenCL extends Resource 11 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/messages/Future.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.messages 2 | 3 | import java.util.concurrent.locks.ReentrantLock 4 | 5 | class Future[T] { 6 | 7 | private var notReady: Boolean = true 8 | private var _result: T = _ 9 | 10 | private val lock = new ReentrantLock 11 | private val cond = lock.newCondition 12 | 13 | def get: T = { 14 | if (notReady) block 15 | _result 16 | } 17 | 18 | private def block { 19 | val lock = this.lock 20 | lock.lock 21 | try { 22 | while (notReady) 23 | cond.await 24 | } 25 | finally { 26 | lock.unlock 27 | } 28 | } 29 | 30 | def set(result: T) { 31 | val lock = this.lock 32 | lock.lock 33 | try { 34 | _result = result 35 | notReady = false 36 | cond.signalAll 37 | } 38 | finally { 39 | lock.unlock 40 | } 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/profiler/Profiling.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime 2 | package profiler 3 | 4 | import ppl.delite.runtime.Config 5 | import ppl.delite.runtime.graph.DeliteTaskGraph 6 | import java.lang.management.ManagementFactory 7 | import tools.nsc.io.Path 8 | 9 | //front-facing interface to activate all profiling tools 10 | object Profiling { 11 | 12 | private var globalStartNanos = 0L 13 | 14 | def init(graph: DeliteTaskGraph) { 15 | val totalResources = Config.numThreads + Config.numCpp + Config.numCuda + Config.numOpenCL 16 | PerformanceTimer.initializeStats(totalResources) 17 | MemoryProfiler.initializeStats(Config.numThreads, Config.numCpp, Config.numCuda, Config.numOpenCL) 18 | } 19 | 20 | def startRun() { 21 | PerformanceTimer.isFinalRun = true 22 | PerformanceTimer.recordAppStartTimeStats() 23 | 24 | PerformanceTimer.clearAll() 25 | MemoryProfiler.clearAll() 26 | 27 | globalStartNanos = System.nanoTime() 28 | 29 | if (Config.dumpProfile) SamplerThread.start() 30 | } 31 | 32 | def endRun() { 33 | if (Config.dumpProfile) SamplerThread.stop() 34 | PerformanceTimer.stop() 35 | if (Config.dumpStats) PerformanceTimer.dumpStats() 36 | if (Config.dumpProfile) PostProcessor.postProcessProfileData(globalStartNanos, Config.degFilePath) 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /runtime/src/ppl/delite/runtime/profiler/Timing.scala: -------------------------------------------------------------------------------- 1 | package ppl.delite.runtime.profiler 2 | 3 | /** 4 | * Timing of a Delite op. 5 | * 6 | * @author Philipp Haller 7 | */ 8 | 9 | class Timing(val startTime: Long, val component: String) { 10 | private var _endTime: Long = 0L 11 | private var done = false 12 | 13 | def endTime: Long = _endTime 14 | def endTime_=(time: Long) { 15 | _endTime = time 16 | done = true 17 | } 18 | 19 | def isDone: Boolean = done 20 | def elapsedMillis: Long = endTime - startTime 21 | override def toString = elapsedMillis + "ms" 22 | } 23 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/Config.h: -------------------------------------------------------------------------------- 1 | #ifndef _DELITE_CONFIG_H_ 2 | #define _DELITE_CONFIG_H_ 3 | 4 | class Config { 5 | public: 6 | int numThreads; 7 | int numCoresPerSocket; 8 | int numSockets; 9 | 10 | Config(int _numThreads) { 11 | numThreads = _numThreads; 12 | numCoresPerSocket = _numThreads; 13 | numSockets = 1; 14 | } 15 | 16 | Config(int _numThreads, int _numCoresPerSocket, int _numSockets) { 17 | numThreads = _numThreads; 18 | numCoresPerSocket = _numCoresPerSocket; 19 | numSockets = _numSockets; 20 | } 21 | 22 | int numCores() { 23 | return numCoresPerSocket * numSockets; 24 | } 25 | 26 | //current strategy is to spill threads to next socket when all cores are filled 27 | //then repeat for numThreads > numCores 28 | int threadToSocket(int threadId) { 29 | int socketId = threadId / numCoresPerSocket % numSockets; 30 | return socketId; 31 | } 32 | 33 | int activeSockets() { 34 | if (numThreads >= numCores()) return numSockets; 35 | else return threadToSocket(numThreads-1)+1; 36 | } 37 | 38 | }; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/DeliteCppProfiler.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELITE_CPP_PROFILER_H__ 2 | #define __DELITE_CPP_PROFILER_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "DeliteNamespaces.h" 22 | #include "pcmHelper.h" 23 | #ifndef __DELITE_CPP_STANDALONE__ 24 | #include 25 | #endif 26 | 27 | typedef struct { 28 | std::string name; 29 | double start; 30 | } cpptimer_t; 31 | 32 | typedef struct { 33 | void* startAddr; 34 | uint64_t size; 35 | } cpparray_layout_info; 36 | 37 | #ifndef DELITE_NUM_CUDA 38 | 39 | class BufferedFileWriter { 40 | 41 | public: 42 | BufferedFileWriter(const char* fileName); 43 | void writeTimer(std::string kernel, long start, double duration, int32_t level, int32_t tid, bool isMultiLoop); 44 | void close(); 45 | 46 | private: 47 | std::ofstream fs; 48 | }; 49 | 50 | #endif 51 | 52 | void InitDeliteCppTimer(int32_t lowestCppTid, int32_t numCppThreads); 53 | void DeliteCppTimerTic(string name); 54 | void DeliteCppTimerToc(string name); 55 | void DeliteCppTimerStart(int32_t tid, string name); 56 | void DeliteCppTimerStop(int32_t tid, string name); 57 | void DeliteCppTimerStopMultiLoop(int32_t tid, string name); 58 | void DeliteCppTimerClose(); 59 | void DeliteUpdateMemoryAccessStats( int32_t tid, std::string sourceContext, PCMStats* stats ); 60 | void DeliteSendMemoryAccessStatsToJVM( int32_t offset, JNIEnv* env ); 61 | void DeliteSendStartTimeToJVM( JNIEnv* env ); 62 | void SendKernelMemUsageStatsToJVM( JNIEnv* env ); 63 | void DeliteLogArrayAllocation(int32_t tid, void* startAddr, int32_t length, std::string elemType, std::string sourceContext); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/DeliteDatastructures.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELITE_DATASTRUCTURES_H__ 2 | #define __DELITE_DATASTRUCTURES_H__ 3 | 4 | #include "DeliteCppRandom.h" 5 | 6 | // structure to keep thread-local resource information 7 | typedef struct resourceInfo_t { 8 | int threadId; 9 | int numThreads; 10 | int socketId; 11 | int numSockets; 12 | //int slaveId; 13 | //int numSlaves; 14 | int groupId; 15 | int groupSize; 16 | int availableThreads; 17 | //TODO: move thread-local rand to somewhere 18 | DeliteCppRandom *rand; 19 | 20 | resourceInfo_t() { } 21 | 22 | resourceInfo_t(int _threadId, int _numThreads, int _socketId, int _numSockets) { 23 | threadId = _threadId; numThreads = _numThreads; socketId = _socketId; numSockets = _numSockets; 24 | groupId = -1; groupSize = -1; availableThreads = _numThreads; 25 | rand = NULL; 26 | } 27 | 28 | } resourceInfo_t; 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/DeliteFileSystem.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELITE_FILE_SYSTEM__ 2 | #define __DELITE_FILE_SYSTEM__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "DeliteNamespaces.h" 10 | 11 | class DeliteFileSystem { 12 | 13 | public: 14 | 15 | // deletes the given path recursively 16 | static void deleteRecursive(string pathname) { 17 | if (!exists(pathname)) return; 18 | struct stat st; 19 | const char *path = pathname.c_str(); 20 | lstat(path, &st); 21 | if (S_ISDIR(st.st_mode)) { 22 | DIR *dir = opendir(path); 23 | struct dirent *dp; 24 | while ((dp = readdir(dir)) != NULL) { 25 | struct stat st; 26 | string filename = string(path) + string("/") + string(dp->d_name); 27 | lstat(filename.c_str(), &st); 28 | if (S_ISREG(st.st_mode)) remove(filename.c_str()); 29 | // TODO: Make sure code below is safe for all platforms! (Need to be careful about parent directory) 30 | //else if(S_ISDIR(st.st_mode) && dp->d_name[0] != '.') deleteRecursive(filename); 31 | } 32 | closedir(dir); 33 | remove(path); 34 | } 35 | else if (S_ISREG(st.st_mode)) { 36 | remove(path); 37 | } 38 | else { 39 | fprintf(stderr, "[DeliteFileSystem] Path %s does not appear to be a valid file or directory\n", path); 40 | exit(-1); 41 | } 42 | } 43 | 44 | // checks existence of a file/directory 45 | static bool exists(string pathname) { 46 | struct stat st; 47 | if (lstat(pathname.c_str(),&st) != 0) { 48 | if(errno == ENOENT) return false; 49 | else { 50 | fprintf(stderr, "Path %s exists but cannot be accessed\n", pathname.c_str()); 51 | exit(-1); 52 | } 53 | } 54 | else return true; 55 | } 56 | }; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/DeliteMemory.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELITE_MEMORY_H__ 2 | #define __DELITE_MEMORY_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "DeliteDatastructures.h" 13 | 14 | //#define DHEAP_DEBUG(...) fprintf(stderr, "[DEBUG-DeliteHeap] "); fprintf(stderr, __VA_ARGS__) 15 | #define DHEAP_DEBUG(...) 16 | 17 | // Delite Custom Memory APIs 18 | void DeliteHeapInit(int numThreads, size_t heapSize); 19 | void DeliteHeapClear(int numThreads); 20 | char *DeliteHeapAlloc(size_t sz, int idx, bool initialize); 21 | void DeliteHeapMark(int idx); 22 | void DeliteHeapReset(int idx); 23 | void DeliteHeapReset2(int idx); 24 | void delite_barrier(unsigned int count); 25 | 26 | // globally overloaded new operators 27 | void *operator new(size_t sz, const resourceInfo_t *resourceInfo); 28 | void *operator new[](size_t sz, const resourceInfo_t *resourceInfo); 29 | 30 | class DeliteMemory { 31 | public: 32 | void* operator new(size_t sz) { 33 | DHEAP_DEBUG("Non-local allocation with size %d\n", sz); 34 | return malloc(sz); 35 | } 36 | 37 | void* operator new(size_t sz, const resourceInfo_t *resourceInfo) { 38 | DHEAP_DEBUG("Allocation from thread %d with size %d\n", resourceInfo->threadId, sz); 39 | return DeliteHeapAlloc(sz, resourceInfo->threadId, true); 40 | } 41 | 42 | /* 43 | void operator delete(void*) { 44 | } 45 | */ 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/DeliteNamespaces.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELITE_NAMESPACES__ 2 | #define __DELITE_NAMESPACES__ 3 | 4 | //#define __USE_STD_STRING__ 5 | #ifdef __USE_STD_STRING__ 6 | #include 7 | using std::string; 8 | #else 9 | #include "DeliteString.h" 10 | using delite::string; 11 | #endif 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/DeliteThreadPool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "DeliteCpp.h" 3 | 4 | 5 | pthread_t* threadPool; 6 | pthread_mutex_t* locks; 7 | pthread_cond_t* readyConds; 8 | pthread_cond_t* doneConds; 9 | 10 | void** workPool; 11 | void** argPool; 12 | 13 | void submitWork(int threadId, void *(*work) (void *), void *arg) { 14 | pthread_mutex_lock(&locks[threadId]); 15 | while (argPool[threadId] != NULL) { 16 | pthread_cond_wait(&doneConds[threadId], &locks[threadId]); 17 | } 18 | workPool[threadId] = (void*)work; 19 | argPool[threadId] = arg; 20 | pthread_cond_signal(&readyConds[threadId]); 21 | pthread_mutex_unlock(&locks[threadId]); 22 | } 23 | 24 | void* processWork(void* threadId) { 25 | int id = *(int*)threadId; 26 | VERBOSE("Initialized thread with id %d\n", id); 27 | initializeThread(id); 28 | 29 | void *(*work) (void *); 30 | void *arg; 31 | while(true) { 32 | pthread_mutex_lock(&locks[id]); 33 | while (argPool[id] == NULL) { 34 | pthread_cond_wait(&readyConds[id], &locks[id]); 35 | } 36 | work = (void *(*)(void *))workPool[id]; 37 | workPool[id] = NULL; 38 | arg = argPool[id]; 39 | argPool[id] = NULL; 40 | pthread_cond_signal(&doneConds[id]); 41 | pthread_mutex_unlock(&locks[id]); 42 | work(arg); 43 | } 44 | } 45 | 46 | void initializeThreadPool(int numThreads) { 47 | threadPool = new pthread_t[numThreads]; 48 | locks = new pthread_mutex_t[numThreads]; 49 | readyConds = new pthread_cond_t[numThreads]; 50 | doneConds = new pthread_cond_t[numThreads]; 51 | workPool = new void*[numThreads]; 52 | argPool = new void*[numThreads]; 53 | 54 | for (int i=1; i void cppDeliteArray::release(void) { free(data); } 5 | template<> void cppDeliteArray::release(void) { free(data); } 6 | template<> void cppDeliteArray::release(void) { free(data); } 7 | template<> void cppDeliteArray::release(void) { free(data); } 8 | template<> void cppDeliteArray::release(void) { free(data); } 9 | template<> void cppDeliteArray::release(void) { free(data); } 10 | template<> void cppDeliteArray::release(void) { 11 | for(int i=0; i void cppDeliteArray::release(void) { 17 | for(int i=0; irelease(); 19 | free(data); 20 | } 21 | 22 | // compiler generated template releases 23 | #include "cppDeliteArrayRelease.h" 24 | */ -------------------------------------------------------------------------------- /runtime/src/static/cpp/cppInit.cpp: -------------------------------------------------------------------------------- 1 | #include "cppInit.h" 2 | 3 | void initializeThread(int threadId) { 4 | #ifdef __linux__ 5 | cpu_set_t cpu; 6 | CPU_ZERO(&cpu); 7 | CPU_SET(threadId, &cpu); 8 | sched_setaffinity(0, sizeof(cpu_set_t), &cpu); 9 | 10 | #ifdef __DELITE_CPP_NUMA__ 11 | if (numa_available() >= 0) { 12 | int socketId = config->threadToSocket(threadId); 13 | if (socketId < numa_num_configured_nodes()) { 14 | bitmask* nodemask = numa_allocate_nodemask(); 15 | numa_bitmask_setbit(nodemask, socketId); 16 | numa_set_membind(nodemask); 17 | } 18 | //VERBOSE("Binding thread %d to cpu %d, socket %d\n", threadId, threadId, socketId); 19 | } 20 | #endif 21 | #endif 22 | 23 | #ifdef __sun 24 | processor_bind(P_LWPID, P_MYID, threadId, NULL); 25 | #endif 26 | } 27 | 28 | #ifndef __DELITE_CPP_STANDALONE__ 29 | extern "C" JNIEXPORT void JNICALL Java_ppl_delite_runtime_executor_PinnedExecutionThread_initializeThread(JNIEnv* env, jobject obj, jint threadId, jint numThreads); 30 | JNIEXPORT void JNICALL Java_ppl_delite_runtime_executor_PinnedExecutionThread_initializeThread(JNIEnv* env, jobject obj, jint threadId, jint numThreads) { 31 | initializeThread(threadId); 32 | } 33 | #endif 34 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/cppInit.h: -------------------------------------------------------------------------------- 1 | #ifndef __CPP_INIT_H__ 2 | #define __CPP_INIT_H__ 3 | 4 | #ifndef __DELITE_CPP_STANDALONE__ 5 | #include 6 | #endif 7 | 8 | #include 9 | 10 | #ifdef __DELITE_CPP_NUMA__ 11 | #include 12 | #endif 13 | 14 | #ifdef __sun 15 | #include 16 | #endif 17 | 18 | void initializeThread(int threadId); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/cppList.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | #include "cppList.h" 3 | 4 | template<> void cppList::release(void) { free(data); } 5 | template<> void cppList::release(void) { free(data); } 6 | template<> void cppList::release(void) { free(data); } 7 | template<> void cppList::release(void) { free(data); } 8 | template<> void cppList::release(void) { free(data); } 9 | template<> void cppList::release(void) { free(data); } 10 | 11 | template void cppList::release(void) { 12 | for(int i=0; irelease(); 14 | free(data); 15 | } 16 | 17 | */ -------------------------------------------------------------------------------- /runtime/src/static/cpp/cppRef.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | #include "cppRef.h" 3 | 4 | template<> void cppRef::release(void) { } 5 | template<> void cppRef::release(void) { } 6 | template<> void cppRef::release(void) { } 7 | template<> void cppRef::release(void) { } 8 | template<> void cppRef::release(void) { } 9 | template<> void cppRef::release(void) { } 10 | 11 | template void cppRef::release(void) { 12 | data->release(); 13 | } 14 | */ -------------------------------------------------------------------------------- /runtime/src/static/cpp/cppRef.h: -------------------------------------------------------------------------------- 1 | /* 2 | #ifndef _CPP_REF_H_ 3 | #define _CPP_REF_H_ 4 | 5 | template 6 | class cppRef { 7 | public: 8 | T data; 9 | 10 | cppRef(T _data) { 11 | data = _data; 12 | } 13 | 14 | T get(void) { 15 | return data; 16 | } 17 | 18 | void set(T newVal) { 19 | data = newVal; 20 | } 21 | 22 | void release(void); 23 | 24 | }; 25 | 26 | #endif 27 | */ -------------------------------------------------------------------------------- /runtime/src/static/cpp/pcmHelper.cpp: -------------------------------------------------------------------------------- 1 | #include "pcmHelper.h" 2 | #include 3 | 4 | #ifdef DELITE_ENABLE_PCM 5 | 6 | void pcmInit() { 7 | std::cout << "Initializing PCM" << std::endl; 8 | PCM* m = PCM::getInstance(); 9 | m->disableJKTWorkaround(); 10 | 11 | switch( m->program() ) { 12 | case PCM::Success: 13 | std::cout << "PCM Initialized" << std::endl; 14 | return; 15 | 16 | case PCM::PMUBusy: 17 | std::cout << "PCM::PMU Busy!" << std::endl; 18 | m->resetPMU(); 19 | return; 20 | 21 | default: 22 | return; 23 | } 24 | } 25 | 26 | PCMStats* getPCMStats(CoreCounterState& before, CoreCounterState& after) { 27 | struct PCMStats* stats = new PCMStats(); 28 | stats->l2CacheHitRatio = getL2CacheHitRatio( before, after ); 29 | stats->l3CacheHitRatio = getL3CacheHitRatio( before, after ); 30 | stats->l2Misses = getL2CacheMisses( before, after ); 31 | stats->l3Misses = getL3CacheMisses( before, after ); 32 | 33 | return stats; 34 | } 35 | 36 | CoreCounterState getCoreCounterState(int32_t tid) { 37 | PCM * inst = PCM::getInstance(); 38 | CoreCounterState result; 39 | if (inst) result = inst->getCoreCounterState(tid); 40 | return result; 41 | } 42 | 43 | void pcmCleanup() { 44 | PCM::getInstance()->cleanup(); 45 | } 46 | 47 | #else 48 | 49 | void pcmInit() { } 50 | void pcmCleanup() { } 51 | 52 | #endif 53 | 54 | void printPCMStats(PCMStats* stats) { 55 | std::cout 56 | << "L2 Hit Ratio: " << stats->l2CacheHitRatio << std::endl 57 | << "L2 Misses : " << stats->l2Misses << std::endl 58 | << "L3 Hit Ratio: " << stats->l3CacheHitRatio << std::endl 59 | << "L3 Misses : " << stats->l3Misses << std::endl; 60 | } 61 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/pcmHelper.h: -------------------------------------------------------------------------------- 1 | #ifndef PCM_HELPER_H 2 | #define PCM_HELPER_H 3 | 4 | #ifdef DELITE_ENABLE_PCM 5 | #include "cpucounters.h" 6 | #endif 7 | 8 | struct PCMStats { 9 | double l2CacheHitRatio; 10 | double l3CacheHitRatio; 11 | unsigned int l2Misses; 12 | unsigned int l3Misses; 13 | 14 | PCMStats() : 15 | l2CacheHitRatio(0.0), 16 | l3CacheHitRatio(0.0), 17 | l2Misses(0), 18 | l3Misses(0) 19 | { } 20 | }; 21 | 22 | void pcmInit(); 23 | void pcmCleanup(); 24 | void printPCMStats(PCMStats* stats); 25 | 26 | #ifdef DELITE_ENABLE_PCM 27 | PCMStats* getPCMStats(CoreCounterState& before, CoreCounterState& after); 28 | CoreCounterState getCoreCounterState(int32_t tid); 29 | #endif 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/standalone/DeliteStandaloneMain.cpp: -------------------------------------------------------------------------------- 1 | #include "DeliteStandaloneMain.h" 2 | 3 | extern void Application(cppDeliteArray< string > *x0); 4 | 5 | int main(int argc, char *argv[]) { 6 | 7 | printf("** Starting Standalone C++ Execution **\n"); 8 | 9 | // set the number of threads for parallel execution 10 | omp_set_num_threads(DELITE_CPP); 11 | 12 | // create x0 symbol for user inputs 13 | cppDeliteArray< string > *x0 = new cppDeliteArray< string >(argc-1); 14 | for(int i=0; idata[i] = string(argv[i+1]); 16 | } 17 | 18 | // Start the generated application 19 | Application(x0); 20 | 21 | printf("** Finished Standalone C++ Execution **\n"); 22 | 23 | return 0; 24 | } 25 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/standalone/DeliteStandaloneMain.h: -------------------------------------------------------------------------------- 1 | #ifndef __DELITE_STANDALONE_MAIN_H__ 2 | #define __DELITE_STANDALONE_MAIN_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "DeliteCpp.h" 14 | #include "cpptypes.h" 15 | #include "library.h" 16 | #include "cppDeliteStructs.h" 17 | #include "cppDeliteArray.h" 18 | #include "cppFileStream.h" 19 | #include "cppRef.h" 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /runtime/src/static/cpp/standalone/Makefile.standalone: -------------------------------------------------------------------------------- 1 | # Makefile for compiling the generated standalone C++ 2 | # Need to correctly set below variables (TARGET, GENERATED_HOME) 3 | # Also assumes DELITE_HOME env variable is set 4 | 5 | # Set TARGET variable to the name of the generated cpp source 6 | TARGET = CGCompiler 7 | 8 | # Set the generated directory 9 | GENERATED_HOME = $(FORGE_HOME)/published/OptiML/generated 10 | 11 | # Set the number of threads for parallel execution 12 | NUM_THREADS = 1 13 | 14 | CC = g++ 15 | CFLAGS = -std=c++0x -fopenmp -O3 16 | LDFLAGS = -fopenmp $(wildcard $(GENERATED_HOME)/libraries/cpp/*.so) 17 | INCLUDES = -I$(GENERATED_HOME)/cpp/datastructures -I$(GENERATED_HOME)/cpp/kernels -I$(DELITE_HOME)/runtime/src/static/cpp 18 | SOURCES = $(TARGET).cpp $(DELITE_HOME)/runtime/src/static/cpp/DeliteStandaloneMain.cpp $(DELITE_HOME)/runtime/src/static/cpp/DeliteCpp.cpp 19 | OBJECTS = $(SOURCES:.cpp=.o) 20 | 21 | all: $(TARGET) 22 | 23 | $(TARGET): $(OBJECTS) 24 | $(CC) $(OBJECTS) $(LDFLAGS) -o $(TARGET) 25 | 26 | %.o: %.cpp 27 | $(CC) -c -O3 -D__DELITE_CPP_STANDALONE__ -DDELITE_CPP=$(NUM_THREADS) $(INCLUDES) $(CFLAGS) $< -o $@ 28 | 29 | .PHONY: clean 30 | clean: 31 | rm -f $(OBJECTS) $(TARGET) 32 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/CudaIOBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef CUDAIOBUFFER_H 2 | #define CUDAIOBUFFER_H 3 | 4 | #include 5 | 6 | 7 | typedef unsigned char byte; 8 | 9 | // Free items for our Cuda Allocator 10 | class AllocationInfo { 11 | public: 12 | cudaEvent_t event; 13 | size_t size; 14 | byte * ptr; 15 | }; 16 | 17 | 18 | class CudaIOBuffer { 19 | public: 20 | byte *head; 21 | size_t capacity; 22 | byte *free; 23 | byte *used; 24 | 25 | 26 | CudaIOBuffer(size_t size, cudaMemcpyKind kind, cudaStream_t stream); 27 | byte * allocate(size_t size); 28 | void copyAsync(byte* dst, byte* src, size_t size); 29 | size_t remainingCapacity(); 30 | 31 | private: 32 | std::queue allocations; 33 | AllocationInfo * pendingAlloc; 34 | cudaMemcpyKind direction; 35 | cudaStream_t cudaStream; 36 | 37 | }; 38 | 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/DeliteCudaProfiler.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #ifndef DELITE_NUM_CUDA 15 | #define DELITE_NUM_CUDA 1 16 | #endif 17 | 18 | typedef struct { 19 | struct timeval start; 20 | } cudatimer_t; 21 | 22 | #ifndef __DELITE_CPP_PROFILER_H__ 23 | 24 | class BufferedFileWriter { 25 | 26 | public: 27 | BufferedFileWriter(const char* fileName); 28 | void writeTimer(std::string kernel, long start, double duration, int32_t level, int32_t tid, bool isMultiLoop); 29 | void close(); 30 | 31 | private: 32 | std::ofstream fs; 33 | }; 34 | 35 | #endif 36 | 37 | void InitDeliteCudaTimer(int32_t tid, int32_t lowestCudaTid); 38 | void DeliteCudaTimerStart(int32_t tid, std::string name); 39 | void DeliteCudaTimerStop(int32_t tid, std::string name, bool isMultiLoop = false); 40 | void DeliteCudaTimerClose(int32_t tid, int32_t rid, JNIEnv* env); 41 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/HostcudaDeliteArray.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDAHOST_DELITEARRAY_H_ 2 | #define _CUDAHOST_DELITEARRAY_H_ 3 | 4 | #include "cppDeliteArray.h" 5 | #include "cudaDeliteArray.h" 6 | 7 | template 8 | class HostcudaDeliteArray { 9 | public: 10 | cppDeliteArray *host; 11 | cudaDeliteArray *device; 12 | }; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/HostcudaList.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDAHOST_LIST_H_ 2 | #define _CUDAHOST_LIST_H_ 3 | 4 | #include "cppList.h" 5 | #include "cudaList.h" 6 | 7 | template 8 | class HostcudaList { 9 | public: 10 | cppList *host; 11 | cudaList *device; 12 | }; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/HostcudaRef.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDAHOST_REF_H_ 2 | #define _CUDAHOST_REF_H_ 3 | 4 | #include "cppRef.h" 5 | #include "cudaRef.h" 6 | 7 | template 8 | class HostcudaRef { 9 | public: 10 | cppRef *host; 11 | cudaRef *device; 12 | }; 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/cudaHashMap.h: -------------------------------------------------------------------------------- 1 | //TODO: Implement HashMap for CUDA here 2 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/cudaInit.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | extern "C" JNIEXPORT void JNICALL Java_ppl_delite_runtime_executor_CudaExecutionThread_initializeThread(JNIEnv* env, jobject obj, jint threadId, jint numThreads); 5 | 6 | JNIEXPORT void JNICALL Java_ppl_delite_runtime_executor_CudaExecutionThread_initializeThread(JNIEnv* env, jobject obj, jint threadId, jint numThreads) { 7 | //chose device num 8 | if(cudaSuccess != cudaSetDevice(threadId)) { 9 | printf("FATAL : GPU device could not be initialized. \n"); 10 | exit(1); 11 | } 12 | 13 | // Using blocksynchronization slows down some kepler devices due to power management. Default is polling. 14 | /* 15 | //reset the device 16 | if(cudaSuccess != cudaDeviceReset()) { 17 | printf("FATAL : cudaDeviceReset() failed \n"); 18 | exit(1); 19 | } 20 | //set device options 21 | if(cudaSuccess != cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)) { 22 | printf("FATAL : GPU device has crashed (cudaSetDviceFlags). \n"); 23 | exit(1); 24 | } 25 | */ 26 | 27 | //called to initialize the device (can take a while) 28 | if(cudaSuccess != cudaThreadSynchronize()) { 29 | printf("FATAL : GPU device has crashed (cudaThreadSynchronize). \n"); 30 | exit(1); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/cudaInit.sh: -------------------------------------------------------------------------------- 1 | nvcc -I$JAVA_HOME/include/ -I$JAVA_HOME/include/linux/ -arch compute_20 -code sm_20 -shared -Xcompiler '-fPIC' -o cudaInit.so cudaInit.cu 2 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/cudaList.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_LIST_H_ 2 | #define _CUDA_LIST_H_ 3 | 4 | #include "DeliteCuda.h" 5 | 6 | #include 7 | 8 | template 9 | class cudaList { 10 | public: 11 | T *data; 12 | int length; 13 | 14 | // Constructor 15 | __host__ __device__ cudaList(void) { 16 | length = 0; 17 | data = NULL; 18 | } 19 | 20 | __device__ cudaList(int _length, T *ptr, int idx) { 21 | length = _length; 22 | data = ptr + idx*_length; 23 | } 24 | 25 | __host__ cudaList(int _length) { 26 | length = _length; 27 | DeliteCudaMalloc((void**)&data,length*sizeof(T)); 28 | } 29 | 30 | __host__ __device__ cudaList(int _length, T *_data) { 31 | length = _length; 32 | data = _data; 33 | } 34 | 35 | __host__ __device__ T apply(int idx) { 36 | return data[idx]; 37 | } 38 | 39 | __host__ __device__ void update(int idx, T value) { 40 | data[idx] = value; 41 | } 42 | 43 | // DeliteCoolection 44 | __host__ __device__ int size() { 45 | return length; 46 | } 47 | 48 | __host__ __device__ T dcApply(int idx) { 49 | return data[idx]; 50 | } 51 | 52 | __host__ __device__ void dcUpdate(int idx, T value) { 53 | data[idx] = value; 54 | } 55 | 56 | }; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/cudaProfiler.cu: -------------------------------------------------------------------------------- 1 | #ifndef DELITE_CUDA_PROFILER_INCLUDED 2 | #define DELITE_CUDA_PROFILER_INCLUDED 3 | 4 | #include 5 | #include 6 | 7 | struct timeval myprofiler_start, myprofiler_end; 8 | void printTime(void) { 9 | printf("Time : %ld [us]\n", ((myprofiler_end.tv_sec * 1000000 + myprofiler_end.tv_usec) - (myprofiler_start.tv_sec * 1000000 + myprofiler_start.tv_usec))); 10 | } 11 | void printTime(char *str) { 12 | printf("%s : Time : %ld [us]\n", str, ((myprofiler_end.tv_sec * 1000000 + myprofiler_end.tv_usec) - (myprofiler_start.tv_sec * 1000000 + myprofiler_start.tv_usec))); 13 | } 14 | /* 15 | void printTime(char *str) { 16 | sprintf(str, "%s : Time : %ld [us]\n", str, ((myprofiler_end.tv_sec * 1000000 + myprofiler_end.tv_usec) - (myprofiler_start.tv_sec * 1000000 + myprofiler_start.tv_usec))); 17 | } 18 | */ 19 | void mytic(void) { 20 | cudaDeviceSynchronize(); 21 | gettimeofday(&myprofiler_start,NULL); 22 | } 23 | 24 | void mytoc(void) { 25 | cudaDeviceSynchronize(); 26 | gettimeofday(&myprofiler_end,NULL); 27 | printTime(); 28 | } 29 | 30 | void mytoc(char *str) { 31 | cudaDeviceSynchronize(); 32 | gettimeofday(&myprofiler_end,NULL); 33 | printTime(str); 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /runtime/src/static/cuda/cudaRef.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_REF_H_ 2 | #define _CUDA_REF_H_ 3 | 4 | template 5 | class cudaRef { 6 | public: 7 | T data; 8 | 9 | __host__ __device__ cudaRef(void) { 10 | data = NULL; 11 | } 12 | 13 | __host__ __device__ cudaRef(T _data) { 14 | data = _data; 15 | } 16 | 17 | __host__ __device__ T get(void) { 18 | return data; 19 | } 20 | 21 | __host__ __device__ void set(T newVal) { 22 | data = newVal; 23 | } 24 | }; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /runtime/src/static/opencl/DeliteOpenCL.h: -------------------------------------------------------------------------------- 1 | #ifndef _DELITE_OPENCL_H_ 2 | #define _DELITE_OPENCL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | // Second element in pair indicates that void* points to GPU device memory, 14 | // so should not call free() on it. 15 | struct FreeItem { 16 | //cudaEvent_t event; 17 | list< pair >* keys; // List of pointers to the DSL Objects (e.g. pointer to DoubleVector object on the host side) 18 | }; 19 | 20 | extern cl_context context; 21 | extern cl_command_queue command_queue; 22 | extern list* lastAlloc; 23 | extern queue* freeList; 24 | extern map*>* clMemoryMap; 25 | 26 | extern void hostInit(); 27 | extern void DeliteOpenCLMemcpyHtoDAsync(cl_mem dest, void* sptr, size_t size); 28 | extern void DeliteOpenCLMemcpyDtoHAsync(void* dptr, cl_mem src, size_t size); 29 | extern void DeliteOpenCLMemcpyDtoDAsync(cl_mem dst, cl_mem src, size_t size); 30 | extern void freeCLMemory(FreeItem item); 31 | extern cl_mem DeliteOpenCLMallocImage(size_t width, size_t height); 32 | extern cl_mem DeliteOpenCLMalloc(size_t size); 33 | extern cl_mem DeliteOpenCLHostMalloc(size_t size, void *ptr); 34 | //TODO: Implement this 35 | //extern void DeliteCudaMemcpyDtoDAsync(void *dptr, void* sptr, size_t size); 36 | 37 | #endif 38 | 39 | -------------------------------------------------------------------------------- /runtime/src/static/opencl/blas/README: -------------------------------------------------------------------------------- 1 | Since BLAS for OpenCL is not installed in the default location, 2 | after building this library, you need to include this path to dynamic library search paths. 3 | Linux: $LD_LIBRARY_PATH or /etc/ld.so.conf.d/ 4 | Windows: %PATH% 5 | -------------------------------------------------------------------------------- /runtime/src/static/opencl/blas/blasgen.sh: -------------------------------------------------------------------------------- 1 | g++ -I/usr/local/cuda/include/ -shared -fPIC clblas.cpp -o libclblas.so 2 | -------------------------------------------------------------------------------- /runtime/src/static/opencl/blas/clblas.h: -------------------------------------------------------------------------------- 1 | #ifndef _OPENCLBLAS_ 2 | #define _OPENCLBLAS_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | //extern "C" { 10 | void clblasInit(cl_context context, cl_device_id device_id); 11 | void clblasSetQueue(cl_command_queue cmd_q); 12 | void clblasSgemm(char transa, char transb, int m, int n, int k, float alpha, cl_mem A, int lda, cl_mem B, int ldb, float beta, cl_mem C, int ldc); 13 | void clblasSgemv(char transa, int m, int n, float alpha, cl_mem A, int lda, cl_mem x, int incx, float beta, cl_mem y, int incy); 14 | void clblasDgemm(char transa, char transb, int m, int n, int k, double alpha, cl_mem A, int lda, cl_mem B, int ldb, double beta, cl_mem C, int ldc); 15 | void clblasDgemv(char transa, int m, int n, double alpha, cl_mem A, int lda, cl_mem x, int incx, double beta, cl_mem y, int incy); 16 | //}; 17 | //TODO: For windows below should be used to generate dynamic load library 18 | //__declspec(dllexport) void clblasInit(cl_context context, cl_device_id device_id); 19 | //__declspec(dllexport) void clblasSetQueue(cl_command_queue cmd_q); 20 | //__declspec(dllexport) void clblasSgemm(char transa, char transb, int m, int n, int k, float alpha, cl_mem A, int lda, cl_mem B, int ldb, float beta, cl_mem C, int ldc); 21 | //__declspec(dllexport) void clblasSgemv(char transa, int m, int n, float alpha, cl_mem A, int lda, cl_mem x, int incx, float beta, cl_mem y, int incy); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /runtime/src/static/opencl/blas/libclblas.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/runtime/src/static/opencl/blas/libclblas.so -------------------------------------------------------------------------------- /runtime/src/static/opencl/openclInit.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern "C" JNIEXPORT void JNICALL Java_ppl_delite_runtime_executor_OpenCLExecutionThread_initializeThread(JNIEnv* env, jobject obj, jint threadId, jint numThreads); 4 | 5 | //TODO: Check initialization errors! 6 | //TODO: Move more stuffs to the initialization phase 7 | JNIEXPORT void JNICALL Java_ppl_delite_runtime_executor_OpenCLExecutionThread_initializeThread(JNIEnv* env, jobject obj, jint threadId, jint numThreads) { 8 | 9 | printf("OpenCL device is being initialized....\n"); 10 | 11 | /* 12 | char buf[1024]; 13 | cl_uint numPlatforms; 14 | cl_uint numDevices; 15 | clGetPlatformIDs(0, NULL, &numPlatforms); 16 | cl_platform_id *platformList = (cl_platform_id *)malloc(sizeof(cl_platform_id) * numPlatforms); 17 | cl_int ret = clGetPlatformIDs(numPlatforms, platformList, NULL); 18 | 19 | clGetPlatformInfo(platformList[0],CL_PLATFORM_NAME,1024,buf,NULL); 20 | printf("\nDelite Runtime is using OpenCL platform : %s\n\n", buf); 21 | 22 | cl_device_id device_id = NULL; 23 | 24 | //ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, 25 | ret = clGetDeviceIDs( platformList[0], CL_DEVICE_TYPE_GPU, 1, &device_id, &numDevices); 26 | */ 27 | 28 | } 29 | -------------------------------------------------------------------------------- /runtime/src/static/opencl/openclInit.sh: -------------------------------------------------------------------------------- 1 | g++ -I$JAVA_HOME/include -I$JAVA_HOME/include/linux -shared -fPIC openclInit.cpp -o openclInit.so 2 | -------------------------------------------------------------------------------- /runtime/src/static/scala/DeliteInterfaces.scala: -------------------------------------------------------------------------------- 1 | package generated.scala 2 | 3 | 4 | /** 5 | * Delite 6 | */ 7 | 8 | abstract class DeliteOpMultiLoop[A] { 9 | def size(info: ResourceInfo): Long 10 | var loopStart: Long 11 | var loopSize: Long 12 | def alloc(info: ResourceInfo): A 13 | def processRange(info: ResourceInfo, __act: A, start: Long, end: Long): A //init+process 14 | def combine(info: ResourceInfo, __act: A, rhs: A): Unit 15 | def postCombine(info: ResourceInfo, __act: A, rhs: A): Unit 16 | def postProcInit(info: ResourceInfo, __act: A): Unit 17 | def postProcess(info: ResourceInfo, __act: A): Unit 18 | def finalize(info: ResourceInfo, __act: A): Unit 19 | def initAct(info: ResourceInfo): A 20 | } 21 | 22 | /** 23 | * Ref 24 | */ 25 | 26 | final class Ref[@specialized T](v: T) { 27 | private[this] var _v = v 28 | 29 | final def get = _v 30 | final def set(v: T) = _v = v 31 | } 32 | -------------------------------------------------------------------------------- /runtime/src/static/scala/ResourceInfo.scala: -------------------------------------------------------------------------------- 1 | package generated.scala 2 | 3 | final case class ResourceInfo ( 4 | final val threadId: Int, 5 | final val numThreads: Int, 6 | final val slaveId: Int, 7 | final val numSlaves: Int, 8 | final val groupId: Int, 9 | final val groupSize: Int, 10 | final val availableThreads: Int 11 | ) { 12 | //workaround for a weird scoping issue with copy constructors in MultiLoopSync.scala 13 | final def copySync(threadId: Int, groupId: Int, groupSize: Int, availableThreads: Int) = copy(threadId = threadId, groupId = groupId, groupSize = groupSize, availableThreads = availableThreads) 14 | } 15 | 16 | object ResourceInfo { 17 | def apply(threadId: Int, numThreads: Int, slaveId: Int, numSlaves: Int): ResourceInfo = apply(threadId, numThreads, slaveId, numSlaves, -1, -1, numThreads) 18 | } 19 | -------------------------------------------------------------------------------- /runtime/src/static/scala/container/IntComparator.java: -------------------------------------------------------------------------------- 1 | package generated.scala.container; 2 | 3 | public interface IntComparator { 4 | int compare(int o1, int o2); 5 | } 6 | -------------------------------------------------------------------------------- /runtime/src/static/scala/container/LongComparator.java: -------------------------------------------------------------------------------- 1 | package generated.scala.container; 2 | 3 | public interface LongComparator { 4 | int compare(long o1, long o2); 5 | } 6 | -------------------------------------------------------------------------------- /runtime/tests/ppl/tests/scalatest/inputs/ProfilerSuite/input_1/profile_t_0.csv: -------------------------------------------------------------------------------- 1 | x0,990,7,0 2 | x1,997,4,0 3 | x0,1001,5,0 4 | x3,1006,7,1 5 | x4,1013,3,1 6 | x7,1016,12,2 7 | x8,1030,9,2 8 | x9,1040,0,2 9 | x10,1041,0,2 10 | x5_0,1015,28,1 11 | x6,1044,3,1 12 | x2_0,1006,41,0 13 | x11_0,1049,31,0 14 | eop,1080,0,0 15 | -------------------------------------------------------------------------------- /runtime/tests/ppl/tests/scalatest/inputs/ProfilerSuite/input_1/profile_t_1.csv: -------------------------------------------------------------------------------- 1 | x11_1,1049,28,0 2 | -------------------------------------------------------------------------------- /runtime/tests/ppl/tests/scalatest/inputs/ProfilerSuite/input_1/profile_t_2.csv: -------------------------------------------------------------------------------- 1 | x11_2,1049,29,0 2 | -------------------------------------------------------------------------------- /runtime/tests/ppl/tests/scalatest/inputs/ProfilerSuite/input_1/profile_tic_toc_cpp.csv: -------------------------------------------------------------------------------- 1 | reg1,1025,4 2 | reg2,1030,3 3 | reg2,1034,4 4 | reg3,1040,7 5 | app,1020,24 -------------------------------------------------------------------------------- /runtime/tests/ppl/tests/scalatest/inputs/ProfilerSuite/input_1/profile_tic_toc_scala.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/runtime/tests/ppl/tests/scalatest/inputs/ProfilerSuite/input_1/profile_tic_toc_scala.csv -------------------------------------------------------------------------------- /unsafe_classes/unsafe.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stanford-ppl/Delite/88021e777d76ca8dd2bdab84983897ade17be47b/unsafe_classes/unsafe.jar -------------------------------------------------------------------------------- /unsafe_classes/unsafe/UnsafeAccessor.java: -------------------------------------------------------------------------------- 1 | package unsafe; 2 | 3 | import sun.misc.Unsafe; 4 | 5 | public class UnsafeAccessor { 6 | public static final Unsafe unsafe = Unsafe.getUnsafe(); 7 | } 8 | --------------------------------------------------------------------------------