├── .github ├── CONTRIBUTING.md ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── ---report-a-bug.md │ ├── ---request-a-new-feature.md │ ├── --ask-a-question.md │ └── config.yml ├── auto_pr_team.yml ├── no-response.yml ├── pull_request_template.md └── stale.yml ├── .gitignore ├── .vscode └── settings.json ├── Cargo.lock ├── Cargo.toml ├── README.md ├── coaster-blas ├── .github │ └── CONTRIBUTING.md ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches │ └── rblas_overhead.rs ├── perf │ ├── README.md │ ├── perf_rblas.sh │ └── run_perf.sh ├── rustfmt.toml ├── src │ ├── binary.rs │ ├── frameworks │ │ ├── cuda │ │ │ ├── helper.rs │ │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── native.rs │ │ └── opencl.rs │ ├── lib.rs │ ├── operation.rs │ ├── plugin.rs │ └── transpose.rs └── tests │ └── blas_specs.rs ├── coaster-nn ├── .github │ └── CONTRIBUTING.md ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── perf │ ├── README.md │ ├── perf_rblas.sh │ └── run_perf.sh ├── rustfmt.toml └── src │ ├── frameworks │ ├── cuda │ │ ├── helper.rs │ │ └── mod.rs │ ├── mod.rs │ ├── native │ │ ├── helper.rs │ │ └── mod.rs │ └── opencl.rs │ ├── lib.rs │ ├── plugin.rs │ └── tests │ ├── activation.rs │ ├── bench_all.rs │ ├── convolutional.rs │ ├── dropout.rs │ ├── mod.rs │ ├── pooling.rs │ ├── rnn.rs │ └── softmax.rs ├── coaster ├── .github │ └── CONTRIBUTING.md ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches │ └── shared_tensor.rs ├── examples │ └── readme.rs ├── index.html ├── perf │ ├── README.md │ └── run_perf.sh ├── rustfmt.toml ├── src │ ├── backend.rs │ ├── binary.rs │ ├── device.rs │ ├── error.rs │ ├── framework.rs │ ├── frameworks │ │ ├── cuda │ │ │ ├── api │ │ │ │ ├── driver │ │ │ │ │ ├── context.rs │ │ │ │ │ ├── device.rs │ │ │ │ │ ├── error.rs │ │ │ │ │ ├── ffi.rs │ │ │ │ │ ├── memory.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── utils.rs │ │ │ │ └── mod.rs │ │ │ ├── context.rs │ │ │ ├── device.rs │ │ │ ├── function.rs │ │ │ ├── memory.rs │ │ │ ├── mod.rs │ │ │ └── module.rs │ │ ├── mod.rs │ │ ├── native │ │ │ ├── binary.rs │ │ │ ├── device.rs │ │ │ ├── error.rs │ │ │ ├── flatbox.rs │ │ │ ├── function.rs │ │ │ ├── hardware.rs │ │ │ ├── mod.rs │ │ │ ├── stable_alloc.rs │ │ │ └── unstable_alloc.rs │ │ └── opencl │ │ │ ├── api │ │ │ ├── context.rs │ │ │ ├── device.rs │ │ │ ├── error.rs │ │ │ ├── ffi.rs │ │ │ ├── memory.rs │ │ │ ├── mod.rs │ │ │ ├── platform.rs │ │ │ ├── queue.rs │ │ │ └── types.rs │ │ │ ├── context.rs │ │ │ ├── device.rs │ │ │ ├── event.rs │ │ │ ├── kernel.rs │ │ │ ├── memory.rs │ │ │ ├── mod.rs │ │ │ ├── platform.rs │ │ │ ├── program.rs │ │ │ └── queue.rs │ ├── hardware.rs │ ├── lib.rs │ ├── operation.rs │ ├── plugin.rs │ └── tensor.rs └── tests │ ├── backend_specs.rs │ ├── framework_cuda_specs.rs │ ├── framework_native_specs.rs │ ├── framework_opencl_specs.rs │ ├── hardware_specs.rs │ ├── shared_memory_specs.rs │ ├── tensor_specs.rs │ ├── ui.rs │ └── ui │ ├── err-02-drop_live_memory.rs │ ├── err-02-drop_live_memory.stderr │ ├── err-03-leak_read_reference.rs │ ├── err-03-leak_read_reference.stderr │ ├── err-04-leak_write_reference.rs │ ├── err-04-leak_write_reference.stderr │ ├── err-05-read_write_borrows.rs │ ├── err-05-read_write_borrows.stderr │ ├── err-06-two_write_borrows.rs │ ├── err-06-two_write_borrows.stderr │ └── ok-01-multiple_read_only_borrows.rs ├── greenglas ├── .github │ └── CONTRIBUTING.md ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── rustfmt.toml ├── src │ ├── image │ │ ├── mod.rs │ │ └── modifiers.rs │ ├── lib.rs │ ├── transformer.rs │ └── word │ │ ├── mod.rs │ │ └── modifiers.rs └── tests │ ├── assets │ ├── test_image.baseline.jpeg │ ├── test_image.bmp │ ├── test_image.gif │ ├── test_image.jpeg │ └── test_image.png │ ├── image_spec.rs │ ├── transformer_spec.rs │ └── word_spec.rs ├── juice-examples ├── README.md ├── juice-utils │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── mackey-glass-rnn-regression │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ ├── assets │ │ ├── .gitignore │ │ ├── norm_mackeyglass_test.csv │ │ └── norm_mackeyglass_train.csv │ ├── rnn.juice │ └── src │ │ ├── args.rs │ │ └── main.rs └── mnist-image-multiclass-classification │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ ├── assets │ └── .gitignore │ └── src │ └── main.rs ├── juice ├── .github │ ├── CONTRIBUTING.md │ └── ISSUE_TEMPLATE.md ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.toml ├── FEATURE-FLAGS.md ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── RELEASE.md ├── benches │ └── network_benches.rs ├── build.rs ├── capnp │ └── juice.capnp ├── doc │ ├── book.toml │ ├── book │ │ ├── _FontAwesome │ │ │ ├── css │ │ │ │ └── font-awesome.css │ │ │ └── fonts │ │ │ │ ├── FontAwesome.ttf │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ └── fontawesome-webfont.woff2 │ │ ├── backend.html │ │ ├── book.css │ │ ├── book.js │ │ ├── building-networks.html │ │ ├── create-new-layer.html │ │ ├── deep-learning-glossary.html │ │ ├── distributed-optimization.html │ │ ├── favicon.png │ │ ├── highlight.css │ │ ├── highlight.js │ │ ├── http │ │ │ └── spearow.github.io │ │ │ │ └── juice │ │ │ │ └── juice │ │ │ │ └── index.html │ │ ├── index.html │ │ ├── jquery.js │ │ ├── juice.html │ │ ├── layer-lifecycle.html │ │ ├── layers.html │ │ ├── multi-device-optimization.html │ │ ├── optimize-layers.html │ │ ├── print.html │ │ ├── solvers.html │ │ └── tomorrow-night.css │ └── src │ │ ├── SUMMARY.md │ │ ├── api-docs.md │ │ ├── backend.md │ │ ├── building-networks.md │ │ ├── create-new-layer.md │ │ ├── deep-learning-glossary.md │ │ ├── distributed-optimization.md │ │ ├── http │ │ └── spearow.github.io │ │ │ └── spearow │ │ │ └── juice │ │ │ └── index.html │ │ ├── juice.md │ │ ├── layer-lifecycle.md │ │ ├── layers.md │ │ ├── multi-device-optimization.md │ │ ├── optimize-layers.md │ │ └── solvers.md ├── examples │ └── benchmarks.rs ├── perf │ ├── README.md │ └── run_perf.sh ├── rustfmt.toml ├── src │ ├── capnp_util.rs │ ├── layer.rs │ ├── layers │ │ ├── activation │ │ │ ├── mod.rs │ │ │ ├── relu.rs │ │ │ ├── sigmoid.rs │ │ │ └── tanh.rs │ │ ├── common │ │ │ ├── convolution.rs │ │ │ ├── dropout.rs │ │ │ ├── linear.rs │ │ │ ├── log_softmax.rs │ │ │ ├── mod.rs │ │ │ ├── pooling.rs │ │ │ ├── rnn.rs │ │ │ └── softmax.rs │ │ ├── container │ │ │ ├── mod.rs │ │ │ └── sequential.rs │ │ ├── loss │ │ │ ├── mean_squared_error.rs │ │ │ ├── mod.rs │ │ │ └── negative_log_likelihood.rs │ │ ├── mod.rs │ │ └── utility │ │ │ ├── flatten.rs │ │ │ ├── mod.rs │ │ │ └── reshape.rs │ ├── lib.rs │ ├── solver │ │ ├── confusion_matrix.rs │ │ ├── mod.rs │ │ └── regression_evaluator.rs │ ├── solvers │ │ ├── mod.rs │ │ └── sgd │ │ │ ├── mod.rs │ │ │ └── momentum.rs │ ├── util.rs │ └── weight.rs └── tests │ ├── layer_specs.rs │ └── solver_specs.rs ├── magic.yml ├── rcublas ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.toml.FIXME ├── README.md ├── cublas-sys │ ├── Cargo.toml │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── README.md │ ├── build.rs │ ├── src │ │ ├── generated.rs │ │ └── lib.rs │ └── wrapper.h └── cublas │ ├── Cargo.toml │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ └── src │ ├── api │ ├── context.rs │ ├── enums.rs │ ├── level1.rs │ ├── level3.rs │ ├── mod.rs │ └── util.rs │ ├── chore.rs │ ├── error.rs │ └── lib.rs ├── rcudnn ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Cargo.toml.FIXME ├── README.md ├── cudnn-sys │ ├── Cargo.toml │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── build.rs │ ├── src │ │ ├── generated.rs │ │ └── lib.rs │ └── wrapper.h ├── cudnn │ ├── Cargo.toml │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── benches │ │ └── cudnn_overhead.rs │ ├── src │ │ ├── activation_descriptor.rs │ │ ├── api │ │ │ ├── activation.rs │ │ │ ├── convolution.rs │ │ │ ├── cuda.rs │ │ │ ├── dropout.rs │ │ │ ├── mod.rs │ │ │ ├── normalization.rs │ │ │ ├── pooling.rs │ │ │ ├── rnn.rs │ │ │ ├── softmax.rs │ │ │ ├── tensor.rs │ │ │ └── utils.rs │ │ ├── convolution_descriptor.rs │ │ ├── cuda.rs │ │ ├── cudnn.rs │ │ ├── dropout_descriptor.rs │ │ ├── error.rs │ │ ├── filter_descriptor.rs │ │ ├── lib.rs │ │ ├── normalization_descriptor.rs │ │ ├── pooling_descriptor.rs │ │ ├── rnn_descriptor.rs │ │ ├── tensor_descriptor.rs │ │ └── utils.rs │ └── tests │ │ ├── cudnn_specs.rs │ │ └── tensor_descriptor_specs.rs └── rustfmt.toml ├── remote-test ├── README.md ├── concourse-crashtest-job.png ├── launch.sh └── test.yml └── rust-blas ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── examples ├── math.rs └── readme.rs └── src ├── attribute.rs ├── default.rs ├── lib.rs ├── math ├── bandmat.rs ├── mat.rs ├── matrix.rs ├── matrix_vector.rs ├── mod.rs └── vector.rs ├── matrix ├── ll.rs ├── mod.rs └── ops.rs ├── matrix_vector ├── ll.rs ├── mod.rs └── ops.rs ├── pointer.rs ├── prefix.rs ├── scalar.rs └── vector ├── ll.rs ├── mod.rs └── ops.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: drahnr 2 | patreon: drahnr 3 | liberapay: drahnr 4 | open_collective: bernhard-schuster 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---report-a-bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Report a bug" 3 | about: Create a report to help us fix bugs 4 | title: '' 5 | labels: bug 6 | assignees: dev 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 12 | 22 | 23 | **To Reproduce** 24 | 25 | Steps to reproduce the behaviour: 26 | 27 | 1. Setting up preconditions with `...` 28 | 2. Run `cargo ...` 29 | 3. Execute `...` 30 | 4. ... 31 | 32 | **Expected behavior** 33 | 34 | 35 | 36 | **Screenshots** 37 | 38 | 40 | 41 | **Please complete the following information:** 42 | 43 | - System: 44 | - Version: 45 | - Rust: 46 | - Environment: 47 | - Backends (if relevant): 48 | * opencl: 49 | * cuda: 50 | 51 | **Additional context** 52 | 53 | 54 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/---request-a-new-feature.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F331 Request a new feature" 3 | about: Suggest a feature you would like to see implemented 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a particular use-case?** 11 | 12 | 13 | 14 | **Describe the solution you'd like to implement/see implemented** 15 | 16 | 17 | 18 | **Describe alternatives you've considered** 19 | 20 | 21 | 22 | **Additional context** 23 | 24 | 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/--ask-a-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓ Ask a question" 3 | about: Something is not clear to you from the documentation 4 | title: '' 5 | labels: documentation, question 6 | assignees: drahnr 7 | 8 | --- 9 | 10 | ** Question: ** 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /.github/auto_pr_team.yml: -------------------------------------------------------------------------------- 1 | org: spearow 2 | team: dev 3 | -------------------------------------------------------------------------------- /.github/no-response.yml: -------------------------------------------------------------------------------- 1 | daysUntilClose: 124 2 | 3 | responseRequiredLabel: needs-more-information 4 | 5 | closeComment: > 6 | Feel free to re-open once there is more information available. 7 | 8 | If you are not the original author, please create a new issue. 9 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ## What does this PR accomplish? 6 | 7 | 10 | 11 | * 🩹 Bug Fix 12 | * 🦚 Feature 13 | * 🧭 Architecture 14 | * 📙 Documentation 15 | * 🦣 Legacy 16 | * 🪆 Dependency 17 | * ⏱ Performance 18 | * 🪣 Misc 19 | 20 | 24 | Closes # . 25 | 26 | ## Changes proposed by this PR: 27 | 28 | 31 | 32 | ## Notes to reviewer: 33 | 34 | 40 | 41 | 42 | ## 📜 Checklist 43 | 44 | * [ ] Test coverage is excellent 45 | * [ ] _All_ unit tests pass 46 | * [ ] The `juice-examples` run just fine 47 | * [ ] Documentation is thorough, extensive and explicit 48 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/.github/stale.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # will have compiled files and executables 2 | target 3 | 4 | # These are backup files generated by rustfmt 5 | **/*.rs.bk 6 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "workbench.colorCustomizations": { 3 | "activityBar.activeBackground": "#19c1ab", 4 | "activityBar.activeBorder": "#af1ac6", 5 | "activityBar.background": "#19c1ab", 6 | "activityBar.foreground": "#15202b", 7 | "activityBar.inactiveForeground": "#15202b99", 8 | "activityBarBadge.background": "#af1ac6", 9 | "activityBarBadge.foreground": "#e7e7e7", 10 | "statusBar.background": "#139483", 11 | "statusBar.foreground": "#e7e7e7", 12 | "statusBarItem.hoverBackground": "#19c1ab", 13 | "titleBar.activeBackground": "#139483", 14 | "titleBar.activeForeground": "#e7e7e7", 15 | "titleBar.inactiveBackground": "#13948399", 16 | "titleBar.inactiveForeground": "#e7e7e799" 17 | }, 18 | "peacock.remoteColor": "#139483" 19 | } -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "greenglas", "coaster", "coaster-nn", "coaster-blas", "juice", "rust-blas", 4 | "rcudnn/cudnn", "rcudnn/cudnn-sys", "rcublas/cublas", "rcublas/cublas-sys", 5 | "juice-examples/juice-utils", "juice-examples/mackey-glass-rnn-regression", 6 | "juice-examples/mnist-image-multiclass-classification"] 7 | 8 | exclude = [ "./rcudnn", "./rcublas", "./juice-examples"] 9 | 10 | [patch.crates-io] 11 | coaster-nn = { path = "./coaster-nn" } 12 | coaster-blas = { path = "./coaster-blas" } 13 | coaster = { path = "./coaster" } 14 | greenglas = { path = "./greenglas" } 15 | juice = { path = "./juice" } 16 | rust-blas = { path = "./rust-blas" } 17 | rcublas = { path = "./rcublas/cublas" } 18 | rcublas-sys = { path = "./rcublas/cublas-sys" } 19 | rcudnn = { path = "./rcudnn/cudnn" } 20 | rcudnn-sys = { path = "./rcudnn/cudnn-sys" } 21 | 22 | [profile.bench] 23 | opt-level = 3 24 | debug = false 25 | rpath = false 26 | lto = false 27 | debug-assertions = false 28 | codegen-units = 1 29 | 30 | [profile.dev] 31 | opt-level = 0 32 | debug = true 33 | rpath = false 34 | lto = false 35 | debug-assertions = true 36 | codegen-units = 2 37 | -------------------------------------------------------------------------------- /coaster-blas/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /coaster-blas/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## 0.2.0 (2016-02-22) 3 | 4 | 5 | #### Features 6 | 7 | * **cuda:** add CUDA support for all existing operations ([6e98efb0](https://github.com/autumnai/collenchyma-blas/commit/6e98efb0a5c8799dad4947fa64210d36d88548ab)) 8 | * **feature_flags:** add framework feature groups ([74c86c42](https://github.com/autumnai/collenchyma-blas/commit/74c86c4291da55bb48855fcb19f3791c50291874)) 9 | * **gemm:** add Level 3 operation GEMM ([89674b17](https://github.com/autumnai/collenchyma-blas/commit/89674b17621a57673da5decf43624cc8e05cf317)) 10 | * **iblas_trait:** remove trait bounds for IBlas ([44cbacf2](https://github.com/autumnai/collenchyma-blas/commit/44cbacf21c0857dc32acba714c973bf7ffae8dff)) 11 | * **license:** change license to dual MIT/Apache-2.0 ([66ec21b9](https://github.com/autumnai/collenchyma-blas/commit/66ec21b96db28ed94e72704338e39e2be4d685a8)) 12 | 13 | #### Bug Fixes 14 | 15 | * **native:** fix writing into rblas matrix memory ([6b184fbc](https://github.com/autumnai/collenchyma-blas/commit/6b184fbcfc6272e6b762997e80479233964620aa), [24bf8175](https://github.com/autumnai/collenchyma-blas/commit/24bf8175452922268f6a041a7dab9f73298c8c2a)) 16 | 17 | 18 | 19 | ## 0.1.0 (2015-12-16) 20 | 21 | 22 | #### Bug Fixes 23 | 24 | * **travis:** require stable and beta builds to pass ([95300544](https://github.com/autumnai/collenchyma-nn/commit/95300544608310750b467f984f0506e65416f483)) 25 | 26 | #### Features 27 | 28 | * **blas:** add L1 BLAS for native ([08d1da0c](https://github.com/autumnai/collenchyma-nn/commit/08d1da0cbe86cb91c8a68a80c36dc7eb26cc9e95)) 29 | * **features:** add framework feature groups ([74c86c42](https://github.com/autumnai/collenchyma-nn/commit/74c86c4291da55bb48855fcb19f3791c50291874)) 30 | -------------------------------------------------------------------------------- /coaster-blas/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "coaster-blas" 3 | description = "Coaster library for full BLAS support" 4 | version = "0.4.0" 5 | authors = [ 6 | "Bernhard Schuster ", 7 | "Lissa Hyacinth ", 8 | "Paul Kassianik ", 9 | "Michael Hirn", 10 | "Maximilian Goisser", 11 | ] 12 | repository = "https://github.com/spearow/coaster-blas" 13 | homepage = "https://spearow.io/projects/coaster-blas" 14 | documentation = "https://spearow.github.com/coaster-blas" 15 | readme = "README.md" 16 | keywords = ["blas", "coaster", "computation", "hpc", "plugin"] 17 | license = "MIT OR Apache-2.0" 18 | edition = "2018" 19 | 20 | [dependencies] 21 | coaster = { version = "0.2", path = "../coaster", default-features = false } 22 | log = "0.4" 23 | # native 24 | rust-blas = { version = "0.2", path = "../rust-blas", optional = true } 25 | # cuda 26 | rcublas = { version = "0.6", path = "../rcublas/cublas", optional = true } 27 | 28 | [features] 29 | default = ["native", "cuda"] 30 | native = ["coaster/native", "rust-blas"] 31 | cuda = ["coaster/cuda", "rcublas"] 32 | opencl = ["coaster/opencl"] 33 | -------------------------------------------------------------------------------- /coaster-blas/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2016 The collenchyma-blas developers. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /coaster-blas/benches/rblas_overhead.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | use coaster as co; 4 | use coaster_blas as co_blas; 5 | use rust_blas as rblas; 6 | 7 | use co::prelude::*; 8 | use co_blas::plugin::*; 9 | use test::Bencher; 10 | 11 | use rand::distributions::Standard; 12 | use rand::{thread_rng, Rng}; 13 | 14 | fn backend() -> Backend { 15 | Backend::::default().unwrap() 16 | } 17 | 18 | fn bench_dot_rblas(b: &mut Bencher, n: usize) { 19 | let rng = thread_rng(); 20 | let slice_a: Vec = rng.sample_iter(Standard).take(n).collect(); 21 | let slice_b: Vec = rng.sample_iter(Standard).take(n).collect(); 22 | 23 | b.iter(|| { 24 | let res = rblas::Dot::dot(slice_a.as_slice(), slice_b.as_slice()); 25 | test::black_box(res); 26 | }); 27 | } 28 | 29 | fn bench_dot_coaster(b: &mut Bencher, n: usize) { 30 | let rng = thread_rng(); 31 | let slice_a: Vec = rng.sample_iter(Standard).take(n).collect(); 32 | let slice_b: Vec = rng.sample_iter(Standard).take(n).collect(); 33 | 34 | let backend = backend(); 35 | let shared_a = &mut SharedTensor::::new(&[n]); 36 | let shared_b = &mut SharedTensor::::new(&[n]); 37 | let shared_res = &mut SharedTensor::::new(&[1]); 38 | shared_a 39 | .write_only(backend.device()) 40 | .unwrap() 41 | .as_mut_slice() 42 | .clone_from_slice(slice_a.as_slice()); 43 | shared_b 44 | .write_only(backend.device()) 45 | .unwrap() 46 | .as_mut_slice() 47 | .clone_from_slice(slice_b.as_slice()); 48 | let _ = backend.dot(shared_a, shared_b, shared_res); 49 | 50 | b.iter(|| backend.dot(shared_a, shared_b, shared_res).unwrap()); 51 | } 52 | 53 | #[bench] 54 | fn bench_dot_100_rblas(b: &mut Bencher) { 55 | bench_dot_rblas(b, 100); 56 | } 57 | 58 | #[bench] 59 | fn bench_dot_100_coaster(b: &mut Bencher) { 60 | bench_dot_coaster(b, 100); 61 | } 62 | 63 | #[bench] 64 | fn bench_dot_1000_rblas(b: &mut Bencher) { 65 | bench_dot_rblas(b, 1000); 66 | } 67 | 68 | #[bench] 69 | fn bench_dot_1000_coaster(b: &mut Bencher) { 70 | bench_dot_coaster(b, 1000); 71 | } 72 | 73 | #[bench] 74 | fn bench_dot_2000_rblas(b: &mut Bencher) { 75 | bench_dot_rblas(b, 2000); 76 | } 77 | 78 | #[bench] 79 | fn bench_dot_2000_coaster(b: &mut Bencher) { 80 | bench_dot_coaster(b, 2000); 81 | } 82 | 83 | #[bench] 84 | fn bench_dot_10000_rblas(b: &mut Bencher) { 85 | bench_dot_rblas(b, 10000); 86 | } 87 | 88 | #[bench] 89 | fn bench_dot_10000_coaster(b: &mut Bencher) { 90 | bench_dot_coaster(b, 10000); 91 | } 92 | 93 | #[bench] 94 | fn bench_dot_20000_rblas(b: &mut Bencher) { 95 | bench_dot_rblas(b, 20000); 96 | } 97 | 98 | #[bench] 99 | fn bench_dot_20000_coaster(b: &mut Bencher) { 100 | bench_dot_coaster(b, 20000); 101 | } 102 | -------------------------------------------------------------------------------- /coaster-blas/perf/README.md: -------------------------------------------------------------------------------- 1 | # Profiling 2 | 3 | Collenchyma comes with scripts to help with profiling performance problems. 4 | 5 | Run [perf](http://www.brendangregg.com/perf.html) on one of the benchmark test: 6 | 7 | ```sh 8 | # compile latest version of benchmarks with DWARF information 9 | cargo rustc --bench rblas_overhead -- -g 10 | sudo ./perf/run_perf.sh bench_1000_dot_100_collenchyma # perf needs sudo 11 | ``` 12 | -------------------------------------------------------------------------------- /coaster-blas/perf/perf_rblas.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | perf record -a -g --output perf_rblas_data.perf target/debug/rblas_overhead-cf1a2670c118749d --bench bench_1000_dot_100_rblas 3 | perf script -f -i perf_rblas_data.perf > perf_rblas_script.perf 4 | /home/hobofan/stuff/FlameGraph/stackcollapse-perf.pl perf_rblas_script.perf > perf_rblas_folded.perf 5 | /home/hobofan/stuff/FlameGraph/flamegraph.pl perf_rblas_folded.perf > perf_rblas_graph.svg 6 | -------------------------------------------------------------------------------- /coaster-blas/perf/run_perf.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | if [ $# -eq 0 ] 3 | then 4 | echo "No benchmark name supplied" 5 | exit 1 6 | fi 7 | benchname=$1 8 | mkdir -p target/perf 9 | perf record -a -g --output target/perf/${benchname}.data target/debug/rblas_overhead-c02a41a1401d43da --bench ${benchname} 10 | perf script -f -i target/perf/${benchname}.data > target/perf/${benchname}.scripted 11 | stackcollapse-perf target/perf/${benchname}.scripted | grep ${benchname} > target/perf/${benchname}.folded 12 | flamegraph target/perf/${benchname}.folded > target/perf/${benchname}.svg 13 | -------------------------------------------------------------------------------- /coaster-blas/rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_strings = false 2 | reorder_imports = true 3 | -------------------------------------------------------------------------------- /coaster-blas/src/binary.rs: -------------------------------------------------------------------------------- 1 | //! Provides the IBlasBinary binary trait for Coaster's Framework implementation. 2 | 3 | use super::operation::*; 4 | 5 | /// Describes the operation binding for a Blas Binary implementation. 6 | pub trait IBlasBinary { 7 | /// Describes the Asum Operation. 8 | type Asum: IOperationAsum; 9 | /// Describes the Axpy Operation. 10 | type Axpy: IOperationAxpy; 11 | /// Describes the Copy Operation. 12 | type Copy: IOperationCopy; 13 | /// Describes the Dot Operation. 14 | type Dot: IOperationDot; 15 | /// Describes the Nrm2 Operation. 16 | type Nrm2: IOperationNrm2; 17 | /// Describes the Scale Operation. 18 | type Scale: IOperationScale; 19 | /// Describes the Swap Operation. 20 | type Swap: IOperationSwap; 21 | 22 | /// Returns an initialized Asum operation. 23 | fn asum(&self) -> Self::Asum; 24 | /// Returns an initialized Axpy operation. 25 | fn axpy(&self) -> Self::Axpy; 26 | /// Returns an initialized Copy operation. 27 | fn copy(&self) -> Self::Copy; 28 | /// Returns an initialized Dot operation. 29 | fn dot(&self) -> Self::Dot; 30 | /// Returns an initialized Nrm2 operation. 31 | fn nrm2(&self) -> Self::Nrm2; 32 | /// Returns an initialized Scale operation. 33 | fn scale(&self) -> Self::Scale; 34 | /// Returns an initialized Swap operation. 35 | fn swap(&self) -> Self::Swap; 36 | } 37 | -------------------------------------------------------------------------------- /coaster-blas/src/frameworks/cuda/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides BLAS for a CUDA backend. 2 | #![allow(missing_docs)] 3 | use crate::cublas; 4 | use crate::plugin::*; 5 | use crate::transpose::Transpose; 6 | use coaster::backend::Backend; 7 | use coaster::frameworks::cuda::Cuda; 8 | use coaster::plugin::Error as PluginError; 9 | use coaster::tensor::{ITensorDesc, SharedTensor}; 10 | 11 | #[macro_use] 12 | pub mod helper; 13 | 14 | impl Asum for Backend { 15 | iblas_asum_for_cuda!(f32); 16 | } 17 | 18 | impl Axpy for Backend { 19 | iblas_axpy_for_cuda!(f32); 20 | } 21 | 22 | impl Copy for Backend { 23 | iblas_copy_for_cuda!(f32); 24 | } 25 | 26 | impl Dot for Backend { 27 | iblas_dot_for_cuda!(f32); 28 | } 29 | 30 | impl Nrm2 for Backend { 31 | iblas_nrm2_for_cuda!(f32); 32 | } 33 | 34 | impl Scal for Backend { 35 | iblas_scal_for_cuda!(f32); 36 | } 37 | 38 | impl Swap for Backend { 39 | iblas_swap_for_cuda!(f32); 40 | } 41 | 42 | impl Gbmv for Backend { 43 | iblas_gbmv_for_cuda!(f32); 44 | } 45 | 46 | impl Gemm for Backend { 47 | iblas_gemm_for_cuda!(f32); 48 | } 49 | -------------------------------------------------------------------------------- /coaster-blas/src/frameworks/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides the specific Framework implementations for the Library Operations. 2 | 3 | #[cfg(feature = "cuda")] 4 | pub mod cuda; 5 | #[cfg(feature = "native")] 6 | pub mod native; 7 | #[cfg(feature = "opencl")] 8 | pub mod opencl; 9 | 10 | #[cfg(not(feature = "cuda"))] 11 | use log as _; 12 | -------------------------------------------------------------------------------- /coaster-blas/src/operation.rs: -------------------------------------------------------------------------------- 1 | //! Provides the IOperationX operation traits for Coaster's Framework implementation. 2 | 3 | use crate::transpose::Transpose; 4 | use coaster::plugin::Error; 5 | use coaster::tensor::SharedTensor; 6 | 7 | /// Describes a Asum Operation. 8 | pub trait IOperationAsum { 9 | /// Computes the Asum operation. 10 | fn compute(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error>; 11 | } 12 | 13 | /// Describes a Axpy Operation. 14 | pub trait IOperationAxpy { 15 | /// Computes the Axpy operation. 16 | fn compute( 17 | &self, 18 | a: &SharedTensor, 19 | x: &SharedTensor, 20 | y: &mut SharedTensor, 21 | ) -> Result<(), Error>; 22 | } 23 | 24 | /// Describes a Copy Operation. 25 | pub trait IOperationCopy { 26 | /// Computes the Copy operation. 27 | fn compute(&self, x: &SharedTensor, y: &mut SharedTensor) -> Result<(), Error>; 28 | } 29 | 30 | /// Describes a Dot Operation. 31 | pub trait IOperationDot { 32 | /// Computes the Dot operation. 33 | fn compute( 34 | &self, 35 | x: &SharedTensor, 36 | y: &SharedTensor, 37 | result: &mut SharedTensor, 38 | ) -> Result<(), Error>; 39 | } 40 | 41 | /// Describes a Nrm2 Operation. 42 | pub trait IOperationNrm2 { 43 | /// Computes the Nrm2 operation. 44 | fn compute(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error>; 45 | } 46 | 47 | /// Describes a Scale Operation. 48 | pub trait IOperationScale { 49 | /// Computes the Scale operation. 50 | fn compute(&self, a: &SharedTensor, x: &mut SharedTensor) -> Result<(), Error>; 51 | } 52 | 53 | /// Describes a Swap Operation. 54 | pub trait IOperationSwap { 55 | /// Computes the Swap operation. 56 | fn compute(&self, x: &mut SharedTensor, y: &mut SharedTensor) -> Result<(), Error>; 57 | } 58 | 59 | /// Describes a Gemm Operation. 60 | pub trait IOperationGemm { 61 | /// Computes the Gemm operation. 62 | #[allow(clippy::too_many_arguments)] 63 | #[allow(clippy::many_single_char_names)] 64 | fn compute( 65 | &self, 66 | alpha: &SharedTensor, 67 | at: Transpose, 68 | a_dims: &[usize], 69 | a: &SharedTensor, 70 | bt: Transpose, 71 | b_dims: &[usize], 72 | b: &SharedTensor, 73 | beta: &SharedTensor, 74 | c_dims: &[usize], 75 | c: &mut SharedTensor, 76 | ) -> Result<(), ::coaster::error::Error>; 77 | } 78 | -------------------------------------------------------------------------------- /coaster-blas/src/transpose.rs: -------------------------------------------------------------------------------- 1 | //! Provides the Transpose functionality for Matrix operations. 2 | #[cfg(feature = "cuda")] 3 | use crate::cublas::api::Operation; 4 | #[cfg(feature = "cuda")] 5 | use std::convert::From; 6 | 7 | #[derive(Debug, Copy, Clone)] 8 | /// Possible transpose operations that can be applied in Level 2 and Level 3 BLAS operations. 9 | pub enum Transpose { 10 | /// Take the matrix as it is. 11 | NoTrans, 12 | /// Take the transpose of the matrix. 13 | Trans, 14 | /// Take the conjugate transpose of the matrix. 15 | ConjTrans, 16 | } 17 | 18 | #[cfg(feature = "native")] 19 | impl Transpose { 20 | /// Create a rust-blas `Transpose` from coaster-blas `Transpose`. 21 | pub fn to_rblas(self) -> ::rblas::attribute::Transpose { 22 | match self { 23 | Transpose::NoTrans => ::rblas::attribute::Transpose::NoTrans, 24 | Transpose::Trans => ::rblas::attribute::Transpose::Trans, 25 | Transpose::ConjTrans => ::rblas::attribute::Transpose::ConjTrans, 26 | } 27 | } 28 | } 29 | 30 | #[cfg(feature = "cuda")] 31 | impl From for Transpose { 32 | fn from(op: Operation) -> Self { 33 | match op { 34 | Operation::NoTrans => Transpose::NoTrans, 35 | Operation::Trans => Transpose::Trans, 36 | Operation::ConjTrans => Transpose::ConjTrans, 37 | } 38 | } 39 | } 40 | 41 | #[cfg(feature = "cuda")] 42 | impl From for Operation { 43 | fn from(op: Transpose) -> Self { 44 | match op { 45 | Transpose::NoTrans => Operation::NoTrans, 46 | Transpose::Trans => Operation::Trans, 47 | Transpose::ConjTrans => Operation::ConjTrans, 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /coaster-nn/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /coaster-nn/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "coaster-nn" 3 | description = "coaster plugin providing Neural Network operations" 4 | version = "0.5.0" 5 | authors = [ 6 | "Bernhard Schuster ", 7 | "Lissa Hyacinth ", 8 | "Michael Hirn", 9 | "Maximilian Goisser", 10 | ] 11 | repository = "https://github.com/spearow/coaster-nn" 12 | homepage = "https://spearow.io/coaster-nn" 13 | documentation = "https://spearow.io/projects/coaster-nn/documentation" 14 | readme = "README.md" 15 | keywords = ["neural-network", "coaster", "computation", "hpc", "plugin"] 16 | license = "MIT OR Apache-2.0" 17 | edition = "2018" 18 | 19 | [dependencies] 20 | coaster = { path = "../coaster", version = "0.2", default-features = false } 21 | rcudnn = { path = "../rcudnn/cudnn", version = "1.7", optional = true } 22 | libc = { version = "0.2", optional = true } 23 | log = "0.4" 24 | rand = { version = "0.8", optional = true } 25 | rand_chacha = { version = "0.3.0", optional = true } 26 | thiserror = { version = "1.0", optional = true } 27 | 28 | [dev-dependencies] 29 | env_logger = "0.9" 30 | num = "0.4" 31 | 32 | [features] 33 | default = ["native", "cuda"] 34 | native = ["coaster/native", "rand", "rand_chacha"] 35 | cuda = ["coaster/cuda", "rcudnn", "libc", "thiserror"] 36 | opencl = ["coaster/opencl"] 37 | unstable = [] 38 | -------------------------------------------------------------------------------- /coaster-nn/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2017-2020 Bernhard Schuster 4 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /coaster-nn/perf/README.md: -------------------------------------------------------------------------------- 1 | # Profiling 2 | 3 | Collenchyma comes with scripts to help with profiling performance problems. 4 | 5 | Run [perf](http://www.brendangregg.com/perf.html) on one of the benchmark test: 6 | 7 | ```sh 8 | # compile latest version of benchmarks with DWARF information 9 | cargo rustc --bench [bench_file_name] -- -g 10 | sudo ./perf/run_perf.sh [bench_fn_name] # perf needs sudo 11 | ``` 12 | -------------------------------------------------------------------------------- /coaster-nn/perf/perf_rblas.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | perf record -a -g --output perf_rblas_data.perf target/debug/rblas_overhead-cf1a2670c118749d --bench bench_1000_dot_100_rblas 3 | perf script -f -i perf_rblas_data.perf > perf_rblas_script.perf 4 | /home/hobofan/stuff/FlameGraph/stackcollapse-perf.pl perf_rblas_script.perf > perf_rblas_folded.perf 5 | /home/hobofan/stuff/FlameGraph/flamegraph.pl perf_rblas_folded.perf > perf_rblas_graph.svg 6 | -------------------------------------------------------------------------------- /coaster-nn/perf/run_perf.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | if [ $# -eq 0 ] 3 | then 4 | echo "No benchmark name supplied" 5 | exit 1 6 | fi 7 | benchname=$1 8 | mkdir -p target/perf 9 | perf record -a -g --output target/perf/${benchname}.data target/debug/rblas_overhead-c02a41a1401d43da --bench ${benchname} 10 | perf script -f -i target/perf/${benchname}.data > target/perf/${benchname}.scripted 11 | stackcollapse-perf target/perf/${benchname}.scripted | grep ${benchname} > target/perf/${benchname}.folded 12 | flamegraph target/perf/${benchname}.folded > target/perf/${benchname}.svg 13 | -------------------------------------------------------------------------------- /coaster-nn/rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_strings = false 2 | reorder_imports = true 3 | -------------------------------------------------------------------------------- /coaster-nn/src/frameworks/cuda/helper.rs: -------------------------------------------------------------------------------- 1 | //! Provides useful macros for easier NN implementation for CUDA/cuDNN. 2 | 3 | macro_rules! read { 4 | ($x:ident, $slf:ident) => { 5 | $x.read($slf.device()).unwrap() 6 | }; 7 | } 8 | 9 | macro_rules! read_write { 10 | ($x:ident, $slf:ident) => { 11 | $x.read_write($slf.device()).unwrap() 12 | }; 13 | } 14 | 15 | macro_rules! write_only { 16 | ($x:ident, $slf:ident) => { 17 | $x.write_only($slf.device()).unwrap() 18 | }; 19 | } 20 | 21 | // trans! cannot be inlined into macros above, because `$mem` would become 22 | // intermidiate variable and `*mut $t` will outlive it. 23 | macro_rules! trans { 24 | ($mem:ident) => { 25 | unsafe { ::std::mem::transmute::(*$mem.id_c()) } 26 | }; 27 | } 28 | 29 | macro_rules! trans_mut { 30 | ($mem:ident) => { 31 | unsafe { ::std::mem::transmute::(*$mem.id_c()) } 32 | }; 33 | } 34 | 35 | macro_rules! exec { 36 | ($f:expr => $msg:literal) => {{ 37 | let res = $f; 38 | res.map_err(|e| { 39 | log::debug!("Unable to execute operation {}: {:?}", $msg, e); 40 | co::plugin::Error::PluginInner(Box::new(e)) 41 | }) 42 | }}; 43 | ($f:expr => $msg:literal as $err:ty) => {{ 44 | exec!($f => $msg).map_err(<$err>::from) 45 | }}; 46 | } 47 | 48 | macro_rules! exec2 { 49 | ($f:expr => $msg:literal) => {{ 50 | exec!($f => $msg as co::Error) 51 | }}; 52 | } 53 | -------------------------------------------------------------------------------- /coaster-nn/src/frameworks/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides the specific Framework implementations for the Library Operations. 2 | 3 | #[cfg(feature = "native")] 4 | pub mod native; 5 | //#[cfg(feature = "opencl")] 6 | //pub mod opencl; 7 | 8 | #[cfg(feature = "cuda")] 9 | #[macro_use] 10 | pub mod cuda; 11 | -------------------------------------------------------------------------------- /coaster-nn/src/frameworks/opencl.rs: -------------------------------------------------------------------------------- 1 | //! Provides NN for a OpenCL backend. 2 | 3 | use binary::*; 4 | use co::Error; 5 | use co::prelude::*; 6 | use operation::*; 7 | use plugin::*; 8 | 9 | impl INnBinary for Program { 10 | type Sigmoid = Kernel; 11 | 12 | fn sigmoid(&self) -> Self::Sigmoid { 13 | unimplemented!() 14 | } 15 | } 16 | 17 | impl IOperationSigmoid for Kernel { 18 | fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> { 19 | unimplemented!() 20 | } 21 | } 22 | 23 | impl INn for Backend { 24 | type B = Program; 25 | 26 | fn binary(&self) -> &Self::B { 27 | self.binary() 28 | } 29 | 30 | fn device(&self) -> &DeviceType { 31 | self.device() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /coaster-nn/src/tests/dropout.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | 3 | use crate::co::plugin::numeric_helpers::Float; 4 | use crate::co::prelude::*; 5 | 6 | use crate::plugin::Dropout; 7 | use crate::tests::{filled_tensor, tensor_assert_eq_tensor, tensor_assert_ne_tensor, Epsilon}; 8 | 9 | pub fn test_dropout(backend: Backend) 10 | where 11 | T: Float + Epsilon + fmt::Debug, 12 | Backend: Dropout + IBackend, 13 | { 14 | let test = 15 | |dims: &[usize], 16 | probability: f32, 17 | seed: u64, 18 | tensor_assert_func: &dyn Fn(&SharedTensor, &SharedTensor, f64)| { 19 | let conf = Dropout::::new_dropout_config(&backend, probability, seed).unwrap(); 20 | 21 | let inp_element_num = dims.iter().fold(1, |factorial, f| factorial * f); 22 | 23 | let inp_vals: Vec = (0..inp_element_num).map(|i| (i * i) as f64).collect(); 24 | 25 | let x = filled_tensor(&backend, dims, &inp_vals); 26 | let mut r = SharedTensor::::new(&dims); 27 | 28 | backend.dropout(&x, &mut r, &conf).unwrap(); 29 | 30 | tensor_assert_func(&x, &r, 0.0); 31 | }; 32 | 33 | test(&[1, 5, 5, 2], 0.999, 77777, &tensor_assert_ne_tensor); 34 | test(&[1, 1, 1, 1], 0.000, 77777, &tensor_assert_eq_tensor); 35 | test(&[5, 200, 200, 4], 0.5, 77777, &tensor_assert_ne_tensor); 36 | } 37 | 38 | // TODO 39 | // pub fn test_dropout_grad(backend: Backend) 40 | // where T: Float + Epsilon + fmt::Debug, 41 | // Backend: Dropout + IBackend { 42 | // 43 | // } 44 | 45 | mod cuda { 46 | use super::*; 47 | test_cuda!(test_dropout, dropout_f32, dropout_f64); 48 | // TODO test_cuda!(test_dropout_grad, dropout_grad_f32, dropout_grad_f64); 49 | } 50 | 51 | mod native { 52 | use super::*; 53 | test_native!(test_dropout, dropout_f32, dropout_f64); 54 | // TODO test_native!(test_dropout_grad, dropout_grad_f32, dropout_grad_f64); 55 | } 56 | -------------------------------------------------------------------------------- /coaster/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /coaster/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "coaster" 3 | description = "high-performance computation on any hardware" 4 | version = "0.2.0" 5 | authors = [ 6 | "Bernhard Schuster ", 7 | "Lissa Hyacinth ", 8 | "Michael Hirn", 9 | "Maximilian Goisser", 10 | ] 11 | edition = "2018" 12 | repository = "https://github.com/spearow/coaster" 13 | homepage = "https://spearow.io/projects/coaster" 14 | documentation = "https://spearow.github.io/coaster" 15 | readme = "README.md" 16 | keywords = ["backend", "computation", "opencl", "cuda", "hpc"] 17 | license = "MIT OR Apache-2.0" 18 | 19 | [dependencies] 20 | libc = "0.2" 21 | bitflags = "1" 22 | enum_primitive = "0.1" 23 | byteorder = "1" 24 | num = "0.4" 25 | lazy_static = "1" 26 | thiserror = "1.0" 27 | rcudnn = { version = "1.7", path = "../rcudnn/cudnn", optional = true } 28 | rcublas = { version = "0.6", path = "../rcublas/cublas", optional = true } 29 | 30 | [dev-dependencies] 31 | coaster-nn = { path = "../coaster-nn", default-features = false } 32 | trybuild = "1.0" 33 | 34 | [features] 35 | default = ["native"] 36 | native = [] 37 | cuda = ["rcudnn", "rcublas"] 38 | opencl = [] 39 | unstable_alloc = [] # faster but unstable memory allocation on native machines 40 | -------------------------------------------------------------------------------- /coaster/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2017-2020 Bernhard Schuster 4 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /coaster/examples/readme.rs: -------------------------------------------------------------------------------- 1 | use coaster as co; 2 | use coaster_nn as nn; 3 | #[cfg(feature = "cuda")] 4 | use rcublas; 5 | 6 | #[cfg(feature = "cuda")] 7 | use co::frameworks::cuda::get_cuda_backend; 8 | use co::frameworks::native::flatbox::FlatBox; 9 | #[cfg(not(feature = "cuda"))] 10 | use co::frameworks::native::get_native_backend; 11 | use co::prelude::*; 12 | use nn::*; 13 | 14 | fn write_to_memory(mem: &mut FlatBox, data: &[T]) { 15 | let mem_buffer = mem.as_mut_slice::(); 16 | for (index, datum) in data.iter().enumerate() { 17 | mem_buffer[index] = *datum; 18 | } 19 | } 20 | 21 | fn main() { 22 | #[cfg(feature = "cuda")] 23 | let backend = get_cuda_backend(); 24 | 25 | #[cfg(not(feature = "cuda"))] 26 | let backend = get_native_backend(); 27 | 28 | // Initialize two SharedTensors. 29 | let mut x = SharedTensor::::new(&(1, 1, 3)); 30 | // let mut result = SharedTensor::::new(&(1, 1, 3)); 31 | // Fill `x` with some data. 32 | let payload: &[f32] = &::std::iter::repeat(1f32) 33 | .take(x.capacity()) 34 | .collect::>(); 35 | let native = Backend::::default().unwrap(); 36 | write_to_memory(x.write_only(native.device()).unwrap(), payload); // Write to native host memory. 37 | // Run the sigmoid operation, provided by the NN Plugin, on your CUDA enabled GPU. 38 | // FIXME: Sigmoid cannot be included from coaster-nn without using cuda and native features 39 | // from coaster-nn. This causes the error https://github.com/rust-lang/cargo/issues/6915 , 40 | // and so sigmoid has been disabled for now. 41 | // backend.sigmoid(&mut x, &mut result).unwrap(); 42 | // See the result. 43 | // println!("{:?}", result.read(native.device()).unwrap().as_slice::()); 44 | } 45 | -------------------------------------------------------------------------------- /coaster/index.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coaster/perf/README.md: -------------------------------------------------------------------------------- 1 | # Profiling 2 | 3 | Collenchyma comes with scripts to help with profiling performance problems. 4 | 5 | Run [perf](http://www.brendangregg.com/perf.html) on one of the benchmark test: 6 | 7 | ```sh 8 | # compile latest version of benchmarks with DWARF information 9 | cargo rustc --bench rblas_overhead -- -g 10 | # benchmark binary is at target/debug/shared_memory-54e69b24ec0c2d04 11 | # benchmark is called bench_256_sync_1mb_native_cuda 12 | sudo ./perf/run_perf.sh shared_memory-54e69b24ec0c2d04 bench_256_sync_1mb_native_cuda # perf needs sudo 13 | ``` 14 | -------------------------------------------------------------------------------- /coaster/perf/run_perf.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | if [ $# -lt 2 ] 4 | then 5 | echo "No binary name or benchmark name supplied" 6 | exit 1 7 | fi 8 | binaryname=$1 9 | benchname=$2 10 | mkdir -p target/perf 11 | perf record -a -g --output target/perf/${benchname}.data target/debug/${binaryname} --bench ${benchname} 12 | perf script -f -i target/perf/${benchname}.data > target/perf/${benchname}.scripted 13 | stackcollapse-perf target/perf/${benchname}.scripted | grep ${benchname} > target/perf/${benchname}.folded 14 | flamegraph target/perf/${benchname}.folded > target/perf/${benchname}.svg 15 | -------------------------------------------------------------------------------- /coaster/rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_strings = false 2 | reorder_imports = true 3 | -------------------------------------------------------------------------------- /coaster/src/binary.rs: -------------------------------------------------------------------------------- 1 | //! Provides the generic functionality for a backend-specific implementation of a 2 | //! [library][libraries]. 3 | //! [libraries]: ../libraries/index.html 4 | //! 5 | //! A binary defines one or (more often) many operations, which share related functionalities and are 6 | //! provided by a specific [library][libraries] such as [Blas][blas]. 7 | //! 8 | //! A binary needs to be 'built', which is handled by the specific framework implementation of a 9 | //! binary representation, and returns initialized operations based on a [library][libraries]. 10 | //! 11 | //! You are ususally not interacting with a binary itself, but rather use it to construct the 12 | //! backend-agnostic operations, which can then be run and parallelized via an 13 | //! unified interface - `backend.__name_of_the_operation__`. 14 | //! 15 | //! ## Development 16 | //! 17 | //! The funcionality provided here is used to construct specific Coaster binaries, which are 18 | //! used to construct the basic computation behavior that come shipped with Coaster. However, it should 19 | //! allow you to define and run your own backend-agnostic programs as well. 20 | //! 21 | //! [blas]: ../libraries/blas/index.html 22 | 23 | /// Defines the functionality for turning a library into backend-specific, executable operations. 24 | pub trait IBinary { 25 | // Returns the unique identifier of the Binary. 26 | //fn id(&self) -> isize; 27 | // Creates a HashMap of available, ready-to-use operations, based on the provided library and 28 | // tailored for a framework. 29 | //fn create_operations(); 30 | } 31 | -------------------------------------------------------------------------------- /coaster/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Defines the general set of error types in Coaster. 2 | 3 | #[derive(Debug, thiserror::Error)] 4 | /// Defines the set of available Coaster error types. 5 | pub enum Error { 6 | /// Failure related to the Framework implementation. 7 | #[error("Framework error")] 8 | Framework(#[from] crate::framework::Error), 9 | /// Failure related to the Tensor. 10 | #[error("Tensor error")] 11 | Tensor(#[from] crate::tensor::Error), 12 | /// Failure at Plugin Operation. 13 | #[error("Tensor error")] 14 | Plugin(#[from] crate::plugin::Error), 15 | /// Failure related to a Device. 16 | #[error("Device error")] 17 | Device(#[from] crate::device::Error), 18 | } 19 | -------------------------------------------------------------------------------- /coaster/src/frameworks/cuda/api/driver/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides a safe wrapper around the CUDA Driver API. 2 | 3 | pub use self::error::Error; 4 | 5 | #[derive(Debug, Copy, Clone)] 6 | /// Defines the Cuda API. 7 | pub struct API; 8 | 9 | mod context; 10 | mod device; 11 | mod error; 12 | pub mod ffi; 13 | mod memory; 14 | mod utils; 15 | -------------------------------------------------------------------------------- /coaster/src/frameworks/cuda/api/driver/utils.rs: -------------------------------------------------------------------------------- 1 | //! Provides Cuda Driver API utility functionality. 2 | 3 | use super::ffi::*; 4 | use super::{Error, API}; 5 | 6 | impl API { 7 | /// Initialize the Cuda Driver API. 8 | /// 9 | /// must be called before any other function from the driver API. 10 | pub fn init() -> Result<(), Error> { 11 | Ok(unsafe { API::ffi_init() }?) 12 | } 13 | 14 | unsafe fn ffi_init() -> Result<(), Error> { 15 | const FLAGS: u32 = 0u32; 16 | match cuInit(FLAGS) { 17 | CUresult::CUDA_SUCCESS => Ok(()), 18 | CUresult::CUDA_ERROR_INVALID_VALUE => { 19 | Err(Error::InvalidValue("Invalid value provided.")) 20 | } 21 | CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidDevice("Invalid device.")), 22 | CUresult::CUDA_ERROR_NO_DEVICE => Err(Error::NoDevice( 23 | "Unable to find a CUDA device. Try run `nvidia-smi` on your console.", 24 | )), 25 | status => Err(Error::Unknown( 26 | "Unable to initialze the Cuda Driver API.", 27 | status as i32 as u64, 28 | )), 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /coaster/src/frameworks/cuda/api/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides safe wrappers around various CUDA APIs. 2 | //! 3 | //! You can find wrappers for the
4 | //! * CUDA Driver API 5 | //! * CUDA cuDNN API 6 | 7 | pub use self::driver::ffi as DriverFFI; 8 | pub use self::driver::Error as DriverError; 9 | pub use self::driver::API as Driver; 10 | 11 | pub mod driver; 12 | -------------------------------------------------------------------------------- /coaster/src/frameworks/cuda/function.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around Cuda's Function. 2 | 3 | use crate::operation::IOperation; 4 | 5 | #[derive(Debug, Copy, Clone)] 6 | /// Defines a Cuda Function. 7 | /// 8 | /// A Function is Cuda's version of Coaster's [operation][operation]. 9 | /// [operation]: ../../operation/index.html 10 | pub struct Function { 11 | id: isize, 12 | } 13 | 14 | impl Function { 15 | /// Initializes a new OpenCL device. 16 | pub fn from_isize(id: isize) -> Function { 17 | Function { id } 18 | } 19 | 20 | /* 21 | /// Initializes a new OpenCL device from its C type. 22 | //pub fn from_c(id: cl::kernel_id) -> Function { 23 | Function { id: id as isize } 24 | } 25 | 26 | /// Returns the id as its C type. 27 | pub fn id_c(&self) -> cl::kernel_id { 28 | self.id as cl::kernel_id 29 | } 30 | */ 31 | } 32 | 33 | impl IOperation for Function {} 34 | -------------------------------------------------------------------------------- /coaster/src/frameworks/cuda/memory.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around Cuda's memory. 2 | 3 | use super::api::{Driver, DriverError, DriverFFI}; 4 | use crate::device::IMemory; 5 | 6 | use std::{fmt, ptr}; 7 | 8 | /// Defines a Cuda Memory. 9 | pub struct Memory { 10 | id: DriverFFI::CUdeviceptr, 11 | /// Pointer to host memory that is used for pinned host memory. 12 | host_ptr: *mut u8, 13 | } 14 | 15 | impl fmt::Debug for Memory { 16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 17 | write!(f, "Memory({})", self.id) 18 | } 19 | } 20 | 21 | impl Drop for Memory { 22 | #[allow(unused_must_use)] 23 | fn drop(&mut self) { 24 | Driver::mem_free(*self.id_c()); 25 | } 26 | } 27 | 28 | #[allow(unused_mut)] 29 | impl Memory { 30 | /// Initializes a new Cuda memory. 31 | pub fn new(size: usize) -> Result { 32 | Driver::mem_alloc(size) 33 | } 34 | 35 | /// Initializes a new Cuda memory from its C type. 36 | pub fn from_c(id: DriverFFI::CUdeviceptr) -> Memory { 37 | Memory { 38 | id, 39 | host_ptr: ptr::null_mut(), 40 | } 41 | } 42 | 43 | /// Returns the memory id as its C type. 44 | pub fn id_c(&self) -> &DriverFFI::CUdeviceptr { 45 | &self.id 46 | } 47 | } 48 | 49 | impl IMemory for Memory {} 50 | -------------------------------------------------------------------------------- /coaster/src/frameworks/cuda/module.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around Cuda's Module. 2 | 3 | use crate::binary::IBinary; 4 | 5 | #[derive(Debug, Copy, Clone)] 6 | /// Defines a Cuda Module. 7 | /// 8 | /// A Module is Cuda's version of Coaster's [binary][binary]. 9 | /// [binary]: ../../binary/index.html 10 | pub struct Module { 11 | id: isize, 12 | } 13 | 14 | impl Module { 15 | /// Initializes a new Cuda Module. 16 | pub fn from_isize(id: isize) -> Module { 17 | Module { id } 18 | } 19 | 20 | // /// Initializes a new Cuda Module from its C type. 21 | // pub fn from_c(id: cl::kernel_id) -> Module { 22 | // Module { 23 | // id: id as isize, 24 | // blas_dot: Function::from_isize(1), 25 | // blas_scale: Function::from_isize(1), 26 | // blas_axpy: Function::from_isize(1), 27 | // } 28 | // } 29 | // 30 | // /// Returns the id as its C type. 31 | // pub fn id_c(&self) -> cl::kernel_id { 32 | // self.id as cl::kernel_id 33 | // } 34 | } 35 | 36 | impl IBinary for Module {} 37 | -------------------------------------------------------------------------------- /coaster/src/frameworks/mod.rs: -------------------------------------------------------------------------------- 1 | //! Exposes the specific Framework implementations. 2 | 3 | #[cfg(feature = "cuda")] 4 | pub use self::cuda::Cuda; 5 | pub use self::native::Native; 6 | #[cfg(feature = "opencl")] 7 | pub use self::opencl::OpenCL; 8 | 9 | #[cfg(feature = "cuda")] 10 | pub mod cuda; 11 | pub mod native; 12 | #[cfg(feature = "opencl")] 13 | pub mod opencl; 14 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/binary.rs: -------------------------------------------------------------------------------- 1 | //! Provides a binary on native CPU. 2 | 3 | use crate::binary::IBinary; 4 | 5 | #[derive(Debug, Default, Copy, Clone)] 6 | /// Defines a host CPU binary. 7 | pub struct Binary { 8 | id: isize, 9 | } 10 | 11 | impl Binary { 12 | /// Initializes the native CPU binary. 13 | pub fn new() -> Binary { 14 | Binary { id: 0 } 15 | } 16 | } 17 | 18 | impl IBinary for Binary {} 19 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/device.rs: -------------------------------------------------------------------------------- 1 | //! Provides a hardware aka. the host CPU. 2 | use std::any::Any; 3 | use std::hash::{Hash, Hasher}; 4 | 5 | use super::allocate_boxed_slice; 6 | use super::flatbox::FlatBox; 7 | use super::hardware::Hardware; 8 | use crate::device::Error as DeviceError; 9 | use crate::device::{IDevice, MemorySync}; 10 | 11 | #[derive(Debug, Clone)] 12 | /// Defines the host CPU Hardware. 13 | /// 14 | /// Can later be transformed into a [Coaster hardware][hardware]. 15 | /// [hardware]: ../../hardware/index.html 16 | pub struct Cpu { 17 | hardwares: Vec, 18 | } 19 | 20 | impl Cpu { 21 | /// Initializes a new OpenCL hardware. 22 | pub fn new(hardwares: Vec) -> Cpu { 23 | Cpu { hardwares } 24 | } 25 | } 26 | 27 | impl IDevice for Cpu { 28 | type H = Hardware; 29 | type M = FlatBox; 30 | 31 | fn id(&self) -> &isize { 32 | static ID: isize = 0; 33 | &ID 34 | } 35 | 36 | fn hardwares(&self) -> &Vec { 37 | &self.hardwares 38 | } 39 | 40 | fn alloc_memory(&self, size: usize) -> Result { 41 | let bx: Box<[u8]> = allocate_boxed_slice(size); 42 | Ok(FlatBox::from_box(bx)) 43 | } 44 | } 45 | 46 | impl MemorySync for Cpu { 47 | // transfers from/to Cuda and OpenCL are defined on their MemorySync traits 48 | fn sync_in( 49 | &self, 50 | my_memory: &mut dyn Any, 51 | src_device: &dyn Any, 52 | src_memory: &dyn Any, 53 | ) -> Result<(), DeviceError> { 54 | if src_device.downcast_ref::().is_some() { 55 | let my_mem = my_memory.downcast_mut::().unwrap(); 56 | let src_mem = src_memory.downcast_ref::().unwrap(); 57 | my_mem 58 | .as_mut_slice::() 59 | .clone_from_slice(src_mem.as_slice::()); 60 | return Ok(()); 61 | } 62 | 63 | Err(DeviceError::NoMemorySyncRoute) 64 | } 65 | 66 | fn sync_out( 67 | &self, 68 | my_memory: &dyn Any, 69 | dst_device: &dyn Any, 70 | dst_memory: &mut dyn Any, 71 | ) -> Result<(), DeviceError> { 72 | if dst_device.downcast_ref::().is_some() { 73 | let my_mem = my_memory.downcast_ref::().unwrap(); 74 | let dst_mem = dst_memory.downcast_mut::().unwrap(); 75 | dst_mem 76 | .as_mut_slice::() 77 | .clone_from_slice(my_mem.as_slice::()); 78 | return Ok(()); 79 | } 80 | 81 | Err(DeviceError::NoMemorySyncRoute) 82 | } 83 | } 84 | 85 | impl PartialEq for Cpu { 86 | fn eq(&self, _: &Self) -> bool { 87 | true 88 | } 89 | } 90 | 91 | impl Eq for Cpu {} 92 | 93 | impl Hash for Cpu { 94 | fn hash(&self, state: &mut H) { 95 | self.id().hash(state); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/error.rs: -------------------------------------------------------------------------------- 1 | /// Defines a generic set of Native Errors. 2 | use std::{error, fmt}; 3 | 4 | #[derive(Debug, Copy, Clone, PartialEq, Eq)] 5 | /// Defines the Native Error. 6 | pub enum Error { 7 | /// Failure related to allocation, syncing memory 8 | Memory(&'static str), 9 | } 10 | 11 | impl fmt::Display for Error { 12 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 13 | match *self { 14 | Error::Memory(ref err) => write!(f, "{}", err), 15 | } 16 | } 17 | } 18 | 19 | impl error::Error for Error { 20 | fn description(&self) -> &str { 21 | match *self { 22 | Error::Memory(ref err) => err, 23 | } 24 | } 25 | 26 | fn cause(&self) -> Option<&dyn error::Error> { 27 | match *self { 28 | Error::Memory(_) => None, 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/flatbox.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Box without any knowledge of its underlying type. 2 | 3 | use crate::device::IMemory; 4 | use std::fmt; 5 | use std::mem; 6 | use std::slice; 7 | 8 | /// A Box without any knowledge of its underlying type. 9 | pub struct FlatBox { 10 | len: usize, 11 | raw_box: *mut [u8], 12 | } 13 | 14 | impl FlatBox { 15 | /// Create FlatBox from Box, consuming it. 16 | pub fn from_box(b: Box<[u8]>) -> FlatBox { 17 | FlatBox { 18 | len: b.len(), 19 | raw_box: Box::into_raw(b), 20 | } 21 | } 22 | 23 | /// Access memory as slice. 24 | /// 25 | /// The preffered way to access native memory. 26 | pub fn as_slice(&self) -> &[T] { 27 | unsafe { slice::from_raw_parts_mut(self.raw_box as *mut T, self.len / mem::size_of::()) } 28 | } 29 | 30 | /// Access memory as mutable slice. 31 | /// 32 | /// The preffered way to access native memory. 33 | pub fn as_mut_slice(&mut self) -> &mut [T] { 34 | unsafe { slice::from_raw_parts_mut(self.raw_box as *mut T, self.len / mem::size_of::()) } 35 | } 36 | 37 | /// Returns memory size of the Flatbox. 38 | pub fn byte_size(&self) -> usize { 39 | self.len 40 | } 41 | } 42 | 43 | impl Drop for FlatBox { 44 | fn drop(&mut self) { 45 | unsafe { 46 | drop(Box::from_raw(self.raw_box)); 47 | } 48 | } 49 | } 50 | 51 | impl fmt::Debug for FlatBox { 52 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 53 | write!(f, "FlatBox of length {}", &self.len) 54 | } 55 | } 56 | 57 | impl IMemory for FlatBox {} 58 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/function.rs: -------------------------------------------------------------------------------- 1 | //! Provides a operation on native CPU. 2 | 3 | use crate::operation::IOperation; 4 | 5 | #[derive(Debug, Default, Copy, Clone)] 6 | /// Defines a host CPU operation. 7 | pub struct Function; 8 | 9 | impl Function { 10 | /// Initializes a native CPU hardware. 11 | pub fn new() -> Function { 12 | Function 13 | } 14 | } 15 | 16 | impl IOperation for Function {} 17 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/hardware.rs: -------------------------------------------------------------------------------- 1 | //! Provides a hardware aka. the host CPU. 2 | 3 | use crate::hardware::{HardwareType, IHardware}; 4 | 5 | #[derive(Debug, Clone)] 6 | /// Defines the host CPU Hardware. 7 | /// 8 | /// Can later be transformed into a [Coaster hardware][hardware]. 9 | /// [hardware]: ../../hardware/index.html 10 | pub struct Hardware { 11 | id: isize, 12 | name: Option, 13 | hardware_type: Option, 14 | compute_units: Option, 15 | } 16 | 17 | impl Default for Hardware { 18 | fn default() -> Self { 19 | Hardware { 20 | id: -1, 21 | name: None, 22 | hardware_type: None, 23 | compute_units: None, 24 | } 25 | } 26 | } 27 | 28 | impl Hardware { 29 | /// Initializes a new OpenCL hardware. 30 | pub fn new(id: isize) -> Hardware { 31 | Hardware { 32 | id, 33 | ..Hardware::default() 34 | } 35 | } 36 | } 37 | 38 | impl IHardware for Hardware { 39 | fn id(&self) -> isize { 40 | self.id 41 | } 42 | 43 | fn name(&self) -> Option { 44 | self.name.clone() 45 | } 46 | 47 | fn set_name(&mut self, name: Option) -> Self { 48 | self.name = name; 49 | self.clone() 50 | } 51 | 52 | fn hardware_type(&self) -> Option { 53 | self.hardware_type 54 | } 55 | 56 | fn set_hardware_type(&mut self, hardware_type: Option) -> Self { 57 | self.hardware_type = hardware_type; 58 | self.clone() 59 | } 60 | 61 | fn compute_units(&self) -> Option { 62 | self.compute_units 63 | } 64 | 65 | fn set_compute_units(&mut self, compute_units: Option) -> Self { 66 | self.compute_units = compute_units; 67 | self.clone() 68 | } 69 | 70 | fn build(self) -> Hardware { 71 | Hardware { 72 | id: self.id(), 73 | name: self.name(), 74 | hardware_type: self.hardware_type(), 75 | compute_units: self.compute_units(), 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides informations about the software system, such as OpenCL, CUDA, that contains the set of 2 | //! components to support [hardwares][hardware] with kernel execution. 3 | //! [hardware]: ../hardware/index.html 4 | //! 5 | //! 6 | 7 | pub use self::binary::Binary; 8 | pub use self::device::Cpu; 9 | pub use self::error::Error; 10 | pub use self::function::Function; 11 | use self::hardware::Hardware; 12 | #[cfg(not(feature = "unstable_alloc"))] 13 | pub use self::stable_alloc::allocate_boxed_slice; 14 | #[cfg(feature = "unstable_alloc")] 15 | pub use self::unstable_alloc::allocate_boxed_slice; 16 | use crate::backend::{Backend, IBackend}; 17 | use crate::framework::IFramework; 18 | use crate::hardware::{HardwareType, IHardware}; 19 | 20 | pub mod binary; 21 | pub mod device; 22 | mod error; 23 | pub mod flatbox; 24 | pub mod function; 25 | pub mod hardware; 26 | #[cfg(not(feature = "unstable_alloc"))] 27 | mod stable_alloc; 28 | #[cfg(feature = "unstable_alloc")] 29 | mod unstable_alloc; 30 | 31 | /// Initialise the Native Backend for running Tensor Operations 32 | pub fn get_native_backend() -> Backend { 33 | Backend::::default().unwrap() 34 | } 35 | 36 | #[derive(Debug, Clone)] 37 | /// Provides the Native framework. 38 | /// 39 | /// Native means host CPU only. The setup one relies on by default. 40 | pub struct Native { 41 | hardwares: Vec, 42 | binary: Binary, 43 | } 44 | 45 | /// Provides the Native framework trait for explicit Backend behaviour. 46 | /// 47 | /// You usually would not need to care about this trait. 48 | pub trait INative {} 49 | 50 | impl INative for Native {} 51 | 52 | impl IFramework for Native { 53 | type H = Hardware; 54 | type D = Cpu; 55 | type B = Binary; 56 | 57 | fn ID() -> &'static str { 58 | "NATIVE" 59 | } 60 | 61 | fn new() -> Native { 62 | let hardwares = Native::load_hardwares().expect("Native hardwares are always ok. qed"); 63 | Self { 64 | hardwares, 65 | binary: Binary::new(), 66 | } 67 | } 68 | 69 | fn load_hardwares() -> Result, crate::framework::Error> { 70 | let cpu = Hardware::new(1) 71 | .set_name(Some(String::from("Host CPU"))) 72 | .set_hardware_type(Some(HardwareType::CPU)) 73 | .set_compute_units(Some(1)) 74 | .build(); 75 | Ok(vec![cpu]) 76 | } 77 | 78 | fn hardwares(&self) -> &[Hardware] { 79 | &self.hardwares 80 | } 81 | 82 | fn binary(&self) -> &Binary { 83 | &self.binary 84 | } 85 | 86 | fn new_device(&self, devices: &[Hardware]) -> Result { 87 | Ok(Cpu::new(devices.to_vec())) 88 | } 89 | } 90 | 91 | impl IBackend for Backend { 92 | type F = Native; 93 | 94 | fn device(&self) -> &Cpu { 95 | &self.device() 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/stable_alloc.rs: -------------------------------------------------------------------------------- 1 | /// Traditional allocation via creation of a filled Vec. 2 | pub fn allocate_boxed_slice(cap: usize) -> Box<[u8]> { 3 | let vec: Vec = vec![0; cap]; 4 | let bx: Box<[u8]> = vec.into_boxed_slice(); 5 | bx 6 | } 7 | -------------------------------------------------------------------------------- /coaster/src/frameworks/native/unstable_alloc.rs: -------------------------------------------------------------------------------- 1 | use alloc::raw_vec::RawVec; 2 | 3 | /// Alternative way to allocate memory, requiring unstable RawVec. 4 | pub fn allocate_boxed_slice(cap: usize) -> Box<[u8]> { 5 | let raw = RawVec::with_capacity(cap); 6 | unsafe { raw.into_box() } 7 | } 8 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/api/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides a safe wrapper around OpenCL. 2 | 3 | pub use self::error::Error; 4 | 5 | #[derive(Debug, Copy, Clone)] 6 | /// Defines the OpenCL API. 7 | pub struct API; 8 | 9 | mod context; 10 | mod device; 11 | mod error; 12 | mod ffi; 13 | mod memory; 14 | mod platform; 15 | mod queue; 16 | pub mod types; 17 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/api/platform.rs: -------------------------------------------------------------------------------- 1 | //! Provides the OpenCL API with its platform functionality. 2 | 3 | use super::ffi::*; 4 | use super::types as cl; 5 | use frameworks::opencl::Platform; 6 | use frameworks::opencl::{Error, API}; 7 | use std::iter::repeat; 8 | use std::ptr; 9 | use std::sync::Mutex; 10 | 11 | impl API { 12 | /// Returns a list of available platforms. 13 | /// 14 | /// The OpenCL platform layer which implements platform-specific features that allow 15 | /// applications to 16 | /// 17 | /// * query OpenCL devices, 18 | /// * obtain device configuration information and 19 | /// * create OpenCL contexts using one or more devices. 20 | pub fn load_platforms() -> Result, Error> { 21 | let mut num_platforms = 0; 22 | // This mutex is used to work around weak OpenCL implementations. 23 | // On some implementations concurrent calls to clGetPlatformIDs 24 | // will cause the implantation to return invalid status. 25 | lazy_static! { 26 | static ref PLATFORM_MUTEX: Mutex<()> = Mutex::new(()); 27 | } 28 | 29 | let guard = PLATFORM_MUTEX.lock(); 30 | unsafe { API::ffi_get_platform_ids(0, ptr::null_mut(), &mut num_platforms) }?; 31 | 32 | let mut ids: Vec = repeat(0 as cl::device_id) 33 | .take(num_platforms as usize) 34 | .collect(); 35 | 36 | unsafe { API::ffi_get_platform_ids(num_platforms, ids.as_mut_ptr(), &mut num_platforms) }?; 37 | 38 | let _ = guard; 39 | 40 | Ok(ids.into_iter().map(Platform::from_c).collect()) 41 | } 42 | 43 | unsafe fn ffi_get_platform_ids( 44 | num_entries: cl::uint, 45 | platforms: *mut cl::platform_id, 46 | num_platforms: *mut cl::uint, 47 | ) -> Result<(), Error> { 48 | match clGetPlatformIDs(num_entries, platforms, num_platforms) { 49 | cl::Status::SUCCESS => Ok(()), 50 | cl::Status::INVALID_VALUE => Err(Error::InvalidValue("`num_entries` is equal to zero and `platforms` is not NULL or if both `num_platforms` and `platforms` are NULL")), 51 | cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory("Failure to allocate resources on the host")), 52 | _status => Err(Error::Other("Unable to get platform ids")) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/event.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around OpenCL's events. 2 | //! 3 | //! ## OpenCL Event 4 | //! 5 | //! Most OpenCL operations happen asynchronously on the OpenCL Device. 6 | //! To provide the possibility to order and synchronize multiple operations, 7 | //! the execution of an operation yields a Event object. 8 | //! This Event can be used as input to other operations 9 | //! which will wait until this Event has finished executing to run. 10 | 11 | use super::api::types as cl; 12 | 13 | #[derive(Debug, Copy, Clone)] 14 | /// Defines a OpenCL Event; 15 | pub struct Event { 16 | id: isize, 17 | } 18 | 19 | impl Event { 20 | /// Initializes a new OpenCL even from its C type. 21 | pub fn from_c(id: cl::event) -> Event { 22 | Event { id: id as isize } 23 | } 24 | 25 | /// Returns the id as its C type. 26 | pub fn id_c(&self) -> cl::event { 27 | self.id as cl::event 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/kernel.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around OpenCL's Kernel. 2 | 3 | use super::api::types as cl; 4 | use operation::IOperation; 5 | 6 | #[derive(Debug, Copy, Clone)] 7 | /// Defines a OpenCL Kernel. 8 | /// 9 | /// A Kernel is OpenCL's version of Coaster's [operation][operation]. 10 | /// [operation]: ../../operation/index.html 11 | pub struct Kernel { 12 | id: isize, 13 | } 14 | 15 | impl Kernel { 16 | /// Initializes a new OpenCL device. 17 | pub fn from_isize(id: isize) -> Kernel { 18 | Kernel { id: id } 19 | } 20 | 21 | /// Initializes a new OpenCL device from its C type. 22 | pub fn from_c(id: cl::kernel_id) -> Kernel { 23 | Kernel { id: id as isize } 24 | } 25 | 26 | /// Returns the id as its C type. 27 | pub fn id_c(&self) -> cl::kernel_id { 28 | self.id as cl::kernel_id 29 | } 30 | } 31 | 32 | impl IOperation for Kernel {} 33 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/memory.rs: -------------------------------------------------------------------------------- 1 | #![allow(missing_docs)] 2 | use super::api::types as cl; 3 | use super::api::{Error, API}; 4 | use super::Context; 5 | use device::IMemory; 6 | 7 | use std::{fmt, ptr}; 8 | 9 | /// Holds a OpenCL memory id and manages its deallocation 10 | pub struct Memory { 11 | /// The underlying memory id> 12 | memory: cl::memory_id, 13 | memory_flags: MemoryFlags, 14 | 15 | /// Pointer to host memory that is used for pinned host memory. 16 | host_ptr: *mut u8, 17 | } 18 | 19 | impl fmt::Debug for Memory { 20 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 21 | write!(f, "Memory({:?}, {:?})", self.memory, self.memory_flags) 22 | } 23 | } 24 | 25 | impl Drop for Memory { 26 | #[allow(unused_must_use)] 27 | fn drop(&mut self) { 28 | API::release_memory(self); 29 | if self.memory_flags.contains(MemoryFlags::MEM_USE_HOST_PTR) { 30 | unsafe { 31 | Box::from_raw(self.host_ptr); 32 | } 33 | } 34 | } 35 | } 36 | 37 | bitflags! { 38 | pub struct MemoryFlags: cl::bitfield { 39 | const MEM_READ_WRITE = 1 << 0; 40 | const MEM_WRITE_ONLY = 1 << 1; 41 | const MEM_READ_ONLY = 1 << 2; 42 | const MEM_USE_HOST_PTR = 1 << 3; 43 | const MEM_ALLOC_HOST_PTR = 1 << 4; 44 | const MEM_COPY_HOST_PTR = 1 << 5; 45 | } 46 | } 47 | 48 | impl Default for MemoryFlags { 49 | fn default() -> MemoryFlags { 50 | MemoryFlags::MEM_READ_WRITE 51 | } 52 | } 53 | 54 | #[allow(unused_mut)] 55 | impl Memory { 56 | pub fn new(context: &Context, size: usize) -> Result { 57 | API::create_buffer(context, MemoryFlags::default(), size, None) 58 | } 59 | 60 | pub fn id_c(&self) -> cl::memory_id { 61 | self.memory 62 | } 63 | 64 | pub fn from_c(id: cl::memory_id) -> Memory { 65 | Memory { 66 | memory: id, 67 | memory_flags: MemoryFlags::default(), 68 | host_ptr: ptr::null_mut(), 69 | } 70 | } 71 | } 72 | 73 | impl IMemory for Memory {} 74 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/platform.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around OpenCL's platform. 2 | 3 | use super::api::types as cl; 4 | 5 | #[derive(Debug, Copy, Clone, PartialEq)] 6 | /// Defines a OpenCL Platform. 7 | pub struct Platform { 8 | id: isize, 9 | } 10 | 11 | impl Platform { 12 | /// Initializes a new OpenCL platform. 13 | pub fn from_isize(id: isize) -> Platform { 14 | Platform { id: id } 15 | } 16 | 17 | /// Initializes a new OpenCL platform from its C type. 18 | pub fn from_c(id: cl::platform_id) -> Platform { 19 | Platform { id: id as isize } 20 | } 21 | 22 | /// Returns the id as isize. 23 | pub fn id(&self) -> isize { 24 | self.id 25 | } 26 | 27 | /// Returns the id as its C type. 28 | pub fn id_c(&self) -> cl::platform_id { 29 | self.id as cl::platform_id 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /coaster/src/frameworks/opencl/program.rs: -------------------------------------------------------------------------------- 1 | //! Provides a Rust wrapper around OpenCL's Program. 2 | 3 | use super::api::types as cl; 4 | use binary::IBinary; 5 | 6 | #[derive(Debug, Copy, Clone)] 7 | /// Defines a OpenCL Program. 8 | /// 9 | /// A Program is OpenCL's version of Coaster's [binary][binary]. 10 | /// [binary]: ../../binary/index.html 11 | pub struct Program { 12 | id: isize, 13 | } 14 | 15 | impl Program { 16 | /// Initializes a new OpenCL device. 17 | pub fn from_isize(id: isize) -> Program { 18 | Program { id: id } 19 | } 20 | 21 | /// Initializes a new OpenCL device from its C type. 22 | pub fn from_c(id: cl::kernel_id) -> Program { 23 | Program { id: id as isize } 24 | } 25 | 26 | /// Returns the id as its C type. 27 | pub fn id_c(&self) -> cl::kernel_id { 28 | self.id as cl::kernel_id 29 | } 30 | } 31 | 32 | impl IBinary for Program {} 33 | -------------------------------------------------------------------------------- /coaster/src/hardware.rs: -------------------------------------------------------------------------------- 1 | //! Provides a representation for a collection of available compute units e.g. CPUs or GPUs. 2 | //! 3 | //! Hardware can be GPUs, multi-core CPUs or DSPs, Cell/B.E. processor or whatever else 4 | //! is supported by the provided frameworks such as OpenCL, CUDA, etc. The struct holds all 5 | //! important information about the hardware. 6 | //! To execute code on hardware, turn hardware into a [device][device]. 7 | //! 8 | //! [device]: ../device/index.html 9 | 10 | #[derive(Debug, Copy, Clone, PartialEq, Hash)] 11 | /// Specifies the available Hardware types. 12 | pub enum HardwareType { 13 | /// CPU devices 14 | CPU, 15 | /// GPU devices 16 | GPU, 17 | /// Hardware Accelerator devices 18 | ACCELERATOR, 19 | /// Used for anything else 20 | OTHER, 21 | } 22 | 23 | /// Specifies Hardware behavior accross frameworks. 24 | pub trait IHardware { 25 | /// Returns the ID of the Hardware 26 | fn id(&self) -> isize; 27 | 28 | /// Returns the name of the Hardware 29 | fn name(&self) -> Option; 30 | 31 | /// Defines the name of the Hardware 32 | fn set_name(&mut self, name: Option) -> Self; 33 | 34 | /// Returns the device_type of the Hardware 35 | fn hardware_type(&self) -> Option; 36 | 37 | /// Defines the hardware_type of the Hardware 38 | fn set_hardware_type(&mut self, hardware_type: Option) -> Self; 39 | 40 | /// Returns the compute_units of the Hardware 41 | fn compute_units(&self) -> Option; 42 | 43 | /// Defines the compute_units of the Hardware 44 | fn set_compute_units(&mut self, compute_units: Option) -> Self; 45 | 46 | /// Build an inmutable Hardware 47 | fn build(self) -> Self; 48 | } 49 | -------------------------------------------------------------------------------- /coaster/src/operation.rs: -------------------------------------------------------------------------------- 1 | //! Provides the generic functionality for backend-agnostic operations. 2 | //! 3 | //! An Operation describes the execution of a [library][library] provided functionality for a 4 | //! specific [framework][frameworks]. An Operation can easily be executed in parallel on multi-core 5 | //! devices. An Operation is very similar to a usual function and defines usually one or many 6 | //! arguments over which the operation then will happen. 7 | //! 8 | //! You are usually not interacting with an operation directly. To execute an operation you would 9 | //! use the [backend][backend]. Also you will rarely initialize your operations directly, 10 | //! as this happens automatically at the initialization of a [binary][binary]. 11 | //! 12 | //! ## Development 13 | //! 14 | //! The functionality provided by this module is used to construct the basic operations that come 15 | //! shipped with Coaster, but should also allow you to define and run your own backend-agnostic 16 | //! operations as well. 17 | //! 18 | //! [frameworks]: ../frameworks/index.html 19 | //! [backend]: ../backend/index.html 20 | //! [library]: ../library/index.html 21 | //! [binary]: ../binary/index.html 22 | 23 | /// Defines the functionality of an operation. 24 | pub trait IOperation {} 25 | -------------------------------------------------------------------------------- /coaster/tests/backend_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | extern crate libc; 3 | 4 | #[cfg(test)] 5 | mod backend_spec { 6 | #[cfg(feature = "native")] 7 | mod native { 8 | use crate::co::prelude::*; 9 | use std::rc::Rc; 10 | 11 | #[test] 12 | fn it_can_create_default_backend() { 13 | assert!(Backend::::default().is_ok()); 14 | } 15 | 16 | #[test] 17 | fn it_can_use_ibackend_trait_object() { 18 | let framework = Native::new(); 19 | let hardwares = framework.hardwares().to_vec(); 20 | let backend_config = BackendConfig::new(framework, &hardwares); 21 | let backend = Rc::new(Backend::new(backend_config).unwrap()); 22 | use_ibackend(backend); 23 | } 24 | 25 | fn use_ibackend(backend: Rc) { 26 | let backend: Rc> = backend.clone(); 27 | backend.device(); 28 | } 29 | } 30 | 31 | #[cfg(feature = "cuda")] 32 | mod cuda { 33 | use crate::co::*; 34 | 35 | #[test] 36 | fn it_can_create_default_backend() { 37 | assert!(Backend::::default().is_ok()); 38 | } 39 | } 40 | 41 | #[cfg(feature = "opencl")] 42 | mod opencl { 43 | use co::*; 44 | 45 | #[test] 46 | fn it_can_create_default_backend() { 47 | assert!(Backend::::default().is_ok()); 48 | } 49 | 50 | #[test] 51 | fn it_can_manually_create_backend() { 52 | let framework = OpenCL::new(); 53 | let hardwares = framework.hardwares().to_vec(); 54 | let backend_config = BackendConfig::new(framework, &hardwares); 55 | let backend = Backend::new(backend_config); 56 | println!("{:?}", backend); 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /coaster/tests/framework_cuda_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | extern crate libc; 3 | 4 | #[cfg(test)] 5 | #[cfg(feature = "cuda")] 6 | mod framework_cuda_spec { 7 | use crate::co::frameworks::cuda::memory::*; 8 | use crate::co::prelude::*; 9 | 10 | #[test] 11 | fn it_works() { 12 | let frm = Cuda::new(); 13 | println!("{:?}", frm.hardwares()); 14 | } 15 | 16 | #[test] 17 | fn it_creates_context() { 18 | let frm = Cuda::new(); 19 | let hardwares = &frm.hardwares()[0..1]; 20 | println!("{:?}", frm.new_device(hardwares)); 21 | } 22 | 23 | #[test] 24 | #[allow(unused_must_use)] 25 | fn it_allocates_memory() { 26 | let vec_a = vec![0isize, 1, 2, -3, 4, 5, 6, 7]; 27 | let frm = Cuda::new(); 28 | let _ctx = frm.new_device(&frm.hardwares()[0..1]).unwrap(); 29 | // Cuda memory 30 | Memory::new(vec_a.len()).unwrap(); 31 | } 32 | 33 | #[test] 34 | #[allow(unused_must_use)] 35 | // Create a lot of new CUDA devices, tests for correct dropping of device 36 | fn it_creates_a_lot_of_devices() { 37 | for _ in 0..256 { 38 | let cuda = Cuda::new(); 39 | let _ = cuda.new_device(&cuda.hardwares()[0..1]).unwrap(); 40 | } 41 | } 42 | 43 | #[test] 44 | #[allow(unused_must_use)] 45 | // Allocate 128mb blocks with dropping them in between, tests for correct freeing of memory 46 | fn it_allocates_4gb_memory_same_device() { 47 | let cuda = Cuda::new(); 48 | let device = cuda.new_device(&cuda.hardwares()[0..1]).unwrap(); 49 | for _ in 0..256 { 50 | let mut x = SharedTensor::::new(&vec![256, 1024, 128]); 51 | x.write_only(&device).unwrap(); 52 | } 53 | } 54 | 55 | #[test] 56 | fn it_can_synchronize_context() { 57 | let backend = Backend::::default().unwrap(); 58 | backend.synchronize().unwrap(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /coaster/tests/framework_native_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | 3 | #[cfg(test)] 4 | #[cfg(feature = "native")] 5 | mod framework_native_spec { 6 | use crate::co::prelude::*; 7 | 8 | #[test] 9 | fn it_works() { 10 | let frm = Native::new(); 11 | assert_eq!(frm.hardwares().len(), 1); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /coaster/tests/framework_opencl_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | extern crate libc; 3 | 4 | #[cfg(test)] 5 | #[cfg(feature = "opencl")] 6 | mod framework_opencl_spec { 7 | use co::frameworks::opencl::context::*; 8 | use co::frameworks::opencl::memory::*; 9 | use co::frameworks::opencl::queue::*; 10 | use co::prelude::*; 11 | 12 | #[test] 13 | fn it_works() { 14 | let frm = OpenCL::new(); 15 | println!("{:?}", frm.hardwares()); 16 | } 17 | 18 | #[test] 19 | fn it_creates_context() { 20 | let frm = OpenCL::new(); 21 | let hardwares = &frm.hardwares()[0..1]; 22 | println!("{:?}", frm.new_device(hardwares)); 23 | } 24 | 25 | #[test] 26 | #[allow(unused_must_use)] 27 | fn it_creates_memory() { 28 | let frm = OpenCL::new(); 29 | let ctx = frm.new_device(&frm.hardwares()[0..1]).unwrap(); 30 | Memory::new(&ctx, 8); 31 | } 32 | 33 | #[test] 34 | fn it_creates_queue() { 35 | let frm = OpenCL::new(); 36 | let ctx = frm.new_device(&frm.hardwares()[0..1]).unwrap(); 37 | Queue::new(&ctx, &frm.hardwares()[0..1][0], None).unwrap(); 38 | } 39 | 40 | #[test] 41 | fn it_queries_context_info() { 42 | let frm = OpenCL::new(); 43 | let ctx = frm.new_device(&frm.hardwares()[0..1]).unwrap(); 44 | println!( 45 | "ReferenceCount: {:?}", 46 | ctx.get_context_info(ContextInfoQuery::ReferenceCount) 47 | ); 48 | println!( 49 | "NumDevices: {:?}", 50 | ctx.get_context_info(ContextInfoQuery::NumDevices) 51 | ); 52 | println!( 53 | "Devices: {:?}", 54 | ctx.get_context_info(ContextInfoQuery::Devices) 55 | ); 56 | println!( 57 | "Properties: {:?}", 58 | ctx.get_context_info(ContextInfoQuery::Properties) 59 | ); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /coaster/tests/hardware_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | extern crate libc; 3 | 4 | #[cfg(test)] 5 | #[cfg(feature = "opencl")] 6 | mod hardware_spec { 7 | use co::frameworks::opencl::Device; 8 | use co::prelude::*; 9 | 10 | #[test] 11 | fn it_works() { 12 | Device::from_isize(42) 13 | .set_hardware_type(Some(HardwareType::GPU)) 14 | .set_name(Some(String::from("Test Hardware"))) 15 | .set_compute_units(Some(450)) 16 | .build(); 17 | } 18 | 19 | #[test] 20 | fn it_returns_correct_id() { 21 | let hardware = Device::from_isize(42); 22 | assert_eq!(hardware.id(), 42); 23 | } 24 | 25 | #[test] 26 | fn it_sets_hardware_type() { 27 | let hardware = Device::from_isize(42) 28 | .set_hardware_type(Some(HardwareType::CPU)) 29 | .build(); 30 | 31 | assert!(match hardware.hardware_type() { 32 | Some(HardwareType::CPU) => true, 33 | _ => false, 34 | }); 35 | } 36 | 37 | #[test] 38 | fn it_sets_name() { 39 | let hardware = Device::from_isize(42) 40 | .set_name(Some(String::from("Test Hardware"))) 41 | .build(); 42 | 43 | assert!(match hardware.name() { 44 | Some(_) => true, 45 | _ => false, 46 | }); 47 | } 48 | 49 | #[test] 50 | fn it_sets_compute_units() { 51 | let hardware = Device::from_isize(42).set_compute_units(Some(400)).build(); 52 | 53 | assert!(match hardware.compute_units() { 54 | Some(400) => true, 55 | _ => false, 56 | }); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /coaster/tests/tensor_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | 3 | #[cfg(test)] 4 | mod tensor_spec { 5 | use crate::co::prelude::*; 6 | 7 | #[test] 8 | fn it_returns_correct_tensor_desc_stride() { 9 | let tensor_desc_r0: TensorDesc = vec![]; 10 | let tensor_desc_r1: TensorDesc = vec![5]; 11 | let tensor_desc_r2: TensorDesc = vec![2, 4]; 12 | let tensor_desc_r3: TensorDesc = vec![2, 2, 4]; 13 | let tensor_desc_r4: TensorDesc = vec![2, 2, 4, 4]; 14 | let r0: Vec = vec![]; 15 | assert_eq!(r0, tensor_desc_r0.default_stride()); 16 | assert_eq!(vec![1], tensor_desc_r1.default_stride()); 17 | assert_eq!(vec![4, 1], tensor_desc_r2.default_stride()); 18 | assert_eq!(vec![8, 4, 1], tensor_desc_r3.default_stride()); 19 | assert_eq!(vec![32, 16, 4, 1], tensor_desc_r4.default_stride()); 20 | } 21 | 22 | #[test] 23 | fn it_returns_correct_size_for_rank_0() { 24 | // In order for correct memory allocation of scala Tensor, the size should never return less than 1. 25 | let tensor_desc_r0: TensorDesc = vec![]; 26 | assert_eq!(1, tensor_desc_r0.size()); 27 | 28 | let tensor_desc_r0_into = <() as IntoTensorDesc>::into(&()); 29 | assert_eq!(1, tensor_desc_r0_into.size()); 30 | } 31 | 32 | #[test] 33 | fn it_resizes_tensor() { 34 | let mut tensor = SharedTensor::::new(&(10, 20, 30)); 35 | assert_eq!(tensor.desc(), &[10, 20, 30]); 36 | tensor.resize(&(2, 3, 4, 5)).unwrap(); 37 | assert_eq!(tensor.desc(), &[2, 3, 4, 5]); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /coaster/tests/ui.rs: -------------------------------------------------------------------------------- 1 | #[test] 2 | fn ui() { 3 | let t = trybuild::TestCases::new(); 4 | t.compile_fail("tests/ui/err-*.rs"); 5 | t.pass("tests/ui/ok-*.rs"); 6 | } 7 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-02-drop_live_memory.rs: -------------------------------------------------------------------------------- 1 | use coaster::prelude::*; 2 | 3 | fn main() { 4 | let ntv = Native::new(); 5 | let dev = ntv.new_device(ntv.hardwares()).unwrap(); 6 | 7 | let x = &mut SharedTensor::::new(&10); 8 | let m = x.write_only(&dev).unwrap(); 9 | x.drop(&dev); 10 | //~^ ERROR error: cannot borrow `*x` as mutable more than once at a time 11 | 12 | // need additional bindings, so rust knows it's used afterwards 13 | let _foo = m; 14 | } 15 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-02-drop_live_memory.stderr: -------------------------------------------------------------------------------- 1 | error[E0499]: cannot borrow `*x` as mutable more than once at a time 2 | --> $DIR/err-02-drop_live_memory.rs:9:5 3 | | 4 | 8 | let m = x.write_only(&dev).unwrap(); 5 | | ------------------ first mutable borrow occurs here 6 | 9 | x.drop(&dev); 7 | | ^^^^^^^^^^^^ second mutable borrow occurs here 8 | ... 9 | 13 | let _foo = m; 10 | | - first borrow later used here 11 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-03-leak_read_reference.rs: -------------------------------------------------------------------------------- 1 | use coaster::prelude::*; 2 | 3 | fn main() { 4 | let ntv = Native::new(); 5 | let dev = ntv.new_device(ntv.hardwares()).unwrap(); 6 | 7 | let mem = { 8 | let x = &mut SharedTensor::::new(&10); 9 | //~^ ERROR error: borrowed value does not live long enough 10 | x.write_only(&dev).unwrap(); 11 | let m = x.read(&dev).unwrap(); 12 | m 13 | }; 14 | } 15 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-03-leak_read_reference.stderr: -------------------------------------------------------------------------------- 1 | warning: unused variable: `mem` 2 | --> $DIR/err-03-leak_read_reference.rs:7:9 3 | | 4 | 7 | let mem = { 5 | | ^^^ help: if this is intentional, prefix it with an underscore: `_mem` 6 | | 7 | = note: `#[warn(unused_variables)]` on by default 8 | 9 | error[E0716]: temporary value dropped while borrowed 10 | --> $DIR/err-03-leak_read_reference.rs:8:22 11 | | 12 | 7 | let mem = { 13 | | --- borrow later stored here 14 | 8 | let x = &mut SharedTensor::::new(&10); 15 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ creates a temporary which is freed while still in use 16 | ... 17 | 13 | }; 18 | | - temporary value is freed at the end of this statement 19 | | 20 | = note: consider using a `let` binding to create a longer lived value 21 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-04-leak_write_reference.rs: -------------------------------------------------------------------------------- 1 | use coaster::prelude::*; 2 | 3 | fn main() { 4 | let ntv = Native::new(); 5 | let dev = ntv.new_device(ntv.hardwares()).unwrap(); 6 | 7 | let _mem = { 8 | let x = &mut SharedTensor::::new(&10); 9 | //~^ ERROR error: borrowed value does not live long enough 10 | let m = x.write_only(&dev).unwrap(); 11 | m 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-04-leak_write_reference.stderr: -------------------------------------------------------------------------------- 1 | error[E0716]: temporary value dropped while borrowed 2 | --> $DIR/err-04-leak_write_reference.rs:8:22 3 | | 4 | 8 | let x = &mut SharedTensor::::new(&10); 5 | | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ creates a temporary which is freed while still in use 6 | ... 7 | 11 | m 8 | | - borrow later used here 9 | 12 | }; 10 | | - temporary value is freed at the end of this statement 11 | | 12 | = note: consider using a `let` binding to create a longer lived value 13 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-05-read_write_borrows.rs: -------------------------------------------------------------------------------- 1 | use coaster::prelude::*; 2 | 3 | fn main() { 4 | let ntv = Native::new(); 5 | let dev = ntv.new_device(ntv.hardwares()).unwrap(); 6 | 7 | let x = &mut SharedTensor::::new(&10); 8 | let m1 = x.write_only(&dev).unwrap(); 9 | let m2 = x.read(&dev).unwrap(); 10 | //~^ ERROR cannot borrow `*x` as immutable because it is also borrowed as mutable 11 | 12 | // need additional bindings, so rust knows it's used afterwards 13 | let _foo = m1; 14 | let _bar = m2; 15 | } 16 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-05-read_write_borrows.stderr: -------------------------------------------------------------------------------- 1 | error[E0502]: cannot borrow `*x` as immutable because it is also borrowed as mutable 2 | --> $DIR/err-05-read_write_borrows.rs:9:14 3 | | 4 | 8 | let m1 = x.write_only(&dev).unwrap(); 5 | | ------------------ mutable borrow occurs here 6 | 9 | let m2 = x.read(&dev).unwrap(); 7 | | ^^^^^^^^^^^^ immutable borrow occurs here 8 | ... 9 | 13 | let _foo = m1; 10 | | -- mutable borrow later used here 11 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-06-two_write_borrows.rs: -------------------------------------------------------------------------------- 1 | use coaster::prelude::*; 2 | 3 | fn main() { 4 | let ntv = Native::new(); 5 | let dev = ntv.new_device(ntv.hardwares()).unwrap(); 6 | 7 | let x = &mut SharedTensor::::new(&10); 8 | let m1 = x.write_only(&dev).unwrap(); 9 | let m2 = x.write_only(&dev).unwrap(); 10 | //~^ ERROR error: cannot borrow `*x` as mutable more than once at a time 11 | 12 | // need additional bindings, so rust knows it's used afterwards 13 | let _foo = m1; 14 | let _bar = m2; 15 | } 16 | -------------------------------------------------------------------------------- /coaster/tests/ui/err-06-two_write_borrows.stderr: -------------------------------------------------------------------------------- 1 | error[E0499]: cannot borrow `*x` as mutable more than once at a time 2 | --> $DIR/err-06-two_write_borrows.rs:9:14 3 | | 4 | 8 | let m1 = x.write_only(&dev).unwrap(); 5 | | ------------------ first mutable borrow occurs here 6 | 9 | let m2 = x.write_only(&dev).unwrap(); 7 | | ^^^^^^^^^^^^^^^^^^ second mutable borrow occurs here 8 | ... 9 | 13 | let _foo = m1; 10 | | -- first borrow later used here 11 | -------------------------------------------------------------------------------- /coaster/tests/ui/ok-01-multiple_read_only_borrows.rs: -------------------------------------------------------------------------------- 1 | use coaster::prelude::*; 2 | 3 | fn main() { 4 | let ntv = Native::new(); 5 | let dev = ntv.new_device(ntv.hardwares()).unwrap(); 6 | 7 | let x = &mut SharedTensor::::new(&10); 8 | x.write_only(&dev).unwrap(); 9 | 10 | let _m1 = x.read(&dev); 11 | let _m2 = x.read(&dev); 12 | let _m3 = x.read(&dev); 13 | } 14 | -------------------------------------------------------------------------------- /greenglas/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /greenglas/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "greenglas" 3 | description = "Data Preprocessing library for Machine Learning" 4 | authors = ["Bernhard Schuster ", 5 | "Michael Hirn"] 6 | version = "0.3.0" 7 | repository = "https://github.com/spearow/greenglas" 8 | homepage = "https://spearow.io/projects/greenglas" 9 | documentation = "https://spearow.github.com/greenglas" 10 | readme = "README.md" 11 | keywords = ["preprocessing", "data-preprocessing", "transformation", "encoding", "machine-learning"] 12 | license = "MIT OR Apache-2.0" 13 | edition = "2018" 14 | 15 | [dependencies] 16 | coaster = { path = "../coaster", version = "0.2", default-features = false, features = ["native"] } # native feature to read/write data into tensors 17 | 18 | # can't upgrade beyond due to: 19 | # ISSUE: https://github.com/image-rs/image/issues/1468 20 | # ISSUE: https://github.com/image-rs/image/issues/1466 21 | image = "=0.23.12" 22 | murmurhash3 = "0.0.5" 23 | modifier = "0.1" 24 | 25 | [features] 26 | default = ["native", "cuda"] 27 | native = [] 28 | cuda = ["coaster/cuda"] 29 | opencl = ["coaster/opencl"] 30 | -------------------------------------------------------------------------------- /greenglas/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2017-2020 Bernhard Schuster 4 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /greenglas/rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_strings = false 2 | reorder_imports = true 3 | -------------------------------------------------------------------------------- /greenglas/src/image/modifiers.rs: -------------------------------------------------------------------------------- 1 | use super::Image; 2 | use crate::image_crate::imageops::FilterType; 3 | use crate::modifier::Modifier; 4 | 5 | #[derive(Debug, Clone, Copy)] 6 | /// Resize Modifier for `Image` 7 | pub struct Resize { 8 | /// The resized width of the new Image 9 | pub width: u32, 10 | /// The resized heigt of the new Image 11 | pub height: u32, 12 | } 13 | 14 | impl Modifier for Resize { 15 | fn modify(self, image: &mut Image) { 16 | image.value = image 17 | .value 18 | .resize(self.width, self.height, FilterType::Triangle) 19 | } 20 | } 21 | 22 | #[derive(Debug, Clone, Copy)] 23 | /// Crop Modifier for `Image` 24 | pub struct Crop { 25 | /// The x value from where the new Image should start 26 | pub x: u32, 27 | /// The y value from where the new Image should start 28 | pub y: u32, 29 | /// The width for the new Image 30 | pub width: u32, 31 | /// The height for the new Image 32 | pub height: u32, 33 | } 34 | 35 | impl Modifier for Crop { 36 | fn modify(self, image: &mut Image) { 37 | image.value = image.value.crop(self.x, self.y, self.width, self.height) 38 | } 39 | } 40 | 41 | #[derive(Debug, Clone, Copy)] 42 | /// Grayscale Modifier for `Image` 43 | pub struct Grayscale; 44 | 45 | impl Modifier for Grayscale { 46 | fn modify(self, image: &mut Image) { 47 | image.value = image.value.grayscale(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /greenglas/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Greenglas provides convenient and universal Machine Learning Transformer 2 | //! for non-numeric data types such as: `Strings`, `Images` and `Audio`. 3 | #![allow(dead_code)] 4 | #![deny( 5 | unused_crate_dependencies, 6 | clippy::missing_docs, 7 | clippy::missing_debug_implementations, 8 | clippy::missing_copy_implementations, 9 | clippy::trivial_casts, 10 | clippy::trivial_numeric_casts, 11 | clippy::unsafe_code, 12 | clippy::unused_import_braces, 13 | clippy::unused_qualifications, 14 | clippy::complexity 15 | )] 16 | 17 | extern crate coaster as co; 18 | extern crate image as image_crate; 19 | extern crate murmurhash3 as murmur3; 20 | 21 | pub use crate::image::Image; 22 | pub use crate::transformer::Transformer; 23 | pub use crate::word::Word; 24 | 25 | pub use crate::modifier::Set; 26 | 27 | /// The Image Struct and its Modifiers 28 | pub mod image; 29 | /// Transformer 30 | pub mod transformer; 31 | /// The Word Struct and its Modifiers 32 | pub mod word; 33 | 34 | /// Re-exports from the modifier crate. 35 | pub mod modifier { 36 | extern crate modifier as modifier_crate; 37 | pub use self::modifier_crate::*; 38 | } 39 | -------------------------------------------------------------------------------- /greenglas/src/transformer.rs: -------------------------------------------------------------------------------- 1 | use crate::co::plugin::numeric_helpers::*; 2 | use crate::co::prelude::*; 3 | 4 | /// The Transformer Trait 5 | /// 6 | /// Gets implemented for all Transformable Data Types. 7 | /// Allows all Transformable Data Types to get transformed into a `Blob`. 8 | pub trait Transformer { 9 | /// Transforms non-numeric data into a numeric `SharedTensor` 10 | /// 11 | /// The shape attribute is used to control the dimensions/shape of the Blob. 12 | /// It returns an Error, when the expected capacity (defined by the shape) differs from the 13 | /// observed one. 14 | fn transform(&self, shape: &[usize]) -> Result, TransformerError> { 15 | let native_backend = Backend::::default().unwrap(); 16 | let mut tensor = SharedTensor::::new(&shape); 17 | 18 | { 19 | let mut native_tensor = tensor.write_only(native_backend.device()).unwrap(); 20 | Self::write_to_memory(&mut native_tensor, &self.transform_to_vec())?; 21 | } 22 | Ok(tensor) 23 | } 24 | 25 | /// Transforms the non-numeric data into a numeric `Vec` 26 | fn transform_to_vec(&self) -> Vec; 27 | 28 | /// Write into a native Coaster Memory. 29 | fn write_to_memory( 30 | mem: &mut FlatBox, 31 | data: &[T], 32 | ) -> Result<(), TransformerError> { 33 | Self::write_to_memory_offset(mem, data, 0) 34 | } 35 | 36 | /// Write into a native Coaster Memory with a offset. 37 | fn write_to_memory_offset( 38 | mem: &mut FlatBox, 39 | data: &[T], 40 | offset: usize, 41 | ) -> Result<(), TransformerError> { 42 | let mem_buffer = mem.as_mut_slice::(); 43 | if offset == 0 && mem_buffer.len() != data.len() { 44 | return Err(TransformerError::InvalidShape); 45 | } 46 | for (index, datum) in data.iter().enumerate() { 47 | let old_val = mem_buffer 48 | .get_mut(index + offset) 49 | .ok_or(TransformerError::InvalidShape)?; 50 | *old_val = cast(*datum).unwrap(); 51 | } 52 | Ok(()) 53 | } 54 | } 55 | 56 | #[derive(Debug, Copy, Clone)] 57 | /// The Transformer Errors 58 | pub enum TransformerError { 59 | /// When the speficied shape capacitiy differs from the actual capacity of the numeric Vec 60 | InvalidShape, 61 | /// When The Image Pixel Buffer can't be converted to a RGB Image 62 | InvalidRgbPixels, 63 | /// When The Image Pixel Buffer can't be converted to a RGBA Image 64 | InvalidRgbaPixels, 65 | /// When The Image Pixel Buffer can't be converted to a greyscale Image 66 | InvalidLumaPixels, 67 | /// When The Image Pixel Buffer can't be converted to a greyscale Alpha Image 68 | InvalidLumaAlphaPixels, 69 | } 70 | -------------------------------------------------------------------------------- /greenglas/src/word/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::murmur3::murmurhash3_x86_32 as murmur3; 2 | use crate::{Set, Transformer}; 3 | 4 | /// The Modifiers for `Word` 5 | pub mod modifiers; 6 | 7 | #[derive(Debug)] 8 | /// The Transformable Data Type `Word` 9 | pub struct Word { 10 | value: String, 11 | } 12 | 13 | impl Set for Word {} 14 | 15 | impl Transformer for Word { 16 | fn transform_to_vec(&self) -> Vec { 17 | vec![murmur3(self.value.as_bytes(), 0) as f32] 18 | } 19 | } 20 | 21 | impl Word { 22 | /// Creates a new `Word` 23 | pub fn new(word: String) -> Word { 24 | Word { value: word } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /greenglas/src/word/modifiers.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /greenglas/tests/assets/test_image.baseline.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/greenglas/tests/assets/test_image.baseline.jpeg -------------------------------------------------------------------------------- /greenglas/tests/assets/test_image.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/greenglas/tests/assets/test_image.bmp -------------------------------------------------------------------------------- /greenglas/tests/assets/test_image.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/greenglas/tests/assets/test_image.gif -------------------------------------------------------------------------------- /greenglas/tests/assets/test_image.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/greenglas/tests/assets/test_image.jpeg -------------------------------------------------------------------------------- /greenglas/tests/assets/test_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/greenglas/tests/assets/test_image.png -------------------------------------------------------------------------------- /greenglas/tests/transformer_spec.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster; 2 | extern crate greenglas; 3 | 4 | #[cfg(test)] 5 | mod transformer_spec { 6 | 7 | use coaster::prelude::*; 8 | use greenglas::image::Crop; 9 | use greenglas::transformer::TransformerError; 10 | use greenglas::{Image, Set, Transformer}; 11 | use std::path::Path; 12 | 13 | fn expected_result() -> Vec { 14 | vec![ 15 | 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 0.0, 0.0, 0.0, 16 | ] 17 | } 18 | 19 | #[test] 20 | fn transform_returns_a_valid_result() { 21 | let path = Path::new("tests/assets/test_image.png"); 22 | let img = Image::from_path(&path); 23 | match img.transform(&vec![2, 2, 3]) { 24 | Ok(_) => assert!(true), 25 | _ => assert!(false), 26 | } 27 | } 28 | 29 | #[test] 30 | fn transform_returns_a_tensor() { 31 | let path = Path::new("tests/assets/test_image.png"); 32 | let img = Image::from_path(&path); 33 | match img.transform(&vec![2, 2, 3]) { 34 | Ok(tensor) => { 35 | let native_backend = Backend::::default().unwrap(); 36 | let data = tensor.read(native_backend.device()).unwrap().as_slice(); 37 | assert_eq!(expected_result(), data); 38 | } 39 | _ => assert!(false), 40 | } 41 | } 42 | 43 | #[test] 44 | fn transform_returns_an_error_when_different_shape() { 45 | let path = Path::new("tests/assets/test_image.png"); 46 | let img = Image::from_path(&path); 47 | match img.transform(&vec![3, 3, 3]) { 48 | Err(TransformerError::InvalidShape) => assert!(true), 49 | _ => assert!(false), 50 | } 51 | } 52 | 53 | #[test] 54 | fn transform_returns_a_valid_result_with_modifiers() { 55 | let path = Path::new("tests/assets/test_image.png"); 56 | let img = Image::from_path(&path); 57 | let crop = Crop { 58 | x: 0, 59 | y: 0, 60 | width: 1, 61 | height: 1, 62 | }; 63 | match img.set(crop).transform(&vec![1, 1, 3]) { 64 | Ok(_) => assert!(true), 65 | _ => assert!(false), 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /greenglas/tests/word_spec.rs: -------------------------------------------------------------------------------- 1 | extern crate greenglas; 2 | 3 | #[cfg(test)] 4 | mod word_spec { 5 | 6 | use greenglas::{Transformer, Word}; 7 | 8 | fn expected_result() -> Vec { 9 | vec![3127628307.0] 10 | } 11 | 12 | #[test] 13 | fn it_works() { 14 | assert_eq!( 15 | expected_result(), 16 | Word::new("test".to_string()).transform_to_vec() 17 | ); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /juice-examples/README.md: -------------------------------------------------------------------------------- 1 | # [Juice](https://github.com/spearow/juice) Examples 2 | 3 | CLI for running [juice](https://github.com/spearow/juice) examples. More examples and benchmark tests can be found at the [juice examples directory](https://github.com/spearow/juice#examples). 4 | 5 | ## Install CLI 6 | 7 | **DISCLAIMER: Currently both CUDA and cuDNN are required for the examples to build.** 8 | 9 | Compile and call the build. 10 | ```bash 11 | # install rust, if you need to 12 | curl -sSf https://static.rust-lang.org/rustup.sh | sh 13 | # download the code 14 | git clone git@github.com:spearow/juice-examples.git && cd juice-examples 15 | # build the binary 16 | cargo build --release 17 | # and you should see the CLI help page 18 | ../target/release/juice-examples --help 19 | # which means, you can run the examples from below 20 | ``` 21 | *Note for OSX El Capitan users: `openssl` no longer ships with OSX by default. `brew link --force openssl` should fix the problem. If not, [see this Github issue](https://github.com/sfackler/rust-openssl/issues/255) for more details.* 22 | 23 | ## MNIST 24 | 25 | The MNIST Datasets comes not shipped with this repository (it's too big), but you can load it directly via the 26 | CLI. 27 | 28 | ```bash 29 | # download the MNIST dataset. 30 | ../target/release/juice-examples load-dataset mnist 31 | 32 | # run the MNIST linear example 33 | ../target/release/juice-examples mnist linear --batch-size 10 34 | # run the MNIST MLP (Multilayer Perceptron) example 35 | ../target/release/juice-examples mnist mlp --batch-size 5 --learning-rate 0.001 36 | # run the MNIST Convolutional Neural Network example 37 | ../target/release/juice-examples mnist conv --batch-size 10 --learning-rate 0.002 38 | ``` 39 | 40 | ## Fashion-MNIST 41 | 42 | The [fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is 43 | also too big to be included, but it can be downloaded in the same way as MNIST: 44 | 45 | ```bash 46 | # download the fashion-MNIST dataset. 47 | ../target/release/juice-examples load-dataset fashion 48 | 49 | # run the fashion-MNIST linear example 50 | ../target/release/juice-examples fashion linear --batch-size 10 51 | # run the fashion-MNIST MLP (Multilayer Perceptron) example 52 | ../target/release/juice-examples fashion mlp --batch-size 5 --learning-rate 0.001 53 | # run the fashion-MNIST Convolutional Neural Network example 54 | ../target/release/juice-examples fashion conv --batch-size 10 --learning-rate 0.002 55 | ``` 56 | -------------------------------------------------------------------------------- /juice-examples/juice-utils/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "juice-utils" 3 | description = "Utilities for running Juice Examples" 4 | version = "0.0.1" 5 | authors = ["Lissa Hyacinth ", "Bernhard Schuster "] 6 | edition = "2018" 7 | publish = false 8 | 9 | [dependencies] 10 | reqwest= { version = "0.11", features = ["blocking", "rustls-tls"]} 11 | flate2 = "1" 12 | fs-err = "2" 13 | log = "0.4" 14 | -------------------------------------------------------------------------------- /juice-examples/juice-utils/src/lib.rs: -------------------------------------------------------------------------------- 1 | use flate2::read::GzDecoder; 2 | use fs_err as fs; 3 | use reqwest::blocking::Client; 4 | use std::io; 5 | use std::io::prelude::*; 6 | 7 | pub fn download_datasets( 8 | datasets: &[&str], 9 | asset_path: &str, 10 | base_url: &str, 11 | ) -> Result<(), Box> { 12 | let client = Client::new(); 13 | std::fs::create_dir_all(asset_path)?; 14 | for dataset in datasets { 15 | let url = format!("{}/{}", base_url, dataset); 16 | log::info!("Downloading {}", dataset); 17 | let resp = client.get(&url).send()?.bytes()?; 18 | let name = format!("{}/{}", asset_path, dataset); 19 | let mut f = fs::File::create(name.clone()).expect("Failed to create file"); 20 | f.write_all(&resp)?; 21 | log::info!("Download of {} complete.", dataset); 22 | } 23 | Ok(()) 24 | } 25 | 26 | pub fn unzip_datasets(datasets: &[&str], asset_path: &str) -> io::Result<()> { 27 | for filename in datasets { 28 | log::info!("Decompressing: {}", filename); 29 | 30 | let file_handle = fs::File::open(&format!("{}/{}", asset_path, filename)).unwrap(); 31 | let mut decoder = GzDecoder::new(file_handle); 32 | 33 | let filename_string = filename.split(".").nth(0).unwrap(); 34 | 35 | let mut dest = fs::File::create(format!("{}/{}", asset_path, filename_string))?; 36 | std::io::copy(&mut decoder, &mut dest)?; 37 | log::info!("Decompression of {} complete.", filename); 38 | } 39 | Ok(()) 40 | } 41 | -------------------------------------------------------------------------------- /juice-examples/mackey-glass-rnn-regression/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /juice-examples/mackey-glass-rnn-regression/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "example-rnn-regression" 3 | description = "Juice Framework example for RNN Regression using Mackey-Glass Dataset" 4 | version = "0.0.1" 5 | authors = ["Lissa Hyacinth "] 6 | edition = "2018" 7 | publish = false 8 | 9 | [dependencies] 10 | greenglas = { path = "../../greenglas" , version = "0.3.0"} 11 | juice = { path = "../../juice", default-features = false , version = "0.3.0"} 12 | coaster = { path = "../../coaster", default-features = false , version = "0.2.0"} 13 | coaster-nn = { path = "../../coaster-nn", default-features = false , version = "0.5.0"} 14 | 15 | csv = "1.1" 16 | serde = { version = "1", features = ["derive"] } 17 | env_logger = "0.9" 18 | log = "0.4" 19 | docopt= "1.1" 20 | fs-err = "2" 21 | 22 | [features] 23 | default = ["native", "cuda"] 24 | native = ["juice/native", "coaster/native"] 25 | opencl = ["juice/opencl", "coaster/opencl"] 26 | cuda = ["juice/cuda", "coaster/cuda"] 27 | -------------------------------------------------------------------------------- /juice-examples/mackey-glass-rnn-regression/README.md: -------------------------------------------------------------------------------- 1 | # [Juice](https://github.com/spearow/juice) Examples 2 | 3 | CLI for running [juice](https://github.com/spearow/juice) examples. 4 | 5 | ## Install CLI 6 | 7 | ```bash 8 | # install rust, if you need to 9 | curl -sSf https://static.rust-lang.org/rustup.sh | sh 10 | # Download repository, and navigate to this example 11 | git clone git@github.com:spearow/juice.git && cd juice/juice-examples/mackey-glass-rnn-regression 12 | # build the binary 13 | cargo build --release 14 | ``` 15 | 16 | ## Environmental Variables 17 | 18 | This example relies upon CUDA and CUDNN to build, and must be able to find both on your machine at build and runtime. The easiest way to ensure this 19 | is to set the following environmental variables; 20 | 21 | ### RUSTFLAGS 22 | 23 | Rustflags must be set to link natively to `cuda.lib` and `cudnn.h` in the pattern 24 | ```RUSTFLAGS=-L native={ CUDA LIB DIR} -L native={CUDNN HEADER DIRECTORY}```, or a single pattern of `-L native` if both files are located in the same directory. 25 | 26 | ### LLVM_CONFIG_PATH 27 | 28 | `LLVM_CONFIG_PATH` must point to your llvm-config binary, including the binary itself, i.e. 29 | `LLVM_CONFIG_PATH=D:\llvm\llvm-9.0.1.src\Release\bin\llvm-config.exe` 30 | 31 | ### CUDNN_INCLUDE_DIR 32 | 33 | `CUDNN_INCLUDE_DIR` must point at the `\include` directory for your version of CUDA, i.e. for CUDA version 11.2 on windows it would be: 34 | 35 | `CUDNN_INCLUDE_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\include` 36 | 37 | ## [Mackey-Glass](http://www.scholarpedia.org/article/Mackey-Glass_equation) Dataset 38 | 39 | A generated version of Mackey-Glass is packaged with Juice, and packaged in a way suitable for RNN networks. 40 | 41 | ```bash 42 | cd juice-examples/mackey-glass-rnn-regression 43 | # Train a RNN Network (*nix) 44 | ../../target/release/example-rnn-regression train --learning-rate=0.01 --batch-size=40 SavedRNNNetwork.juice 45 | # Train a RNN Network (Windows) 46 | ..\..\target\release\example-rnn-regression.exe train --learning-rate=0.01 --batch-size=40 SavedRNNNetwork.juice 47 | 48 | # Test the RNN Network (*nix) 49 | ../../target/release/example-rnn-regression test --batch-size=40 SavedRNNNetwork.juice 50 | # Test the RNN Network (Windows) 51 | ..\..\target\release\example-rnn-regression.exe test --batch-size=40 SavedRNNNetwork.juice 52 | ``` 53 | -------------------------------------------------------------------------------- /juice-examples/mackey-glass-rnn-regression/assets/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /juice-examples/mackey-glass-rnn-regression/rnn.juice: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice-examples/mackey-glass-rnn-regression/rnn.juice -------------------------------------------------------------------------------- /juice-examples/mackey-glass-rnn-regression/src/args.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | use std::path::PathBuf; 3 | 4 | pub const MAIN_USAGE: &str = " 5 | Demonstrate RNN caps of juice with the cuda backend. 6 | 7 | Usage: 8 | mackey-glass-example train [--batch-size=] [--learning-rate=] [--momentum=] 9 | mackey-glass-example test [--batch-size=] 10 | 11 | Options: 12 | -b, --batch-size= Network Batch Size. 13 | -l, --learning-rate= Learning Rate. 14 | -m, --momentum= Momentum. 15 | -h, --help Show this screen. 16 | "; 17 | 18 | #[allow(non_snake_case)] 19 | #[derive(Deserialize, Debug, Default)] 20 | pub struct Args { 21 | pub cmd_train: bool, 22 | pub cmd_test: bool, 23 | pub flag_batch_size: Option, 24 | pub flag_learning_rate: Option, 25 | pub flag_momentum: Option, 26 | /// Path to the stored network. 27 | pub arg_networkfile: PathBuf, 28 | } 29 | 30 | impl Args { 31 | pub(crate) fn data_mode(&self) -> DataMode { 32 | assert_ne!(self.cmd_train, self.cmd_test); 33 | if self.cmd_train { 34 | return DataMode::Train; 35 | } 36 | if self.cmd_test { 37 | return DataMode::Test; 38 | } 39 | unreachable!("nope"); 40 | } 41 | } 42 | 43 | pub const fn default_learning_rate() -> f32 { 44 | 0.10_f32 45 | } 46 | 47 | pub const fn default_momentum() -> f32 { 48 | 0.00 49 | } 50 | 51 | pub const fn default_batch_size() -> usize { 52 | 10 53 | } 54 | 55 | impl std::cmp::PartialEq for Args { 56 | fn eq(&self, other: &Self) -> bool { 57 | match (self.flag_learning_rate, other.flag_learning_rate) { 58 | (Some(lhs), Some(rhs)) if (rhs - lhs).abs() < 1e6 => {} 59 | (None, None) => {} 60 | _ => return false, 61 | } 62 | match (self.flag_momentum, other.flag_momentum) { 63 | (Some(lhs), Some(rhs)) if (rhs - lhs).abs() < 1e6 => {} 64 | (None, None) => {} 65 | _ => return false, 66 | } 67 | self.cmd_test == other.cmd_test 68 | && self.cmd_train == other.cmd_train 69 | && self.arg_networkfile == other.arg_networkfile 70 | && self.flag_batch_size == other.flag_batch_size 71 | } 72 | } 73 | 74 | impl std::cmp::Eq for Args {} 75 | 76 | pub enum DataMode { 77 | Train, 78 | Test, 79 | } 80 | 81 | impl DataMode { 82 | pub fn as_path(&self) -> &'static str { 83 | match self { 84 | DataMode::Train => "assets/norm_mackeyglass_train.csv", 85 | DataMode::Test => "assets/norm_mackeyglass_test.csv", 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /juice-examples/mnist-image-multiclass-classification/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /juice-examples/mnist-image-multiclass-classification/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "example-mnist-classification" 3 | description = "MNIST Image Multiclass Classification example of Juice" 4 | version = "0.0.1" 5 | authors = ["Bernhard Schuster ", 6 | "Michael Hirn", 7 | "Maximilian Goisser"] 8 | edition = "2018" 9 | publish = false 10 | 11 | [dependencies] 12 | greenglas = { path = "../../greenglas" , version = "0.3.0"} 13 | juice = { path = "../../juice", default-features = false , version = "0.3.0"} 14 | coaster = { path = "../../coaster", default-features = false , version = "0.2.0"} 15 | coaster-nn = { path = "../../coaster-nn", default-features = false , version = "0.5.0"} 16 | 17 | juice-utils = {path = "../juice-utils"} 18 | 19 | csv = "1" 20 | hyper = "0.14" 21 | hyper-rustls = "0.23" 22 | futures = "0.3" 23 | futures-util = "0.3" 24 | 25 | tokio = "1" 26 | flate2 = "1" 27 | mnist = "0.5.0" 28 | 29 | log = "0.4" 30 | env_logger = "0.9" 31 | timeit = "0.1.2" 32 | 33 | docopt = "1" 34 | serde = { version = "1", features = ["derive"] } 35 | 36 | 37 | [features] 38 | default = ["native", "cuda"] 39 | native = ["juice/native", "coaster/native"] 40 | opencl = ["juice/opencl", "coaster/opencl"] 41 | cuda = ["juice/cuda", "coaster/cuda"] 42 | -------------------------------------------------------------------------------- /juice-examples/mnist-image-multiclass-classification/assets/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /juice/.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/.github/CONTRIBUTING.md -------------------------------------------------------------------------------- /juice/.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Error Description 2 | 3 | > error logs, as verbose as possible 4 | > describe which command you ran 5 | 6 | 7 | ### What should have happend 8 | 9 | > your expectations 10 | 11 | 12 | ### Describe in which context it happened 13 | 14 | > custom app, demo compilation, test run 15 | 16 | 17 | ### Environment 18 | 19 | OS: 20 | 21 | > `uname -a` 22 | > `cat /etc/os-release | grep PRETTY_NAME` 23 | 24 | GPU Devices: 25 | 26 | > `lspci | grep "\(VGA\|GPU\)"` 27 | > `lsmod | grep "\(amdgpu\|radeon\|nvidia\|nouveau\|i915\)"` 28 | 29 | Native related issues: 30 | 31 | > `pkg-config --libs --cflags blas` 32 | 33 | cuda related issues: 34 | 35 | > `pkg-config --cflags cublas cudnn` 36 | > `env | grep "CU\(BLAS\|DNN\|DA\)_.*"` 37 | 38 | OpenCL related issues: 39 | 40 | > `clinfo` 41 | -------------------------------------------------------------------------------- /juice/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | 4 | mynetwork 5 | -------------------------------------------------------------------------------- /juice/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "juice" 3 | description = "Machine Learning Framework for Hackers" 4 | version = "0.3.0" 5 | authors = [ 6 | "Bernhard Schuster ", 7 | "Lissa Hyacinth ", 8 | "Maximilian Goisser", 9 | "Michael Hirn", 10 | ] 11 | repository = "https://github.com/spearow/juice" 12 | # TODO get the website back on track 13 | homepage = "https://github.com/spearow/juice" 14 | #documentation = "https://spearow.github.io/juice" 15 | readme = "README.md" 16 | keywords = ["deep-learning", "neural-networks", "machine-learning", "framework"] 17 | license = "MIT OR Apache-2.0" 18 | edition = "2018" 19 | build = "build.rs" 20 | 21 | [dependencies] 22 | coaster = { default-features = false, version = "0.2", path = "../coaster" } 23 | coaster-blas = { default-features = false, version = "0.4", path = "../coaster-blas" } 24 | coaster-nn = { default-features = false, version = "0.5", path = "../coaster-nn" } 25 | 26 | log = "0.4" 27 | rand = "0.8" 28 | num = "0.4" 29 | capnp = "0.14" 30 | timeit = "0.1" 31 | 32 | [build-dependencies] 33 | capnpc = "0.14" 34 | 35 | [dev-dependencies] 36 | env_logger = "0.9" 37 | 38 | [features] 39 | default = ["native", "cuda"] 40 | native = ["coaster-blas/native", "coaster-nn/native"] 41 | cuda = ["coaster/cuda", "coaster-blas/cuda", "coaster-nn/cuda"] 42 | opencl = ["coaster/opencl", "coaster-blas/opencl", "coaster-nn/opencl"] 43 | # When enabled, all weights are initialized in a deterministic way. 44 | deterministic = [] 45 | -------------------------------------------------------------------------------- /juice/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2017-2020 Bernhard Schuster 4 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /juice/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | capnpc::CompilerCommand::new() 3 | .src_prefix("capnp") 4 | .file("capnp/juice.capnp") 5 | .run() 6 | .expect("capnpc schema compiler command must succeed"); 7 | } 8 | -------------------------------------------------------------------------------- /juice/doc/book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | title = "juice documentation" 3 | description = "Introduction book to the juice framework" 4 | authors = [ 5 | "Maximilian Goisser", 6 | "Michael Hirn", 7 | "Bernhard Schuster ", 8 | ] 9 | 10 | language = "en" 11 | 12 | [output.html] 13 | mathjax-support = true 14 | 15 | [output.html.playpen] 16 | editable = true 17 | line-numbers = true 18 | 19 | [output.html.search] 20 | limit-results = 20 21 | use-boolean-and = true 22 | boost-title = 2 23 | boost-hierarchy = 2 24 | boost-paragraph = 1 25 | expand = true 26 | heading-split-level = 2 27 | 28 | [output.linkcheck] 29 | follow-web-links = true 30 | cache-timeout = 43200 31 | warning-policy = "warn" 32 | 33 | [output.linkcheck.http-headers] 34 | 'crates\.io' = ["Accept: text/html"] 35 | -------------------------------------------------------------------------------- /juice/doc/book/_FontAwesome/fonts/FontAwesome.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/doc/book/_FontAwesome/fonts/FontAwesome.ttf -------------------------------------------------------------------------------- /juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/doc/book/_FontAwesome/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /juice/doc/book/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/juice/doc/book/favicon.png -------------------------------------------------------------------------------- /juice/doc/book/highlight.css: -------------------------------------------------------------------------------- 1 | /* Modified Base16 Atelier Dune Light - Theme 2 | /* Original by Bram de Haan (http://atelierbram.github.io/syntax-highlighting/atelier-schemes/dune) */ 3 | 4 | .hljs { 5 | display: block; 6 | overflow-x: auto; 7 | background: #f1f1f1; 8 | color: #6e6b5e; 9 | padding: 0.5em; 10 | -webkit-text-size-adjust: none; 11 | } 12 | 13 | 14 | /* Atelier-Dune Comment */ 15 | .hljs-comment { 16 | color: #AAA; 17 | } 18 | 19 | /* Atelier-Dune Red */ 20 | .hljs-variable, 21 | 22 | .hljs-tag, 23 | .hljs-regexp, 24 | .hljs-name, 25 | .ruby .hljs-constant, 26 | .xml .hljs-tag .hljs-title, 27 | .xml .hljs-pi, 28 | .xml .hljs-doctype, 29 | .html .hljs-doctype, 30 | .css .hljs-id, 31 | .css .hljs-class, 32 | .css .hljs-pseudo { 33 | color: #d73737; 34 | } 35 | 36 | /* Atelier-Dune Orange */ 37 | .hljs-number, 38 | .hljs-preprocessor, 39 | .hljs-built_in, 40 | .hljs-literal, 41 | .hljs-params, 42 | .hljs-attribute, 43 | .hljs-constant { 44 | color: #b65611; 45 | } 46 | 47 | /* Atelier-Dune Yellow */ 48 | .ruby .hljs-class .hljs-title, 49 | .css .hljs-rule .hljs-attribute { 50 | color: #ae9513; 51 | } 52 | 53 | /* Atelier-Dune Green */ 54 | .hljs-string, 55 | .hljs-value, 56 | .hljs-inheritance, 57 | .ruby .hljs-symbol, 58 | .xml .hljs-cdata { 59 | color: #2a9292; 60 | } 61 | 62 | /* Atelier-Dune Aqua */ 63 | .hljs-title, 64 | .css .hljs-hexcolor { 65 | color: #1fad83; 66 | } 67 | 68 | /* Atelier-Dune Blue */ 69 | .hljs-function, 70 | .python .hljs-decorator, 71 | .python .hljs-title, 72 | .ruby .hljs-function .hljs-title, 73 | .ruby .hljs-title .hljs-keyword, 74 | .perl .hljs-sub, 75 | .javascript .hljs-title, 76 | .coffeescript .hljs-title { 77 | color: #6684e1; 78 | } 79 | 80 | /* Atelier-Dune Purple */ 81 | .hljs-keyword, 82 | .javascript .hljs-function { 83 | color: #b854d4; 84 | } 85 | 86 | .coffeescript .javascript, 87 | .javascript .xml, 88 | .tex .hljs-formula, 89 | .xml .javascript, 90 | .xml .vbscript, 91 | .xml .css, 92 | .xml .hljs-cdata { 93 | opacity: 0.5; 94 | } 95 | 96 | /* markdown */ 97 | .hljs-header { 98 | color: #A30000; 99 | } 100 | 101 | .hljs-link_label { 102 | color: #33CCCC; 103 | } 104 | 105 | .hljs-link_url { 106 | color: #CC66FF; 107 | } 108 | -------------------------------------------------------------------------------- /juice/doc/book/http/spearow.github.io/juice/juice/index.html: -------------------------------------------------------------------------------- 1 | # Rust API Documentation 2 | -------------------------------------------------------------------------------- /juice/doc/book/tomorrow-night.css: -------------------------------------------------------------------------------- 1 | /* Tomorrow Night Theme */ 2 | /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ 3 | /* Original theme - https://github.com/chriskempson/tomorrow-theme */ 4 | /* http://jmblog.github.com/color-themes-for-google-code-highlightjs */ 5 | 6 | /* Tomorrow Comment */ 7 | .hljs-comment { 8 | color: #969896; 9 | } 10 | 11 | /* Tomorrow Red */ 12 | .hljs-variable, 13 | .hljs-attribute, 14 | .hljs-tag, 15 | .hljs-regexp, 16 | .ruby .hljs-constant, 17 | .xml .hljs-tag .hljs-title, 18 | .xml .hljs-pi, 19 | .xml .hljs-doctype, 20 | .html .hljs-doctype, 21 | .css .hljs-id, 22 | .css .hljs-class, 23 | .css .hljs-pseudo { 24 | color: #cc6666; 25 | } 26 | 27 | /* Tomorrow Orange */ 28 | .hljs-number, 29 | .hljs-preprocessor, 30 | .hljs-pragma, 31 | .hljs-built_in, 32 | .hljs-literal, 33 | .hljs-params, 34 | .hljs-constant { 35 | color: #de935f; 36 | } 37 | 38 | /* Tomorrow Yellow */ 39 | .ruby .hljs-class .hljs-title, 40 | .css .hljs-rule .hljs-attribute { 41 | color: #f0c674; 42 | } 43 | 44 | /* Tomorrow Green */ 45 | .hljs-string, 46 | .hljs-value, 47 | .hljs-inheritance, 48 | .hljs-header, 49 | .hljs-name, 50 | .ruby .hljs-symbol, 51 | .xml .hljs-cdata { 52 | color: #b5bd68; 53 | } 54 | 55 | /* Tomorrow Aqua */ 56 | .hljs-title, 57 | .css .hljs-hexcolor { 58 | color: #8abeb7; 59 | } 60 | 61 | /* Tomorrow Blue */ 62 | .hljs-function, 63 | .python .hljs-decorator, 64 | .python .hljs-title, 65 | .ruby .hljs-function .hljs-title, 66 | .ruby .hljs-title .hljs-keyword, 67 | .perl .hljs-sub, 68 | .javascript .hljs-title, 69 | .coffeescript .hljs-title { 70 | color: #81a2be; 71 | } 72 | 73 | /* Tomorrow Purple */ 74 | .hljs-keyword, 75 | .javascript .hljs-function { 76 | color: #b294bb; 77 | } 78 | 79 | .hljs { 80 | display: block; 81 | overflow-x: auto; 82 | background: #1d1f21; 83 | color: #c5c8c6; 84 | padding: 0.5em; 85 | -webkit-text-size-adjust: none; 86 | } 87 | 88 | .coffeescript .javascript, 89 | .javascript .xml, 90 | .tex .hljs-formula, 91 | .xml .javascript, 92 | .xml .vbscript, 93 | .xml .css, 94 | .xml .hljs-cdata { 95 | opacity: 0.5; 96 | } 97 | -------------------------------------------------------------------------------- /juice/doc/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | - [Juice](./juice.md) 4 | - [Layers](./layers.md) 5 | - [Layer Lifecycle](./layer-lifecycle.md) 6 | - [Create a Network](./building-networks.md) 7 | - [Create a new Layer](./create-new-layer.md) 8 | - [Solvers](./solvers.md) 9 | - [Optimize Layers](./optimize-layers.md) 10 | - [Multi-Device Optimization](./multi-device-optimization.md) 11 | - [Distributed Optimization](./distributed-optimization.md) 12 | - [Backend](./backend.md) 13 | - [Glossary](./deep-learning-glossary.md) 14 | ----------- 15 | - [Rust API Documentation](./api-docs.md) 16 | -------------------------------------------------------------------------------- /juice/doc/src/api-docs.md: -------------------------------------------------------------------------------- 1 | 2 | # Rust API Documentation 3 | 4 | The latest and greatest [API Documentation][api-docs] based on the current git master status. 5 | 6 | [api-docs]: https://spearow.github.io/juice/juice/index.html 7 | -------------------------------------------------------------------------------- /juice/doc/src/backend.md: -------------------------------------------------------------------------------- 1 | # Backend 2 | 3 | Via the concept of a backend we can abstract over the platform we will execute 4 | or optimize a network on. The construction of a backend is trivial. The backend 5 | is passed to the `Solver`, (one backend for `network` and one for the 6 | `objectve`). The Solver than executes all operations on the provided backend. 7 | 8 | ```rust 9 | let backend = ::std::rc::Rc::new(Backend::::default().unwrap()); 10 | 11 | // set up solver 12 | let mut solver_cfg = SolverConfig { minibatch_size: batch_size, base_lr: learning_rate, momentum: momentum, .. SolverConfig::default() }; 13 | solver_cfg.network = LayerConfig::new("network", net_cfg); 14 | solver_cfg.objective = LayerConfig::new("classifier", classifier_cfg); 15 | let mut solver = Solver::from_config(backend.clone(), backend.clone(), &solver_cfg); 16 | ``` 17 | 18 | The backend is a concept of 19 | [Coaster](https://github.com/spearow/coaster), to which you can refer 20 | for now, until this chapter becomes more fleshed out. 21 | -------------------------------------------------------------------------------- /juice/doc/src/create-new-layer.md: -------------------------------------------------------------------------------- 1 | # Create a new Layer 2 | 3 | A layer in Juice can implement any behavior as long as it takes an input and 4 | produces an output. As Juice is new, there are still many valuable layers that 5 | are not yet implemented. This is why this chapter shows how you can add new 6 | layers to Juice. 7 | 8 | A not exclusive list of steps to take in order to implement a new layer: 9 | 10 | > The Rust compiler is also very helpful with pointing out the necessary steps 11 | > for implementing a new layer struct. It might be beneficial to start the 12 | > implementation of a new layer from a copied file of an already existing layer. 13 | 14 | 1. Decide to which of the [five types](./layers.html#What can Layers do?) 15 | the new layer belongs. This decides under which directory to put the layer 16 | implementation in the Juice project. 17 | 18 | 2. Create the `Layer` worker struct. 19 | 20 | 3. Expose the `Layer` worker struct in the `mod.rs` of the layer type directory. 21 | 22 | 4. Expose the `Layer` worker struct in the `mod.rs` of the `/layers` directory. 23 | 24 | 5. Implement `ILayer` and its trait boundaries for the new `Layer` worker struct. 25 | 26 | 6. Add the new layer to the `LayerType` in `layer.rs` and add the matching 27 | for `.support_in_place` and `.worker_from_config`. 28 | 29 | 7. If the new layer relies on a coaster operation, also add the coaster 30 | trait boundary. 31 | 32 | 8. Add documentation and serialization to the new layer. 33 | 34 | 9. (optional) Depending on how complex the layer is, you might also add tests and more 35 | advanced implementations for its `.from_config`, `.reshape` or other helper 36 | methods. 37 | -------------------------------------------------------------------------------- /juice/doc/src/deep-learning-glossary.md: -------------------------------------------------------------------------------- 1 | # Glossary 2 | 3 | ### Layer 4 | 5 | **In General** 6 | 7 | A layer is the highest-level building block in a (Deep) Neural Network. A layer 8 | is a container that usually receives weighted input, transforms it and returns 9 | the result as output to the next layer. A layer usually contains one type of 10 | function like ReLU, pooling, convolution etc. so that it can be easily compared 11 | to other parts of the network. The first and last layers in a network are called 12 | input and output layers, respectively, and all layers in between are called 13 | hidden layers. 14 | 15 | **In Juice** 16 | 17 | In Juice, a layer is very similar to the general understanding of a layer. A layer 18 | in Juice, like a layer in a (Deep) Neural Network, 19 | 20 | * is the highest-level building block 21 | * needs to receive input, might transform it and needs to return the result 22 | * should be uniform (it does one type of function) 23 | 24 | Additionally to a Neural Network layer, a Juice layer can implement any 25 | functionality, not only those related to Neural Networks like ReLU, pooling, 26 | LSTM, etc. For example, the `Sequential` layer in Juice, allows it to connect 27 | multiple layers, creating a network. 28 | 29 | ### Network 30 | 31 | **In General** 32 | 33 | A network, also often called Neural Network (NN) or Artificial Neural Network 34 | (ANN) is a subset of Machine Learning methods. 35 | 36 | A not exhaustive list of other Machine Learning methods: 37 | *Linear Regression, SVM, Genetic/Evolution Algorithms, dynamic programming, 38 | deterministic algorithmic optimization methods.* 39 | 40 | **In Juice** 41 | 42 | In Juice, a network means a graph (a connected set) of one or more 43 | [layers](./layers.html). This network can consist of Artificial Neural Network 44 | methods, other Machine Learning methods or any other (not Machine Learning 45 | related) methods. As described in [2. Layers](./layers.html) a network in Juice 46 | is actually a layer which connects other layers. 47 | 48 | An initialized network is a network, which is ready to be executed, meaning it 49 | is fully constructed e.g. all necessary memory is allocated on the host or device. 50 | -------------------------------------------------------------------------------- /juice/doc/src/distributed-optimization.md: -------------------------------------------------------------------------------- 1 | # Distributed Optimization 2 | 3 | The distributed optimization of networks will (very likely) be managed by a 4 | standalone crate on top of Juice. Although distributed optimization will not be a 5 | core part of Juice itself, we will cover the topic of distributed optimization 6 | with Juice here in this chapter of the book. 7 | -------------------------------------------------------------------------------- /juice/doc/src/http/spearow.github.io/spearow/juice/index.html: -------------------------------------------------------------------------------- 1 | # Rust API Documentation 2 | -------------------------------------------------------------------------------- /juice/doc/src/multi-device-optimization.md: -------------------------------------------------------------------------------- 1 | # Multi-Device Optimization 2 | 3 | Optimization of a Layer over multiple devices is planned for the Juice 0.3 4 | release. Thanks to the decoupling of computation and representation through 5 | Coaster, multi-device optimization is fairly straight forward to implement. 6 | 7 | Pull Requests are welcome :) 8 | -------------------------------------------------------------------------------- /juice/doc/src/solvers.md: -------------------------------------------------------------------------------- 1 | # Solvers 2 | 3 | Solvers optimize the layer with a given objective. This might happen 4 | by updating the weights of the layer, which is the usual practice for 5 | Neural Networks but is not limited to this kind of learning. 6 | 7 | A solver can have different learning (solving) policies. With Neural Networks, it 8 | is common to use a Stochastic Gradient Descent based approach 9 | like Adagrad, whereas for a classical regression the solving might be 10 | done via a maximum likelihood estimation. 11 | 12 | Similar to `Layer`s, we can construct a [`Solver` (_/src/solver/mod.rs_)][solver] 13 | from a [`SolverConfig` (_/src/solver/mod.rs_)][solver-config]. 14 | When passing this `SolverConfig` (e.g. an Adagrad `SolverConfig`) to the 15 | `Solver::from_config` method, a `Solver` with the behavior 16 | of the config is returned. 17 | 18 | The most characteristic feature of the `SolverConfig` is its `network` 19 | and `objective` fields. These two fields expect one `LayerConfig` each. When 20 | passing the `SolverConfig` to the `Solver::from_config` method, the 21 | `LayerConfig` of the `network` and `objective` fields are turned into 22 | an initialized `Layer` and provided to the returned, `Solver`. 23 | 24 | ```rust 25 | // set up a Solver 26 | let mut solver_cfg = SolverConfig { minibatch_size: batch_size, base_lr: learning_rate, momentum: momentum, .. SolverConfig::default() }; 27 | solver_cfg.network = LayerConfig::new("network", net_cfg); 28 | solver_cfg.objective = LayerConfig::new("classifier", classifier_cfg); 29 | let mut solver = Solver::from_config(backend.clone(), backend.clone(), &solver_cfg); 30 | ``` 31 | 32 | The now initialized `Solver` can be feed with data to optimize the `network`. 33 | 34 | [solver]: https://github.com/spearow/juice/blob/master/juice/src/solver/mod.rs 35 | [solver-config]: https://github.com/spearow/juice/blob/master/juice/src/solver/mod.rs 36 | -------------------------------------------------------------------------------- /juice/perf/README.md: -------------------------------------------------------------------------------- 1 | # Profiling 2 | 3 | Coaster comes with scripts to help with profiling performance problems. 4 | 5 | Run [perf](http://www.brendangregg.com/perf.html) on one of the benchmark test: 6 | 7 | ```sh 8 | # compile latest version of benchmarks with DWARF information 9 | cargo rustc --bench rblas_overhead -- -g 10 | # benchmark binary is at target/debug/shared_memory-54e69b24ec0c2d04 11 | # benchmark is called bench_256_sync_1mb_native_cuda 12 | sudo ./perf/run_perf.sh target/debug/shared_memory-54e69b24ec0c2d04 bench_256_sync_1mb_native_cuda # perf needs sudo 13 | ``` 14 | -------------------------------------------------------------------------------- /juice/perf/run_perf.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | if [ $# -lt 2 ] 4 | then 5 | echo "No binary name or benchmark name supplied" 6 | exit 1 7 | fi 8 | binaryname=$1 9 | benchname=$2 10 | mkdir -p target/perf 11 | perf record -a -g --output target/perf/${benchname}.data ${binaryname} --bench ${benchname} 12 | perf script -f -i target/perf/${benchname}.data > target/perf/${benchname}.scripted 13 | stackcollapse-perf target/perf/${benchname}.scripted | grep ${benchname} > target/perf/${benchname}.folded 14 | flamegraph target/perf/${benchname}.folded > target/perf/${benchname}.svg 15 | -------------------------------------------------------------------------------- /juice/rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_strings = false 2 | reorder_imports = true 3 | max_width = 120 4 | -------------------------------------------------------------------------------- /juice/src/capnp_util.rs: -------------------------------------------------------------------------------- 1 | //! Provides functionality for Cap'n Proto (de)serialization. 2 | 3 | pub trait CapnpWrite<'a> { 4 | /// The Builder that was autogenerated by capnp. 5 | type Builder; 6 | 7 | /// Write the struct into the message that is being built by the Builder. 8 | fn write_capnp(&self, builder: &mut Self::Builder); 9 | } 10 | 11 | pub trait CapnpRead<'a> { 12 | /// The Reader that was autogenerated by capnp. 13 | type Reader; 14 | 15 | /// Read the struct from the Reader. 16 | fn read_capnp(reader: Self::Reader) -> Self; 17 | } 18 | -------------------------------------------------------------------------------- /juice/src/layers/activation/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides nonlinear activation methods. 2 | //! 3 | //! Activation Layers take a input tensor, provide the activation operation and 4 | //! produce a output tensor. 5 | //! Thanks to the nonlinearity of the activation methods, we can 'learn' and 6 | //! detect nonlinearities 7 | //! in our (complex) datasets. 8 | //! 9 | //! The activation operation used should depend on the task at hand. For binary 10 | //! classification a 11 | //! step function might be very useful. For more complex tasks continious 12 | //! activation functions such 13 | //! as [Sigmoid][mod_sigmoid], TanH, [ReLU][mod_relu] should be used. In most cases ReLU might 14 | //! provide the best results. 15 | //! 16 | //! If you supply the same blob as input and output to a layer via the [LayerConfig][struct_layerconfig], 17 | //! computations will be done in-place, requiring less memory. 18 | //! 19 | //! The activation function is also sometimes called transfer function. 20 | //! 21 | //! [mod_sigmoid]: ./sigmoid/index.html 22 | //! [mod_relu]: ./relu/index.html 23 | //! [struct_layerconfig]: ../../layer/struct.LayerConfig.html 24 | 25 | /// macro helper to implement activation trait 26 | /// TODO see common 27 | #[macro_export] 28 | macro_rules! impl_ilayer_activation { 29 | () => { 30 | fn exact_num_output_blobs(&self) -> Option { 31 | Some(1) 32 | } 33 | fn exact_num_input_blobs(&self) -> Option { 34 | Some(1) 35 | } 36 | }; 37 | } 38 | 39 | pub use self::relu::ReLU; 40 | pub use self::sigmoid::Sigmoid; 41 | pub use self::tanh::TanH; 42 | 43 | pub mod relu; 44 | pub mod sigmoid; 45 | pub mod tanh; 46 | -------------------------------------------------------------------------------- /juice/src/layers/common/log_softmax.rs: -------------------------------------------------------------------------------- 1 | //! Computes the logarithmic softmax of its input. 2 | //! 3 | 4 | use crate::co::{IBackend, SharedTensor}; 5 | use crate::conn; 6 | use crate::layer::*; 7 | use crate::util::ArcLock; 8 | 9 | #[derive(Debug, Clone)] 10 | #[allow(missing_copy_implementations)] 11 | /// LogSoftmax Layer 12 | pub struct LogSoftmax; 13 | 14 | impl> ILayer for LogSoftmax { 15 | fn reshape( 16 | &mut self, 17 | backend: ::std::rc::Rc, 18 | input_data: &mut Vec>>, 19 | input_gradient: &mut Vec>>, 20 | weights_data: &mut Vec>>, 21 | weights_gradient: &mut Vec>>, 22 | output_data: &mut Vec>>, 23 | output_gradient: &mut Vec>>, 24 | ) { 25 | let input_desc = input_data[0].read().unwrap().desc().clone(); 26 | input_gradient[0].write().unwrap().resize(&input_desc).unwrap(); 27 | output_data[0].write().unwrap().resize(&input_desc).unwrap(); 28 | output_gradient[0].write().unwrap().resize(&input_desc).unwrap(); 29 | } 30 | } 31 | 32 | impl> ComputeOutput for LogSoftmax { 33 | fn compute_output( 34 | &self, 35 | backend: &B, 36 | _weights: &[&SharedTensor], 37 | input_data: &[&SharedTensor], 38 | output_data: &mut [&mut SharedTensor], 39 | ) { 40 | backend.log_softmax(input_data[0], output_data[0]).unwrap(); 41 | } 42 | } 43 | 44 | impl> ComputeInputGradient for LogSoftmax { 45 | fn compute_input_gradient( 46 | &self, 47 | backend: &B, 48 | weights_data: &[&SharedTensor], 49 | output_data: &[&SharedTensor], 50 | output_gradients: &[&SharedTensor], 51 | input_data: &[&SharedTensor], 52 | input_gradients: &mut [&mut SharedTensor], 53 | ) { 54 | backend 55 | .log_softmax_grad(output_data[0], output_gradients[0], input_gradients[0]) 56 | .unwrap(); 57 | } 58 | } 59 | 60 | impl> ComputeParametersGradient for LogSoftmax {} 61 | 62 | impl ::std::default::Default for LogSoftmax { 63 | fn default() -> LogSoftmax { 64 | LogSoftmax 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /juice/src/layers/common/softmax.rs: -------------------------------------------------------------------------------- 1 | //! Computes the softmax of its input. 2 | //! 3 | //! For the logarithmic softmax see the `LogSoftmax` layer. 4 | 5 | use crate::co::{IBackend, SharedTensor}; 6 | use crate::conn; 7 | use crate::layer::*; 8 | use crate::util::ArcLock; 9 | 10 | #[derive(Debug, Clone)] 11 | #[allow(missing_copy_implementations)] 12 | /// Softmax Layer 13 | pub struct Softmax; 14 | 15 | impl> ILayer for Softmax { 16 | fn reshape( 17 | &mut self, 18 | backend: ::std::rc::Rc, 19 | input_data: &mut Vec>>, 20 | input_gradient: &mut Vec>>, 21 | weights_data: &mut Vec>>, 22 | weights_gradient: &mut Vec>>, 23 | output_data: &mut Vec>>, 24 | output_gradient: &mut Vec>>, 25 | ) { 26 | let input_desc = input_data[0].read().unwrap().desc().clone(); 27 | input_gradient[0].write().unwrap().resize(&input_desc).unwrap(); 28 | output_data[0].write().unwrap().resize(&input_desc).unwrap(); 29 | output_gradient[0].write().unwrap().resize(&input_desc).unwrap(); 30 | } 31 | } 32 | 33 | impl> ComputeOutput for Softmax { 34 | fn compute_output( 35 | &self, 36 | backend: &B, 37 | _weights: &[&SharedTensor], 38 | input_data: &[&SharedTensor], 39 | output_data: &mut [&mut SharedTensor], 40 | ) { 41 | backend.softmax(input_data[0], output_data[0]).unwrap(); 42 | } 43 | } 44 | 45 | impl> ComputeInputGradient for Softmax { 46 | fn compute_input_gradient( 47 | &self, 48 | backend: &B, 49 | weights_data: &[&SharedTensor], 50 | output_data: &[&SharedTensor], 51 | output_gradients: &[&SharedTensor], 52 | input_data: &[&SharedTensor], 53 | input_gradients: &mut [&mut SharedTensor], 54 | ) { 55 | backend 56 | .softmax_grad(output_data[0], output_gradients[0], input_gradients[0]) 57 | .unwrap(); 58 | } 59 | } 60 | 61 | impl> ComputeParametersGradient for Softmax {} 62 | 63 | impl ::std::default::Default for Softmax { 64 | fn default() -> Softmax { 65 | Softmax 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /juice/src/layers/container/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides container layers. 2 | //! 3 | //! For now layers in container should be discribed as layers that are used 4 | //! to connect multiple layers together to create 'networks'. 5 | 6 | pub use self::sequential::{Sequential, SequentialConfig}; 7 | 8 | pub mod sequential; 9 | -------------------------------------------------------------------------------- /juice/src/layers/loss/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides methods to calculate the loss (cost) of some output. 2 | //! 3 | //! A loss function is also sometimes called cost function. 4 | 5 | /// macro helper for default loss 6 | #[macro_export] 7 | macro_rules! impl_ilayer_loss { 8 | () => { 9 | fn exact_num_output_blobs(&self) -> Option { 10 | Some(1) 11 | } 12 | fn exact_num_input_blobs(&self) -> Option { 13 | Some(1) 14 | } 15 | fn auto_output_blobs(&self) -> bool { 16 | true 17 | } 18 | 19 | fn loss_weight(&self, output_id: usize) -> Option { 20 | if output_id == 0 { 21 | Some(1f32) 22 | } else { 23 | None 24 | } 25 | } 26 | }; 27 | } 28 | 29 | pub use self::mean_squared_error::MeanSquaredError; 30 | pub use self::negative_log_likelihood::{NegativeLogLikelihood, NegativeLogLikelihoodConfig}; 31 | pub mod mean_squared_error; 32 | pub mod negative_log_likelihood; 33 | -------------------------------------------------------------------------------- /juice/src/layers/utility/flatten.rs: -------------------------------------------------------------------------------- 1 | //! Flattens the bottom Blob into a simpler top Blob. 2 | //! 3 | //! Input of shape n * c * h * w becomes 4 | //! a simple vector output of shape n * (c*h*w). 5 | //! 6 | #[derive(Debug, Clone)] 7 | #[allow(missing_copy_implementations)] 8 | /// Flattening Utility Layer 9 | pub struct Flatten; 10 | -------------------------------------------------------------------------------- /juice/src/layers/utility/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides various helpful layers, which might be not directly related to 2 | //! neural networks in general. 3 | //! 4 | //! These layers do not have to necesarrely manipulate the data flowing through 5 | //! them and might have 6 | //! no effect on the Networks' capabilities to learn (e.g. loging) but obey all 7 | //! the rules of a [Layer][1]. 8 | //! The type of these layers can vary a lot. From data normalization to 9 | //! specific data access layers for e.g. a database like LevelDB. 10 | //! 11 | //! [1]: ../../layer/index.html 12 | 13 | pub use self::flatten::Flatten; 14 | pub use self::reshape::{Reshape, ReshapeConfig}; 15 | 16 | pub mod flatten; 17 | pub mod reshape; 18 | -------------------------------------------------------------------------------- /juice/tests/solver_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate coaster as co; 2 | extern crate juice; 3 | 4 | #[cfg(all(test, whatever))] 5 | // #[cfg(test)] 6 | mod solver_specs { 7 | use co::backend::Backend; 8 | use co::frameworks::Native; 9 | use juice::solver::*; 10 | 11 | #[test] 12 | // fixed: always return base_lr. 13 | fn lr_fixed() { 14 | let cfg = SolverConfig { 15 | lr_policy: LRPolicy::Fixed, 16 | base_lr: 5f32, 17 | gamma: 0.5f32, 18 | ..SolverConfig::default() 19 | }; 20 | assert!(cfg.get_learning_rate(0) == 5f32); 21 | assert!(cfg.get_learning_rate(100) == 5f32); 22 | assert!(cfg.get_learning_rate(1000) == 5f32); 23 | } 24 | 25 | #[test] 26 | // step: return base_lr * gamma ^ (floor(iter / step)) 27 | fn lr_step() { 28 | let cfg = SolverConfig { 29 | lr_policy: LRPolicy::Step, 30 | base_lr: 5f32, 31 | gamma: 0.5f32, 32 | stepsize: 10, 33 | ..SolverConfig::default() 34 | }; 35 | assert!(cfg.get_learning_rate(0) == 5f32); 36 | assert!(cfg.get_learning_rate(10) == 2.5f32); 37 | assert!(cfg.get_learning_rate(20) == 1.25f32); 38 | } 39 | 40 | #[test] 41 | // exp: return base_lr * gamma ^ iter 42 | fn lr_exp() { 43 | let cfg = SolverConfig { 44 | lr_policy: LRPolicy::Exp, 45 | base_lr: 5f32, 46 | gamma: 0.5f32, 47 | ..SolverConfig::default() 48 | }; 49 | assert!(cfg.get_learning_rate(0) == 5f32); 50 | assert!(cfg.get_learning_rate(1) == 2.5f32); 51 | assert!(cfg.get_learning_rate(2) == 1.25f32); 52 | assert!(cfg.get_learning_rate(3) == 0.625f32); 53 | 54 | let cfg2 = SolverConfig { 55 | lr_policy: LRPolicy::Exp, 56 | base_lr: 5f32, 57 | gamma: 0.25f32, 58 | ..SolverConfig::default() 59 | }; 60 | assert!(cfg2.get_learning_rate(0) == 5f32); 61 | assert!(cfg2.get_learning_rate(1) == 1.25f32); 62 | assert!(cfg2.get_learning_rate(2) == 0.3125f32); 63 | } 64 | 65 | #[test] 66 | fn instantiate_solver_sgd_momentum() { 67 | let cfg = SolverConfig { 68 | solver: SolverKind::SGD(SGDKind::Momentum), 69 | ..SolverConfig::default() 70 | }; 71 | Solver::>>, Backend>::from_config(&cfg); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /magic.yml: -------------------------------------------------------------------------------- 1 | platform: linux 2 | 3 | image_resource: 4 | type: registry-image 5 | source: 6 | repository: quay.io/spearow/machine-learning-container-fedora-cuda 7 | tag: latest 8 | 9 | inputs: 10 | - name: juice 11 | 12 | caches: 13 | - path: cargo_home 14 | 15 | run: 16 | path: bash 17 | dir: juice 18 | 19 | -------------------------------------------------------------------------------- /rcublas/.gitignore: -------------------------------------------------------------------------------- 1 | cublas/target 2 | cublas/Cargo.lock 3 | cublas-sys/target 4 | cublas-sys/Cargo.lock 5 | target 6 | Cargo.lock 7 | -------------------------------------------------------------------------------- /rcublas/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## 0.2.0 (2016-02-04) 3 | 4 | #### Features 5 | 6 | * **context:** expose operations as methods of Context ([3abfbc60](https://github.com/autumnai/rust-cublas/commit/3abfbc6090f632879dd6dfdfda0015fd0a392ee9)) 7 | 8 | 9 | ## 0.1.0 (2016-01-18) 10 | 11 | #### Bug Fixes 12 | 13 | * **manifest:** add missing description to Cargo.toml ([6bc0ee67](https://github.com/autumnai/rust-cublas/commit/6bc0ee67573468d31f81119b526a579a7fe51fbf)) 14 | * **typo:** change forgotten CUDNN to CUBLAS ([6dbe0923](https://github.com/autumnai/rust-cublas/commit/6dbe09233a5daa22a98cf8d11de956fccefd54b8)) 15 | -------------------------------------------------------------------------------- /rcublas/Cargo.toml.FIXME: -------------------------------------------------------------------------------- 1 | # until 2 | # https://github.com/rust-lang/cargo/issues/5042 3 | # is resolved 4 | # nested workspaces are unuseable 5 | [workspace] 6 | members = ["cublas", "cublas-sys"] 7 | 8 | [patch.crates-io] 9 | rcublas-sys = { path = "./cublas-sys" } 10 | 11 | [profile.bench] 12 | opt-level = 3 13 | debug = false 14 | rpath = false 15 | lto = false 16 | debug-assertions = false 17 | codegen-units = 1 18 | 19 | [profile.dev] 20 | opt-level = 0 21 | debug = true 22 | rpath = false 23 | lto = false 24 | debug-assertions = true 25 | codegen-units = 2 26 | -------------------------------------------------------------------------------- /rcublas/cublas-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rcublas-sys" 3 | description = "FFI bindings to cuBLAS" 4 | version = "0.5.0" 5 | edition = "2018" 6 | authors = [ 7 | "Bernhard Schuster ", 8 | "Maximilian Goisser" 9 | ] 10 | repository = "https://github.com/spearow/juice" 11 | homepage = "https://github.com/spearow/juice/tree/master/rcublas/cublas-sys" 12 | keywords = ["cublas", "cuda", "nvidia", "sys"] 13 | license = "MIT OR Apache-2.0" 14 | links = "cublas" 15 | build = "build.rs" 16 | 17 | [dependencies] 18 | libc = "0.2" 19 | 20 | [build-dependencies] 21 | pkg-config = "0.3" 22 | bindgen = { version = "^0.60.1", optional = true } 23 | 24 | [features] 25 | default = [] 26 | generate = ["bindgen"] 27 | -------------------------------------------------------------------------------- /rcublas/cublas-sys/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 The rust-cublas Developers 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /rcublas/cublas-sys/README.md: -------------------------------------------------------------------------------- 1 | The file `src/generated.rs` was created with [bindgen](https://github.com/crabtw/rust-bindgen) using `build.rs` 2 | -------------------------------------------------------------------------------- /rcublas/cublas-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod generated; 2 | 3 | pub use crate::generated::*; 4 | 5 | unsafe impl std::marker::Send for crate::generated::cublasContext {} 6 | -------------------------------------------------------------------------------- /rcublas/cublas-sys/wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /rcublas/cublas/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rcublas" 3 | description = "safe Rust wrapper for CUDA's cuBLAS" 4 | version = "0.6.0" 5 | edition = "2018" 6 | authors = [ 7 | "Bernhard Schuster ", 8 | "Lissa Hyacinth ", 9 | "Maximilian Goisser", 10 | ] 11 | homepage = "https://github.com/spearow/juice/tree/master/rcublas" 12 | repository = "https://github.com/spearow/juice" 13 | readme = "../README.md" 14 | keywords = ["cublas", "cuda", "nvidia", "blas"] 15 | license = "MIT OR Apache-2.0" 16 | 17 | [dependencies] 18 | libc = "0.2" 19 | rcublas-sys = { version = "0.5.0", path = "../cublas-sys" } 20 | lazy_static = "1" 21 | log = "0.4" 22 | thiserror = "1.0" 23 | 24 | [dev-dependencies] 25 | coaster = { path = "../../coaster", features = ["cuda"] } 26 | env_logger = "0.9" 27 | -------------------------------------------------------------------------------- /rcublas/cublas/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 The rust-cublas Developers 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /rcublas/cublas/src/api/enums.rs: -------------------------------------------------------------------------------- 1 | use crate::ffi::*; 2 | 3 | #[derive(Debug, PartialEq, Clone, Copy)] 4 | pub enum PointerMode { 5 | Host, 6 | Device, 7 | } 8 | 9 | impl PointerMode { 10 | pub fn from_c(in_mode: cublasPointerMode_t) -> PointerMode { 11 | match in_mode { 12 | cublasPointerMode_t::CUBLAS_POINTER_MODE_HOST => PointerMode::Host, 13 | cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE => PointerMode::Device, 14 | _ => unreachable!("wrapping library is newer than this impl, please file a BUG"), 15 | } 16 | } 17 | 18 | pub fn as_c(self) -> cublasPointerMode_t { 19 | match self { 20 | PointerMode::Host => cublasPointerMode_t::CUBLAS_POINTER_MODE_HOST, 21 | PointerMode::Device => cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE, 22 | } 23 | } 24 | } 25 | 26 | #[derive(Debug, PartialEq, Clone, Copy)] 27 | pub enum Operation { 28 | NoTrans, 29 | Trans, 30 | ConjTrans, 31 | } 32 | 33 | impl Operation { 34 | pub fn from_c(in_mode: cublasOperation_t) -> Operation { 35 | match in_mode { 36 | cublasOperation_t::CUBLAS_OP_N => Operation::NoTrans, 37 | cublasOperation_t::CUBLAS_OP_T => Operation::Trans, 38 | cublasOperation_t::CUBLAS_OP_C => Operation::ConjTrans, 39 | _ => unreachable!("wrapping library is newer than this impl, please file a BUG"), 40 | } 41 | } 42 | 43 | pub fn as_c(self) -> cublasOperation_t { 44 | match self { 45 | Operation::NoTrans => cublasOperation_t::CUBLAS_OP_N, 46 | Operation::Trans => cublasOperation_t::CUBLAS_OP_T, 47 | Operation::ConjTrans => cublasOperation_t::CUBLAS_OP_C, 48 | } 49 | } 50 | } 51 | 52 | // TODO: cublasFillMode_t 53 | // TODO: cublasDiagType_t 54 | // TODO: cublasSideMode_t 55 | // TODO: cublasAtomicsMode_t 56 | // TODO: cublasDataType_t 57 | -------------------------------------------------------------------------------- /rcublas/cublas/src/api/mod.rs: -------------------------------------------------------------------------------- 1 | pub use self::context::Context; 2 | 3 | pub use self::enums::{Operation, PointerMode}; 4 | 5 | pub use self::level1::*; 6 | pub use self::level3::*; 7 | 8 | mod context; 9 | 10 | mod level1; 11 | mod level3; 12 | mod util; 13 | 14 | mod enums; 15 | -------------------------------------------------------------------------------- /rcublas/cublas/src/chore.rs: -------------------------------------------------------------------------------- 1 | use crate::co::backend::{Backend, IBackend}; 2 | use crate::co::frameworks::native::flatbox::FlatBox; 3 | use crate::co::frameworks::{Cuda, Native}; 4 | use crate::co::tensor::SharedTensor; 5 | use env_logger; 6 | 7 | pub fn test_setup() { 8 | let _ = env_logger::builder().is_test(true).try_init(); 9 | } 10 | 11 | pub fn test_teardown() {} 12 | 13 | pub fn get_native_backend() -> Backend { 14 | Backend::::default().unwrap() 15 | } 16 | 17 | pub fn get_cuda_backend() -> Backend { 18 | Backend::::default().unwrap() 19 | } 20 | 21 | pub fn write_to_memory(mem: &mut FlatBox, data: &[T]) { 22 | let mem_buffer = mem.as_mut_slice::(); 23 | for (index, datum) in data.iter().enumerate() { 24 | mem_buffer[index] = *datum; 25 | } 26 | } 27 | 28 | pub fn filled_tensor(_backend: &B, n: usize, val: T) -> SharedTensor { 29 | let mut x = SharedTensor::::new(&vec![n]); 30 | let values: &[T] = &::std::iter::repeat(val) 31 | .take(x.capacity()) 32 | .collect::>(); 33 | write_to_memory(x.write_only(get_native_backend().device()).unwrap(), values); 34 | x 35 | } 36 | -------------------------------------------------------------------------------- /rcublas/cublas/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Provides Rust Errors for every cuBLAS status. 2 | 3 | #[allow(unused)] 4 | pub type Result = std::result::Result; 5 | 6 | #[derive(Debug, Copy, Clone, thiserror::Error)] 7 | /// Defines cuBLAS errors. 8 | pub enum Error { 9 | /// Failure with cuBLAS initialization. 10 | #[error("CUDA Driver/Runtime API not initialized.")] 11 | NotInitialized, 12 | /// Failure with allocation. 13 | #[error("The resources could not be allocated.")] 14 | AllocFailed, 15 | /// Failure with cuDNN. 16 | #[error("Internal: {0}")] 17 | InternalError(&'static str), 18 | /// Failure with provided value. 19 | #[error("Invalid value: {0}")] 20 | InvalidValue(&'static str), 21 | /// Failure with the hardware architecture. 22 | #[error( 23 | "cuBLAS only supports devices with compute capabilities greater than or equal to 1.3." 24 | )] 25 | ArchMismatch, 26 | /// Failure with memory access or internal error/bug. 27 | #[error("There was an error accessing GPU memory.")] 28 | MappingError, 29 | /// Failure with Kernel execution. 30 | #[error("Execution failed to launch on the GPU.")] 31 | ExecutionFailed, 32 | /// Failure with an unsupported request. 33 | #[error("Not supported: {0}")] 34 | NotSupported(&'static str), 35 | /// Failure CUDA License. 36 | #[error("There is an error with the license. Check that it is present, unexpired and the NVIDIA_LICENSE_FILE environment variable has been set correctly.")] 37 | LicenseError, 38 | /// Failure 39 | #[error("Unknown error: {0} - code {1}")] 40 | Unknown(&'static str, u64), 41 | } 42 | -------------------------------------------------------------------------------- /rcublas/cublas/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | pub(crate) use rcublas_sys as ffi; 4 | 5 | #[cfg(test)] 6 | use coaster as co; 7 | 8 | pub use api::Context; 9 | pub use error::Error; 10 | 11 | #[derive(Debug, Copy, Clone)] 12 | /// Defines the cuBLAS API. 13 | pub struct API; 14 | 15 | pub mod api; 16 | pub mod error; 17 | 18 | #[cfg(test)] 19 | pub(crate) mod chore; 20 | -------------------------------------------------------------------------------- /rcudnn/.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | /cudnn/target 3 | /cudnn-sys/target 4 | /cudnn-sys/target-install 5 | .travis/libcudnn.so 6 | target 7 | -------------------------------------------------------------------------------- /rcudnn/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | ## 1.3.1 (2016-03-02) 3 | 4 | `cudnn-sys` was bumped to `0.0.3` 5 | 6 | #### Features 7 | 8 | * **cudnnv4:** add passive cuDNNv4 support ([de6ced69](https://github.com/autumnai/rust-cudnn/commit/de6ced6922213619cff97b0258d4bf24bdbfcc77)) 9 | 10 | 11 | ## 1.3.0 (2016-03-01) 12 | 13 | #### Breaking Changes 14 | 15 | * **convolution:** 16 | * remove workspace from the ConvolutionConfig, changing its constructor([d1e106cc](https://github.com/autumnai/rust-cudnn/commit/d1e106ccb28ae7727dc42235f2d6d34927c1532f)) 17 | * convolution operations now require a workspace pointer to be passed; allows shared workspace ([d1e106cc](https://github.com/autumnai/rust-cudnn/commit/d1e106ccb28ae7727dc42235f2d6d34927c1532f)) 18 | 19 | 20 | ## 1.2.1 (2016-02-21) 21 | 22 | #### Fix 23 | 24 | * **dependeny:** bump collenchyma to 0.0.8 ([00559f63](https://github.com/autumnai/rust-cudnn/commit/00559f63faaa178d2bfe221fa074c905cf3b07e6)) 25 | 26 | 27 | 28 | ## 1.2.0 (2016-02-21) 29 | 30 | #### Features 31 | 32 | * **log_softmax:** add logarithmic softmax (`log_softmax`) ([2147ccec](https://github.com/autumnai/rust-cudnn/commit/2147ccec328f79662f9662ce0659f228964c2533)) 33 | 34 | 35 | 36 | ## 1.1.0 (2016-02-02) 37 | 38 | #### Breaking Changes 39 | 40 | * **convolution:** 41 | * make parameter ordering more consistent ([bb314bdd](https://github.com/autumnai/rust-cudnn/commit/bb314bdd1ddd8213539252bb4bc0f5ba514e5888)) 42 | * implement backward _data _filter _bias ([09cdeb7a](https://github.com/autumnai/rust-cudnn/commit/09cdeb7ac48dc77aae1db30b70579b030349bd4f)) 43 | 44 | #### Bug Fixes 45 | 46 | * **convolution:** make parameter ordering more consistent ([bb314bdd](https://github.com/autumnai/rust-cudnn/commit/bb314bdd1ddd8213539252bb4bc0f5ba514e5888)) 47 | 48 | #### Features 49 | 50 | * **convolution:** implement backward _data _filter _bias ([09cdeb7a](https://github.com/autumnai/rust-cudnn/commit/09cdeb7ac48dc77aae1db30b70579b030349bd4f)) 51 | * **license:** change license to dual MIT/Apache-2.0 ([1d37a0fe](https://github.com/autumnai/rust-cudnn/commit/1d37a0fe149f95b2b895876aa811d3dc86a957f9)), closes [#10](https://github.com/autumnai/rust-cudnn/issues/10) 52 | 53 | 54 | ## 1.0.1 (2015-12-21) 55 | 56 | #### Bug Fixes 57 | 58 | * **pooling:** fix pooling and convolution ([5b8c94b0](https://github.com/autumnai/rust-cudnn/commit/5b8c94b06673ca4f9ef0c218addf774fcab578d7)) 59 | 60 | 61 | ## 1.0.0 (2015-12-16) 62 | 63 | First working release 64 | -------------------------------------------------------------------------------- /rcudnn/Cargo.toml.FIXME: -------------------------------------------------------------------------------- 1 | # until 2 | # https://github.com/rust-lang/cargo/issues/5042 3 | # is resolved 4 | # nested workspaces are unusable 5 | [workspace] 6 | members = ["cudnn", "cudnn-sys"] 7 | 8 | [patch.crates-io] 9 | rcudnn = { path = "./cudnn" } 10 | rcudnn-sys = { path = "./cudnn-sys" } 11 | 12 | [profile.bench] 13 | opt-level = 3 14 | debug = false 15 | rpath = false 16 | lto = false 17 | debug-assertions = false 18 | codegen-units = 1 19 | 20 | [profile.dev] 21 | opt-level = 0 22 | debug = true 23 | rpath = false 24 | lto = false 25 | debug-assertions = true -------------------------------------------------------------------------------- /rcudnn/cudnn-sys/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rcudnn-sys" 3 | description = "FFI bindings to cuDNN" 4 | version = "0.5.0" 5 | edition = "2018" 6 | authors = [ 7 | "Bernhard Schuster ", 8 | "Maximilian Goisser" 9 | ] 10 | repository = "https://github.com/spearow/juice" 11 | homepage = "https://github.com/spearow/juice/tree/master/rcudnn/cublas-sys" 12 | keywords = ["cudnn", "cuda", "nvidia", "sys"] 13 | license = "MIT OR Apache-2.0" 14 | links = "cudnn" 15 | build = "build.rs" 16 | 17 | [dependencies] 18 | libc = "0.2" 19 | 20 | [build-dependencies] 21 | pkg-config = "0.3" 22 | bindgen = { version = "^0.60.1", optional = true } 23 | 24 | [features] 25 | default = [] 26 | generate = ["bindgen"] 27 | -------------------------------------------------------------------------------- /rcudnn/cudnn-sys/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2017-2020 Bernhard Schuster 4 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /rcudnn/cudnn-sys/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod generated; 2 | 3 | pub use crate::generated::*; 4 | 5 | impl Default for cudnnConvolutionFwdAlgoPerf_t { 6 | fn default() -> Self { 7 | Self { 8 | algo: cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_DIRECT, 9 | status: cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED, 10 | time: 0.0 as f32, 11 | memory: 0, 12 | determinism: cudnnDeterminism_t::CUDNN_NON_DETERMINISTIC, 13 | mathType: cudnnMathType_t::CUDNN_DEFAULT_MATH, 14 | reserved: [0; 3usize], 15 | } 16 | } 17 | } 18 | 19 | impl Default for cudnnConvolutionBwdFilterAlgoPerf_t { 20 | fn default() -> Self { 21 | Self { 22 | algo: cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, 23 | status: cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED, 24 | time: 0.0 as f32, 25 | memory: 0, 26 | determinism: cudnnDeterminism_t::CUDNN_NON_DETERMINISTIC, 27 | mathType: cudnnMathType_t::CUDNN_DEFAULT_MATH, 28 | reserved: [0; 3usize], 29 | } 30 | } 31 | } 32 | 33 | impl Default for cudnnConvolutionBwdDataAlgoPerf_t { 34 | fn default() -> Self { 35 | Self { 36 | algo: cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, 37 | status: cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED, 38 | time: 0.0 as f32, 39 | memory: 0, 40 | determinism: cudnnDeterminism_t::CUDNN_NON_DETERMINISTIC, 41 | mathType: cudnnMathType_t::CUDNN_DEFAULT_MATH, 42 | reserved: [0; 3usize], 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /rcudnn/cudnn-sys/wrapper.h: -------------------------------------------------------------------------------- 1 | #include 2 | -------------------------------------------------------------------------------- /rcudnn/cudnn/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rcudnn" 3 | description = "safe Rust wrapper for CUDA's cuDNN" 4 | version = "1.8.0" 5 | edition = "2018" 6 | authors = [ 7 | "Bernhard Schuster ", 8 | "Lissa Hyacinth ", 9 | "MichaelHirn", 10 | "Maximilian Goisser", 11 | ] 12 | repository = "https://github.com/spearow/juice" 13 | homepage = "https://github.com/spearow/juice/tree/master/rcudnn" 14 | readme = "../README.md" 15 | keywords = ["cudnn", "cuda", "nvidia", "neural-network"] 16 | license = "MIT OR Apache-2.0" 17 | 18 | [dependencies] 19 | libc = "0.2" 20 | rcudnn-sys = { version = "0.5.0", path = "../cudnn-sys" } 21 | num = "0.4" 22 | thiserror = "1.0" 23 | 24 | [dev-dependencies] 25 | coaster = { default-features = false, features = ["native", "cuda"], path = "../../coaster" , version = "0.2.0"} 26 | env_logger = "0.9" 27 | log = "0.4" 28 | -------------------------------------------------------------------------------- /rcudnn/cudnn/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT LICENSE 2 | 3 | Copyright (c) 2017-2020 Bernhard Schuster 4 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /rcudnn/cudnn/benches/cudnn_overhead.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate coaster as co; 4 | extern crate rcudnn; 5 | extern crate test; 6 | 7 | #[cfg(test)] 8 | mod cudnn_spec { 9 | 10 | use rcudnn::Cudnn; 11 | 12 | #[test] 13 | fn it_initializes_correctly() { 14 | /* 15 | match Cudnn::new() { 16 | Ok(_) => assert!(true), 17 | Err(err) => { 18 | println!("{:?}", err); 19 | assert!(false); 20 | } 21 | } 22 | */ 23 | } 24 | 25 | #[test] 26 | fn it_returns_version() { 27 | println!("cuDNN Version: {:?}", Cudnn::version()); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/activation_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Activation Descriptor. 2 | //! 3 | //! A Activation Descriptor is used to hold information about the rule, 4 | //! which describes how to transform the data. 5 | 6 | use super::{Error, API}; 7 | use crate::ffi::*; 8 | 9 | #[derive(Debug, Clone)] 10 | /// Describes a ActivationDescriptor. 11 | pub struct ActivationDescriptor { 12 | id: cudnnActivationDescriptor_t, 13 | } 14 | 15 | impl Drop for ActivationDescriptor { 16 | #[allow(unused_must_use)] 17 | fn drop(&mut self) { 18 | API::destroy_activation_descriptor(*self.id_c()); 19 | } 20 | } 21 | 22 | impl ActivationDescriptor { 23 | /// Initializes a new CUDA cuDNN Activation Descriptor. 24 | pub fn new(mode: cudnnActivationMode_t) -> Result { 25 | let generic_activation_desc = API::create_activation_descriptor()?; 26 | API::set_activation_descriptor( 27 | generic_activation_desc, 28 | mode, 29 | cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN, // FIXME check if this makes sense 30 | ::std::f64::MAX, // FIXME make this public API 31 | )?; 32 | 33 | Ok(ActivationDescriptor::from_c(generic_activation_desc)) 34 | } 35 | 36 | /// Initializes a new CUDA cuDNN Activation Descriptor from its C type. 37 | pub fn from_c(id: cudnnActivationDescriptor_t) -> ActivationDescriptor { 38 | ActivationDescriptor { id } 39 | } 40 | 41 | /// Returns the CUDA cuDNN Activation Descriptor as its C type. 42 | pub fn id_c(&self) -> &cudnnActivationDescriptor_t { 43 | &self.id 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/api/cuda.rs: -------------------------------------------------------------------------------- 1 | //! Provides utility functionality for the CUDA cuDNN API. 2 | 3 | use crate::ffi::*; 4 | use crate::{Error, API}; 5 | use std::ptr; 6 | 7 | impl API { 8 | /// Initialize the CUDA cuDNN API with needed context and resources. 9 | /// 10 | /// The returned `handle` must be provided to future CUDA cuDNN API calls. 11 | /// Call this method outside of performance critical routines. 12 | pub fn cuda_allocate_device_memory(bytes: usize) -> Result<*mut ::libc::c_void, Error> { 13 | unsafe { API::ffi_cuda_allocate_device_memory(bytes) } 14 | } 15 | 16 | /// Destroys the CUDA cuDNN context and resources associated with the `handle`. 17 | /// 18 | /// Frees up resources and will call `cudaDeviceSynchronize` internaly. 19 | /// Therefore, use this method outside of performance critical routines. 20 | pub fn cuda_free_device_memory(ptr: *mut ::libc::c_void) -> Result<(), Error> { 21 | unsafe { API::ffi_cuda_free_device_memory(ptr) } 22 | } 23 | 24 | unsafe fn ffi_cuda_allocate_device_memory(bytes: usize) -> Result<*mut ::libc::c_void, Error> { 25 | let mut ptr: *mut ::libc::c_void = ptr::null_mut(); 26 | match cudaMalloc(&mut ptr, bytes) { 27 | cudaError_t::cudaSuccess => Ok(ptr), 28 | cudaError_t::cudaErrorMemoryAllocation => { 29 | Err(Error::AllocFailed("Unable to allocate CUDA device memory.")) 30 | } 31 | status => Err(Error::Unknown( 32 | "Unable to allocate CUDA device memory for unknown reasons.", 33 | status as i32 as u64, 34 | )), 35 | } 36 | } 37 | 38 | unsafe fn ffi_cuda_free_device_memory(ptr: *mut ::libc::c_void) -> Result<(), Error> { 39 | match cudaFree(ptr) { 40 | cudaError_t::cudaSuccess => Ok(()), 41 | // TODO, more error enums sigh 42 | cudaError_t::cudaErrorInvalidDevicePointer => Err(Error::InvalidValue( 43 | "Unable to free the CUDA device memory due to invalid device pointer.", 44 | )), 45 | cudaError_t::cudaErrorInitializationError => Err(Error::NotInitialized( 46 | "CUDA Driver/Runtime API not initialized.", 47 | )), 48 | status => Err(Error::Unknown( 49 | "Unable to free the CUDA device memory.", 50 | status as i32 as u64, 51 | )), 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/api/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides safe API calls to CUDA's cuDNN library. 2 | //! 3 | //! Usually you will not use those calls directly, but acess them through, 4 | //! the higher-level structs, exposed at the root of this crate, which provides 5 | //! a more convenient and "rusty" interface. 6 | 7 | pub mod activation; 8 | pub mod convolution; 9 | pub mod cuda; 10 | pub mod dropout; 11 | pub mod normalization; 12 | pub mod pooling; 13 | pub mod rnn; 14 | pub mod softmax; 15 | pub mod tensor; 16 | pub mod utils; 17 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/convolution_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Convolution Descriptor. 2 | //! 3 | //! A Convolution Descriptor is used to hold information about the convolution, 4 | //! which is needed for forward and backward convolutional operations. 5 | 6 | use super::utils::DataType; 7 | use super::{Error, API}; 8 | use crate::ffi::*; 9 | 10 | #[derive(Debug, Clone)] 11 | /// Describes a Convolution Descriptor. 12 | pub struct ConvolutionDescriptor { 13 | id: cudnnConvolutionDescriptor_t, 14 | } 15 | 16 | impl Drop for ConvolutionDescriptor { 17 | #[allow(unused_must_use)] 18 | fn drop(&mut self) { 19 | API::destroy_convolution_descriptor(*self.id_c()); 20 | } 21 | } 22 | 23 | impl ConvolutionDescriptor { 24 | /// Initializes a new CUDA cuDNN ConvolutionDescriptor. 25 | pub fn new( 26 | pad: &[i32], 27 | filter_stride: &[i32], 28 | data_type: DataType, 29 | ) -> Result { 30 | let array_length = pad.len() as i32; 31 | let upscale: Vec = ::std::iter::repeat(1i32) 32 | .take(array_length as usize) 33 | .collect(); 34 | 35 | let generic_convolution_desc = API::create_convolution_descriptor()?; 36 | let data_type = API::cudnn_data_type(data_type); 37 | 38 | API::set_convolution_descriptor( 39 | generic_convolution_desc, 40 | data_type, 41 | cudnnConvolutionMode_t::CUDNN_CONVOLUTION, 42 | array_length, 43 | pad.as_ptr(), 44 | filter_stride.as_ptr(), 45 | upscale.as_ptr(), 46 | )?; 47 | Ok(ConvolutionDescriptor::from_c(generic_convolution_desc)) 48 | } 49 | 50 | /// Initializes a new CUDA cuDNN ConvolutionDescriptor from its C type. 51 | pub fn from_c(id: cudnnConvolutionDescriptor_t) -> ConvolutionDescriptor { 52 | ConvolutionDescriptor { id } 53 | } 54 | 55 | /// Returns the CUDA cuDNN ConvolutionDescriptor as its C type. 56 | pub fn id_c(&self) -> &cudnnConvolutionDescriptor_t { 57 | &self.id 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/cuda.rs: -------------------------------------------------------------------------------- 1 | //! Defines Cuda Device Memory. 2 | //! 3 | //! Hold a pointer and size of the cuda device memory. 4 | //! This is only a convenience wrapper to interact in a 5 | //! defined mamner with cudnn, which requires scrap/temporary 6 | //! memory for some operations, i.e. dropout. 7 | 8 | use super::{Error, API}; 9 | 10 | #[derive(Debug)] 11 | /// A pointer to memory existing on a nvidia GPU 12 | pub struct CudaDeviceMemory { 13 | ptr: *mut ::libc::c_void, 14 | size: usize, 15 | } 16 | 17 | impl CudaDeviceMemory { 18 | /// Saw fun X Y Z 19 | pub fn new(size: usize) -> Result { 20 | let ptr = API::cuda_allocate_device_memory(size)?; 21 | Ok(CudaDeviceMemory { ptr, size }) 22 | } 23 | 24 | /// Initializes a new CUDA Device Memory from its C type. 25 | pub fn from_c(ptr: *mut ::libc::c_void, size: usize) -> CudaDeviceMemory { 26 | CudaDeviceMemory { ptr, size } 27 | } 28 | 29 | /// Returns the CUDA Device Memory ptr as its C type. 30 | pub fn id_c(&self) -> &*mut ::libc::c_void { 31 | &self.ptr 32 | } 33 | 34 | /// Returns the size of the CUDA Device Memory chunk. 35 | pub fn size(&self) -> usize { 36 | self.size 37 | } 38 | } 39 | 40 | impl Drop for CudaDeviceMemory { 41 | #[allow(unused_must_use)] 42 | fn drop(&mut self) { 43 | self.size = 0; 44 | API::cuda_free_device_memory(*self.id_c()).unwrap() 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/dropout_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Dropout Descriptor. 2 | //! 3 | //! A Tensor Descriptor is used to hold information about the probability 4 | //! of dropping a value as well as an initial seed. 5 | 6 | use super::{Error, API}; 7 | use crate::cudnn::Cudnn; 8 | use crate::ffi::*; 9 | 10 | #[derive(Debug, Clone)] 11 | /// Describes a DropoutDescriptor. 12 | pub struct DropoutDescriptor { 13 | id: cudnnDropoutDescriptor_t, 14 | } 15 | 16 | impl Drop for DropoutDescriptor { 17 | #[allow(unused_must_use)] 18 | fn drop(&mut self) { 19 | API::destroy_dropout_descriptor(*self.id_c()); 20 | } 21 | } 22 | 23 | impl DropoutDescriptor { 24 | /// Initializes a new CUDA cuDNN Dropout Descriptor. 25 | pub fn new( 26 | handle: &Cudnn, 27 | dropout: f32, 28 | seed: u64, 29 | reserve: *mut libc::c_void, 30 | reserve_size: usize, 31 | ) -> Result { 32 | let generic_dropout_desc = API::create_dropout_descriptor()?; 33 | API::set_dropout_descriptor( 34 | generic_dropout_desc, 35 | *handle.id_c(), 36 | dropout, 37 | reserve, 38 | reserve_size, 39 | seed, 40 | )?; 41 | 42 | Ok(DropoutDescriptor::from_c(generic_dropout_desc)) 43 | } 44 | 45 | /// Get the size for a tensor 46 | pub fn get_required_size() -> usize { 47 | unimplemented!() 48 | } 49 | 50 | /// Initializes a new CUDA cuDNN Tensor Descriptor from its C type. 51 | pub fn from_c(id: cudnnDropoutDescriptor_t) -> DropoutDescriptor { 52 | DropoutDescriptor { id } 53 | } 54 | 55 | /// Returns the CUDA cuDNN Tensor Descriptor as its C type. 56 | pub fn id_c(&self) -> &cudnnDropoutDescriptor_t { 57 | &self.id 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/error.rs: -------------------------------------------------------------------------------- 1 | //! Provides Rust Errors for CUDA's cuDNN status. 2 | 3 | #[allow(unused)] 4 | pub type Result = std::result::Result; 5 | 6 | #[non_exhaustive] 7 | #[derive(Debug, Copy, Clone, thiserror::Error)] 8 | /// Defines CUDA's cuDNN errors. 9 | pub enum Error { 10 | /// Failure with CUDA cuDNN initialization. 11 | #[error("{0:?}")] 12 | NotInitialized(&'static str), 13 | /// Failure with allocation. 14 | #[error("{0:?}")] 15 | AllocFailed(&'static str), 16 | /// Failure with a provided parameter. 17 | #[error("{0:?}")] 18 | BadParam(&'static str), 19 | /// Failure with cuDNN. 20 | #[error("{0:?}")] 21 | InternalError(&'static str), 22 | /// Failure with provided value. 23 | #[error("{0:?}")] 24 | InvalidValue(&'static str), 25 | /// Failure with the hardware architecture. 26 | #[error("{0:?}")] 27 | ArchMismatch(&'static str), 28 | /// Failure with memory access or internal error/bug. 29 | #[error("{0:?}")] 30 | MappingError(&'static str), 31 | /// Failure with Kernel execution. 32 | #[error("{0:?}")] 33 | ExecutionFailed(&'static str), 34 | /// Failure with an unsupported request. 35 | #[error("{0:?}")] 36 | NotSupported(&'static str), 37 | /// Failure CUDA License. 38 | #[error("{0:?}")] 39 | LicenseError(&'static str), 40 | /// Failure 41 | #[error("{0:?}: {1}")] 42 | Unknown(&'static str, u64), 43 | } 44 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/filter_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Filter Descriptor. 2 | //! 3 | //! A Filter Descriptor is used to hold information about the Filter, 4 | //! which is needed for forward and backward convolutional operations. 5 | 6 | use super::utils::DataType; 7 | use super::{Error, API}; 8 | use crate::ffi::*; 9 | 10 | #[derive(Debug, Clone)] 11 | /// Describes a Filter Descriptor. 12 | pub struct FilterDescriptor { 13 | id: cudnnFilterDescriptor_t, 14 | } 15 | 16 | impl Drop for FilterDescriptor { 17 | #[allow(unused_must_use)] 18 | fn drop(&mut self) { 19 | API::destroy_filter_descriptor(*self.id_c()); 20 | } 21 | } 22 | 23 | impl FilterDescriptor { 24 | /// Initializes a new CUDA cuDNN FilterDescriptor. 25 | pub fn new(filter_dim: &[i32], data_type: DataType) -> Result { 26 | let nb_dims = filter_dim.len() as i32; 27 | let tensor_format = cudnnTensorFormat_t::CUDNN_TENSOR_NCHW; 28 | let data_type = API::cudnn_data_type(data_type); 29 | let generic_filter_desc = API::create_filter_descriptor()?; 30 | API::set_filter_descriptor( 31 | generic_filter_desc, 32 | data_type, 33 | tensor_format, 34 | nb_dims, 35 | filter_dim.as_ptr(), 36 | )?; 37 | Ok(FilterDescriptor::from_c(generic_filter_desc)) 38 | } 39 | 40 | /// Initializes a new CUDA cuDNN FilterDescriptor from its C type. 41 | pub fn from_c(id: cudnnFilterDescriptor_t) -> FilterDescriptor { 42 | FilterDescriptor { id } 43 | } 44 | 45 | /// Returns the CUDA cuDNN FilterDescriptor as its C type. 46 | pub fn id_c(&self) -> &cudnnFilterDescriptor_t { 47 | &self.id 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/normalization_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a LRN Descriptor. 2 | 3 | use super::{Error, API}; 4 | use crate::ffi::*; 5 | 6 | #[derive(Debug, Clone)] 7 | /// Describes a LRN Descriptor. 8 | pub struct NormalizationDescriptor { 9 | id: cudnnLRNDescriptor_t, 10 | } 11 | 12 | impl Drop for NormalizationDescriptor { 13 | #[allow(unused_must_use)] 14 | fn drop(&mut self) { 15 | API::destroy_lrn_descriptor(*self.id_c()); 16 | } 17 | } 18 | 19 | impl NormalizationDescriptor { 20 | /// Initializes a new CUDA cuDNN LRNDescriptor. 21 | pub fn new( 22 | lrn_n: u32, 23 | lrn_alpha: f64, 24 | lrn_beta: f64, 25 | lrn_k: f64, 26 | ) -> Result { 27 | let generic_lrn_desc = API::create_lrn_descriptor()?; 28 | API::set_lrn_descriptor(generic_lrn_desc, lrn_n, lrn_alpha, lrn_beta, lrn_k)?; 29 | Ok(NormalizationDescriptor::from_c(generic_lrn_desc)) 30 | } 31 | 32 | /// Initializes a new CUDA cuDNN NormalizationDescriptor from its C type. 33 | pub fn from_c(id: cudnnLRNDescriptor_t) -> NormalizationDescriptor { 34 | NormalizationDescriptor { id } 35 | } 36 | 37 | /// Returns the CUDA cuDNN NormalizationDescriptor as its C type. 38 | pub fn id_c(&self) -> &cudnnLRNDescriptor_t { 39 | &self.id 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/pooling_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Pooling Descriptor. 2 | 3 | use super::{Error, API}; 4 | use crate::ffi::*; 5 | 6 | #[derive(Debug, Clone)] 7 | /// Describes a Pooling Descriptor. 8 | pub struct PoolingDescriptor { 9 | id: cudnnPoolingDescriptor_t, 10 | } 11 | 12 | impl Drop for PoolingDescriptor { 13 | #[allow(unused_must_use)] 14 | fn drop(&mut self) { 15 | API::destroy_pooling_descriptor(*self.id_c()).unwrap(); 16 | } 17 | } 18 | 19 | impl PoolingDescriptor { 20 | /// Initializes a new CUDA cuDNN Pooling Descriptor. 21 | pub fn new( 22 | mode: cudnnPoolingMode_t, 23 | window: &[i32], 24 | padding: &[i32], 25 | stride: &[i32], 26 | ) -> Result { 27 | let generic_pooling_desc = API::create_pooling_descriptor()?; 28 | API::set_pooling_descriptor( 29 | generic_pooling_desc, 30 | mode, 31 | cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN, // TODO check if this is sane to do 32 | window.len() as i32, 33 | window.as_ptr(), 34 | padding.as_ptr(), 35 | stride.as_ptr(), 36 | )?; 37 | 38 | Ok(PoolingDescriptor::from_c(generic_pooling_desc)) 39 | } 40 | 41 | /// Initializes a new CUDA cuDNN PoolingDescriptor from its C type. 42 | pub fn from_c(id: cudnnPoolingDescriptor_t) -> PoolingDescriptor { 43 | PoolingDescriptor { id } 44 | } 45 | 46 | /// Returns the CUDA cuDNN Pooling Descriptor as its C type. 47 | pub fn id_c(&self) -> &cudnnPoolingDescriptor_t { 48 | &self.id 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/rnn_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Recurrent Descriptor. 2 | //! 3 | //! A Recurrent Descriptor is used to hold information about the rnn, 4 | //! which is needed for forward and backward rnnal operations. 5 | 6 | use super::{Error, API}; 7 | use crate::utils::DataType; 8 | use crate::utils::DropoutConfig; 9 | use crate::Cudnn; 10 | use ffi::*; 11 | 12 | /// Describes a Recurrent Descriptor. 13 | #[derive(Debug)] 14 | pub struct RnnDescriptor { 15 | id: cudnnRNNDescriptor_t, 16 | dropout_config: DropoutConfig, 17 | } 18 | 19 | impl Drop for RnnDescriptor { 20 | #[allow(unused_must_use)] 21 | fn drop(&mut self) { 22 | API::destroy_rnn_descriptor(*self.id_c()); 23 | } 24 | } 25 | 26 | impl RnnDescriptor { 27 | /// Initializes a new CUDA cuDNN RnnDescriptor. 28 | #[allow(clippy::too_many_arguments)] 29 | pub fn new( 30 | handle: &Cudnn, 31 | hidden_size: i32, 32 | num_layers: i32, 33 | dropout_config: DropoutConfig, 34 | input_mode: cudnnRNNInputMode_t, 35 | direction: cudnnDirectionMode_t, 36 | mode: cudnnRNNMode_t, 37 | algorithm: cudnnRNNAlgo_t, 38 | data_type: DataType, 39 | padding_mode: cudnnRNNPaddingMode_t, 40 | ) -> Result { 41 | let generic_rnn_desc = API::create_rnn_descriptor()?; 42 | API::set_rnn_descriptor( 43 | *handle.id_c(), 44 | generic_rnn_desc, 45 | hidden_size, 46 | num_layers, 47 | *dropout_config.dropout_desc().id_c(), 48 | input_mode, 49 | direction, 50 | mode, 51 | algorithm, 52 | data_type, 53 | )?; 54 | 55 | API::set_rnn_padding_mode(generic_rnn_desc, padding_mode)?; 56 | 57 | Ok(RnnDescriptor { 58 | id: generic_rnn_desc, 59 | dropout_config, 60 | }) 61 | } 62 | 63 | /// Initializes a new CUDA cuDNN RnnDescriptor from its C type. 64 | pub fn from_c(id: cudnnRNNDescriptor_t, dropout_config: DropoutConfig) -> RnnDescriptor { 65 | RnnDescriptor { id, dropout_config } 66 | } 67 | 68 | /// Returns the CUDA cuDNN RnnDescriptor as its C type. 69 | pub fn id_c(&self) -> &cudnnRNNDescriptor_t { 70 | &self.id 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /rcudnn/cudnn/src/tensor_descriptor.rs: -------------------------------------------------------------------------------- 1 | //! Defines a Tensor Descriptor. 2 | //! 3 | //! A Tensor Descriptor is used to hold information about the data, 4 | //! which is needed for the operations to obtain information about 5 | //! the structure and dimensionality of the data. 6 | 7 | use super::utils::DataType; 8 | use super::{Error, API}; 9 | use crate::ffi::*; 10 | 11 | #[derive(Debug, Clone)] 12 | /// Describes a TensorDescriptor. 13 | pub struct TensorDescriptor { 14 | id: cudnnTensorDescriptor_t, 15 | } 16 | 17 | /// Return C Handle for a Vector of Tensor Descriptors 18 | pub fn tensor_vec_id_c(tensor_vec: &[TensorDescriptor]) -> Vec { 19 | tensor_vec.iter().map(|tensor| *tensor.id_c()).collect() 20 | } 21 | 22 | impl Drop for TensorDescriptor { 23 | #[allow(unused_must_use)] 24 | fn drop(&mut self) { 25 | API::destroy_tensor_descriptor(*self.id_c()); 26 | } 27 | } 28 | 29 | impl TensorDescriptor { 30 | /// Initializes a new CUDA cuDNN Tensor Descriptor. 31 | pub fn new( 32 | dims: &[i32], 33 | strides: &[i32], 34 | data_type: DataType, 35 | ) -> Result { 36 | let nb_dims = dims.len() as i32; 37 | if nb_dims < 3 { 38 | return Err(Error::BadParam( 39 | "CUDA cuDNN only supports Tensors with 3 to 8 dimensions.", 40 | )); 41 | } 42 | 43 | let dims_ptr = dims.as_ptr(); 44 | let strides_ptr = strides.as_ptr(); 45 | let generic_tensor_desc = API::create_tensor_descriptor()?; 46 | let data_type = API::cudnn_data_type(data_type); 47 | 48 | API::set_tensor_descriptor( 49 | generic_tensor_desc, 50 | data_type, 51 | nb_dims, 52 | dims_ptr, 53 | strides_ptr, 54 | )?; 55 | Ok(TensorDescriptor::from_c(generic_tensor_desc)) 56 | } 57 | 58 | /// Initializes a new CUDA cuDNN Tensor Descriptor from its C type. 59 | pub fn from_c(id: cudnnTensorDescriptor_t) -> TensorDescriptor { 60 | TensorDescriptor { id } 61 | } 62 | 63 | /// Returns the CUDA cuDNN Tensor Descriptor as its C type. 64 | pub fn id_c(&self) -> &cudnnTensorDescriptor_t { 65 | &self.id 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /rcudnn/cudnn/tests/tensor_descriptor_specs.rs: -------------------------------------------------------------------------------- 1 | extern crate rcudnn as cudnn; 2 | 3 | #[cfg(test)] 4 | mod tensor_descriptor_spec { 5 | 6 | use crate::cudnn::utils::DataType; 7 | use crate::cudnn::TensorDescriptor; 8 | 9 | #[test] 10 | fn it_initializes_a_tensor_descriptor() { 11 | match TensorDescriptor::new(&[2, 2, 2], &[4, 2, 1], DataType::Float) { 12 | Ok(_) => assert!(true), 13 | Err(err) => { 14 | println!("{:?}", err); 15 | assert!(false); 16 | } 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /rcudnn/rustfmt.toml: -------------------------------------------------------------------------------- 1 | format_strings = false 2 | reorder_imports = true 3 | -------------------------------------------------------------------------------- /remote-test/concourse-crashtest-job.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fff-rs/juice/09253d3d89b4f9cd6b221e5a2c268df5036187eb/remote-test/concourse-crashtest-job.png -------------------------------------------------------------------------------- /remote-test/launch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | cargo check --tests 4 | fly -t spearow login -n juice-crashtesters --concourse-url https://ci.spearow.io 5 | fly -t spearow execute \ 6 | --tag simsalabim \ 7 | --tag framework:cuda \ 8 | --tag framework:opencl \ 9 | -c ./remote-test/test.yml \ 10 | --input juice=. 11 | -------------------------------------------------------------------------------- /remote-test/test.yml: -------------------------------------------------------------------------------- 1 | platform: linux 2 | 3 | image_resource: 4 | type: registry-image 5 | source: 6 | repository: quay.io/spearow/machine-learning-container-fedora-cuda 7 | # tag: latest 8 | # repository: docker.io/nvidia/cuda 9 | # tag: 11.7.1-devel-centos7 10 | 11 | inputs: 12 | - name: juice 13 | 14 | caches: 15 | - path: cargo_home 16 | - path: cargo_target 17 | 18 | run: 19 | path: sh 20 | args: 21 | - -exc 22 | - | 23 | set -e 24 | prepare 25 | cargo-override-injection 26 | export CARGO_HOME=$(pwd)/../cargo_home 27 | mkdir -p ${CARGO_HOME} 28 | export CARGO_TARGET_DIR=$(pwd)/../cargo_target 29 | mkdir -p ${CARGO_TARGET_DIR} 30 | export RUST_LOG=rcudnn=trace,rcudnn-sys=trace 31 | export DIRX="rcudnn/cudnn coaster-nn" 32 | export RUST_BACKTRACE=full 33 | 34 | cargo t -p coaster-nn cuda::rnn_f32 -- --nocapture 35 | cargo t -p juice rnn_roundtrip_pass -- --nocapture 36 | cd juice-examples/mackey-glass-rnn-regression 37 | 38 | # FIXME batch-size should not have to match 39 | # https://github.com/spearow/juice/issues/140 40 | cargo run -p example-rnn-regression -- train --batch-size=23 --learning-rate=0.1 --momentum=0.2 netstate.capnp 41 | cargo run -p example-rnn-regression -- test --batch-size=23 netstate.capnp 42 | 43 | # for D in $DIRX; do 44 | # echo ">>>>> $D" 45 | # cd $D 46 | # # do not generate with bindgen 47 | # cargo test rnn_f32 -- --nocapture 48 | # cd - 49 | # echo "<<<<< $D" 50 | # done 51 | dir: juice 52 | -------------------------------------------------------------------------------- /rust-blas/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | -------------------------------------------------------------------------------- /rust-blas/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-blas" 3 | version = "0.2.0" 4 | authors = [ 5 | "Paul Kassianik ", 6 | "Lissa Hyacinth ", 7 | "Bernhard Schuster ", 8 | "Michael Yang ", 9 | ] 10 | description = "BLAS bindings and wrappers, fork of rblas" 11 | documentation = "http://mikkyang.github.io/rust-blas/doc/rblas/index.html" 12 | homepage = "https://github.com/spearow/juice/tree/master/rust-blas" 13 | repository = "https://github.com/spearow/juice" 14 | readme = "README.md" 15 | keywords = ["BLAS"] 16 | license = "MIT" 17 | build = "build.rs" 18 | edition = "2018" 19 | 20 | [dependencies] 21 | num = "0.4" 22 | num-complex = "0.4" 23 | libc = "0.2" 24 | 25 | [build-dependencies] 26 | pkg-config = "0.3" 27 | -------------------------------------------------------------------------------- /rust-blas/LICENSE: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2013 Michael Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /rust-blas/README.md: -------------------------------------------------------------------------------- 1 | # rust-blas 2 | 3 | [![MIT licensed](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE) 4 | [![crates.io](http://meritbadge.herokuapp.com/rust-blas)](https://crates.io/crates/rust-blas) 5 | 6 | Rust bindings and wrappers for BLAS (Basic Linear Algebra Subprograms). 7 | 8 | ## Overview 9 | 10 | RBLAS wraps each external call in a trait with the same name (but capitalized). 11 | This trait contains a single static method, of the same name. These traits are 12 | generic over the four main types of numbers BLAS supports: `f32`, `f64`, 13 | `Complex32`, and `Complex64`. 14 | 15 | For example the functions `cblas_saxpy`, `cblas_daxpy`, `cblas_caxypy`, and 16 | `cblas_zaxpy` are called with the function `Axpy::axpy`. 17 | 18 | Additionally, RBLAS introduces a few traits to shorten calls to these BLAS 19 | functions: `Vector` for types that implement vector-like characteristics and 20 | `Matrix` for types that implement matrix-like characteristics. The `Vector` 21 | trait is already implemented by `Vec` and `[]` types. 22 | 23 | [Documentation](http://mikkyang.github.io/rust-blas/doc/rblas/index.html) 24 | 25 | ## Installation 26 | 27 | By default, the library links with `blas` dynamically. To link to an alternate 28 | implementation, like OpenBLAS, use the environment variable `CARGO_BLAS`. If 29 | you've already built the bindings, you may need to clean and build again. 30 | 31 | ``` 32 | export CARGO_BLAS=openblas 33 | ``` 34 | 35 | ## Example 36 | 37 | ```rust 38 | extern crate rblas; 39 | 40 | use rblas::Dot; 41 | 42 | fn main() { 43 | let x = vec![1.0, -2.0, 3.0, 4.0]; 44 | let y = [1.0, 1.0, 1.0, 1.0, 7.0]; 45 | 46 | let d = Dot::dot(&x, &y[..x.len()]); 47 | assert_eq!(d, 6.0); 48 | } 49 | ``` 50 | 51 | ## Sugared Example (Soon to be Deprecated) 52 | 53 | ```rust 54 | #[macro_use] 55 | extern crate rblas as blas; 56 | use blas::math::Mat; 57 | use blas::{Matrix, Vector}; 58 | use blas::math::Marker::T; 59 | 60 | fn main() { 61 | let x = vec![1.0, 2.0]; 62 | let xr = &x as &Vector<_>; 63 | let i = mat![1.0, 0.0; 0.0, 1.0]; 64 | let ir = &i as &Matrix<_>; 65 | 66 | assert!(xr + &x == 2.0 * xr); 67 | assert!(ir * xr == x); 68 | 69 | let dot = (xr ^ T) * xr; 70 | assert!(dot == 5.0); 71 | } 72 | ``` 73 | -------------------------------------------------------------------------------- /rust-blas/build.rs: -------------------------------------------------------------------------------- 1 | extern crate pkg_config; 2 | use std::env; 3 | 4 | fn main() { 5 | let variant = env::var("BLAS_VARIANT").unwrap_or_else(|_| { 6 | if let Ok("x86_64-apple-darwin") = std::env::var("TARGET").as_ref().map(|s| &s[..]) { 7 | "BLAS".to_string() 8 | } else { 9 | "openblas".to_string() 10 | } 11 | }); 12 | let lib_dir = env::var("BLAS_LIB_DIR").ok(); 13 | let include_dir = env::var("BLAS_INCLUDE_DIR").ok(); 14 | 15 | if lib_dir.is_none() && include_dir.is_none() { 16 | if let Ok(info) = pkg_config::find_library(variant.as_str()) { 17 | // avoid empty include paths as they are not supported by GCC 18 | if !info.include_paths.is_empty() { 19 | let paths = env::join_paths(info.include_paths).unwrap(); 20 | println!("cargo:include={}", paths.to_str().unwrap()); 21 | } 22 | return; 23 | } 24 | } 25 | 26 | let mode = if env::var_os("BLAS_STATIC").is_some() { 27 | "static" 28 | } else { 29 | "dylib" 30 | }; 31 | 32 | if let Some(lib_dir) = lib_dir { 33 | println!("cargo:rustc-link-search=native={}", lib_dir); 34 | } 35 | 36 | println!("cargo:rustc-link-lib={}={}", mode, variant); 37 | 38 | if let Some(include_dir) = include_dir { 39 | println!("cargo:include={}", include_dir); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /rust-blas/examples/math.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate rust_blas; 3 | use rust_blas as blas; 4 | 5 | use blas::math::Marker::T; 6 | use blas::math::Mat; 7 | use blas::{Matrix, Vector}; 8 | 9 | fn main() { 10 | let x = vec![1.0, 2.0]; 11 | let xr = &x as &dyn Vector<_>; 12 | let i = mat![1.0, 0.0; 0.0, 1.0]; 13 | let ir = &i as &dyn Matrix<_>; 14 | 15 | assert!(xr + &x == 2.0 * xr); 16 | assert!(ir * xr == x); 17 | 18 | let dot = (xr ^ T) * xr; 19 | assert!(dot == 5.0); 20 | } 21 | -------------------------------------------------------------------------------- /rust-blas/examples/readme.rs: -------------------------------------------------------------------------------- 1 | extern crate rust_blas; 2 | use rust_blas as blas; 3 | 4 | use blas::Dot; 5 | 6 | fn main() { 7 | let x = vec![1.0, -2.0, 3.0, 4.0]; 8 | let y = [1.0, 1.0, 1.0, 1.0, 7.0]; 9 | 10 | let d = Dot::dot(&x, &y[..x.len()]); 11 | assert_eq!(d, 6.0); 12 | } 13 | -------------------------------------------------------------------------------- /rust-blas/src/attribute.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //! Various attributes of vectors and matrices. 6 | 7 | #[repr(C)] 8 | #[derive(Copy, Clone)] 9 | pub enum Order { 10 | RowMajor = 101, 11 | ColMajor = 102, 12 | } 13 | 14 | #[repr(C)] 15 | #[derive(Copy, Clone)] 16 | pub enum Transpose { 17 | NoTrans = 111, 18 | Trans = 112, 19 | ConjTrans = 113, 20 | } 21 | 22 | #[repr(C)] 23 | #[derive(Copy, Clone)] 24 | pub enum Symmetry { 25 | Upper = 121, 26 | Lower = 122, 27 | } 28 | 29 | #[repr(C)] 30 | #[derive(Copy, Clone)] 31 | pub enum Diagonal { 32 | NonUnit = 131, 33 | Unit = 132, 34 | } 35 | 36 | #[repr(C)] 37 | #[derive(Copy, Clone)] 38 | pub enum Side { 39 | Left = 141, 40 | Right = 142, 41 | } 42 | -------------------------------------------------------------------------------- /rust-blas/src/default.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | use num_complex::Complex; 6 | 7 | pub trait Default { 8 | fn one() -> Self; 9 | fn zero() -> Self; 10 | fn neg_one() -> Self; 11 | } 12 | 13 | macro_rules! default_impl( 14 | ($t:ty, $ov:expr, $zv:expr) => { 15 | impl Default for $t { 16 | #[inline] 17 | fn one() -> $t { $ov } 18 | #[inline] 19 | fn zero() -> $t { $zv } 20 | #[inline] 21 | fn neg_one() -> $t { -$ov } 22 | } 23 | 24 | impl Default for Complex<$t> { 25 | #[inline] 26 | fn one() -> Complex<$t> { Complex::new($ov, $zv) } 27 | #[inline] 28 | fn zero() -> Complex<$t> { Complex::new($zv, $zv) } 29 | #[inline] 30 | fn neg_one() -> Complex<$t> { Complex::new(-$ov, $zv) } 31 | } 32 | } 33 | ); 34 | 35 | default_impl!(f32, 1f32, 0f32); 36 | default_impl!(f64, 1f64, 0f64); 37 | -------------------------------------------------------------------------------- /rust-blas/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //! BLAS bindings and wrappers. 6 | //! 7 | //! Bindings are split by BLAS level and contained in a module named `ll` 8 | //! (stands for low level, not sure if that's the best name, but that's 9 | //! what it is). 10 | //! 11 | //! Wrappers are split likewise. They are named after the function they wrap, 12 | //! but capitalized and contained in their respective `ops` modules. To use 13 | //! these wrappers, the appropriate traits must be implemented for the type. 14 | //! These are either `Vector` or `Matrix`. 15 | //! 16 | //! * Level 1: `vector` 17 | //! * Level 2: `matrix_vector` 18 | //! * Level 3: `matrix` 19 | 20 | pub use crate::matrix::ops::*; 21 | pub use crate::matrix::Matrix; 22 | pub use crate::matrix_vector::ops::*; 23 | pub use crate::vector::ops::*; 24 | pub use crate::vector::Vector; 25 | pub use crate::vector::VectorOperations; 26 | 27 | #[macro_use] 28 | mod prefix; 29 | mod pointer; 30 | mod scalar; 31 | 32 | pub mod attribute; 33 | pub mod default; 34 | pub mod matrix; 35 | pub mod matrix_vector; 36 | pub mod vector; 37 | 38 | pub mod math; 39 | -------------------------------------------------------------------------------- /rust-blas/src/math/matrix_vector.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | use crate::attribute::Transpose; 6 | use crate::default::Default; 7 | use crate::math::Mat; 8 | use crate::math::Trans; 9 | use crate::matrix::Matrix; 10 | use crate::matrix_vector::ops::*; 11 | use crate::vector::Vector; 12 | use std::ops::Mul; 13 | 14 | impl<'a, T> Mul<&'a dyn Vector> for &'a dyn Matrix 15 | where 16 | T: Default + Copy + Gemv, 17 | { 18 | type Output = Vec; 19 | 20 | fn mul(self, x: &dyn Vector) -> Vec { 21 | let n = self.rows() as usize; 22 | let mut result = Vec::with_capacity(n); 23 | unsafe { 24 | result.set_len(n); 25 | } 26 | let scale = Default::one(); 27 | let clear = Default::zero(); 28 | let t = Transpose::NoTrans; 29 | 30 | Gemv::gemv(t, &scale, self, x, &clear, &mut result); 31 | result 32 | } 33 | } 34 | 35 | impl<'a, T> Mul>> for &'a dyn Vector 36 | where 37 | T: Default + Ger + Gerc + Clone, 38 | { 39 | type Output = Mat; 40 | 41 | fn mul(self, x: Trans<&dyn Vector>) -> Mat { 42 | let n = self.len() as usize; 43 | let m = (*x).len() as usize; 44 | let mut result = Mat::fill(Default::zero(), n, m); 45 | let scale = Default::one(); 46 | 47 | match x { 48 | Trans::T(v) => Ger::ger(&scale, self, v, &mut result), 49 | Trans::H(v) => Gerc::gerc(&scale, self, v, &mut result), 50 | } 51 | 52 | result 53 | } 54 | } 55 | 56 | #[cfg(test)] 57 | mod tests { 58 | use crate::math::Marker::T; 59 | use crate::math::Mat; 60 | use crate::Matrix; 61 | use crate::Vector; 62 | 63 | #[test] 64 | fn mul() { 65 | let a = mat![2f32, -2.0; 2.0, -4.0]; 66 | let x = vec![2f32, 1.0]; 67 | 68 | let y = { 69 | let ar = &a as &dyn Matrix; 70 | let xr = &x as &dyn Vector; 71 | ar * xr 72 | }; 73 | 74 | assert_eq!(y, vec![2.0, 0.0]); 75 | } 76 | 77 | #[test] 78 | fn outer() { 79 | let x = vec![2.0, 1.0, 4.0]; 80 | let y = vec![3.0, 6.0, -1.0]; 81 | 82 | let a = { 83 | let xr = &x as &dyn Vector<_>; 84 | let yr = &y as &dyn Vector<_>; 85 | 86 | xr * (yr ^ T) 87 | }; 88 | 89 | let result = mat![ 6.0, 12.0, -2.0; 90 | 3.0, 6.0, -1.0; 91 | 12.0, 24.0, -4.0]; 92 | assert_eq!(a, result); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /rust-blas/src/math/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | use crate::matrix::Matrix; 6 | use crate::vector::Vector; 7 | use std::ops::{BitXor, Deref}; 8 | 9 | pub use self::mat::Mat; 10 | 11 | pub mod bandmat; 12 | pub mod mat; 13 | pub mod matrix; 14 | pub mod matrix_vector; 15 | pub mod vector; 16 | 17 | pub enum Trans { 18 | T(A), 19 | H(A), 20 | } 21 | 22 | impl Deref for Trans { 23 | type Target = A; 24 | 25 | fn deref(&self) -> &A { 26 | match *self { 27 | Trans::T(ref v) => v, 28 | Trans::H(ref v) => v, 29 | } 30 | } 31 | } 32 | 33 | pub enum Marker { 34 | T, 35 | H, 36 | } 37 | 38 | impl<'a, T> BitXor for &'a dyn Vector { 39 | type Output = Trans<&'a dyn Vector>; 40 | 41 | fn bitxor(self, m: Marker) -> Trans<&'a dyn Vector> { 42 | match m { 43 | Marker::T => Trans::T(self), 44 | Marker::H => Trans::H(self), 45 | } 46 | } 47 | } 48 | 49 | impl<'a, T> BitXor for &'a dyn Matrix { 50 | type Output = Trans<&'a dyn Matrix>; 51 | 52 | fn bitxor(self, m: Marker) -> Trans<&'a dyn Matrix> { 53 | match m { 54 | Marker::T => Trans::T(self), 55 | Marker::H => Trans::H(self), 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /rust-blas/src/matrix/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //! Matrix operations. 6 | use crate::attribute::Order; 7 | 8 | pub mod ll; 9 | pub mod ops; 10 | 11 | /// Methods that allow a type to be used in BLAS functions as a matrix. 12 | pub trait Matrix { 13 | /// The leading dimension of the matrix. Defaults to `cols` for `RowMajor` 14 | /// order and 'rows' for `ColMajor` order. 15 | fn lead_dim(&self) -> u32 { 16 | match self.order() { 17 | Order::RowMajor => self.cols(), 18 | Order::ColMajor => self.rows(), 19 | } 20 | } 21 | /// The order of the matrix. Defaults to `RowMajor`. 22 | fn order(&self) -> Order { 23 | Order::RowMajor 24 | } 25 | /// Returns the number of rows. 26 | fn rows(&self) -> u32; 27 | /// Returns the number of columns. 28 | fn cols(&self) -> u32; 29 | /// An unsafe pointer to a contiguous block of memory. 30 | fn as_ptr(&self) -> *const T; 31 | /// An unsafe pointer to a contiguous block of memory. 32 | fn as_mut_ptr(&mut self) -> *mut T; 33 | } 34 | 35 | pub trait BandMatrix: Matrix { 36 | fn sub_diagonals(&self) -> u32; 37 | fn sup_diagonals(&self) -> u32; 38 | 39 | fn as_matrix(&self) -> &dyn Matrix; 40 | } 41 | 42 | #[cfg(test)] 43 | pub mod tests { 44 | use crate::Matrix; 45 | 46 | pub struct M(pub u32, pub u32, pub Vec); 47 | 48 | impl Matrix for M { 49 | fn rows(&self) -> u32 { 50 | self.0 51 | } 52 | 53 | fn cols(&self) -> u32 { 54 | self.1 55 | } 56 | 57 | #[inline] 58 | fn as_ptr(&self) -> *const T { 59 | self.2[..].as_ptr() 60 | } 61 | 62 | #[inline] 63 | fn as_mut_ptr(&mut self) -> *mut T { 64 | (&mut self.2[..]).as_mut_ptr() 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /rust-blas/src/matrix_vector/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | //! Matrix-vector operations. 6 | 7 | pub mod ll; 8 | pub mod ops; 9 | -------------------------------------------------------------------------------- /rust-blas/src/pointer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | use libc::{c_double, c_float, c_long, c_void}; 6 | use num_complex::{Complex32, Complex64}; 7 | 8 | pub trait CPtr { 9 | fn as_c_ptr(self) -> T; 10 | } 11 | 12 | macro_rules! c_ptr_impl( 13 | ($t: ty, $c_type: ty) => ( 14 | impl CPtr<*const $c_type> for *const $t { 15 | #[inline] 16 | fn as_c_ptr(self) -> *const $c_type { 17 | self as *const $c_type 18 | } 19 | } 20 | 21 | impl CPtr<*mut $c_type> for *mut $t { 22 | #[inline] 23 | fn as_c_ptr(self) -> *mut $c_type { 24 | self as *mut $c_type 25 | } 26 | } 27 | ); 28 | ); 29 | 30 | c_ptr_impl!(i32, u32); 31 | c_ptr_impl!(i64, c_long); 32 | c_ptr_impl!(f32, c_float); 33 | c_ptr_impl!(f64, c_double); 34 | c_ptr_impl!(Complex32, c_void); 35 | c_ptr_impl!(Complex64, c_void); 36 | -------------------------------------------------------------------------------- /rust-blas/src/prefix.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | macro_rules! prefix( 6 | (f32, $f: ident) => (cblas_s::$f); 7 | (f64, $f: ident) => (cblas_d::$f); 8 | (Complex, $f: ident) => (cblas_c::$f); 9 | (Complex, $f: ident) => (cblas_z::$f); 10 | (Complex32, $f: ident) => (cblas_c::$f); 11 | (Complex64, $f: ident) => (cblas_z::$f); 12 | ); 13 | -------------------------------------------------------------------------------- /rust-blas/src/scalar.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2014 Michael Yang. All rights reserved. 2 | // Use of this source code is governed by a MIT-style 3 | // license that can be found in the LICENSE file. 4 | 5 | use libc::{c_double, c_float, c_void}; 6 | use num_complex::Complex; 7 | 8 | pub trait Scalar { 9 | fn as_const(self) -> T; 10 | fn as_mut(self) -> S; 11 | } 12 | 13 | macro_rules! scalar_impl( 14 | ($t: ty, $c_type: ty) => ( 15 | impl<'a> Scalar<$t, *mut $t> for &'a $t { 16 | #[inline] 17 | fn as_const(self) -> $t { 18 | *self as $c_type 19 | } 20 | 21 | #[inline] 22 | fn as_mut(self) -> *mut $t { 23 | &self as *const _ as *mut $c_type 24 | } 25 | } 26 | 27 | impl<'a> Scalar<*const c_void, *mut c_void> for &'a Complex<$t> { 28 | #[inline] 29 | fn as_const(self) -> *const c_void { 30 | self as *const _ as *const c_void 31 | } 32 | 33 | #[inline] 34 | fn as_mut(self) -> *mut c_void { 35 | self as *const _ as *mut c_void 36 | } 37 | } 38 | ); 39 | ); 40 | 41 | scalar_impl!(f32, c_float); 42 | scalar_impl!(f64, c_double); 43 | --------------------------------------------------------------------------------