├── .clog.toml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── benches
├── relu.rs
├── sigmoid.rs
├── softmax.rs
└── tanh.rs
├── perf
├── README.md
├── perf_rblas.sh
└── run_perf.sh
├── rustfmt.toml
├── src
├── frameworks
│ ├── cuda
│ │ ├── helper.rs
│ │ └── mod.rs
│ ├── mod.rs
│ ├── native
│ │ ├── helper.rs
│ │ └── mod.rs
│ └── opencl.rs
├── lib.rs
└── plugin.rs
└── tests
├── convolution_specs.rs
├── lrn_specs.rs
├── pooling_specs.rs
├── relu_pointwise_specs.rs
├── relu_specs.rs
├── sigmoid_pointwise_specs.rs
├── sigmoid_specs.rs
├── softmax_specs.rs
├── tanh_pointwise_specs.rs
└── tanh_specs.rs
/.clog.toml:
--------------------------------------------------------------------------------
1 | [clog]
2 | # A repository link with the trailing '.git' which will be used to generate
3 | # all commit and issue links
4 | repository = "https://github.com/autumnai/collenchyma-nn"
5 |
6 | # specify the style of commit links to generate, defaults to "github" if omitted
7 | link-style = "github"
8 |
9 | # The preferred way to set a constant changelog. This file will be read for old changelog
10 | # data, then prepended to for new changelog data. It's the equivilant to setting
11 | # both infile and outfile to the same file.
12 | #
13 | # Do not use with outfile or infile fields!
14 | #
15 | # Defaults to stdout when omitted
16 | changelog = "CHANGELOG.md"
17 |
18 | # This sets the output format. There are two options "json" or "markdown" and
19 | # defaults to "markdown" when omitted
20 | output-format = "markdown"
21 |
22 | # If you use tags, you can set the following if you wish to only pick
23 | # up changes since your latest tag
24 | from-latest-tag = true
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: rust
3 | rust:
4 | - nightly
5 | - beta
6 | - stable
7 | matrix:
8 | allow_failures:
9 | - rust: nightly
10 | env: TRAVIS_CARGO_NIGHTLY_FEATURE=lint
11 | exclude:
12 | - rust: beta
13 | env: TRAVIS_CARGO_NIGHTLY_FEATURE=lint
14 | - rust: stable
15 | env: TRAVIS_CARGO_NIGHTLY_FEATURE=lint
16 | branches:
17 | only:
18 | - master
19 | before_script:
20 | - |
21 | pip install 'travis-cargo<0.2' --user &&
22 | export PATH=$HOME/.local/bin:$PATH
23 | script:
24 | - |
25 | cd cudnn
26 | travis-cargo build -- --no-default-feature --features "travis"s &&
27 | travis-cargo test -- --no-default-features --features "travis"
28 | travis-cargo doc -- --no-default-features --features "travis"
29 | addons:
30 | apt:
31 | packages:
32 | - libcurl4-openssl-dev
33 | - libelf-dev
34 | - libdw-dev
35 | after_success:
36 | - travis-cargo doc-upload
37 | - travis-cargo coveralls --no-sudo
38 | notifications:
39 | email:
40 | on_success: never
41 | env:
42 | global:
43 | secure: hTYDLFNmzEazAYd0eBY0HearyERReUHUSSmwGvdVBIX0Vbltvq1vKmUJAneDwzRiehL9HyrX5HV4OkbA6XNseFXXuTYjC/vbSjyN+TcXDLUWIhTMTsLEdh3h5g/XQN9lv03ovmqPDA2owoggxNosRt/10dclV9GiyYHF3ozbJOur4DIMqCi9ta9FpE9KMHsC6HSSdFgW5vTcrKsk9M2GBWzy52lAUQjm1qw1zHG2FmopbzXruaeFHIV0V2owww2FxLp6Hh592/WTX0gj6AMR1M8DfvALV5vDB+F49EWWHnC64RHGW74muQXrGPmG4nk0oUE4EzjX+XWTaRUCQ9p1nkPxFcWOCqykASOCnXNLfdDH47mqRmpjvHwvS5Ivd0FWaHPWmHbxu9CJ9zJImijHPgRpKVVmxh0BqKMG72QCUkONr4nKCW/vbCOvJgnwXpFXLLhYgqQsjaT/kqGR4VbB6PxKeI0+z8AnKE6RAzZmvN1U3Bx3kZ5xEaJCfytpXGBROTyXV4gvhyyDmdG8MnYuCPlY4Ov8LC7vWAmyp7nbE/IYtGePz6B6ec5bl9qrv9zD14FOT+MFvxqZkYaNUgImTouUG/MvH1lmSrPjqalxdmq8YTiGFgmh8vFZ2ovPbfPRl6usEMcgd8CjNuewb4Dz/XNYEmsS0C3+o3HMCNJ/YVc=
44 | matrix:
45 | - TRAVIS_CARGO_NIGHTLY_FEATURE=travis
46 | - TRAVIS_CARGO_NIGHTLY_FEATURE=lint
47 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 |
2 | ## 0.3.4 (2016-03-03)
3 |
4 |
5 | #### Bug Fixes
6 |
7 | * **cuda/convolution:** workaround for 0 memory allocation ([e30b59de](https://github.com/autumnai/collenchyma-nn/commit/e30b59decfe7ca6663a42fd27e19e55fcee83552))
8 |
9 |
10 |
11 |
12 | ## 0.3.3 (2016-03-03)
13 |
14 |
15 | #### Features
16 |
17 | * **cudnnv4:** passive support for cuDNNv4 ([0dc46301](https://github.com/autumnai/collenchyma-nn/commit/0dc463011c0ae261baee201e1b9cb540309349c5))
18 |
19 |
20 |
21 |
22 | ## 0.3.2 (2016-03-02)
23 |
24 |
25 | #### Breaking Changes
26 |
27 | * **convolution:** change convolution functions to require workspace ([f9d40136](https://github.com/autumnai/collenchyma-nn/commit/f9d401360c54bac6a253925d90625b0a1393ea17)
28 |
29 |
30 |
31 |
32 | ## 0.3.1 (2016-02-23)
33 |
34 |
35 | #### Features
36 |
37 | * **pointwise:** add pointwise activation functions (cuDNN) ([d74821b5](https://github.com/autumnai/collenchyma-nn/commit/d74821b582056f9acd3bdb4acd98f72668d070f8))
38 |
39 |
40 |
41 |
42 | ## 0.3.0 (2016-02-22)
43 |
44 |
45 | #### Features
46 |
47 | * **log_softmax:** add LogSoftmax operations ([86a8ae67](https://github.com/autumnai/collenchyma-nn/commit/86a8ae67727e0a5d28c901a7a32940fd7e2250f2))
48 | * **cuda:**
49 | * share workspace between CUDA convolution operations ([7f5f3207](https://github.com/autumnai/collenchyma-nn/commit/7f5f3207873874accb7a5a16d637e2701161ac04))
50 | * allow CUDA activations to work with 1D/2D tensors ([f4effe7d](https://github.com/autumnai/collenchyma-nn/commit/f4effe7d66d96537251d86bf24968b521a951121))
51 | * allow CUDA softmax to work with 1-3D tensors ([f74f72b6](https://github.com/autumnai/collenchyma-nn/commit/f74f72b6207505f4c29c7c44a9748d83972e7f72))
52 | * **nn_trait:** remove trait bounds for NN ([9ad08d9f](https://github.com/autumnai/collenchyma-nn/commit/9ad08d9f97cc382699c78c1397b52509d2e98969))
53 | * **license:** change license to dual MIT/Apache-2.0 ([8a940690](https://github.com/autumnai/collenchyma-nn/commit/8a940690e21bae269c44b9501e956bbf066cdcc1))
54 |
55 | #### Breaking Changes
56 |
57 | * **convolution:** implement convolutions correctly ([24b164b5](https://github.com/autumnai/collenchyma-nn/commit/24b164b55a913f522d79832308cf2e4a7996612a))
58 |
59 | #### Performance
60 |
61 | * **convolution:** don't do a memAlloc for a zero size workspace ([73612bb5](https://github.com/autumnai/collenchyma-nn/commit/73612bb56ab70500b4670b7a9a12390e2facee37))
62 |
63 |
64 |
65 | ## 0.2.1 (2016-01-21)
66 |
67 |
68 | #### Features
69 |
70 | * **native:** Add support for softmax w/ test and benches. ([14d6d1bc](https://github.com/autumnai/collenchyma-nn/commit/14d6d1bcda8bbc0ffa368527633f592862517200))
71 |
72 | #### Bug Fixes
73 |
74 | * **native:** Fix sigmoid_grad to use x_diff instead of x for dx ([c25a32aa](https://github.com/autumnai/collenchyma-nn/commit/c25a32aa272ff3c753ee8be2ea89457367b38734))
75 |
76 |
77 |
78 |
79 | ## 0.2.0 (2016-01-15)
80 |
81 |
82 | #### Features
83 |
84 | * **bench:** add bench and perf utilities ([0e2d34c6](https://github.com/autumnai/collenchyma-nn/commit/0e2d34c67acba38c6910cdff6e983b5285dfb852))
85 | * **native:** implement Sigmoid, ReLU, tanh for Native backend. ([ece54e37](https://github.com/autumnai/collenchyma-nn/commit/ece54e37a241f81b45888225ab0ee28c538950f6))
86 |
87 |
88 |
89 | ## 0.1.0 (2015-12-21)
90 |
91 |
92 | #### Bug Fixes
93 |
94 | * **scale_params:** fix ScalParams default to work on stable ([43654dca](https://github.com/autumnai/collenchyma-nn/commit/43654dca7cb92826ffecd4f0cd251fb7071d11c5))
95 |
96 | #### Features
97 |
98 | * **activation:** add most popular NN activation functions ([3311bb43](https://github.com/autumnai/collenchyma-nn/commit/3311bb43d78c850db8322c9ea8c1a5f2ca189cd1))
99 | * **features:** add framework feature groups ([08629ea8](https://github.com/autumnai/collenchyma-nn/commit/08629ea8f1c38047a5d7fec24601e21ba79d704f))
100 | * **nn:**
101 | * add all cudnn available operations to collenchyma-nn ([03384763](https://github.com/autumnai/collenchyma-nn/commit/033847630a0674c372666db209d436a80ecabe1b))
102 | * add basic nn implementation structure ([aa17ef0f](https://github.com/autumnai/collenchyma-nn/commit/aa17ef0f5064e479152ac3e398bf64887e03b6e2))
103 | * **sigmoid:**
104 | * add full sigmoid CUDA implementation ([8ea1a290](https://github.com/autumnai/collenchyma-nn/commit/8ea1a29016c364536755e2fb5d13a52352b059ab))
105 | * add CUDA Sigmoid ([6aceb957](https://github.com/autumnai/collenchyma-nn/commit/6aceb957d05a0ee625b48bab38693b99c9e09f01))
106 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to Leaf
2 |
3 | We love, that you are interested in contributing to Leaf. There are many ways
4 | to contribute and we appreciate all of them. This document gives a rough
5 | overview of how you can contribute to Leaf.
6 |
7 | * [Pull Requests](#pull-requests)
8 | * [Bug Reports](#bug-reports)
9 | * [Feature Requests](#feature-requests)
10 | * [Appendix](#appendix)
11 | * [Git Commit Guidelines](#git-commit-guidelines)
12 | * [Documentation Guidelines](#documentation-guidelines)
13 |
14 |
15 | If you have questions hop on the [Leaf Chat](https://gitter.im/autumnai/leaf)
16 | , or reach out to {@[MJ](https://twitter.com/mjhirn), @[Max](https://twitter.com/hobofan)}.
17 |
18 | ## Pull Requests
19 |
20 | #### Preparation
21 |
22 | Before you get started, please find the page of the project you're looking to
23 | improve. We encourage you to poke around in the code a little bit, familiarize
24 | yourself with their development styles, check the commit log to see who is
25 | contributing.
26 |
27 | Before you start working, you might check out the **Network** tab on the project
28 | to see all the other forks other people have made. Somebody might be already
29 | working on the problem you would love to solve.
30 |
31 | #### Making a PR
32 |
33 | Pull requests are the primary mechanism we use to change Leaf repos. GitHub
34 | itself has some [great documentation](https://help.github.com/articles/using-pull-requests/)
35 | on using the Pull Request feature. We use the 'fork and pull' model described
36 | there.
37 |
38 | Please make pull requests against the `master` branch.
39 |
40 | All pull requests are reviewed by another person.
41 |
42 | > **Highfive not yet integrated**:
43 | > *We have a bot, @rust-highfive, that will automatically assign a random*
44 | > *person to review your request.*
45 | >
46 | > *If you want to request that a specific person reviews your pull request,*
47 | > *you can add an `r?` to the message. For example, MJ usually reviews*
48 | > *documentation changes. So if you were to make a documentation change, add*
49 | >
50 | > r? @MichaelHirn
51 | >
52 | > *to the end of the message, and @rust-highfive will assign @MichaelHirn*
53 | > *instead of a random person. This is entirely optional.*
54 |
55 | After someone has reviewed your pull request, they will leave an annotation
56 | on the pull request with an `r+`. It will look something like this:
57 |
58 | @homu: r+ 38fe8d2
59 |
60 | This tells @homu, our lovable integration bot, that your pull request has
61 | been approved. The PR then enters the
62 | [merge queue](http://buildbot.rust-lang.org/homu/queue/rust), where
63 | @homu will run all the tests on every platform we support. If it all works
64 | out, @homu will merge your code into `master` and close the pull request.
65 |
66 | ## Bug Reports
67 |
68 | While bugs are unfortunate, they're a reality in software. We can't fix what we
69 | don't know about, so please report liberally. If you're not sure if something
70 | is a bug or not, feel free to file a bug anyway.
71 |
72 | If you have the chance, before reporting a bug, please search existing issues,
73 | as it's possible that someone else has already reported your error. This doesn't
74 | always work, and sometimes it's hard to know what to search for, so consider this
75 | extra credit. We won't mind if you accidentally file a duplicate report.
76 |
77 | [Opening an issue is easy](https://guides.github.com/features/issues/)
78 | Here's a template that you can use to file a bug, though it's not necessary to
79 | use it exactly:
80 |
81 |
82 |
83 | I tried this code:
84 |
85 |
86 |
87 | I expected to see this happen:
88 |
89 | Instead, this happened:
90 |
91 | ## Meta
92 |
93 | {Library, Rust, OS} versions
94 |
95 | Backtrace:
96 |
97 | All three components are important: what you did, what you expected, what
98 | happened instead. Please include information about what platform you're on, what
99 | version of Rust and library you're using, etc.
100 |
101 | Sometimes, a backtrace is helpful, and so including that is nice. To get
102 | a backtrace, set the `RUST_BACKTRACE` environment variable. The easiest way
103 | to do this is to invoke `rustc` like this:
104 |
105 | ```bash
106 | $ RUST_BACKTRACE=1 rustc ...
107 | ```
108 |
109 | ## Feature Requests
110 |
111 | To request a change to the way that one of the Leaf libraries work, please
112 | open an issue in the repository.
113 |
114 | ## Appendix
115 |
116 | ### Git Commit Guidelines
117 |
118 | We have very precise rules over how git commit messages should be formatted.
119 | This leads to more readable messages that are easy to follow when looking
120 | through the project history. But also, we may use the git commit messages to
121 | auto-generate the Leaf change log.
122 |
123 | #### Commit Message Format
124 |
125 | Each commit message consists of a header, a body and a footer. The header has a
126 | special format that includes a type, a scope and a subject:
127 |
128 | /:
129 | \n
130 |
131 | \n
132 |
133 |
134 | Any line of the commit message cannot be longer 100 characters! This allows the
135 | message to be easier to read on GitHub as well as in various git tools.
136 |
137 | <**type**>:
138 |
139 | Must be one of the following:
140 |
141 | - *`feat`*: A new feature
142 | - *`fix`*: A bug fix
143 | - *`docs`*: Documentation only changes
144 | - *`style`*: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc)
145 | - *`refactor`*: A code change that neither fixes a bug nor adds a feature
146 | - *`perf`*: A code change that improves performance
147 | - *`test`*: Adding missing tests
148 | - *`chore`*: Changes to the build process or auxiliary tools and libraries such as documentation generation
149 |
150 | <**scope**>:
151 |
152 | The scope could be anything that specifies the place of the commit change.
153 | For example: `feature1`, `tests`, `lib`, etc...
154 |
155 | <**subject**>:
156 |
157 | The subject contains succinct description of the change:
158 | - use the imperative, present tense: "change" not "changed" nor "changes"
159 | - don't capitalize first letter
160 | - no dot (.) at the end
161 |
162 | <**body**>:
163 |
164 | The body should include the motivation for the change, contrast this with
165 | previous behaviour and overall information about, why that commit matters.
166 |
167 | - Just as in the `subject`, use the imperative, present tense
168 |
169 | <**footer**>:
170 |
171 | The footer should contain any information about Breaking Changes and is also the
172 | place to reference GitHub issues that this commit closes. For Example:
173 |
174 | BREAKING CHANGE: [specify what is breaking]
175 |
176 | { REFERENCE, CLOSE, FIX } #Issue
177 |
178 |
179 | #### Revert
180 |
181 | If the commit reverts a previous commit, it should begin with `revert:`,
182 | followed by the header of the reverted commit. In the body it should say:
183 | `This reverts commit .`, where the hash is the SHA of the commit being
184 | reverted.
185 |
186 | ### Documentation Guidelines
187 |
188 | We created an extensive [Documentation Guide][1] for you, which outlines an easy
189 | and efficient communication Framework for providing developers and users with
190 | helpful Documentation about the Deep Learning Framework.
191 |
192 | [1] https://medium.com/@autumn_eng/increasing-open-source-engagement-with-structural-communication-guidelines-for-code-documentation-e72533de8e45
193 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "collenchyma-nn"
3 | description = "collenchyma plugin providing Neural Network operations"
4 | version = "0.3.4"
5 | authors = ["Michael Hirn ",
6 | "Maximilian Goisser "]
7 |
8 | repository = "https://github.com/autumnai/collenchyma-nn"
9 | homepage = "https://github.com/autumnai/collenchyma-nn"
10 | documentation = "https://autumnai.github.io/collenchyma-nn"
11 | readme = "README.md"
12 |
13 | keywords = ["neural-network", "collenchyma", "computation", "hpc", "plugin"]
14 | license = "MIT OR Apache-2.0"
15 |
16 | [dependencies]
17 | collenchyma = { version = "0.0.8", default-features = false }
18 | cudnn = { version = "1.3.1", optional = true }
19 | libc = "0.2"
20 | lazy_static = "0.1"
21 | log = "0.3.2"
22 |
23 | clippy = { version = "0.0.27", optional = true }
24 |
25 | [dev-dependencies]
26 |
27 | rand = "0.3"
28 |
29 | [features]
30 | default = ["native", "cuda", "opencl"]
31 | native = ["collenchyma/native"]
32 | cuda = ["collenchyma/cuda", "cudnn"]
33 | opencl = ["collenchyma/opencl"]
34 |
35 | travis = ["native"]
36 | dev = []
37 | lint = ["clippy"]
38 |
--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 |
--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
1 | MIT LICENSE
2 |
3 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # collenchyma-NN • [](https://gitter.im/autumnai/collenchyma?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [](https://travis-ci.org/autumnai/collenchyma-nn) [](https://crates.io/crates/collenchyma-nn) [](LICENSE)
2 |
3 | collenchyma-NN provides Neural Network related algorithms for [Collenchyma][collenchyma].
4 | Run NN operations on servers, desktops or mobiles, GPUs, FPGAs or CPUS, without
5 | carrying about OpenCL or CUDA support on the machine.
6 |
7 | collenchyma-NN was started at [Autumn][autumn] to support the Machine Intelligence
8 | Framework [Leaf][leaf] with backend-agnostic, state-of-the-art performance.
9 |
10 | For more information,
11 |
12 | * see collenchyma-NN's [Documentation](http://autumnai.github.io/collenchyma-nn)
13 | * visit [Collenchyma][collenchyma] for more information about portable operations and other Plugins.
14 | * or get in touch on [Twitter][twitter-autumn] or [Gitter][gitter-collenchyma]
15 |
16 | [collenchyma]: https://github.com/autumnai/collenchyma
17 | [autumn]: http://autumnai.com
18 | [leaf]: https://github.com/autumnai/leaf
19 | [twitter-autumn]: https://twitter.com/autumn_eng
20 |
21 | ## Provided Operations
22 |
23 | This Plugins provides the following operations to the Collenchyma Backend.
24 | Every Operation includes forward + backward. A `-` means not yet implemented.
25 | More information can be found in the [Documentation][docs-ops].
26 |
27 | | Operation | CUDA | OpenCL | Native |
28 | |--- |--- |--- |--- |
29 | | Sigmoid | { cuDNN v3, v4 } | - | Rust |
30 | | SigmoidPointwise | { cuDNN v3, v4 } | - | |
31 | | ReLU | { cuDNN v3, v4 } | - | Rust |
32 | | ReLUPointwise | { cuDNN v3, v4 } | - | |
33 | | Tanh | { cuDNN v3, v4 } | - | Rust |
34 | | TanhPointwise | { cuDNN v3, v4 } | - | |
35 | | | | | |
36 | | Normalization (LRN) | { cuDNN v3, v4 } | - | - |
37 | | | | | |
38 | | Convolution | { cuDNN v3, v4 } | - | - |
39 | | | | | |
40 | | Softmax | { cuDNN v3, v4 } | - | Rust |
41 | | LogSoftmax | { cuDNN v3, v4 } | - | Rust |
42 | | | | | |
43 | | Pooling Max | { cuDNN v3, v4 } | - | - |
44 | | Pooling Avg | { cuDNN v3, v4 } | - | - |
45 |
46 | Kudos to [ehiggs][ehiggs], for implementing the native Rust operations.
47 |
48 | [docs-ops]: http://autumnai.github.io/collenchyma-nn/collenchyma_nn/trait.NN.html
49 | [ehiggs]: https://github.com/ehiggs
50 |
51 | ## Getting Started
52 |
53 | If you're using Cargo, just add collenchyma-NN to your Cargo.toml:
54 |
55 | [dependencies]
56 | collenchyma = "0.0.8"
57 | collenchyma-nn = "0.3.4"
58 |
59 | If you're using [Cargo Edit][cargo-edit], you can call:
60 |
61 | $ cargo add collenchyma-nn
62 |
63 | [cargo-edit]: https://github.com/killercup/cargo-edit
64 |
65 | ## Usage
66 |
67 | Bring the Plugin trait and the other important Collenchyma traits/structs in scope and
68 | you will be able to execute the here provided operations on your Collenchyma Backend.
69 |
70 | ```rust
71 | extern crate collenchyma as co;
72 | extern crate collenchyma_nn as nn;
73 | use co::prelude::*;
74 | use nn::*;
75 | fn main() {
76 | // Initialize a CUDA Backend.
77 | let backend = Backend::::default().unwrap();
78 | // Initialize two SharedTensors.
79 | // Usually you would want also fill them with data.
80 | // More infos about that in the Collenchyma README.md
81 | let mut x = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap();
82 | let mut result = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap();
83 | // Use the operation provided by this Plugin.
84 | backend.sigmoid(&mut x, &mut result);
85 | }
86 | ```
87 |
88 | ## Contributing
89 |
90 | Want to contribute? Awesome! We have
91 | [instructions to help you get started contributing code or documentation][contributing].
92 | And high priority issues, that we could need your help with.
93 |
94 | We have a mostly real-time collaboration culture and happens here on Github and
95 | on the [Collenchyma Gitter Channel][gitter-collenchyma].
96 | You can also reach out to the Maintainers
97 | {[@MJ][mj], [@hobofan][hobofan]}.
98 |
99 | Unless you explicitly state otherwise, any contribution intentionally
100 | submitted for inclusion in the work by you, as defined in the Apache-2.0
101 | license, shall be dual licensed as below, without any additional terms or
102 | conditions.
103 |
104 | [contributing]: CONTRIBUTING.md
105 | [gitter-collenchyma]: https://gitter.im/autumnai/collenchyma
106 | [mj]: https://twitter.com/mjhirn
107 | [hobofan]: https://twitter.com/hobofan
108 |
109 | ## Changelog
110 |
111 | > *A changelog is a log or record of all the changes made to a project, such as a website or software project, usually including such records as bug fixes, new features, etc.* - [Wikipedia][changelog-quote]
112 |
113 | You can find the release history at the [CHANGELOG][changelog] file.
114 |
115 | We are using [Clog][clog], the Rust tool for auto generating CHANGELOG files.
116 |
117 | [changelog]: CHANGELOG.md
118 | [changelog-quote]: https://en.wikipedia.org/wiki/Changelog
119 | [Clog]: https://github.com/clog-tool/clog-cli
120 |
121 | ## License
122 |
123 | Licensed under either of
124 |
125 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
126 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
127 |
128 | at your option.
129 |
--------------------------------------------------------------------------------
/benches/relu.rs:
--------------------------------------------------------------------------------
1 | #![feature(test)]
2 |
3 | extern crate test;
4 | extern crate collenchyma as co;
5 | extern crate collenchyma_nn as co_nn;
6 | extern crate rand;
7 |
8 | use test::Bencher;
9 | use co::prelude::*;
10 | use co_nn::*;
11 |
12 | use rand::{thread_rng, Rng};
13 |
14 | fn backend() -> Backend {
15 | Backend::::default().unwrap()
16 | }
17 |
18 | fn arguments(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor) {
19 | let mut rng = thread_rng();
20 | let slice_x = rng.gen_iter::().take(size).collect::>();
21 |
22 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
23 | let out = SharedTensor::::new(backend.device(), &size).unwrap();
24 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 | (x, out)
26 | }
27 |
28 | fn arguments_grad(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor, SharedTensor, SharedTensor) {
29 | let mut rng = thread_rng();
30 | let slice_x = rng.gen_iter::().take(size).collect::>();
31 |
32 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
33 | let mut dx = SharedTensor::::new(backend.device(), &size).unwrap();
34 | let mut out = SharedTensor::::new(backend.device(), &size).unwrap();
35 | let dout = SharedTensor::::new(backend.device(), &size).unwrap();
36 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 | dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
38 | out.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
39 | (x, dx, out, dout)
40 | }
41 |
42 | #[inline(never)]
43 | fn bench_profile ()>(
44 | b: &mut Bencher,
45 | mut bench_func: F,
46 | times: usize
47 | ) {
48 | b.iter(|| { for _ in 0..times { bench_func(); } });
49 | }
50 |
51 | #[bench]
52 | fn bench_1000_relu_100_native(b: &mut Bencher) {
53 | let backend = backend();
54 | let (mut x, mut out) = arguments(&backend, 100);
55 | let mut func = || { let _ = backend.relu_plain(&mut x, &mut out); };
56 | { func(); bench_profile(b, func, 1000); }
57 | }
58 |
59 | #[bench]
60 | fn bench_10_relu_10000_native(b: &mut Bencher) {
61 | let backend = backend();
62 | let (mut x, mut out) = arguments(&backend, 10000);
63 | let mut func = || { let _ = backend.relu_plain(&mut x, &mut out); };
64 | { func(); bench_profile(b, func, 10); }
65 | }
66 |
67 | #[bench]
68 | fn bench_1000_relu_grad_100_native(b: &mut Bencher) {
69 | let backend = backend();
70 | let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 100);
71 | let mut func = || { let _ = backend.relu_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
72 | { func(); bench_profile(b, func, 1000); }
73 | }
74 |
75 | #[bench]
76 | fn bench_10_relu_grad_10000_native(b: &mut Bencher) {
77 | let backend = backend();
78 | let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 10000);
79 | let mut func = || { let _ = backend.relu_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
80 | { func(); bench_profile(b, func, 10); }
81 | }
82 |
--------------------------------------------------------------------------------
/benches/sigmoid.rs:
--------------------------------------------------------------------------------
1 | #![feature(test)]
2 |
3 | extern crate test;
4 | extern crate collenchyma as co;
5 | extern crate collenchyma_nn as co_nn;
6 | extern crate rand;
7 |
8 | use test::Bencher;
9 | use co::prelude::*;
10 | use co_nn::*;
11 |
12 | use rand::{thread_rng, Rng};
13 |
14 | fn backend() -> Backend {
15 | Backend::::default().unwrap()
16 | }
17 |
18 | fn arguments(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor) {
19 | let mut rng = thread_rng();
20 | let slice_x = rng.gen_iter::().take(size).collect::>();
21 |
22 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
23 | let out = SharedTensor::::new(backend.device(), &size).unwrap();
24 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 | (x, out)
26 | }
27 |
28 | fn arguments_grad(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor, SharedTensor, SharedTensor) {
29 | let mut rng = thread_rng();
30 | let slice_x = rng.gen_iter::().take(size).collect::>();
31 |
32 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
33 | let mut dx = SharedTensor::::new(backend.device(), &size).unwrap();
34 | let mut out = SharedTensor::::new(backend.device(), &size).unwrap();
35 | let dout = SharedTensor::::new(backend.device(), &size).unwrap();
36 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 | dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
38 | out.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
39 | (x, dx, out, dout)
40 | }
41 |
42 | #[inline(never)]
43 | fn bench_profile ()>(
44 | b: &mut Bencher,
45 | mut bench_func: F,
46 | times: usize
47 | ) {
48 | b.iter(|| { for _ in 0..times { bench_func(); } });
49 | }
50 |
51 | #[bench]
52 | fn bench_1000_sigmoid_100_native(b: &mut Bencher) {
53 | let backend = backend();
54 | let (mut x, mut out) = arguments(&backend, 100);
55 | let mut func = || { let _ = backend.sigmoid_plain(&mut x, &mut out); };
56 | { func(); bench_profile(b, func, 1000); }
57 | }
58 |
59 | #[bench]
60 | fn bench_10_sigmoid_10000_native(b: &mut Bencher) {
61 | let backend = backend();
62 | let (mut x, mut out) = arguments(&backend, 10000);
63 | let mut func = || { let _ = backend.sigmoid_plain(&mut x, &mut out); };
64 | { func(); bench_profile(b, func, 10); }
65 | }
66 |
67 | #[bench]
68 | fn bench_1000_sigmoid_grad_100_native(b: &mut Bencher) {
69 | let backend = backend();
70 | let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 100);
71 | let mut func = || { let _ = backend.sigmoid_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
72 | { func(); bench_profile(b, func, 1000); }
73 | }
74 |
75 | #[bench]
76 | fn bench_10_sigmoid_grad_10000_native(b: &mut Bencher) {
77 | let backend = backend();
78 | let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 10000);
79 | let mut func = || { let _ = backend.sigmoid_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
80 | { func(); bench_profile(b, func, 10); }
81 | }
82 |
--------------------------------------------------------------------------------
/benches/softmax.rs:
--------------------------------------------------------------------------------
1 | #![feature(test)]
2 |
3 | extern crate test;
4 | extern crate collenchyma as co;
5 | extern crate collenchyma_nn as co_nn;
6 | extern crate rand;
7 |
8 | use test::Bencher;
9 | use co::prelude::*;
10 | use co_nn::*;
11 |
12 | use rand::{thread_rng, Rng};
13 |
14 | fn backend() -> Backend {
15 | Backend::::default().unwrap()
16 | }
17 |
18 | fn arguments(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor) {
19 | let mut rng = thread_rng();
20 | let slice_x = rng.gen_iter::().take(size).collect::>();
21 |
22 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
23 | let out = SharedTensor::::new(backend.device(), &size).unwrap();
24 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 | (x, out)
26 | }
27 |
28 | fn arguments_grad(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor, SharedTensor) {
29 | let mut rng = thread_rng();
30 | let slice_x = rng.gen_iter::().take(size).collect::>();
31 |
32 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
33 | let mut dx = SharedTensor::::new(backend.device(), &size).unwrap();
34 | let dout = SharedTensor::::new(backend.device(), &size).unwrap();
35 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
36 | dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 | (x, dx, dout)
38 | }
39 |
40 | #[inline(never)]
41 | fn bench_profile ()>(
42 | b: &mut Bencher,
43 | mut bench_func: F,
44 | times: usize
45 | ) {
46 | b.iter(|| { for _ in 0..times { bench_func(); } });
47 | }
48 |
49 | #[bench]
50 | fn bench_1000_softmax_100_native(b: &mut Bencher) {
51 | let backend = backend();
52 | let (mut x, mut out) = arguments(&backend, 100);
53 | let mut func = || { let _ = backend.softmax_plain(&mut x, &mut out); };
54 | { func(); bench_profile(b, func, 1000); }
55 | }
56 |
57 | #[bench]
58 | fn bench_10_softmax_10000_native(b: &mut Bencher) {
59 | let backend = backend();
60 | let (mut x, mut out) = arguments(&backend, 10000);
61 | let mut func = || { let _ = backend.softmax_plain(&mut x, &mut out); };
62 | { func(); bench_profile(b, func, 10); }
63 | }
64 |
65 | #[bench]
66 | fn bench_1000_softmax_grad_100_native(b: &mut Bencher) {
67 | let backend = backend();
68 | let (mut x, mut dx, mut dout) = arguments_grad(&backend, 100);
69 | let mut func = || { let _ = backend.softmax_grad_plain(&mut x, &mut dx, &mut dout); };
70 | { func(); bench_profile(b, func, 1000); }
71 | }
72 |
73 | #[bench]
74 | fn bench_10_softmax_grad_10000_native(b: &mut Bencher) {
75 | let backend = backend();
76 | let (mut x, mut dx, mut dout) = arguments_grad(&backend, 10000);
77 | let mut func = || { let _ = backend.softmax_grad_plain(&mut x, &mut dx, &mut dout); };
78 | { func(); bench_profile(b, func, 10); }
79 | }
80 |
--------------------------------------------------------------------------------
/benches/tanh.rs:
--------------------------------------------------------------------------------
1 | #![feature(test)]
2 |
3 | extern crate test;
4 | extern crate collenchyma as co;
5 | extern crate collenchyma_nn as co_nn;
6 | extern crate rand;
7 |
8 | use test::Bencher;
9 | use co::prelude::*;
10 | use co_nn::*;
11 |
12 | use rand::{thread_rng, Rng};
13 |
14 | fn backend() -> Backend {
15 | Backend::::default().unwrap()
16 | }
17 |
18 | fn arguments(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor) {
19 | let mut rng = thread_rng();
20 | let slice_x = rng.gen_iter::().take(size).collect::>();
21 |
22 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
23 | let out = SharedTensor::::new(backend.device(), &size).unwrap();
24 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 | (x, out)
26 | }
27 |
28 | fn arguments_grad(backend: &Backend, size: usize) -> (SharedTensor, SharedTensor, SharedTensor, SharedTensor) {
29 | let mut rng = thread_rng();
30 | let slice_x = rng.gen_iter::().take(size).collect::>();
31 |
32 | let mut x = SharedTensor::::new(backend.device(), &size).unwrap();
33 | let mut dx = SharedTensor::::new(backend.device(), &size).unwrap();
34 | let mut out = SharedTensor::::new(backend.device(), &size).unwrap();
35 | let dout = SharedTensor::::new(backend.device(), &size).unwrap();
36 | x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 | dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
38 | out.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
39 | (x, dx, out, dout)
40 | }
41 |
42 | #[inline(never)]
43 | fn bench_profile ()>(
44 | b: &mut Bencher,
45 | mut bench_func: F,
46 | times: usize
47 | ) {
48 | b.iter(|| { for _ in 0..times { bench_func(); } });
49 | }
50 |
51 | #[bench]
52 | fn bench_1000_tanh_100_native(b: &mut Bencher) {
53 | let backend = backend();
54 | let (mut x, mut out) = arguments(&backend, 100);
55 | let mut func = || { let _ = backend.tanh_plain(&mut x, &mut out); };
56 | { func(); bench_profile(b, func, 1000); }
57 | }
58 |
59 | #[bench]
60 | fn bench_10_tanh_10000_native(b: &mut Bencher) {
61 | let backend = backend();
62 | let (mut x, mut out) = arguments(&backend, 10000);
63 | let mut func = || { let _ = backend.tanh_plain(&mut x, &mut out); };
64 | { func(); bench_profile(b, func, 10); }
65 | }
66 |
67 | #[bench]
68 | fn bench_1000_tanh_grad_100_native(b: &mut Bencher) {
69 | let backend = backend();
70 | let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 100);
71 | let mut func = || { let _ = backend.tanh_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
72 | { func(); bench_profile(b, func, 1000); }
73 | }
74 |
75 | #[bench]
76 | fn bench_10_tanh_grad_10000_native(b: &mut Bencher) {
77 | let backend = backend();
78 | let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 10000);
79 | let mut func = || { let _ = backend.tanh_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
80 | { func(); bench_profile(b, func, 10); }
81 | }
82 |
--------------------------------------------------------------------------------
/perf/README.md:
--------------------------------------------------------------------------------
1 | # Profiling
2 |
3 | Collenchyma comes with scripts to help with profiling performance problems.
4 |
5 | Run [perf](http://www.brendangregg.com/perf.html) on one of the benchmark test:
6 |
7 | ```sh
8 | # compile latest version of benchmarks with DWARF information
9 | cargo rustc --bench [bench_file_name] -- -g
10 | sudo ./perf/run_perf.sh [bench_fn_name] # perf needs sudo
11 | ```
12 |
--------------------------------------------------------------------------------
/perf/perf_rblas.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | perf record -a -g --output perf_rblas_data.perf target/debug/rblas_overhead-cf1a2670c118749d --bench bench_1000_dot_100_rblas
3 | perf script -f -i perf_rblas_data.perf > perf_rblas_script.perf
4 | /home/hobofan/stuff/FlameGraph/stackcollapse-perf.pl perf_rblas_script.perf > perf_rblas_folded.perf
5 | /home/hobofan/stuff/FlameGraph/flamegraph.pl perf_rblas_folded.perf > perf_rblas_graph.svg
6 |
--------------------------------------------------------------------------------
/perf/run_perf.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | if [ $# -eq 0 ]
3 | then
4 | echo "No benchmark name supplied"
5 | exit 1
6 | fi
7 | benchname=$1
8 | mkdir -p target/perf
9 | perf record -a -g --output target/perf/${benchname}.data target/debug/rblas_overhead-c02a41a1401d43da --bench ${benchname}
10 | perf script -f -i target/perf/${benchname}.data > target/perf/${benchname}.scripted
11 | stackcollapse-perf target/perf/${benchname}.scripted | grep ${benchname} > target/perf/${benchname}.folded
12 | flamegraph target/perf/${benchname}.folded > target/perf/${benchname}.svg
13 |
--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | format_strings = false
2 | reorder_imports = true
3 |
--------------------------------------------------------------------------------
/src/frameworks/cuda/mod.rs:
--------------------------------------------------------------------------------
1 | //! Provides NN for a CUDA backend.
2 | #![allow(missing_docs)]
3 | use ::plugin::*;
4 | use co::prelude::*;
5 | use co::plugin::Error as PluginError;
6 | use cudnn::*;
7 |
8 | #[macro_use]
9 | pub mod helper;
10 |
11 | lazy_static! {
12 | static ref CUDNN: Cudnn = Cudnn::new().unwrap();
13 | }
14 |
15 | pub trait ICudnnDesc {
16 | fn cudnn_tensor_desc(&self) -> Result;
17 | /// Creates a TensorDescriptor similar to `cudnn_tensor_desc`,
18 | /// but will create a fitting 4D tensor if the actual tensor would be 1D-3D.
19 | fn cudnn_tensor_desc_softmax(&self) -> Result;
20 | /// Creates a TensorDescriptor similar to `cudnn_tensor_desc`,
21 | /// but will create a fitting 3D tensor if the actual tensor would be 1D/2D.
22 | ///
23 | /// This should be used in operations where the shape doesn't really matter
24 | /// e.g. activation like ReLU.
25 | fn cudnn_tensor_desc_flat(&self) -> Result;
26 |
27 | fn cudnn_filter_desc(&self) -> Result;
28 |
29 | fn cudnn_convolution_desc(&self, filter: &SharedTensor) -> Result;
30 | }
31 |
32 | macro_rules! impl_icudnndesc_for_sharedtensor {
33 | ($t:ty, $cutype:path) => (
34 | impl ICudnnDesc<$t> for SharedTensor<$t> {
35 | fn cudnn_tensor_desc(&self) -> Result {
36 | match TensorDescriptor::new(&self.desc().dims_i32().clone(), &self.desc().default_stride_i32().clone(), $cutype) {
37 | Ok(desc) => Ok(desc),
38 | Err(_) => {
39 | Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
40 | }
41 | }
42 | }
43 |
44 | fn cudnn_tensor_desc_softmax(&self) -> Result {
45 | let actual_desc = self.desc().clone();
46 | let override_desc = match actual_desc.len() {
47 | // not batched and single dimension softmax
48 | 1 => vec![1, actual_desc[0], 1, 1],
49 | // batched and single dimension softmax
50 | 2 => vec![actual_desc[0], actual_desc[1], 1, 1],
51 | // neither batched nor single dimension
52 | 3 => vec![1, actual_desc[0], actual_desc[1], actual_desc[2]],
53 | _ => actual_desc
54 | };
55 | match TensorDescriptor::new(&override_desc.dims_i32().clone(),
56 | &override_desc.default_stride_i32().clone(),
57 | $cutype) {
58 | Ok(desc) => Ok(desc),
59 | Err(_) => {
60 | Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
61 | }
62 | }
63 | }
64 |
65 | fn cudnn_tensor_desc_flat(&self) -> Result {
66 | let actual_desc = self.desc().clone();
67 | let mut override_desc = match actual_desc.len() {
68 | 1 => vec![1, 1],
69 | 2 => vec![1],
70 | _ => vec![]
71 | };
72 | for dim in actual_desc {
73 | override_desc.push(dim);
74 | }
75 | match TensorDescriptor::new(&override_desc.dims_i32().clone(),
76 | &override_desc.default_stride_i32().clone(),
77 | $cutype) {
78 | Ok(desc) => Ok(desc),
79 | Err(_) => {
80 | Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
81 | }
82 | }
83 | }
84 |
85 | fn cudnn_filter_desc(&self) -> Result {
86 | match FilterDescriptor::new(&self.desc().dims_i32().clone(), $cutype) {
87 | Ok(desc) => Ok(desc),
88 | Err(_) => {
89 | Err(PluginError::Plugin("Unable to create CuDNN FilterDescriptor."))
90 | }
91 | }
92 | }
93 |
94 | fn cudnn_convolution_desc(&self, filter: &SharedTensor<$t>) -> Result {
95 | match ConvolutionDescriptor::new(&self.desc().dims_i32().clone(), &filter.desc().default_stride_i32().clone(), $cutype) {
96 | Ok(desc) => Ok(desc),
97 | Err(_) => {
98 | Err(PluginError::Plugin("Unable to create CuDNN ConvolutionDescriptor."))
99 | }
100 | }
101 | }
102 | }
103 | )
104 | }
105 |
106 | impl_icudnndesc_for_sharedtensor!(f32, ::cudnn::utils::DataType::Float);
107 | impl_icudnndesc_for_sharedtensor!(f64, ::cudnn::utils::DataType::Double);
108 |
109 | impl_oconf_for_cc!(f32, f64);
110 | impl_oconf_for_clrn!(f32, f64);
111 | impl_oconf_for_pooling!(f32, f64);
112 |
113 | impl ConvForwardAlgo {
114 | /// Tries to return the matching cuDNN type for the enum value.
115 | fn as_cudnn(&self) -> Result {
116 | Ok(match *self {
117 | ConvForwardAlgo::Auto => return Err(::co::error::Error::Plugin(::co::plugin::Error::Plugin("Can't create cuDNN convolution forward algorithm from ConvForwardAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))),
118 | ConvForwardAlgo::GEMM => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
119 | ConvForwardAlgo::ImplicitGEMM => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM,
120 | ConvForwardAlgo::ImplicitPrecompiledGEMM => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
121 | ConvForwardAlgo::FFT => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT,
122 | ConvForwardAlgo::FFTTiling => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING,
123 | ConvForwardAlgo::Direct => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_DIRECT,
124 | })
125 | }
126 |
127 | /// Returns the matching enum value for a cuDNN algo.
128 | fn from_cudnn(algo: &cudnnConvolutionFwdAlgo_t) -> ConvForwardAlgo {
129 | match *algo {
130 | ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_GEMM => ConvForwardAlgo::GEMM,
131 | ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM => ConvForwardAlgo::ImplicitGEMM,
132 | ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM => ConvForwardAlgo::ImplicitPrecompiledGEMM,
133 | ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT => ConvForwardAlgo::FFT,
134 | ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING => ConvForwardAlgo::FFTTiling,
135 | ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_DIRECT => ConvForwardAlgo::Direct,
136 | }
137 | }
138 |
139 | /// Try to find best algorithm for a operation that uses the provided descriptors.
140 | fn find_cudnn_algo(
141 | &self,
142 | filter_desc: &FilterDescriptor,
143 | conv_desc: &ConvolutionDescriptor,
144 | src_desc: &TensorDescriptor,
145 | dest_desc: &TensorDescriptor,
146 | ) -> Result {
147 | if !self.is_auto() {
148 | return Ok(*self);
149 | }
150 | let algos = API::find_convolution_forward_algorithm(*CUDNN.id_c(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
151 | let algo = match algos.len() {
152 | 0 => return Err(::co::error::Error::Plugin(::co::plugin::Error::Operation("Unable to find CUDA cuDNN convolution forward algorithm."))),
153 | _ => algos[0].algo
154 | };
155 | Ok(ConvForwardAlgo::from_cudnn(&algo))
156 | }
157 | }
158 |
159 | impl ConvBackwardFilterAlgo {
160 | /// Tries to return the matching cuDNN type for the enum value.
161 | fn as_cudnn(&self) -> Result {
162 | Ok(match *self {
163 | ConvBackwardFilterAlgo::Auto => return Err(::co::error::Error::Plugin(::co::plugin::Error::Plugin("Can't create cuDNN convolution backward filter algorithm from ConvBackwardFilterAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))),
164 | ConvBackwardFilterAlgo::ImplicitGEMM => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1,
165 | ConvBackwardFilterAlgo::ImplicitGEMMSum => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0,
166 | ConvBackwardFilterAlgo::ImplicitPrecompiledGEMMSum => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3,
167 | ConvBackwardFilterAlgo::FFT => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT,
168 | })
169 | }
170 |
171 | /// Returns the matching enum value for a cuDNN algo.
172 | fn from_cudnn(algo: &cudnnConvolutionBwdFilterAlgo_t) -> ConvBackwardFilterAlgo {
173 | match *algo {
174 | ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 => ConvBackwardFilterAlgo::ImplicitGEMMSum,
175 | ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 => ConvBackwardFilterAlgo::ImplicitGEMM,
176 | ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT => ConvBackwardFilterAlgo::FFT,
177 | ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 => ConvBackwardFilterAlgo::ImplicitPrecompiledGEMMSum,
178 | }
179 | }
180 |
181 | /// Try to find best algorithm for a operation that uses the provided descriptors.
182 | fn find_cudnn_algo(
183 | &self,
184 | filter_desc: &FilterDescriptor,
185 | conv_desc: &ConvolutionDescriptor,
186 | src_desc: &TensorDescriptor,
187 | dest_desc: &TensorDescriptor,
188 | ) -> Result {
189 | if !self.is_auto() {
190 | return Ok(*self);
191 | }
192 | let algos = API::find_convolution_backward_filter_algorithm(*CUDNN.id_c(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
193 | let algo = match algos.len() {
194 | 0 => return Err(::co::error::Error::Plugin(::co::plugin::Error::Operation("Unable to find CUDA cuDNN convolution backward filter algorithm."))),
195 | _ => algos[0].algo
196 | };
197 | Ok(ConvBackwardFilterAlgo::from_cudnn(&algo))
198 | }
199 | }
200 |
201 | impl ConvBackwardDataAlgo {
202 | /// Tries to return the matching cuDNN type for the enum value.
203 | fn as_cudnn(&self) -> Result {
204 | Ok(match *self {
205 | ConvBackwardDataAlgo::Auto => return Err(::co::error::Error::Plugin(::co::plugin::Error::Plugin("Can't create cuDNN convolution backward data algorithm from ConvBackwardDataAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))),
206 | ConvBackwardDataAlgo::ImplicitGEMM => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1,
207 | ConvBackwardDataAlgo::ImplicitGEMMSum => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0,
208 | ConvBackwardDataAlgo::FFT => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT,
209 | ConvBackwardDataAlgo::FFTTiling => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING,
210 | })
211 | }
212 |
213 | /// Returns the matching enum value for a cuDNN algo.
214 | fn from_cudnn(algo: &cudnnConvolutionBwdDataAlgo_t) -> ConvBackwardDataAlgo {
215 | match *algo {
216 | ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 => ConvBackwardDataAlgo::ImplicitGEMMSum,
217 | ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 => ConvBackwardDataAlgo::ImplicitGEMM,
218 | ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT => ConvBackwardDataAlgo::FFT,
219 | ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING => ConvBackwardDataAlgo::FFTTiling,
220 | }
221 | }
222 |
223 | /// Try to find best algorithm for a operation that uses the provided descriptors.
224 | fn find_cudnn_algo(
225 | &self,
226 | filter_desc: &FilterDescriptor,
227 | conv_desc: &ConvolutionDescriptor,
228 | src_desc: &TensorDescriptor,
229 | dest_desc: &TensorDescriptor,
230 | ) -> Result {
231 | if !self.is_auto() {
232 | return Ok(*self);
233 | }
234 | let algos = API::find_convolution_backward_data_algorithm(*CUDNN.id_c(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
235 | let algo = match algos.len() {
236 | 0 => return Err(::co::error::Error::Plugin(::co::plugin::Error::Operation("Unable to find CUDA cuDNN convolution backward data algorithm."))),
237 | _ => algos[0].algo
238 | };
239 | Ok(ConvBackwardDataAlgo::from_cudnn(&algo))
240 | }
241 | }
242 |
243 | macro_rules! impl_convolution_for_cuda_backend {
244 | ($t:ty, $cutype:path) => (
245 | impl ConvolutionConfig<$t> for ::cudnn::utils::ConvolutionConfig {
246 | fn workspace_size(&self) -> usize {
247 | *self.largest_workspace_size()
248 | }
249 | }
250 |
251 | impl Convolution<$t> for Backend {
252 | fn new_convolution_config(
253 | &self,
254 | src: &::co::tensor::SharedTensor<$t>,
255 | dest: &::co::tensor::SharedTensor<$t>,
256 | filter: &mut ::co::tensor::SharedTensor<$t>,
257 | algo_fwd: ConvForwardAlgo,
258 | algo_bwd_filter: ConvBackwardFilterAlgo,
259 | algo_bwd_data: ConvBackwardDataAlgo,
260 | stride: &[i32],
261 | zero_padding: &[i32],
262 | ) -> Result {
263 | let src_desc = try!(src.cudnn_tensor_desc());
264 | let dest_desc = try!(dest.cudnn_tensor_desc());
265 | let filter_desc = try!(filter.cudnn_filter_desc());
266 | let conv_desc = ::cudnn::ConvolutionDescriptor::new(zero_padding, stride, $cutype).unwrap();
267 |
268 | let useable_algo_fwd = try!(algo_fwd.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
269 | let useable_algo_bwd_filter = try!(algo_bwd_filter.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
270 | let useable_algo_bwd_data = try!(algo_bwd_data.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
271 |
272 | let mut workspace_size_fwd = API::get_convolution_forward_workspace_size(*CUDNN.id_c(), useable_algo_fwd.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
273 | let mut workspace_size_bwd_filter = API::get_convolution_backward_filter_workspace_size(*CUDNN.id_c(), useable_algo_bwd_filter.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
274 | let mut workspace_size_bwd_data = API::get_convolution_backward_data_workspace_size(*CUDNN.id_c(), useable_algo_bwd_data.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
275 |
276 | if workspace_size_fwd == 0 {
277 | workspace_size_fwd = 8;
278 | }
279 | if workspace_size_bwd_filter == 0 {
280 | workspace_size_bwd_filter = 8;
281 | }
282 | if workspace_size_bwd_data == 0 {
283 | workspace_size_bwd_data = 8;
284 | }
285 |
286 | Ok(
287 | ::cudnn::utils::ConvolutionConfig::new(
288 | useable_algo_fwd.as_cudnn().unwrap(), workspace_size_fwd,
289 | useable_algo_bwd_filter.as_cudnn().unwrap(), workspace_size_bwd_filter,
290 | useable_algo_bwd_data.as_cudnn().unwrap(), workspace_size_bwd_data,
291 | conv_desc, filter_desc
292 | )
293 | )
294 | }
295 |
296 | impl_ops_convolution_for!($t, Backend);
297 | }
298 | )
299 | }
300 |
301 | impl NN for Backend {
302 | type CC = utils::ConvolutionConfig;
303 | type CLRN = utils::NormalizationConfig;
304 | type CPOOL = utils::PoolingConfig;
305 |
306 | fn init_nn() { let _ = CUDNN.id_c(); }
307 | fn device(&self) -> &DeviceType { self.device() }
308 | }
309 |
310 | impl_convolution_for_cuda_backend!(f32, ::cudnn::utils::DataType::Float);
311 | impl_ops_sigmoid_for!(f32, Backend);
312 | impl_ops_relu_for!(f32, Backend);
313 | impl_ops_tanh_for!(f32, Backend);
314 | impl_ops_softmax_for!(f32, Backend);
315 | impl_ops_log_softmax_for!(f32, Backend);
316 | impl_ops_lrn_for!(f32, Backend);
317 | impl_ops_pooling_for!(f32, Backend);
318 |
319 | impl_ops_sigmoid_pointwise_for!(f32, Backend);
320 | impl_ops_relu_pointwise_for!(f32, Backend);
321 | impl_ops_tanh_pointwise_for!(f32, Backend);
322 |
323 | impl NN for Backend {
324 | type CC = utils::ConvolutionConfig;
325 | type CLRN = utils::NormalizationConfig;
326 | type CPOOL = utils::PoolingConfig;
327 |
328 | fn init_nn() { let _ = CUDNN.id_c(); }
329 | fn device(&self) -> &DeviceType { self.device() }
330 | }
331 |
332 | impl_convolution_for_cuda_backend!(f64, ::cudnn::utils::DataType::Double);
333 | impl_ops_sigmoid_for!(f64, Backend);
334 | impl_ops_relu_for!(f64, Backend);
335 | impl_ops_tanh_for!(f64, Backend);
336 | impl_ops_softmax_for!(f64, Backend);
337 | impl_ops_log_softmax_for!(f64, Backend);
338 | impl_ops_lrn_for!(f64, Backend);
339 | impl_ops_pooling_for!(f64, Backend);
340 |
341 | impl_ops_sigmoid_pointwise_for!(f64, Backend);
342 | impl_ops_relu_pointwise_for!(f64, Backend);
343 | impl_ops_tanh_pointwise_for!(f64, Backend);
344 |
--------------------------------------------------------------------------------
/src/frameworks/mod.rs:
--------------------------------------------------------------------------------
1 | //! Provides the specific Framework implementations for the Library Operations.
2 |
3 | #[cfg(feature = "native")]
4 | pub mod native;
5 | //#[cfg(feature = "opencl")]
6 | //pub mod opencl;
7 | #[cfg(feature = "cuda")]
8 | pub mod cuda;
9 |
--------------------------------------------------------------------------------
/src/frameworks/native/mod.rs:
--------------------------------------------------------------------------------
1 | //! Provides NN for a Native backend.
2 |
3 | #![allow(unused_imports)]
4 | #![allow(unused_variables)]
5 | #![allow(unreachable_code)]
6 |
7 | use ::plugin::*;
8 | use co::prelude::*;
9 | use co::Error;
10 | use co::plugin::Error as PluginError;
11 |
12 | #[macro_use]
13 | pub mod helper;
14 |
15 | impl_oconf_for_cc!(f32, f64);
16 | impl_oconf_for_clrn!(f32, f64);
17 | impl_oconf_for_pooling!(f32, f64);
18 |
19 | impl NN for Backend {
20 | type CC = helper::ConvolutionConfig;
21 | type CLRN = helper::NormalizationConfig;
22 | type CPOOL = helper::PoolingConfig;
23 |
24 | fn init_nn() { }
25 | fn device(&self) -> &DeviceType { self.device() }
26 | }
27 |
28 | impl_ops_sigmoid_for!(f32, Backend);
29 | impl_ops_relu_for!(f32, Backend);
30 | impl_ops_tanh_for!(f32, Backend);
31 | // impl_ops_convolution_for!(f32, Backend);
32 | impl_ops_softmax_for!(f32, Backend);
33 | impl_ops_log_softmax_for!(f32, Backend);
34 | // impl_ops_lrn_for!(f32, Backend);
35 | // impl_ops_pooling_for!(f32, Backend);
36 |
37 | impl NN for Backend {
38 | type CC = helper::ConvolutionConfig;
39 | type CLRN = helper::NormalizationConfig;
40 | type CPOOL = helper::PoolingConfig;
41 |
42 | fn init_nn() { }
43 | fn device(&self) -> &DeviceType { self.device() }
44 | }
45 |
46 | impl_ops_sigmoid_for!(f64, Backend);
47 | impl_ops_relu_for!(f64, Backend);
48 | impl_ops_tanh_for!(f64, Backend);
49 | // impl_ops_convolution_for!(f64, Backend);
50 | impl_ops_softmax_for!(f64, Backend);
51 | impl_ops_log_softmax_for!(f64, Backend);
52 | // impl_ops_lrn_for!(f64, Backend);
53 | // impl_ops_pooling_for!(f64, Backend);
54 |
--------------------------------------------------------------------------------
/src/frameworks/opencl.rs:
--------------------------------------------------------------------------------
1 | //! Provides NN for a OpenCL backend.
2 |
3 | use ::operation::*;
4 | use ::binary::*;
5 | use ::plugin::*;
6 | use co::prelude::*;
7 | use co::Error;
8 |
9 | impl INnBinary for Program {
10 | type Sigmoid = Kernel;
11 |
12 | fn sigmoid(&self) -> Self::Sigmoid {
13 | unimplemented!()
14 | }
15 | }
16 |
17 | impl IOperationSigmoid for Kernel {
18 | fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
19 | unimplemented!()
20 | }
21 | }
22 |
23 | impl INn for Backend {
24 | type B = Program;
25 |
26 | fn binary(&self) -> &Self::B {
27 | self.binary()
28 | }
29 |
30 | fn device(&self) -> &DeviceType {
31 | self.device()
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | //! Provides a [Collenchyma][collenchyma] Plugin, to extend Collenchyma with Neural Network related
2 | //! operations such as convolutions, pooling, ReLU, etc. A full list of operations provided by this Plugin,
3 | //! can be found at the [provided Operations section](#operations).
4 | //!
5 | //! ## Overview
6 | //!
7 | //! This Collenchyma Plugin extends Collenchyma's Backend with NN related methods/operations. This allows
8 | //! you to run, these operations (and therefore your application) on your local machine as well as on servers,
9 | //! mobiles or any other machine (as if they were written for common CPU execution), while
10 | //! receiving the significant performance increases (usually one-to-two orders of magnitutde), by
11 | //! executing the operations on special purpose hardware such as GPUs - if they are available. Usage examples
12 | //! can be found in the next section.
13 | //!
14 | //! The architecture of a Plugin is quite easy. It defines one Plugin Trait, in this case the `NN`
15 | //! trait, which implements basic functionality for initialization and multiple Plugin Operation Traits which define the
16 | //! methods which are going to be available on the Backed, as the Plugin Trait as well as the Plugin Operations Traits
17 | //! are implemented for the Collenchyma Backends (CUDA, OpenCL, Native). The operations take as arguments one or many
18 | //! SharedTensors, holding the data over which the operation should happen, and none or one Operation Configuration.
19 | //!
20 | //! ## Usage
21 | //!
22 | //! An example on how to write some data into a SharedTensor and compute the result of the
23 | //! sigmoid function for each value:
24 | //!
25 | //! ```rust
26 | //! # #![allow(dead_code)]
27 | //! extern crate collenchyma as co;
28 | //! extern crate collenchyma_nn as nn;
29 | //! # #[cfg(feature = "cuda")]
30 | //! # mod cuda {
31 | //! use co::prelude::*;
32 | //! use nn::*;
33 | //!
34 | //! fn write_to_memory(mem: &mut MemoryType, data: &[T]) {
35 | //! if let &mut MemoryType::Native(ref mut mem) = mem {
36 | //! let mut mem_buffer = mem.as_mut_slice::();
37 | //! for (index, datum) in data.iter().enumerate() {
38 | //! mem_buffer[index] = *datum;
39 | //! }
40 | //! }
41 | //! }
42 | //!
43 | //! pub fn main() {
44 | //! // Initialize a CUDA Backend.
45 | //! // Usually you would not use CUDA but let Collenchyma pick what is available on the machine.
46 | //! let backend = Backend::::default().unwrap();
47 | //! // Initialize two SharedTensors.
48 | //! let mut x = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap();
49 | //! let mut result = SharedTensor::::new(backend.device(), &(1, 1, 3)).unwrap();
50 | //! // Fill `x` with some data.
51 | //! let payload: &[f32] = &::std::iter::repeat(1f32).take(x.capacity()).collect::>();
52 | //! let native = Native::new();
53 | //! let cpu = native.new_device(native.hardwares()).unwrap();
54 | //! x.add_device(&cpu).unwrap(); // Add native host memory
55 | //! x.sync(&cpu).unwrap(); // Sync to native host memory
56 | //! write_to_memory(x.get_mut(&cpu).unwrap(), payload); // Write to native host memory.
57 | //! x.sync(backend.device()).unwrap(); // Sync the data to the CUDA device.
58 | //! // Run the sigmoid operation, provided by the NN Plugin, on your CUDA enabled GPU.
59 | //! backend.sigmoid(&mut x, &mut result).unwrap();
60 | //! // See the result.
61 | //! result.add_device(&cpu).unwrap(); // Add native host memory
62 | //! result.sync(&cpu).unwrap(); // Sync the result to host memory.
63 | //! println!("{:?}", result.get(&cpu).unwrap().as_native().unwrap().as_slice::());
64 | //! }
65 | //! # }
66 | //! # #[cfg(not(feature = "cuda"))]
67 | //! # mod cuda {
68 | //! # pub fn main() {}
69 | //! # }
70 | //! #
71 | //! # fn main() {
72 | //! # if cfg!(feature = "cuda") {
73 | //! # ::cuda::main();
74 | //! # }
75 | //! # }
76 | //! ```
77 | //!
78 | //! ## Provided Operations
79 | //!
80 | //! This Plugins provides the following operations. (Forward + Backward)
81 | //! A `-` means not yet implemented.
82 | //!
83 |
84 | //! | Operation | CUDA | OpenCL | Native |
85 | //! |--- |--- |--- |--- |
86 | //! | Sigmoid | { cuDNN v3, v4 } | - | Rust |
87 | //! | SigmoidPointwise | { cuDNN v3, v4 } | - | |
88 | //! | ReLU | { cuDNN v3, v4 } | - | Rust |
89 | //! | ReLUPointwise | { cuDNN v3, v4 } | - | |
90 | //! | Tanh | { cuDNN v3, v4 } | - | Rust |
91 | //! | TanhPointwise | { cuDNN v3, v4 } | - | |
92 | //! | | | | |
93 | //! | Normalization (LRN) | { cuDNN v3, v4 } | - | - |
94 | //! | | | | |
95 | //! | Convolution | { cuDNN v3, v4 } | - | - |
96 | //! | | | | |
97 | //! | Softmax | { cuDNN v3, v4 } | - | Rust |
98 | //! | LogSoftmax | { cuDNN v3, v4 } | - | Rust |
99 | //! | | | | |
100 | //! | Pooling Max | { cuDNN v3, v4 } | - | - |
101 | //! | Pooling Avg | { cuDNN v3, v4 } | - | - |
102 | //!
103 | //! [collenchyma]: https://github.com/autumnai/collenchyma
104 | //! [collenchyma-docs]: http://autumnai.github.io/collenchyma
105 | //! [blas-source]: https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms
106 | #![cfg_attr(lint, feature(plugin))]
107 | #![cfg_attr(lint, plugin(clippy))]
108 | #![allow(dead_code)]
109 | #![deny(missing_docs,
110 | missing_debug_implementations, missing_copy_implementations,
111 | trivial_casts, trivial_numeric_casts,
112 | unused_import_braces, unused_qualifications)]
113 |
114 | extern crate collenchyma as co;
115 | #[cfg(feature = "cuda")]
116 | extern crate cudnn;
117 | extern crate libc;
118 | #[macro_use]
119 | extern crate lazy_static;
120 | #[macro_use]
121 | extern crate log;
122 |
123 | pub use plugin::*;
124 |
125 | mod plugin;
126 | pub mod frameworks;
127 |
--------------------------------------------------------------------------------
/tests/convolution_specs.rs:
--------------------------------------------------------------------------------
1 | extern crate collenchyma_nn as co_nn;
2 | extern crate collenchyma as co;
3 |
4 | #[cfg(test)]
5 | #[cfg(feature = "cuda")]
6 | mod convolution_spec_cuda {
7 |
8 | use co::prelude::*;
9 | use co_nn::*;
10 | use co::plugin::numeric_helpers::{cast, Float};
11 |
12 | fn get_native_backend() -> Backend {
13 | Backend::::default().unwrap()
14 | }
15 |
16 | fn get_cuda_backend() -> Backend {
17 | Backend::::default().unwrap()
18 | }
19 |
20 | fn write_to_memory(mem: &mut MemoryType, data: &[T]) {
21 | match mem {
22 | &mut MemoryType::Native(ref mut mem) => {
23 | let mut mem_buffer = mem.as_mut_slice::();
24 | for (index, datum) in data.iter().enumerate() {
25 | mem_buffer[index] = *datum;
26 | }
27 | },
28 | #[cfg(any(feature = "opencl", feature = "cuda"))]
29 | _ => {}
30 | }
31 | }
32 |
33 | fn get_memory(backend: &Backend, native: &Backend) -> (SharedTensor, SharedTensor, SharedTensor, SharedTensor){
34 | let val = cast::(1f64).unwrap();
35 | let val2 = cast::(2f64).unwrap();
36 | let batch = 4;
37 | let w1 = 9;
38 | let h1 = 9;
39 | let d1 = 3;
40 | let k = 6;
41 | let f = 3;
42 | let w2 = (w1 - f + 0) / 1;
43 | let h2 = (h1 - f + 0) / 1;
44 | let mut x = SharedTensor::::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
45 | let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::>();
46 | payload[0] = val2;
47 | x.add_device(native.device()).unwrap();
48 | x.sync(native.device()).unwrap();
49 | write_to_memory(x.get_mut(native.device()).unwrap(), payload);
50 | x.sync(backend.device()).unwrap();
51 |
52 | let mut filter = SharedTensor::::new(backend.device(), &(k, d1, f, f)).unwrap();
53 | let payload: &[T] = &::std::iter::repeat(val).take(filter.capacity()).collect::>();
54 | filter.add_device(native.device()).unwrap();
55 | filter.sync(native.device()).unwrap();
56 | write_to_memory(filter.get_mut(native.device()).unwrap(), payload);
57 | filter.sync(backend.device()).unwrap();
58 |
59 | let mut result = SharedTensor::::new(backend.device(), &(batch, k, h2, w2)).unwrap();
60 | let payload: &[T] = &::std::iter::repeat(val2).take(result.capacity()).collect::>();
61 | result.add_device(native.device()).unwrap();
62 | result.sync(native.device()).unwrap();
63 | write_to_memory(result.get_mut(native.device()).unwrap(), payload);
64 | result.sync(backend.device()).unwrap();
65 |
66 | let workspace = SharedTensor::::new(backend.device(), &(4)).unwrap();
67 |
68 | (x, result, filter, workspace)
69 | }
70 |
71 | #[allow(dead_code)]
72 | fn get_grad_memory(backend: &Backend, native: &Backend) -> (SharedTensor, SharedTensor, SharedTensor, SharedTensor, SharedTensor){
73 | let val = cast::(1f64).unwrap();
74 | let val2 = cast::(2f64).unwrap();
75 | let batch = 4;
76 | let w1 = 9;
77 | let h1 = 9;
78 | let d1 = 3;
79 | let k = 6;
80 | let f = 3;
81 | let w2 = (w1 - f + 0) / 1;
82 | let h2 = (h1 - f + 0) / 1;
83 |
84 | let mut x = SharedTensor::::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
85 | let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::>();
86 | payload[0] = val2;
87 | x.add_device(native.device()).unwrap();
88 | x.sync(native.device()).unwrap();
89 | write_to_memory(x.get_mut(native.device()).unwrap(), payload);
90 | x.sync(backend.device()).unwrap();
91 |
92 | let mut x_diff = SharedTensor::::new(backend.device(), &(batch, k, h2, w2)).unwrap();
93 | let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x_diff.capacity()).collect::>();
94 | payload[0] = val2;
95 | x_diff.add_device(native.device()).unwrap();
96 | x_diff.sync(native.device()).unwrap();
97 | write_to_memory(x_diff.get_mut(native.device()).unwrap(), payload);
98 | x_diff.sync(backend.device()).unwrap();
99 |
100 | let mut filter = SharedTensor::::new(backend.device(), &(k, d1, f, f)).unwrap();
101 | let payload: &[T] = &::std::iter::repeat(val).take(filter.capacity()).collect::>();
102 | filter.add_device(native.device()).unwrap();
103 | filter.sync(native.device()).unwrap();
104 | write_to_memory(filter.get_mut(native.device()).unwrap(), payload);
105 | filter.sync(backend.device()).unwrap();
106 |
107 | let mut result = SharedTensor::::new(backend.device(), &(batch, k, h2, w2)).unwrap();
108 | let payload: &[T] = &::std::iter::repeat(val).take(result.capacity()).collect::>();
109 | result.add_device(native.device()).unwrap();
110 | result.sync(native.device()).unwrap();
111 | write_to_memory(result.get_mut(native.device()).unwrap(), payload);
112 | result.sync(backend.device()).unwrap();
113 |
114 | let mut result_diff = SharedTensor::::new(backend.device(), &(batch, k, h2, w2)).unwrap();
115 | result_diff.add_device(native.device()).unwrap();
116 |
117 | (x, x_diff, result, result_diff, filter)
118 | }
119 |
120 | #[test]
121 | fn it_computes_correct_convolution_on_cuda_for_f32() {
122 | let backend = get_cuda_backend();
123 | let native = get_native_backend();
124 | let (mut x, mut result, mut filter, mut workspace) = get_memory::(&backend, &native);
125 |
126 | let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
127 | match backend.convolution(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
128 | Ok(_) => {
129 | result.sync(native.device()).unwrap();
130 | if let Some(mem) = result.get(native.device()).unwrap().as_native() {
131 | let mut payload: &mut [f32] = &mut ::std::iter::repeat(27f32).take(result.capacity()).collect::>();
132 | payload[0] = 28f32;
133 | assert_eq!(payload, mem.as_slice::());
134 | }
135 | },
136 | Err(err) => { println!("{:?}", err); assert!(false) }
137 | }
138 | }
139 |
140 | #[test]
141 | fn it_computes_correct_convolution_on_cuda_for_f64() {
142 | let backend = get_cuda_backend();
143 | let native = get_native_backend();
144 | let (mut x, mut result, mut filter, mut workspace) = get_memory::(&backend, &native);
145 |
146 | let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
147 | match backend.convolution(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
148 | Ok(_) => {
149 | result.sync(native.device()).unwrap();
150 | if let Some(mem) = result.get(native.device()).unwrap().as_native() {
151 | let mut payload: &mut [f64] = &mut ::std::iter::repeat(27f64).take(result.capacity()).collect::>();
152 | payload[0] = 28f64;
153 | assert_eq!(payload, mem.as_slice::());
154 | }
155 | },
156 | Err(err) => { println!("{:?}", err); assert!(false) }
157 | }
158 | }
159 |
160 | #[test]
161 | fn it_computes_correct_convolution_on_cuda_for_f32_plain() {
162 | let backend = get_cuda_backend();
163 | let native = get_native_backend();
164 | let (mut x, mut result, mut filter, mut workspace) = get_memory::(&backend, &native);
165 |
166 | let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
167 | match backend.convolution_plain(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
168 | Ok(_) => {
169 | result.sync(native.device()).unwrap();
170 | if let Some(mem) = result.get(native.device()).unwrap().as_native() {
171 | let mut payload: &mut [f32] = &mut ::std::iter::repeat(27f32).take(result.capacity()).collect::>();
172 | payload[0] = 28f32;
173 | assert_eq!(payload, mem.as_slice::());
174 | }
175 | },
176 | Err(err) => { println!("{:?}", err); assert!(false) }
177 | }
178 | }
179 |
180 | #[test]
181 | fn it_computes_correct_convolution_on_cuda_for_f64_plain() {
182 | let backend = get_cuda_backend();
183 | let native = get_native_backend();
184 | let (mut x, mut result, mut filter, mut workspace) = get_memory::(&backend, &native);
185 |
186 | let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
187 | match backend.convolution_plain(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
188 | Ok(_) => {
189 | result.sync(native.device()).unwrap();
190 | if let Some(mem) = result.get(native.device()).unwrap().as_native() {
191 | let mut payload: &mut [f64] = &mut ::std::iter::repeat(27f64).take(result.capacity()).collect::>();
192 | payload[0] = 28f64;
193 | assert_eq!(payload, mem.as_slice::());
194 | }
195 | },
196 | Err(err) => { println!("{:?}", err); assert!(false) }
197 | }
198 | }
199 |
200 | /*
201 | #[test]
202 | fn it_computes_correct_convolution_grad_on_cuda_for_f32() {
203 | let backend = get_cuda_backend();
204 | let native = get_native_backend();
205 | let (mut x, mut x_diff, mut result, mut result_diff, mut filter) = get_grad_memory::(&backend, &native);
206 |
207 | let conf = backend.new_convolution_config(&x, &result, &mut filter, &vec!(1,1), &vec!(0,0)).unwrap();
208 | match backend.convolution_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
209 | Ok(_) => {
210 | result_diff.sync(native.device()).unwrap();
211 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
212 | assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::());
213 | }
214 | },
215 | Err(err) => { println!("{:?}", err); assert!(false) }
216 | }
217 | }
218 |
219 | #[test]
220 | fn it_computes_correct_convolution_grad_on_cuda_for_f64() {
221 | let backend = get_cuda_backend();
222 | let native = get_native_backend();
223 | let (mut x, mut x_diff, mut result, mut result_diff, filter, conv) = get_grad_memory::(&backend, &native);
224 |
225 | let conf = backend.new_convolution_config(&x, &result, &filter, &conv).unwrap();
226 | match backend.convolution_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
227 | Ok(_) => {
228 | result_diff.sync(native.device()).unwrap();
229 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
230 | assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::());
231 | }
232 | },
233 | Err(err) => { println!("{:?}", err); assert!(false) }
234 | }
235 | }
236 |
237 | #[test]
238 | fn it_computes_correct_convolution_grad_on_cuda_for_f32_plain() {
239 | let backend = get_cuda_backend();
240 | let native = get_native_backend();
241 | let (mut x, mut x_diff, mut result, mut result_diff, filter, conv) = get_grad_memory::(&backend, &native);
242 |
243 | let conf = backend.new_convolution_config(&x, &result, &filter, &conv).unwrap();
244 | match backend.convolution_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
245 | Ok(_) => {
246 | result_diff.sync(native.device()).unwrap();
247 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
248 | assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::());
249 | }
250 | },
251 | Err(err) => { println!("{:?}", err); assert!(false) }
252 | }
253 | }
254 |
255 | #[test]
256 | fn it_computes_correct_convolution_grad_on_cuda_for_f64_plain() {
257 | let backend = get_cuda_backend();
258 | let native = get_native_backend();
259 | let (mut x, mut x_diff, mut result, mut result_diff, filter, conv) = get_grad_memory::(&backend, &native);
260 |
261 | let conf = backend.new_convolution_config(&x, &result, &filter, &conv).unwrap();
262 | match backend.convolution_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
263 | Ok(_) => {
264 | result_diff.sync(native.device()).unwrap();
265 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
266 | assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::());
267 | }
268 | },
269 | Err(err) => { println!("{:?}", err); assert!(false) }
270 | }
271 | }
272 | */
273 | }
274 |
275 | #[cfg(test)]
276 | #[cfg(feature = "native")]
277 | mod convolution_spec_native{
278 |
279 | // use co::backend::{Backend, BackendConfig};
280 | // use co::framework::IFramework;
281 | // use co::frameworks::Native;
282 | // use co_nn::*;
283 | // use co::memory::MemoryType;
284 | // use co::tensor::SharedTensor;
285 | // use co::plugin::numeric_helpers::{cast, Float};
286 | //
287 | // fn get_native_backend() -> Backend {
288 | // let framework = Native::new();
289 | // let hardwares = framework.hardwares();
290 | // let backend_config = BackendConfig::new(framework, hardwares);
291 | // Backend::new(backend_config).unwrap()
292 | // }
293 | //
294 | // fn write_to_memory(mem: &mut MemoryType, data: &[T]) {
295 | // match mem {
296 | // &mut MemoryType::Native(ref mut mem) => {
297 | // let mut mem_buffer = mem.as_mut_slice::();
298 | // for (index, datum) in data.iter().enumerate() {
299 | // mem_buffer[index] = *datum;
300 | // }
301 | // },
302 | // #[cfg(any(feature = "opencl", feature = "cuda"))]
303 | // _ => {}
304 | // }
305 | // }
306 | //
307 | // fn get_memory(backend: &Backend) -> (SharedTensor, SharedTensor, SharedTensor){
308 | // let val = cast::(1f64).unwrap();
309 | // let val2 = cast::(2f64).unwrap();
310 | // let batch = 4;
311 | // let w1 = 9;
312 | // let h1 = 9;
313 | // let d1 = 3;
314 | // let k = 6;
315 | // let f = 3;
316 | // let w2 = (w1 - f + 0) / 1;
317 | // let h2 = (h1 - f + 0) / 1;
318 | // let mut x = SharedTensor::::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
319 | // let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::>();
320 | // payload[0] = val2;
321 | // write_to_memory(x.get_mut(backend.device()).unwrap(), payload);
322 | //
323 | // let mut filter = SharedTensor::