├── .clog.toml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Cargo.toml
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.md
├── benches
    ├── relu.rs
    ├── sigmoid.rs
    ├── softmax.rs
    └── tanh.rs
├── perf
    ├── README.md
    ├── perf_rblas.sh
    └── run_perf.sh
├── rustfmt.toml
├── src
    ├── frameworks
    │   ├── cuda
    │   │   ├── helper.rs
    │   │   └── mod.rs
    │   ├── mod.rs
    │   ├── native
    │   │   ├── helper.rs
    │   │   └── mod.rs
    │   └── opencl.rs
    ├── lib.rs
    └── plugin.rs
└── tests
    ├── convolution_specs.rs
    ├── lrn_specs.rs
    ├── pooling_specs.rs
    ├── relu_pointwise_specs.rs
    ├── relu_specs.rs
    ├── sigmoid_pointwise_specs.rs
    ├── sigmoid_specs.rs
    ├── softmax_specs.rs
    ├── tanh_pointwise_specs.rs
    └── tanh_specs.rs


/.clog.toml:
--------------------------------------------------------------------------------
 1 | [clog]
 2 | # A repository link with the trailing '.git' which will be used to generate
 3 | # all commit and issue links
 4 | repository = "https://github.com/autumnai/collenchyma-nn"
 5 | 
 6 | # specify the style of commit links to generate, defaults to "github" if omitted
 7 | link-style = "github"
 8 | 
 9 | # The preferred way to set a constant changelog. This file will be read for old changelog
10 | # data, then prepended to for new changelog data. It's the equivilant to setting
11 | # both infile and outfile to the same file.
12 | #
13 | # Do not use with outfile or infile fields!
14 | #
15 | # Defaults to stdout when omitted
16 | changelog = "CHANGELOG.md"
17 | 
18 | # This sets the output format. There are two options "json" or "markdown" and
19 | # defaults to "markdown" when omitted
20 | output-format = "markdown"
21 | 
22 | # If you use tags, you can set the following if you wish to only pick
23 | # up changes since your latest tag
24 | from-latest-tag = true
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: rust
 3 | rust:
 4 | - nightly
 5 | - beta
 6 | - stable
 7 | matrix:
 8 |   allow_failures:
 9 |   - rust: nightly
10 |     env: TRAVIS_CARGO_NIGHTLY_FEATURE=lint
11 |   exclude:
12 |   - rust: beta
13 |     env: TRAVIS_CARGO_NIGHTLY_FEATURE=lint
14 |   - rust: stable
15 |     env: TRAVIS_CARGO_NIGHTLY_FEATURE=lint
16 | branches:
17 |   only:
18 |     - master
19 | before_script:
20 | - |
21 |   pip install 'travis-cargo<0.2' --user &&
22 |   export PATH=$HOME/.local/bin:$PATH
23 | script:
24 | - |
25 |   cd cudnn
26 |   travis-cargo build -- --no-default-feature --features "travis"s &&
27 |   travis-cargo test -- --no-default-features --features "travis"
28 |   travis-cargo doc -- --no-default-features --features "travis"
29 | addons:
30 |   apt:
31 |     packages:
32 |     - libcurl4-openssl-dev
33 |     - libelf-dev
34 |     - libdw-dev
35 | after_success:
36 | - travis-cargo doc-upload
37 | - travis-cargo coveralls --no-sudo
38 | notifications:
39 |   email:
40 |     on_success: never
41 | env:
42 |   global:
43 |     secure: hTYDLFNmzEazAYd0eBY0HearyERReUHUSSmwGvdVBIX0Vbltvq1vKmUJAneDwzRiehL9HyrX5HV4OkbA6XNseFXXuTYjC/vbSjyN+TcXDLUWIhTMTsLEdh3h5g/XQN9lv03ovmqPDA2owoggxNosRt/10dclV9GiyYHF3ozbJOur4DIMqCi9ta9FpE9KMHsC6HSSdFgW5vTcrKsk9M2GBWzy52lAUQjm1qw1zHG2FmopbzXruaeFHIV0V2owww2FxLp6Hh592/WTX0gj6AMR1M8DfvALV5vDB+F49EWWHnC64RHGW74muQXrGPmG4nk0oUE4EzjX+XWTaRUCQ9p1nkPxFcWOCqykASOCnXNLfdDH47mqRmpjvHwvS5Ivd0FWaHPWmHbxu9CJ9zJImijHPgRpKVVmxh0BqKMG72QCUkONr4nKCW/vbCOvJgnwXpFXLLhYgqQsjaT/kqGR4VbB6PxKeI0+z8AnKE6RAzZmvN1U3Bx3kZ5xEaJCfytpXGBROTyXV4gvhyyDmdG8MnYuCPlY4Ov8LC7vWAmyp7nbE/IYtGePz6B6ec5bl9qrv9zD14FOT+MFvxqZkYaNUgImTouUG/MvH1lmSrPjqalxdmq8YTiGFgmh8vFZ2ovPbfPRl6usEMcgd8CjNuewb4Dz/XNYEmsS0C3+o3HMCNJ/YVc=
44 |   matrix:
45 |   - TRAVIS_CARGO_NIGHTLY_FEATURE=travis
46 |   - TRAVIS_CARGO_NIGHTLY_FEATURE=lint
47 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | <a name="0.3.4"></a>
  2 | ## 0.3.4 (2016-03-03)
  3 | 
  4 | 
  5 | #### Bug Fixes
  6 | 
  7 | * **cuda/convolution:**  workaround for 0 memory allocation ([e30b59de](https://github.com/autumnai/collenchyma-nn/commit/e30b59decfe7ca6663a42fd27e19e55fcee83552))
  8 | 
  9 | 
 10 | 
 11 | <a name="0.3.3"></a>
 12 | ## 0.3.3 (2016-03-03)
 13 | 
 14 | 
 15 | #### Features
 16 | 
 17 | * **cudnnv4:**  passive support for cuDNNv4 ([0dc46301](https://github.com/autumnai/collenchyma-nn/commit/0dc463011c0ae261baee201e1b9cb540309349c5))
 18 | 
 19 | 
 20 | 
 21 | <a name="0.3.2"></a>
 22 | ## 0.3.2 (2016-03-02)
 23 | 
 24 | 
 25 | #### Breaking Changes
 26 | 
 27 | * **convolution:**  change convolution functions to require workspace ([f9d40136](https://github.com/autumnai/collenchyma-nn/commit/f9d401360c54bac6a253925d90625b0a1393ea17)
 28 | 
 29 | 
 30 | 
 31 | <a name="0.3.1"></a>
 32 | ## 0.3.1 (2016-02-23)
 33 | 
 34 | 
 35 | #### Features
 36 | 
 37 | * **pointwise:**  add pointwise activation functions (cuDNN) ([d74821b5](https://github.com/autumnai/collenchyma-nn/commit/d74821b582056f9acd3bdb4acd98f72668d070f8))
 38 | 
 39 | 
 40 | 
 41 | <a name="0.3.0"></a>
 42 | ## 0.3.0 (2016-02-22)
 43 | 
 44 | 
 45 | #### Features
 46 | 
 47 | * **log_softmax:**  add LogSoftmax operations ([86a8ae67](https://github.com/autumnai/collenchyma-nn/commit/86a8ae67727e0a5d28c901a7a32940fd7e2250f2))
 48 | * **cuda:**
 49 |   * share workspace between CUDA convolution operations ([7f5f3207](https://github.com/autumnai/collenchyma-nn/commit/7f5f3207873874accb7a5a16d637e2701161ac04))
 50 |   * allow CUDA activations to work with 1D/2D tensors ([f4effe7d](https://github.com/autumnai/collenchyma-nn/commit/f4effe7d66d96537251d86bf24968b521a951121))
 51 |   * allow CUDA softmax to work with 1-3D tensors ([f74f72b6](https://github.com/autumnai/collenchyma-nn/commit/f74f72b6207505f4c29c7c44a9748d83972e7f72))
 52 | * **nn_trait:**  remove trait bounds for NN ([9ad08d9f](https://github.com/autumnai/collenchyma-nn/commit/9ad08d9f97cc382699c78c1397b52509d2e98969))
 53 | * **license:**  change license to dual MIT/Apache-2.0 ([8a940690](https://github.com/autumnai/collenchyma-nn/commit/8a940690e21bae269c44b9501e956bbf066cdcc1))
 54 | 
 55 | #### Breaking Changes
 56 | 
 57 | * **convolution:**  implement convolutions correctly ([24b164b5](https://github.com/autumnai/collenchyma-nn/commit/24b164b55a913f522d79832308cf2e4a7996612a))
 58 | 
 59 | #### Performance
 60 | 
 61 | * **convolution:**  don't do a memAlloc for a zero size workspace ([73612bb5](https://github.com/autumnai/collenchyma-nn/commit/73612bb56ab70500b4670b7a9a12390e2facee37))
 62 | 
 63 | 
 64 | <a name="0.2.1"></a>
 65 | ## 0.2.1 (2016-01-21)
 66 | 
 67 | 
 68 | #### Features
 69 | 
 70 | * **native:**  Add support for softmax w/ test and benches. ([14d6d1bc](https://github.com/autumnai/collenchyma-nn/commit/14d6d1bcda8bbc0ffa368527633f592862517200))
 71 | 
 72 | #### Bug Fixes
 73 | 
 74 | * **native:**  Fix sigmoid_grad to use x_diff instead of x for dx ([c25a32aa](https://github.com/autumnai/collenchyma-nn/commit/c25a32aa272ff3c753ee8be2ea89457367b38734))
 75 | 
 76 | 
 77 | 
 78 | <a name="0.2.0"></a>
 79 | ## 0.2.0 (2016-01-15)
 80 | 
 81 | 
 82 | #### Features
 83 | 
 84 | * **bench:**  add bench and perf utilities ([0e2d34c6](https://github.com/autumnai/collenchyma-nn/commit/0e2d34c67acba38c6910cdff6e983b5285dfb852))
 85 | * **native:**  implement Sigmoid, ReLU, tanh for Native backend. ([ece54e37](https://github.com/autumnai/collenchyma-nn/commit/ece54e37a241f81b45888225ab0ee28c538950f6))
 86 | 
 87 | 
 88 | <a name="0.1.0"></a>
 89 | ## 0.1.0 (2015-12-21)
 90 | 
 91 | 
 92 | #### Bug Fixes
 93 | 
 94 | * **scale_params:**  fix ScalParams default to work on stable ([43654dca](https://github.com/autumnai/collenchyma-nn/commit/43654dca7cb92826ffecd4f0cd251fb7071d11c5))
 95 | 
 96 | #### Features
 97 | 
 98 | * **activation:**  add most popular NN activation functions ([3311bb43](https://github.com/autumnai/collenchyma-nn/commit/3311bb43d78c850db8322c9ea8c1a5f2ca189cd1))
 99 | * **features:**  add framework feature groups ([08629ea8](https://github.com/autumnai/collenchyma-nn/commit/08629ea8f1c38047a5d7fec24601e21ba79d704f))
100 | * **nn:**
101 |   *  add all cudnn available operations to collenchyma-nn ([03384763](https://github.com/autumnai/collenchyma-nn/commit/033847630a0674c372666db209d436a80ecabe1b))
102 |   *  add basic nn implementation structure ([aa17ef0f](https://github.com/autumnai/collenchyma-nn/commit/aa17ef0f5064e479152ac3e398bf64887e03b6e2))
103 | * **sigmoid:**
104 |   *  add full sigmoid CUDA implementation ([8ea1a290](https://github.com/autumnai/collenchyma-nn/commit/8ea1a29016c364536755e2fb5d13a52352b059ab))
105 |   *  add CUDA Sigmoid ([6aceb957](https://github.com/autumnai/collenchyma-nn/commit/6aceb957d05a0ee625b48bab38693b99c9e09f01))
106 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to Leaf
  2 | 
  3 | We love, that you are interested in contributing to Leaf. There are many ways
  4 | to contribute and we appreciate all of them. This document gives a rough
  5 | overview of how you can contribute to Leaf.
  6 | 
  7 | * [Pull Requests](#pull-requests)
  8 | * [Bug Reports](#bug-reports)
  9 | * [Feature Requests](#feature-requests)
 10 | * [Appendix](#appendix)
 11 |   * [Git Commit Guidelines](#git-commit-guidelines)
 12 |   * [Documentation Guidelines](#documentation-guidelines)
 13 | 
 14 | 
 15 | If you have questions hop on the [Leaf Chat](https://gitter.im/autumnai/leaf)
 16 | , or reach out to {@[MJ](https://twitter.com/mjhirn), @[Max](https://twitter.com/hobofan)}.
 17 | 
 18 | ## Pull Requests
 19 | 
 20 | #### Preparation
 21 | 
 22 | Before you get started, please find the page of the project you're looking to
 23 | improve. We encourage you to poke around in the code a little bit, familiarize
 24 | yourself with their development styles, check the commit log to see who is
 25 | contributing.
 26 | 
 27 | Before you start working, you might check out the **Network** tab on the project
 28 | to see all the other forks other people have made. Somebody might be already
 29 | working on the problem you would love to solve.
 30 | 
 31 | #### Making a PR
 32 | 
 33 | Pull requests are the primary mechanism we use to change Leaf repos. GitHub
 34 | itself has some [great documentation](https://help.github.com/articles/using-pull-requests/)
 35 | on using the Pull Request feature. We use the 'fork and pull' model described
 36 | there.
 37 | 
 38 | Please make pull requests against the `master` branch.
 39 | 
 40 | All pull requests are reviewed by another person.
 41 | 
 42 | > **Highfive not yet integrated**:
 43 | > *We have a bot, @rust-highfive, that will automatically assign a random*
 44 | > *person to review your request.*
 45 | >
 46 | > *If you want to request that a specific person reviews your pull request,*
 47 | > *you can add an `r?` to the message. For example, MJ usually reviews*
 48 | > *documentation changes. So if you were to make a documentation change, add*
 49 | >
 50 | >    r? @MichaelHirn
 51 | >
 52 | > *to the end of the message, and @rust-highfive will assign @MichaelHirn*
 53 | > *instead of a random person. This is entirely optional.*
 54 | 
 55 | After someone has reviewed your pull request, they will leave an annotation
 56 | on the pull request with an `r+`. It will look something like this:
 57 | 
 58 |    @homu: r+ 38fe8d2
 59 | 
 60 | This tells @homu, our lovable integration bot, that your pull request has
 61 | been approved. The PR then enters the
 62 | [merge queue](http://buildbot.rust-lang.org/homu/queue/rust), where
 63 | @homu will run all the tests on every platform we support. If it all works
 64 | out, @homu will merge your code into `master` and close the pull request.
 65 | 
 66 | ## Bug Reports
 67 | 
 68 | While bugs are unfortunate, they're a reality in software. We can't fix what we
 69 | don't know about, so please report liberally. If you're not sure if something
 70 | is a bug or not, feel free to file a bug anyway.
 71 | 
 72 | If you have the chance, before reporting a bug, please search existing issues,
 73 | as it's possible that someone else has already reported your error. This doesn't
 74 | always work, and sometimes it's hard to know what to search for, so consider this
 75 | extra credit. We won't mind if you accidentally file a duplicate report.
 76 | 
 77 | [Opening an issue is easy](https://guides.github.com/features/issues/)
 78 | Here's a template that you can use to file a bug, though it's not necessary to
 79 | use it exactly:
 80 | 
 81 |     <short summary of the bug>
 82 | 
 83 |     I tried this code:
 84 | 
 85 |     <code sample that causes the bug>
 86 | 
 87 |     I expected to see this happen: <explanation>
 88 | 
 89 |     Instead, this happened: <explanation>
 90 | 
 91 |     ## Meta
 92 | 
 93 |     {Library, Rust, OS} versions
 94 | 
 95 |     Backtrace:
 96 | 
 97 | All three components are important: what you did, what you expected, what
 98 | happened instead. Please include information about what platform you're on, what
 99 | version of Rust and library you're using, etc.
100 | 
101 | Sometimes, a backtrace is helpful, and so including that is nice. To get
102 | a backtrace, set the `RUST_BACKTRACE` environment variable. The easiest way
103 | to do this is to invoke `rustc` like this:
104 | 
105 | ```bash
106 | $ RUST_BACKTRACE=1 rustc ...
107 | ```
108 | 
109 | ## Feature Requests
110 | 
111 | To request a change to the way that one of the Leaf libraries work, please
112 | open an issue in the repository.
113 | 
114 | ## Appendix
115 | 
116 | ### Git Commit Guidelines
117 | 
118 | We have very precise rules over how git commit messages should be formatted.
119 | This leads to more readable messages that are easy to follow when looking
120 | through the project history. But also, we may use the git commit messages to
121 | auto-generate the Leaf change log.
122 | 
123 | #### Commit Message Format
124 | 
125 | Each commit message consists of a header, a body and a footer. The header has a
126 | special format that includes a type, a scope and a subject:
127 | 
128 |     <type>/<scope>: <subject>
129 |     \n
130 |     <body>
131 |     \n
132 |     <footer>
133 | 
134 | Any line of the commit message cannot be longer 100 characters! This allows the
135 | message to be easier to read on GitHub as well as in various git tools.
136 | 
137 | <**type**>:
138 | 
139 | Must be one of the following:
140 | 
141 | - *`feat`*: A new feature
142 | - *`fix`*: A bug fix
143 | - *`docs`*: Documentation only changes
144 | - *`style`*: Changes that do not affect the meaning of the code (white-space, formatting, missing semi-colons, etc)
145 | - *`refactor`*: A code change that neither fixes a bug nor adds a feature
146 | - *`perf`*: A code change that improves performance
147 | - *`test`*: Adding missing tests
148 | - *`chore`*: Changes to the build process or auxiliary tools and libraries such as documentation generation
149 | 
150 | <**scope**>:
151 | 
152 | The scope could be anything that specifies the place of the commit change.
153 | For example: `feature1`, `tests`, `lib`, etc...
154 | 
155 | <**subject**>:
156 | 
157 | The subject contains succinct description of the change:
158 | - use the imperative, present tense: "change" not "changed" nor "changes"
159 | - don't capitalize first letter
160 | - no dot (.) at the end
161 | 
162 | <**body**>:
163 | 
164 | The body should include the motivation for the change, contrast this with
165 | previous behaviour and overall information about, why that commit matters.
166 | 
167 | - Just as in the `subject`, use the imperative, present tense
168 | 
169 | <**footer**>:
170 | 
171 | The footer should contain any information about Breaking Changes and is also the
172 | place to reference GitHub issues that this commit closes. For Example:
173 | 
174 |     BREAKING CHANGE: [specify what is breaking]
175 | 
176 |     { REFERENCE, CLOSE, FIX } #Issue
177 | 
178 | 
179 | #### Revert
180 | 
181 | If the commit reverts a previous commit, it should begin with `revert:`,
182 | followed by the header of the reverted commit. In the body it should say:
183 | `This reverts commit <hash>.`, where the hash is the SHA of the commit being
184 | reverted.
185 | 
186 | ### Documentation Guidelines
187 | 
188 | We created an extensive [Documentation Guide][1] for you, which outlines an easy
189 | and efficient communication Framework for providing developers and users with
190 | helpful Documentation about the Deep Learning Framework.
191 | 
192 | [1] https://medium.com/@autumn_eng/increasing-open-source-engagement-with-structural-communication-guidelines-for-code-documentation-e72533de8e45
193 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name            = "collenchyma-nn"
 3 | description     = "collenchyma plugin providing Neural Network operations"
 4 | version         = "0.3.4"
 5 | authors         = ["Michael Hirn <mj@autumnai.com>",
 6 |                    "Maximilian Goisser <max@autumnai.com>"]
 7 | 
 8 | repository      = "https://github.com/autumnai/collenchyma-nn"
 9 | homepage        = "https://github.com/autumnai/collenchyma-nn"
10 | documentation   = "https://autumnai.github.io/collenchyma-nn"
11 | readme          = "README.md"
12 | 
13 | keywords        = ["neural-network", "collenchyma", "computation", "hpc", "plugin"]
14 | license         = "MIT OR Apache-2.0"
15 | 
16 | [dependencies]
17 | collenchyma = { version = "0.0.8", default-features = false }
18 | cudnn = { version = "1.3.1", optional = true }
19 | libc = "0.2"
20 | lazy_static = "0.1"
21 | log = "0.3.2"
22 | 
23 | clippy = { version = "0.0.27", optional = true }
24 | 
25 | [dev-dependencies]
26 | 
27 | rand = "0.3"
28 | 
29 | [features]
30 | default = ["native", "cuda", "opencl"]
31 | native = ["collenchyma/native"]
32 | cuda = ["collenchyma/cuda", "cudnn"]
33 | opencl = ["collenchyma/opencl"]
34 | 
35 | travis = ["native"]
36 | dev = []
37 | lint = ["clippy"]
38 | 


--------------------------------------------------------------------------------
/LICENSE-APACHE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | MIT LICENSE
 2 | 
 3 | Copyright (c) 2015 Storeness UG (haftungsbeschraenkt) <developers@autumnai.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # collenchyma-NN • [![Join the chat at https://gitter.im/autumnai/collenchyma](https://img.shields.io/badge/gitter-join%20chat-brightgreen.svg)](https://gitter.im/autumnai/collenchyma?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) [![Build Status](https://travis-ci.org/autumnai/collenchyma-nn.svg?branch=master)](https://travis-ci.org/autumnai/collenchyma-nn) [![Crates.io](http://meritbadge.herokuapp.com/collenchyma-nn)](https://crates.io/crates/collenchyma-nn) [![License](https://img.shields.io/crates/l/collenchyma-nn.svg)](LICENSE)
  2 | 
  3 | collenchyma-NN provides Neural Network related algorithms for [Collenchyma][collenchyma].
  4 | Run NN operations on servers, desktops or mobiles, GPUs, FPGAs or CPUS, without
  5 | carrying about OpenCL or CUDA support on the machine.
  6 | 
  7 | collenchyma-NN was started at [Autumn][autumn] to support the Machine Intelligence
  8 | Framework [Leaf][leaf] with backend-agnostic, state-of-the-art performance.
  9 | 
 10 | For more information,
 11 | 
 12 | * see collenchyma-NN's [Documentation](http://autumnai.github.io/collenchyma-nn)
 13 | * visit [Collenchyma][collenchyma] for more information about portable operations and other Plugins.
 14 | * or get in touch on [Twitter][twitter-autumn] or [Gitter][gitter-collenchyma]
 15 | 
 16 | [collenchyma]: https://github.com/autumnai/collenchyma
 17 | [autumn]: http://autumnai.com
 18 | [leaf]: https://github.com/autumnai/leaf
 19 | [twitter-autumn]: https://twitter.com/autumn_eng
 20 | 
 21 | ## Provided Operations
 22 | 
 23 | This Plugins provides the following operations to the Collenchyma Backend.
 24 | Every Operation includes forward + backward. A `-` means not yet implemented.
 25 | More information can be found in the [Documentation][docs-ops].
 26 | 
 27 | | Operation            | CUDA                 | OpenCL    | Native    |
 28 | |---	                 |---	                  |---        |---        |
 29 | | Sigmoid  	           | { cuDNN v3, v4 }     | -  	      | Rust	  	|
 30 | | SigmoidPointwise     | { cuDNN v3, v4 }  	  | -  	      |   	      |
 31 | | ReLU  	             | { cuDNN v3, v4 }     | -  	      | Rust      |
 32 | | ReLUPointwise        | { cuDNN v3, v4 }     | -  	      |           |
 33 | | Tanh  	   	         | { cuDNN v3, v4 }     | - 	      | Rust      |
 34 | | TanhPointwise  	   	 | { cuDNN v3, v4 }     | - 	      |           |
 35 | |   	   	             |  	                  |  	        |           |
 36 | | Normalization (LRN)  | { cuDNN v3, v4 }     | - 	      | -         |
 37 | |   	   	             |  	                  |  	        |           |
 38 | | Convolution          | { cuDNN v3, v4 }     | - 	      | -         |
 39 | |   	   	             |  	                  |  	        |           |
 40 | | Softmax              | { cuDNN v3, v4 }     | - 	      | Rust      |
 41 | | LogSoftmax           | { cuDNN v3, v4 }     | - 	      | Rust      |
 42 | |   	   	             |  	                  |  	        |           |
 43 | | Pooling Max          | { cuDNN v3, v4 }     | - 	      | -         |
 44 | | Pooling Avg          | { cuDNN v3, v4 }     | - 	      | -         |
 45 | 
 46 | Kudos to [ehiggs][ehiggs], for implementing the native Rust operations.
 47 | 
 48 | [docs-ops]: http://autumnai.github.io/collenchyma-nn/collenchyma_nn/trait.NN.html
 49 | [ehiggs]: https://github.com/ehiggs
 50 | 
 51 | ## Getting Started
 52 | 
 53 | If you're using Cargo, just add collenchyma-NN to your Cargo.toml:
 54 | 
 55 |     [dependencies]
 56 |     collenchyma = "0.0.8"
 57 |     collenchyma-nn = "0.3.4"
 58 | 
 59 | If you're using [Cargo Edit][cargo-edit], you can call:
 60 | 
 61 |     $ cargo add collenchyma-nn
 62 | 
 63 | [cargo-edit]: https://github.com/killercup/cargo-edit
 64 | 
 65 | ## Usage
 66 | 
 67 | Bring the Plugin trait and the other important Collenchyma traits/structs in scope and
 68 | you will be able to execute the here provided operations on your Collenchyma Backend.
 69 | 
 70 | ```rust
 71 | extern crate collenchyma as co;
 72 | extern crate collenchyma_nn as nn;
 73 | use co::prelude::*;
 74 | use nn::*;
 75 | fn main() {
 76 |     // Initialize a CUDA Backend.
 77 |     let backend = Backend::<Cuda>::default().unwrap();
 78 |     // Initialize two SharedTensors.
 79 |     // Usually you would want also fill them with data.
 80 |     // More infos about that in the Collenchyma README.md
 81 |     let mut x = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
 82 |     let mut result = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
 83 |     // Use the operation provided by this Plugin.
 84 |     backend.sigmoid(&mut x, &mut result);
 85 | }
 86 | ```
 87 | 
 88 | ## Contributing
 89 | 
 90 | Want to contribute? Awesome! We have
 91 | [instructions to help you get started contributing code or documentation][contributing].
 92 | And high priority issues, that we could need your help with.
 93 | 
 94 | We have a mostly real-time collaboration culture and happens here on Github and
 95 | on the [Collenchyma Gitter Channel][gitter-collenchyma].
 96 | You can also reach out to the Maintainers
 97 | {[@MJ][mj], [@hobofan][hobofan]}.
 98 | 
 99 | Unless you explicitly state otherwise, any contribution intentionally
100 | submitted for inclusion in the work by you, as defined in the Apache-2.0
101 | license, shall be dual licensed as below, without any additional terms or
102 | conditions.
103 | 
104 | [contributing]: CONTRIBUTING.md
105 | [gitter-collenchyma]: https://gitter.im/autumnai/collenchyma
106 | [mj]: https://twitter.com/mjhirn
107 | [hobofan]: https://twitter.com/hobofan
108 | 
109 | ## Changelog
110 | 
111 | > *A changelog is a log or record of all the changes made to a project, such as a website or software project, usually including such records as bug fixes, new features, etc.* - [Wikipedia][changelog-quote]
112 | 
113 | You can find the release history at the [CHANGELOG][changelog] file.
114 | 
115 | We are using [Clog][clog], the Rust tool for auto generating CHANGELOG files.
116 | 
117 | [changelog]: CHANGELOG.md
118 | [changelog-quote]: https://en.wikipedia.org/wiki/Changelog
119 | [Clog]: https://github.com/clog-tool/clog-cli
120 | 
121 | ## License
122 | 
123 | Licensed under either of
124 | 
125 |  * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
126 |  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
127 | 
128 | at your option.
129 | 


--------------------------------------------------------------------------------
/benches/relu.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | extern crate collenchyma as co;
 5 | extern crate collenchyma_nn as co_nn;
 6 | extern crate rand;
 7 | 
 8 | use test::Bencher;
 9 | use co::prelude::*;
10 | use co_nn::*;
11 | 
12 | use rand::{thread_rng, Rng};
13 | 
14 | fn backend() -> Backend<Native> {
15 |     Backend::<Native>::default().unwrap()
16 | }
17 | 
18 | fn arguments<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>) {
19 |     let mut rng = thread_rng();
20 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
21 | 
22 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
23 |     let out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
24 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 |     (x, out)
26 | }
27 | 
28 | fn arguments_grad<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>) {
29 |     let mut rng = thread_rng();
30 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
31 | 
32 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
33 |     let mut dx = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
34 |     let mut out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
35 |     let dout = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
36 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 |     dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
38 |     out.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
39 |     (x, dx, out, dout)
40 | }
41 | 
42 | #[inline(never)]
43 | fn bench_profile<F: FnMut() -> ()>(
44 |     b: &mut Bencher,
45 |     mut bench_func: F,
46 |     times: usize
47 | ) {
48 |     b.iter(|| { for _ in 0..times { bench_func(); } });
49 | }
50 | 
51 | #[bench]
52 | fn bench_1000_relu_100_native(b: &mut Bencher) {
53 |     let backend = backend();
54 |     let (mut x, mut out) = arguments(&backend, 100);
55 |     let mut func = || { let _ = backend.relu_plain(&mut x, &mut out); };
56 |     { func(); bench_profile(b, func, 1000); }
57 | }
58 | 
59 | #[bench]
60 | fn bench_10_relu_10000_native(b: &mut Bencher) {
61 |     let backend = backend();
62 |     let (mut x, mut out) = arguments(&backend, 10000);
63 |     let mut func = || { let _ = backend.relu_plain(&mut x, &mut out); };
64 |     { func(); bench_profile(b, func, 10); }
65 | }
66 | 
67 | #[bench]
68 | fn bench_1000_relu_grad_100_native(b: &mut Bencher) {
69 |     let backend = backend();
70 |     let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 100);
71 |     let mut func = || { let _ = backend.relu_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
72 |     { func(); bench_profile(b, func, 1000); }
73 | }
74 | 
75 | #[bench]
76 | fn bench_10_relu_grad_10000_native(b: &mut Bencher) {
77 |     let backend = backend();
78 |     let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 10000);
79 |     let mut func = || { let _ = backend.relu_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
80 |     { func(); bench_profile(b, func, 10); }
81 | }
82 | 


--------------------------------------------------------------------------------
/benches/sigmoid.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | extern crate collenchyma as co;
 5 | extern crate collenchyma_nn as co_nn;
 6 | extern crate rand;
 7 | 
 8 | use test::Bencher;
 9 | use co::prelude::*;
10 | use co_nn::*;
11 | 
12 | use rand::{thread_rng, Rng};
13 | 
14 | fn backend() -> Backend<Native> {
15 |     Backend::<Native>::default().unwrap()
16 | }
17 | 
18 | fn arguments<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>) {
19 |     let mut rng = thread_rng();
20 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
21 | 
22 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
23 |     let out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
24 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 |     (x, out)
26 | }
27 | 
28 | fn arguments_grad<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>) {
29 |     let mut rng = thread_rng();
30 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
31 | 
32 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
33 |     let mut dx = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
34 |     let mut out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
35 |     let dout = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
36 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 |     dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
38 |     out.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
39 |     (x, dx, out, dout)
40 | }
41 | 
42 | #[inline(never)]
43 | fn bench_profile<F: FnMut() -> ()>(
44 |     b: &mut Bencher,
45 |     mut bench_func: F,
46 |     times: usize
47 | ) {
48 |     b.iter(|| { for _ in 0..times { bench_func(); } });
49 | }
50 | 
51 | #[bench]
52 | fn bench_1000_sigmoid_100_native(b: &mut Bencher) {
53 |     let backend = backend();
54 |     let (mut x, mut out) = arguments(&backend, 100);
55 |     let mut func = || { let _ = backend.sigmoid_plain(&mut x, &mut out); };
56 |     { func(); bench_profile(b, func, 1000); }
57 | }
58 | 
59 | #[bench]
60 | fn bench_10_sigmoid_10000_native(b: &mut Bencher) {
61 |     let backend = backend();
62 |     let (mut x, mut out) = arguments(&backend, 10000);
63 |     let mut func = || { let _ = backend.sigmoid_plain(&mut x, &mut out); };
64 |     { func(); bench_profile(b, func, 10); }
65 | }
66 | 
67 | #[bench]
68 | fn bench_1000_sigmoid_grad_100_native(b: &mut Bencher) {
69 |     let backend = backend();
70 |     let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 100);
71 |     let mut func = || { let _ = backend.sigmoid_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
72 |     { func(); bench_profile(b, func, 1000); }
73 | }
74 | 
75 | #[bench]
76 | fn bench_10_sigmoid_grad_10000_native(b: &mut Bencher) {
77 |     let backend = backend();
78 |     let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 10000);
79 |     let mut func = || { let _ = backend.sigmoid_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
80 |     { func(); bench_profile(b, func, 10); }
81 | }
82 | 


--------------------------------------------------------------------------------
/benches/softmax.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | extern crate collenchyma as co;
 5 | extern crate collenchyma_nn as co_nn;
 6 | extern crate rand;
 7 | 
 8 | use test::Bencher;
 9 | use co::prelude::*;
10 | use co_nn::*;
11 | 
12 | use rand::{thread_rng, Rng};
13 | 
14 | fn backend() -> Backend<Native> {
15 |     Backend::<Native>::default().unwrap()
16 | }
17 | 
18 | fn arguments<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>) {
19 |     let mut rng = thread_rng();
20 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
21 | 
22 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
23 |     let out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
24 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 |     (x, out)
26 | }
27 | 
28 | fn arguments_grad<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>) {
29 |     let mut rng = thread_rng();
30 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
31 | 
32 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
33 |     let mut dx = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
34 |     let dout = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
35 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
36 |     dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 |     (x, dx, dout)
38 | }
39 | 
40 | #[inline(never)]
41 | fn bench_profile<F: FnMut() -> ()>(
42 |     b: &mut Bencher,
43 |     mut bench_func: F,
44 |     times: usize
45 | ) {
46 |     b.iter(|| { for _ in 0..times { bench_func(); } });
47 | }
48 | 
49 | #[bench]
50 | fn bench_1000_softmax_100_native(b: &mut Bencher) {
51 |     let backend = backend();
52 |     let (mut x, mut out) = arguments(&backend, 100);
53 |     let mut func = || { let _ = backend.softmax_plain(&mut x, &mut out); };
54 |     { func(); bench_profile(b, func, 1000); }
55 | }
56 | 
57 | #[bench]
58 | fn bench_10_softmax_10000_native(b: &mut Bencher) {
59 |     let backend = backend();
60 |     let (mut x, mut out) = arguments(&backend, 10000);
61 |     let mut func = || { let _ = backend.softmax_plain(&mut x, &mut out); };
62 |     { func(); bench_profile(b, func, 10); }
63 | }
64 | 
65 | #[bench]
66 | fn bench_1000_softmax_grad_100_native(b: &mut Bencher) {
67 |     let backend = backend();
68 |     let (mut x, mut dx, mut dout) = arguments_grad(&backend, 100);
69 |     let mut func = || { let _ = backend.softmax_grad_plain(&mut x, &mut dx, &mut dout); };
70 |     { func(); bench_profile(b, func, 1000); }
71 | }
72 | 
73 | #[bench]
74 | fn bench_10_softmax_grad_10000_native(b: &mut Bencher) {
75 |     let backend = backend();
76 |     let (mut x, mut dx, mut dout) = arguments_grad(&backend, 10000);
77 |     let mut func = || { let _ = backend.softmax_grad_plain(&mut x, &mut dx, &mut dout); };
78 |     { func(); bench_profile(b, func, 10); }
79 | }
80 | 


--------------------------------------------------------------------------------
/benches/tanh.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | extern crate collenchyma as co;
 5 | extern crate collenchyma_nn as co_nn;
 6 | extern crate rand;
 7 | 
 8 | use test::Bencher;
 9 | use co::prelude::*;
10 | use co_nn::*;
11 | 
12 | use rand::{thread_rng, Rng};
13 | 
14 | fn backend() -> Backend<Native> {
15 |     Backend::<Native>::default().unwrap()
16 | }
17 | 
18 | fn arguments<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>) {
19 |     let mut rng = thread_rng();
20 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
21 | 
22 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
23 |     let out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
24 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
25 |     (x, out)
26 | }
27 | 
28 | fn arguments_grad<T: IFramework + Clone>(backend: &Backend<T>, size: usize) -> (SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>, SharedTensor<f32>) {
29 |     let mut rng = thread_rng();
30 |     let slice_x = rng.gen_iter::<f32>().take(size).collect::<Vec<f32>>();
31 | 
32 |     let mut x = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
33 |     let mut dx = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
34 |     let mut out = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
35 |     let dout = SharedTensor::<f32>::new(backend.device(), &size).unwrap();
36 |     x.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
37 |     dx.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
38 |     out.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_x);
39 |     (x, dx, out, dout)
40 | }
41 | 
42 | #[inline(never)]
43 | fn bench_profile<F: FnMut() -> ()>(
44 |     b: &mut Bencher,
45 |     mut bench_func: F,
46 |     times: usize
47 | ) {
48 |     b.iter(|| { for _ in 0..times { bench_func(); } });
49 | }
50 | 
51 | #[bench]
52 | fn bench_1000_tanh_100_native(b: &mut Bencher) {
53 |     let backend = backend();
54 |     let (mut x, mut out) = arguments(&backend, 100);
55 |     let mut func = || { let _ = backend.tanh_plain(&mut x, &mut out); };
56 |     { func(); bench_profile(b, func, 1000); }
57 | }
58 | 
59 | #[bench]
60 | fn bench_10_tanh_10000_native(b: &mut Bencher) {
61 |     let backend = backend();
62 |     let (mut x, mut out) = arguments(&backend, 10000);
63 |     let mut func = || { let _ = backend.tanh_plain(&mut x, &mut out); };
64 |     { func(); bench_profile(b, func, 10); }
65 | }
66 | 
67 | #[bench]
68 | fn bench_1000_tanh_grad_100_native(b: &mut Bencher) {
69 |     let backend = backend();
70 |     let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 100);
71 |     let mut func = || { let _ = backend.tanh_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
72 |     { func(); bench_profile(b, func, 1000); }
73 | }
74 | 
75 | #[bench]
76 | fn bench_10_tanh_grad_10000_native(b: &mut Bencher) {
77 |     let backend = backend();
78 |     let (mut x, mut dx, mut out, mut dout) = arguments_grad(&backend, 10000);
79 |     let mut func = || { let _ = backend.tanh_grad_plain(&mut x, &mut dx, &mut out, &mut dout); };
80 |     { func(); bench_profile(b, func, 10); }
81 | }
82 | 


--------------------------------------------------------------------------------
/perf/README.md:
--------------------------------------------------------------------------------
 1 | # Profiling
 2 | 
 3 | Collenchyma comes with scripts to help with profiling performance problems.
 4 | 
 5 | Run [perf](http://www.brendangregg.com/perf.html) on one of the benchmark test:
 6 | 
 7 | ```sh
 8 | # compile latest version of benchmarks with DWARF information
 9 | cargo rustc --bench [bench_file_name] -- -g
10 | sudo ./perf/run_perf.sh [bench_fn_name] # perf needs sudo
11 | ```
12 | 


--------------------------------------------------------------------------------
/perf/perf_rblas.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | perf record -a -g --output perf_rblas_data.perf target/debug/rblas_overhead-cf1a2670c118749d --bench bench_1000_dot_100_rblas
3 | perf script -f -i perf_rblas_data.perf > perf_rblas_script.perf
4 | /home/hobofan/stuff/FlameGraph/stackcollapse-perf.pl perf_rblas_script.perf > perf_rblas_folded.perf
5 | /home/hobofan/stuff/FlameGraph/flamegraph.pl perf_rblas_folded.perf > perf_rblas_graph.svg
6 | 


--------------------------------------------------------------------------------
/perf/run_perf.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | if [ $# -eq 0 ]
 3 |   then
 4 |     echo "No benchmark name supplied"
 5 |     exit 1
 6 | fi
 7 | benchname=$1
 8 | mkdir -p target/perf
 9 | perf record -a -g --output target/perf/${benchname}.data target/debug/rblas_overhead-c02a41a1401d43da --bench ${benchname}
10 | perf script -f -i target/perf/${benchname}.data > target/perf/${benchname}.scripted
11 | stackcollapse-perf target/perf/${benchname}.scripted | grep ${benchname} > target/perf/${benchname}.folded
12 | flamegraph target/perf/${benchname}.folded > target/perf/${benchname}.svg
13 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | format_strings = false
2 | reorder_imports = true
3 | 


--------------------------------------------------------------------------------
/src/frameworks/cuda/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Provides NN for a CUDA backend.
  2 | #![allow(missing_docs)]
  3 | use ::plugin::*;
  4 | use co::prelude::*;
  5 | use co::plugin::Error as PluginError;
  6 | use cudnn::*;
  7 | 
  8 | #[macro_use]
  9 | pub mod helper;
 10 | 
 11 | lazy_static! {
 12 |     static ref CUDNN: Cudnn = Cudnn::new().unwrap();
 13 | }
 14 | 
 15 | pub trait ICudnnDesc<T> {
 16 |     fn cudnn_tensor_desc(&self) -> Result<TensorDescriptor, PluginError>;
 17 |     /// Creates a TensorDescriptor similar to `cudnn_tensor_desc`,
 18 |     /// but will create a fitting 4D tensor if the actual tensor would be 1D-3D.
 19 |     fn cudnn_tensor_desc_softmax(&self) -> Result<TensorDescriptor, PluginError>;
 20 |     /// Creates a TensorDescriptor similar to `cudnn_tensor_desc`,
 21 |     /// but will create a fitting 3D tensor if the actual tensor would be 1D/2D.
 22 |     ///
 23 |     /// This should be used in operations where the shape doesn't really matter
 24 |     /// e.g. activation like ReLU.
 25 |     fn cudnn_tensor_desc_flat(&self) -> Result<TensorDescriptor, PluginError>;
 26 | 
 27 |     fn cudnn_filter_desc(&self) -> Result<FilterDescriptor, PluginError>;
 28 | 
 29 |     fn cudnn_convolution_desc(&self, filter: &SharedTensor<T>) -> Result<ConvolutionDescriptor, PluginError>;
 30 | }
 31 | 
 32 | macro_rules! impl_icudnndesc_for_sharedtensor {
 33 |     ($t:ty, $cutype:path) => (
 34 |         impl ICudnnDesc<$t> for SharedTensor<$t> {
 35 |             fn cudnn_tensor_desc(&self) -> Result<TensorDescriptor, PluginError> {
 36 |                 match TensorDescriptor::new(&self.desc().dims_i32().clone(), &self.desc().default_stride_i32().clone(), $cutype) {
 37 |                     Ok(desc) => Ok(desc),
 38 |                     Err(_) => {
 39 |                         Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
 40 |                     }
 41 |                 }
 42 |             }
 43 | 
 44 |             fn cudnn_tensor_desc_softmax(&self) -> Result<TensorDescriptor, PluginError> {
 45 |                 let actual_desc = self.desc().clone();
 46 |                 let override_desc = match actual_desc.len() {
 47 |                     // not batched and single dimension softmax
 48 |                     1 => vec![1, actual_desc[0], 1, 1],
 49 |                     // batched and single dimension softmax
 50 |                     2 => vec![actual_desc[0], actual_desc[1], 1, 1],
 51 |                     // neither batched nor single dimension
 52 |                     3 => vec![1, actual_desc[0], actual_desc[1], actual_desc[2]],
 53 |                     _ => actual_desc
 54 |                 };
 55 |                 match TensorDescriptor::new(&override_desc.dims_i32().clone(),
 56 |                                             &override_desc.default_stride_i32().clone(),
 57 |                                             $cutype) {
 58 |                     Ok(desc) => Ok(desc),
 59 |                     Err(_) => {
 60 |                         Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
 61 |                     }
 62 |                 }
 63 |             }
 64 | 
 65 |             fn cudnn_tensor_desc_flat(&self) -> Result<TensorDescriptor, PluginError> {
 66 |                 let actual_desc = self.desc().clone();
 67 |                 let mut override_desc = match actual_desc.len() {
 68 |                     1 => vec![1, 1],
 69 |                     2 => vec![1],
 70 |                     _ => vec![]
 71 |                 };
 72 |                 for dim in actual_desc {
 73 |                     override_desc.push(dim);
 74 |                 }
 75 |                 match TensorDescriptor::new(&override_desc.dims_i32().clone(),
 76 |                                             &override_desc.default_stride_i32().clone(),
 77 |                                             $cutype) {
 78 |                     Ok(desc) => Ok(desc),
 79 |                     Err(_) => {
 80 |                         Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
 81 |                     }
 82 |                 }
 83 |             }
 84 | 
 85 |             fn cudnn_filter_desc(&self) -> Result<FilterDescriptor, PluginError> {
 86 |                 match FilterDescriptor::new(&self.desc().dims_i32().clone(), $cutype) {
 87 |                     Ok(desc) => Ok(desc),
 88 |                     Err(_) => {
 89 |                         Err(PluginError::Plugin("Unable to create CuDNN FilterDescriptor."))
 90 |                     }
 91 |                 }
 92 |             }
 93 | 
 94 |             fn cudnn_convolution_desc(&self, filter: &SharedTensor<$t>) -> Result<ConvolutionDescriptor, PluginError> {
 95 |                 match ConvolutionDescriptor::new(&self.desc().dims_i32().clone(), &filter.desc().default_stride_i32().clone(), $cutype) {
 96 |                     Ok(desc) => Ok(desc),
 97 |                     Err(_) => {
 98 |                         Err(PluginError::Plugin("Unable to create CuDNN ConvolutionDescriptor."))
 99 |                     }
100 |                 }
101 |             }
102 |         }
103 |     )
104 | }
105 | 
106 | impl_icudnndesc_for_sharedtensor!(f32, ::cudnn::utils::DataType::Float);
107 | impl_icudnndesc_for_sharedtensor!(f64, ::cudnn::utils::DataType::Double);
108 | 
109 | impl_oconf_for_cc!(f32, f64);
110 | impl_oconf_for_clrn!(f32, f64);
111 | impl_oconf_for_pooling!(f32, f64);
112 | 
113 | impl ConvForwardAlgo {
114 |     /// Tries to return the matching cuDNN type for the enum value.
115 |     fn as_cudnn(&self) -> Result<cudnnConvolutionFwdAlgo_t, ::co::error::Error> {
116 |         Ok(match *self {
117 |             ConvForwardAlgo::Auto => return Err(::co::error::Error::Plugin(::co::plugin::Error::Plugin("Can't create cuDNN convolution forward algorithm from ConvForwardAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))),
118 |             ConvForwardAlgo::GEMM => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
119 |             ConvForwardAlgo::ImplicitGEMM => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM,
120 |             ConvForwardAlgo::ImplicitPrecompiledGEMM => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
121 |             ConvForwardAlgo::FFT => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT,
122 |             ConvForwardAlgo::FFTTiling => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING,
123 |             ConvForwardAlgo::Direct => ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_DIRECT,
124 |         })
125 |     }
126 | 
127 |     /// Returns the matching enum value for a cuDNN algo.
128 |     fn from_cudnn(algo: &cudnnConvolutionFwdAlgo_t) -> ConvForwardAlgo {
129 |         match *algo {
130 |             ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_GEMM => ConvForwardAlgo::GEMM,
131 |             ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM => ConvForwardAlgo::ImplicitGEMM,
132 |             ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM => ConvForwardAlgo::ImplicitPrecompiledGEMM,
133 |             ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT => ConvForwardAlgo::FFT,
134 |             ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING => ConvForwardAlgo::FFTTiling,
135 |             ::cudnn::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_DIRECT => ConvForwardAlgo::Direct,
136 |         }
137 |     }
138 | 
139 |     /// Try to find best algorithm for a operation that uses the provided descriptors.
140 |     fn find_cudnn_algo(
141 |         &self,
142 |         filter_desc: &FilterDescriptor,
143 |         conv_desc: &ConvolutionDescriptor,
144 |         src_desc: &TensorDescriptor,
145 |         dest_desc: &TensorDescriptor,
146 |     ) -> Result<ConvForwardAlgo, ::co::error::Error> {
147 |         if !self.is_auto() {
148 |             return Ok(*self);
149 |         }
150 |         let algos = API::find_convolution_forward_algorithm(*CUDNN.id_c(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
151 |         let algo = match algos.len() {
152 |             0 => return Err(::co::error::Error::Plugin(::co::plugin::Error::Operation("Unable to find CUDA cuDNN convolution forward algorithm."))),
153 |             _ => algos[0].algo
154 |         };
155 |         Ok(ConvForwardAlgo::from_cudnn(&algo))
156 |     }
157 | }
158 | 
159 | impl ConvBackwardFilterAlgo {
160 |     /// Tries to return the matching cuDNN type for the enum value.
161 |     fn as_cudnn(&self) -> Result<cudnnConvolutionBwdFilterAlgo_t, ::co::error::Error> {
162 |         Ok(match *self {
163 |             ConvBackwardFilterAlgo::Auto => return Err(::co::error::Error::Plugin(::co::plugin::Error::Plugin("Can't create cuDNN convolution backward filter algorithm from ConvBackwardFilterAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))),
164 |             ConvBackwardFilterAlgo::ImplicitGEMM => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1,
165 |             ConvBackwardFilterAlgo::ImplicitGEMMSum => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0,
166 |             ConvBackwardFilterAlgo::ImplicitPrecompiledGEMMSum => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3,
167 |             ConvBackwardFilterAlgo::FFT => ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT,
168 |         })
169 |     }
170 | 
171 |     /// Returns the matching enum value for a cuDNN algo.
172 |     fn from_cudnn(algo: &cudnnConvolutionBwdFilterAlgo_t) -> ConvBackwardFilterAlgo {
173 |         match *algo {
174 |             ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 => ConvBackwardFilterAlgo::ImplicitGEMMSum,
175 |             ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 => ConvBackwardFilterAlgo::ImplicitGEMM,
176 |             ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT => ConvBackwardFilterAlgo::FFT,
177 |             ::cudnn::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 => ConvBackwardFilterAlgo::ImplicitPrecompiledGEMMSum,
178 |         }
179 |     }
180 | 
181 |     /// Try to find best algorithm for a operation that uses the provided descriptors.
182 |     fn find_cudnn_algo(
183 |         &self,
184 |         filter_desc: &FilterDescriptor,
185 |         conv_desc: &ConvolutionDescriptor,
186 |         src_desc: &TensorDescriptor,
187 |         dest_desc: &TensorDescriptor,
188 |     ) -> Result<ConvBackwardFilterAlgo, ::co::error::Error> {
189 |         if !self.is_auto() {
190 |             return Ok(*self);
191 |         }
192 |         let algos = API::find_convolution_backward_filter_algorithm(*CUDNN.id_c(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
193 |         let algo = match algos.len() {
194 |             0 => return Err(::co::error::Error::Plugin(::co::plugin::Error::Operation("Unable to find CUDA cuDNN convolution backward filter algorithm."))),
195 |             _ => algos[0].algo
196 |         };
197 |         Ok(ConvBackwardFilterAlgo::from_cudnn(&algo))
198 |     }
199 | }
200 | 
201 | impl ConvBackwardDataAlgo {
202 |     /// Tries to return the matching cuDNN type for the enum value.
203 |     fn as_cudnn(&self) -> Result<cudnnConvolutionBwdDataAlgo_t, ::co::error::Error> {
204 |         Ok(match *self {
205 |             ConvBackwardDataAlgo::Auto => return Err(::co::error::Error::Plugin(::co::plugin::Error::Plugin("Can't create cuDNN convolution backward data algorithm from ConvBackwardDataAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))),
206 |             ConvBackwardDataAlgo::ImplicitGEMM => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1,
207 |             ConvBackwardDataAlgo::ImplicitGEMMSum => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0,
208 |             ConvBackwardDataAlgo::FFT => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT,
209 |             ConvBackwardDataAlgo::FFTTiling => ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING,
210 |         })
211 |     }
212 | 
213 |     /// Returns the matching enum value for a cuDNN algo.
214 |     fn from_cudnn(algo: &cudnnConvolutionBwdDataAlgo_t) -> ConvBackwardDataAlgo {
215 |         match *algo {
216 |             ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 => ConvBackwardDataAlgo::ImplicitGEMMSum,
217 |             ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 => ConvBackwardDataAlgo::ImplicitGEMM,
218 |             ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT => ConvBackwardDataAlgo::FFT,
219 |             ::cudnn::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING => ConvBackwardDataAlgo::FFTTiling,
220 |         }
221 |     }
222 | 
223 |     /// Try to find best algorithm for a operation that uses the provided descriptors.
224 |     fn find_cudnn_algo(
225 |         &self,
226 |         filter_desc: &FilterDescriptor,
227 |         conv_desc: &ConvolutionDescriptor,
228 |         src_desc: &TensorDescriptor,
229 |         dest_desc: &TensorDescriptor,
230 |     ) -> Result<ConvBackwardDataAlgo, ::co::error::Error> {
231 |         if !self.is_auto() {
232 |             return Ok(*self);
233 |         }
234 |         let algos = API::find_convolution_backward_data_algorithm(*CUDNN.id_c(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
235 |         let algo = match algos.len() {
236 |             0 => return Err(::co::error::Error::Plugin(::co::plugin::Error::Operation("Unable to find CUDA cuDNN convolution backward data algorithm."))),
237 |             _ => algos[0].algo
238 |         };
239 |         Ok(ConvBackwardDataAlgo::from_cudnn(&algo))
240 |     }
241 | }
242 | 
243 | macro_rules! impl_convolution_for_cuda_backend {
244 |     ($t:ty, $cutype:path) => (
245 |         impl ConvolutionConfig<$t> for ::cudnn::utils::ConvolutionConfig {
246 |             fn workspace_size(&self) -> usize {
247 |                 *self.largest_workspace_size()
248 |             }
249 |         }
250 | 
251 |         impl Convolution<$t> for Backend<Cuda> {
252 |             fn new_convolution_config(
253 |                 &self,
254 |                 src: &::co::tensor::SharedTensor<$t>,
255 |                 dest: &::co::tensor::SharedTensor<$t>,
256 |                 filter: &mut ::co::tensor::SharedTensor<$t>,
257 |                 algo_fwd: ConvForwardAlgo,
258 |                 algo_bwd_filter: ConvBackwardFilterAlgo,
259 |                 algo_bwd_data: ConvBackwardDataAlgo,
260 |                 stride: &[i32],
261 |                 zero_padding: &[i32],
262 |             ) -> Result<Self::CC, ::co::error::Error> {
263 |                 let src_desc = try!(src.cudnn_tensor_desc());
264 |                 let dest_desc = try!(dest.cudnn_tensor_desc());
265 |                 let filter_desc = try!(filter.cudnn_filter_desc());
266 |                 let conv_desc = ::cudnn::ConvolutionDescriptor::new(zero_padding, stride, $cutype).unwrap();
267 | 
268 |                 let useable_algo_fwd = try!(algo_fwd.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
269 |                 let useable_algo_bwd_filter = try!(algo_bwd_filter.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
270 |                 let useable_algo_bwd_data = try!(algo_bwd_data.find_cudnn_algo(&filter_desc, &conv_desc, &src_desc, &dest_desc));
271 | 
272 |                 let mut workspace_size_fwd = API::get_convolution_forward_workspace_size(*CUDNN.id_c(), useable_algo_fwd.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
273 |                 let mut workspace_size_bwd_filter = API::get_convolution_backward_filter_workspace_size(*CUDNN.id_c(), useable_algo_bwd_filter.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
274 |                 let mut workspace_size_bwd_data = API::get_convolution_backward_data_workspace_size(*CUDNN.id_c(), useable_algo_bwd_data.as_cudnn().unwrap(), *filter_desc.id_c(), *conv_desc.id_c(), *src_desc.id_c(), *dest_desc.id_c()).unwrap();
275 | 
276 |                 if workspace_size_fwd == 0 {
277 |                     workspace_size_fwd = 8;
278 |                 }
279 |                 if workspace_size_bwd_filter == 0 {
280 |                     workspace_size_bwd_filter = 8;
281 |                 }
282 |                 if workspace_size_bwd_data == 0 {
283 |                     workspace_size_bwd_data = 8;
284 |                 }
285 | 
286 |                 Ok(
287 |                     ::cudnn::utils::ConvolutionConfig::new(
288 |                         useable_algo_fwd.as_cudnn().unwrap(), workspace_size_fwd,
289 |                         useable_algo_bwd_filter.as_cudnn().unwrap(), workspace_size_bwd_filter,
290 |                         useable_algo_bwd_data.as_cudnn().unwrap(), workspace_size_bwd_data,
291 |                         conv_desc, filter_desc
292 |                     )
293 |                 )
294 |             }
295 | 
296 |             impl_ops_convolution_for!($t, Backend<Cuda>);
297 |         }
298 |     )
299 | }
300 | 
301 | impl NN<f32> for Backend<Cuda> {
302 |     type CC = utils::ConvolutionConfig;
303 |     type CLRN = utils::NormalizationConfig;
304 |     type CPOOL = utils::PoolingConfig;
305 | 
306 |     fn init_nn() { let _ = CUDNN.id_c(); }
307 |     fn device(&self) -> &DeviceType { self.device() }
308 | }
309 | 
310 | impl_convolution_for_cuda_backend!(f32, ::cudnn::utils::DataType::Float);
311 | impl_ops_sigmoid_for!(f32, Backend<Cuda>);
312 | impl_ops_relu_for!(f32, Backend<Cuda>);
313 | impl_ops_tanh_for!(f32, Backend<Cuda>);
314 | impl_ops_softmax_for!(f32, Backend<Cuda>);
315 | impl_ops_log_softmax_for!(f32, Backend<Cuda>);
316 | impl_ops_lrn_for!(f32, Backend<Cuda>);
317 | impl_ops_pooling_for!(f32, Backend<Cuda>);
318 | 
319 | impl_ops_sigmoid_pointwise_for!(f32, Backend<Cuda>);
320 | impl_ops_relu_pointwise_for!(f32, Backend<Cuda>);
321 | impl_ops_tanh_pointwise_for!(f32, Backend<Cuda>);
322 | 
323 | impl NN<f64> for Backend<Cuda> {
324 |     type CC = utils::ConvolutionConfig;
325 |     type CLRN = utils::NormalizationConfig;
326 |     type CPOOL = utils::PoolingConfig;
327 | 
328 |     fn init_nn() { let _ = CUDNN.id_c(); }
329 |     fn device(&self) -> &DeviceType { self.device() }
330 | }
331 | 
332 | impl_convolution_for_cuda_backend!(f64, ::cudnn::utils::DataType::Double);
333 | impl_ops_sigmoid_for!(f64, Backend<Cuda>);
334 | impl_ops_relu_for!(f64, Backend<Cuda>);
335 | impl_ops_tanh_for!(f64, Backend<Cuda>);
336 | impl_ops_softmax_for!(f64, Backend<Cuda>);
337 | impl_ops_log_softmax_for!(f64, Backend<Cuda>);
338 | impl_ops_lrn_for!(f64, Backend<Cuda>);
339 | impl_ops_pooling_for!(f64, Backend<Cuda>);
340 | 
341 | impl_ops_sigmoid_pointwise_for!(f64, Backend<Cuda>);
342 | impl_ops_relu_pointwise_for!(f64, Backend<Cuda>);
343 | impl_ops_tanh_pointwise_for!(f64, Backend<Cuda>);
344 | 


--------------------------------------------------------------------------------
/src/frameworks/mod.rs:
--------------------------------------------------------------------------------
1 | //! Provides the specific Framework implementations for the Library Operations.
2 | 
3 | #[cfg(feature = "native")]
4 | pub mod native;
5 | //#[cfg(feature = "opencl")]
6 | //pub mod opencl;
7 | #[cfg(feature = "cuda")]
8 | pub mod cuda;
9 | 


--------------------------------------------------------------------------------
/src/frameworks/native/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Provides NN for a Native backend.
 2 | 
 3 | #![allow(unused_imports)]
 4 | #![allow(unused_variables)]
 5 | #![allow(unreachable_code)]
 6 | 
 7 | use ::plugin::*;
 8 | use co::prelude::*;
 9 | use co::Error;
10 | use co::plugin::Error as PluginError;
11 | 
12 | #[macro_use]
13 | pub mod helper;
14 | 
15 | impl_oconf_for_cc!(f32, f64);
16 | impl_oconf_for_clrn!(f32, f64);
17 | impl_oconf_for_pooling!(f32, f64);
18 | 
19 | impl NN<f32> for Backend<Native> {
20 |     type CC = helper::ConvolutionConfig;
21 |     type CLRN = helper::NormalizationConfig;
22 |     type CPOOL = helper::PoolingConfig;
23 | 
24 |     fn init_nn() { }
25 |     fn device(&self) -> &DeviceType { self.device() }
26 | }
27 | 
28 | impl_ops_sigmoid_for!(f32, Backend<Native>);
29 | impl_ops_relu_for!(f32, Backend<Native>);
30 | impl_ops_tanh_for!(f32, Backend<Native>);
31 | // impl_ops_convolution_for!(f32, Backend<Native>);
32 |  impl_ops_softmax_for!(f32, Backend<Native>);
33 |  impl_ops_log_softmax_for!(f32, Backend<Native>);
34 | // impl_ops_lrn_for!(f32, Backend<Native>);
35 | // impl_ops_pooling_for!(f32, Backend<Native>);
36 | 
37 | impl NN<f64> for Backend<Native> {
38 |     type CC = helper::ConvolutionConfig;
39 |     type CLRN = helper::NormalizationConfig;
40 |     type CPOOL = helper::PoolingConfig;
41 | 
42 |     fn init_nn() { }
43 |     fn device(&self) -> &DeviceType { self.device() }
44 | }
45 | 
46 | impl_ops_sigmoid_for!(f64, Backend<Native>);
47 | impl_ops_relu_for!(f64, Backend<Native>);
48 | impl_ops_tanh_for!(f64, Backend<Native>);
49 | // impl_ops_convolution_for!(f64, Backend<Native>);
50 |  impl_ops_softmax_for!(f64, Backend<Native>);
51 |  impl_ops_log_softmax_for!(f64, Backend<Native>);
52 | // impl_ops_lrn_for!(f64, Backend<Native>);
53 | // impl_ops_pooling_for!(f64, Backend<Native>);
54 | 


--------------------------------------------------------------------------------
/src/frameworks/opencl.rs:
--------------------------------------------------------------------------------
 1 | //! Provides NN for a OpenCL backend.
 2 | 
 3 | use ::operation::*;
 4 | use ::binary::*;
 5 | use ::plugin::*;
 6 | use co::prelude::*;
 7 | use co::Error;
 8 | 
 9 | impl INnBinary<f32> for Program {
10 |     type Sigmoid = Kernel;
11 | 
12 |     fn sigmoid(&self) -> Self::Sigmoid {
13 |         unimplemented!()
14 |     }
15 | }
16 | 
17 | impl IOperationSigmoid<f32> for Kernel {
18 |     fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
19 |         unimplemented!()
20 |     }
21 | }
22 | 
23 | impl INn<f32> for Backend<OpenCL> {
24 |     type B = Program;
25 | 
26 |     fn binary(&self) -> &Self::B {
27 |         self.binary()
28 |     }
29 | 
30 |     fn device(&self) -> &DeviceType {
31 |         self.device()
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Provides a [Collenchyma][collenchyma] Plugin, to extend Collenchyma with Neural Network related
  2 | //! operations such as convolutions, pooling, ReLU, etc. A full list of operations provided by this Plugin,
  3 | //! can be found at the [provided Operations section](#operations).
  4 | //!
  5 | //! ## Overview
  6 | //!
  7 | //! This Collenchyma Plugin extends Collenchyma's Backend with NN related methods/operations. This allows
  8 | //! you to run, these operations (and therefore your application) on your local machine as well as on servers,
  9 | //! mobiles or any other machine (as if they were written for common CPU execution), while
 10 | //! receiving the significant performance increases (usually one-to-two orders of magnitutde), by
 11 | //! executing the operations on special purpose hardware such as GPUs - if they are available. Usage examples
 12 | //! can be found in the next section.
 13 | //!
 14 | //! The architecture of a Plugin is quite easy. It defines one Plugin Trait, in this case the `NN`
 15 | //! trait, which implements basic functionality for initialization and multiple Plugin Operation Traits which define the
 16 | //! methods which are going to be available on the Backed, as the Plugin Trait as well as the Plugin Operations Traits
 17 | //! are implemented for the Collenchyma Backends (CUDA, OpenCL, Native). The operations take as arguments one or many
 18 | //! SharedTensors, holding the data over which the operation should happen, and none or one Operation Configuration.
 19 | //!
 20 | //! ## Usage
 21 | //!
 22 | //! An example on how to write some data into a SharedTensor and compute the result of the
 23 | //! sigmoid function for each value:
 24 | //!
 25 | //! ```rust
 26 | //! # #![allow(dead_code)]
 27 | //! extern crate collenchyma as co;
 28 | //! extern crate collenchyma_nn as nn;
 29 | //! # #[cfg(feature = "cuda")]
 30 | //! # mod cuda {
 31 | //! use co::prelude::*;
 32 | //! use nn::*;
 33 | //!
 34 | //! fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 35 | //!     if let &mut MemoryType::Native(ref mut mem) = mem {
 36 | //!         let mut mem_buffer = mem.as_mut_slice::<T>();
 37 | //!         for (index, datum) in data.iter().enumerate() {
 38 | //!             mem_buffer[index] = *datum;
 39 | //!         }
 40 | //!     }
 41 | //! }
 42 | //!
 43 | //! pub fn main() {
 44 | //!     // Initialize a CUDA Backend.
 45 | //!     // Usually you would not use CUDA but let Collenchyma pick what is available on the machine.
 46 | //!     let backend = Backend::<Cuda>::default().unwrap();
 47 | //!     // Initialize two SharedTensors.
 48 | //!     let mut x = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
 49 | //!     let mut result = SharedTensor::<f32>::new(backend.device(), &(1, 1, 3)).unwrap();
 50 | //!     // Fill `x` with some data.
 51 | //!     let payload: &[f32] = &::std::iter::repeat(1f32).take(x.capacity()).collect::<Vec<f32>>();
 52 | //!     let native = Native::new();
 53 | //!     let cpu = native.new_device(native.hardwares()).unwrap();
 54 | //!     x.add_device(&cpu).unwrap(); // Add native host memory
 55 | //!     x.sync(&cpu).unwrap(); // Sync to native host memory
 56 | //!     write_to_memory(x.get_mut(&cpu).unwrap(), payload); // Write to native host memory.
 57 | //!     x.sync(backend.device()).unwrap(); // Sync the data to the CUDA device.
 58 | //!     // Run the sigmoid operation, provided by the NN Plugin, on your CUDA enabled GPU.
 59 | //!     backend.sigmoid(&mut x, &mut result).unwrap();
 60 | //!     // See the result.
 61 | //!     result.add_device(&cpu).unwrap(); // Add native host memory
 62 | //!     result.sync(&cpu).unwrap(); // Sync the result to host memory.
 63 | //!     println!("{:?}", result.get(&cpu).unwrap().as_native().unwrap().as_slice::<f64>());
 64 | //! }
 65 | //! # }
 66 | //! # #[cfg(not(feature = "cuda"))]
 67 | //! # mod cuda {
 68 | //! # pub fn main() {}
 69 | //! # }
 70 | //! #
 71 | //! # fn main() {
 72 | //! #     if cfg!(feature = "cuda") {
 73 | //! #         ::cuda::main();
 74 | //! #    }
 75 | //! # }
 76 | //! ```
 77 | //!
 78 | //! ## Provided Operations
 79 | //!
 80 | //! This Plugins provides the following operations. (Forward + Backward)
 81 | //! A `-` means not yet implemented.
 82 | //!
 83 | 
 84 | //! | Operation            | CUDA               | OpenCL    | Native    |
 85 | //! |---	               |---	                |---        |---        |
 86 | //! | Sigmoid  	           | { cuDNN v3, v4 }  	| -  	    | Rust  	|
 87 | //! | SigmoidPointwise     | { cuDNN v3, v4 }  	| -  	    |   	    |
 88 | //! | ReLU  	           | { cuDNN v3, v4 }   | -  	    | Rust 	    |
 89 | //! | ReLUPointwise        | { cuDNN v3, v4 }  	| -  	    |   	    |
 90 | //! | Tanh  	   	       | { cuDNN v3, v4 }   | - 	    | Rust      |
 91 | //! | TanhPointwise        | { cuDNN v3, v4 }  	| -  	    |   	    |
 92 | //! |   	   	           |  	                |  	        |           |
 93 | //! | Normalization (LRN)  | { cuDNN v3, v4 }   | - 	    | -         |
 94 | //! |   	   	           |  	                |  	        |           |
 95 | //! | Convolution          | { cuDNN v3, v4 }   | - 	    | -         |
 96 | //! |   	   	           |  	                |  	        |           |
 97 | //! | Softmax              | { cuDNN v3, v4 }   | - 	    | Rust      |
 98 | //! | LogSoftmax           | { cuDNN v3, v4 }   | - 	    | Rust      |
 99 | //! |   	   	           |  	                |  	        |           |
100 | //! | Pooling Max          | { cuDNN v3, v4 }   | - 	    | -         |
101 | //! | Pooling Avg          | { cuDNN v3, v4 }   | - 	    | -         |
102 | //!
103 | //! [collenchyma]: https://github.com/autumnai/collenchyma
104 | //! [collenchyma-docs]: http://autumnai.github.io/collenchyma
105 | //! [blas-source]: https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms
106 | #![cfg_attr(lint, feature(plugin))]
107 | #![cfg_attr(lint, plugin(clippy))]
108 | #![allow(dead_code)]
109 | #![deny(missing_docs,
110 |         missing_debug_implementations, missing_copy_implementations,
111 |         trivial_casts, trivial_numeric_casts,
112 |         unused_import_braces, unused_qualifications)]
113 | 
114 | extern crate collenchyma as co;
115 | #[cfg(feature = "cuda")]
116 | extern crate cudnn;
117 | extern crate libc;
118 | #[macro_use]
119 | extern crate lazy_static;
120 | #[macro_use]
121 | extern crate log;
122 | 
123 | pub use plugin::*;
124 | 
125 | mod plugin;
126 | pub mod frameworks;
127 | 


--------------------------------------------------------------------------------
/tests/convolution_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod convolution_spec_cuda {
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<u8>){
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let batch = 4;
 37 |         let w1 = 9;
 38 |         let h1 = 9;
 39 |         let d1 = 3;
 40 |         let k = 6;
 41 |         let f = 3;
 42 |         let w2 = (w1 - f + 0) / 1;
 43 |         let h2 = (h1 - f + 0) / 1;
 44 |         let mut x = SharedTensor::<T>::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
 45 |         let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::<Vec<T>>();
 46 |         payload[0] = val2;
 47 |         x.add_device(native.device()).unwrap();
 48 |         x.sync(native.device()).unwrap();
 49 |         write_to_memory(x.get_mut(native.device()).unwrap(), payload);
 50 |         x.sync(backend.device()).unwrap();
 51 | 
 52 |         let mut filter = SharedTensor::<T>::new(backend.device(), &(k, d1, f, f)).unwrap();
 53 |         let payload: &[T] = &::std::iter::repeat(val).take(filter.capacity()).collect::<Vec<T>>();
 54 |         filter.add_device(native.device()).unwrap();
 55 |         filter.sync(native.device()).unwrap();
 56 |         write_to_memory(filter.get_mut(native.device()).unwrap(), payload);
 57 |         filter.sync(backend.device()).unwrap();
 58 | 
 59 |         let mut result = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
 60 |         let payload: &[T] = &::std::iter::repeat(val2).take(result.capacity()).collect::<Vec<T>>();
 61 |         result.add_device(native.device()).unwrap();
 62 |         result.sync(native.device()).unwrap();
 63 |         write_to_memory(result.get_mut(native.device()).unwrap(), payload);
 64 |         result.sync(backend.device()).unwrap();
 65 | 
 66 |         let workspace = SharedTensor::<u8>::new(backend.device(), &(4)).unwrap();
 67 | 
 68 |         (x, result, filter, workspace)
 69 |     }
 70 | 
 71 |     #[allow(dead_code)]
 72 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
 73 |         let val = cast::<f64, T>(1f64).unwrap();
 74 |         let val2 = cast::<f64, T>(2f64).unwrap();
 75 |         let batch = 4;
 76 |         let w1 = 9;
 77 |         let h1 = 9;
 78 |         let d1 = 3;
 79 |         let k = 6;
 80 |         let f = 3;
 81 |         let w2 = (w1 - f + 0) / 1;
 82 |         let h2 = (h1 - f + 0) / 1;
 83 | 
 84 |         let mut x = SharedTensor::<T>::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
 85 |         let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::<Vec<T>>();
 86 |         payload[0] = val2;
 87 |         x.add_device(native.device()).unwrap();
 88 |         x.sync(native.device()).unwrap();
 89 |         write_to_memory(x.get_mut(native.device()).unwrap(), payload);
 90 |         x.sync(backend.device()).unwrap();
 91 | 
 92 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
 93 |         let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x_diff.capacity()).collect::<Vec<T>>();
 94 |         payload[0] = val2;
 95 |         x_diff.add_device(native.device()).unwrap();
 96 |         x_diff.sync(native.device()).unwrap();
 97 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), payload);
 98 |         x_diff.sync(backend.device()).unwrap();
 99 | 
100 |         let mut filter = SharedTensor::<T>::new(backend.device(), &(k, d1, f, f)).unwrap();
101 |         let payload: &[T] = &::std::iter::repeat(val).take(filter.capacity()).collect::<Vec<T>>();
102 |         filter.add_device(native.device()).unwrap();
103 |         filter.sync(native.device()).unwrap();
104 |         write_to_memory(filter.get_mut(native.device()).unwrap(), payload);
105 |         filter.sync(backend.device()).unwrap();
106 | 
107 |         let mut result = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
108 |         let payload: &[T] = &::std::iter::repeat(val).take(result.capacity()).collect::<Vec<T>>();
109 |         result.add_device(native.device()).unwrap();
110 |         result.sync(native.device()).unwrap();
111 |         write_to_memory(result.get_mut(native.device()).unwrap(), payload);
112 |         result.sync(backend.device()).unwrap();
113 | 
114 |         let mut result_diff = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
115 |         result_diff.add_device(native.device()).unwrap();
116 | 
117 |         (x, x_diff, result, result_diff, filter)
118 |     }
119 | 
120 |     #[test]
121 |     fn it_computes_correct_convolution_on_cuda_for_f32() {
122 |         let backend = get_cuda_backend();
123 |         let native = get_native_backend();
124 |         let (mut x, mut result, mut filter, mut workspace) = get_memory::<f32, Cuda, Native>(&backend, &native);
125 | 
126 |         let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
127 |         match backend.convolution(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
128 |             Ok(_) => {
129 |                 result.sync(native.device()).unwrap();
130 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
131 |                     let mut payload: &mut [f32] = &mut ::std::iter::repeat(27f32).take(result.capacity()).collect::<Vec<f32>>();
132 |                     payload[0] = 28f32;
133 |                     assert_eq!(payload, mem.as_slice::<f32>());
134 |                 }
135 |             },
136 |             Err(err) => { println!("{:?}", err); assert!(false) }
137 |         }
138 |     }
139 | 
140 |     #[test]
141 |     fn it_computes_correct_convolution_on_cuda_for_f64() {
142 |         let backend = get_cuda_backend();
143 |         let native = get_native_backend();
144 |         let (mut x, mut result, mut filter, mut workspace) = get_memory::<f64, Cuda, Native>(&backend, &native);
145 | 
146 |         let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
147 |         match backend.convolution(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
148 |             Ok(_) => {
149 |                 result.sync(native.device()).unwrap();
150 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
151 |                     let mut payload: &mut [f64] = &mut ::std::iter::repeat(27f64).take(result.capacity()).collect::<Vec<f64>>();
152 |                     payload[0] = 28f64;
153 |                     assert_eq!(payload, mem.as_slice::<f64>());
154 |                 }
155 |             },
156 |             Err(err) => { println!("{:?}", err); assert!(false) }
157 |         }
158 |     }
159 | 
160 |     #[test]
161 |     fn it_computes_correct_convolution_on_cuda_for_f32_plain() {
162 |         let backend = get_cuda_backend();
163 |         let native = get_native_backend();
164 |         let (mut x, mut result, mut filter, mut workspace) = get_memory::<f32, Cuda, Native>(&backend, &native);
165 | 
166 |         let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
167 |         match backend.convolution_plain(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
168 |             Ok(_) => {
169 |                 result.sync(native.device()).unwrap();
170 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
171 |                     let mut payload: &mut [f32] = &mut ::std::iter::repeat(27f32).take(result.capacity()).collect::<Vec<f32>>();
172 |                     payload[0] = 28f32;
173 |                     assert_eq!(payload, mem.as_slice::<f32>());
174 |                 }
175 |             },
176 |             Err(err) => { println!("{:?}", err); assert!(false) }
177 |         }
178 |     }
179 | 
180 |     #[test]
181 |     fn it_computes_correct_convolution_on_cuda_for_f64_plain() {
182 |         let backend = get_cuda_backend();
183 |         let native = get_native_backend();
184 |         let (mut x, mut result, mut filter, mut workspace) = get_memory::<f64, Cuda, Native>(&backend, &native);
185 | 
186 |         let conf = backend.new_convolution_config(&x, &result, &mut filter, ConvForwardAlgo::ImplicitGEMM, ConvBackwardFilterAlgo::ImplicitGEMM, ConvBackwardDataAlgo::ImplicitGEMM, &vec!(1,1), &vec!(0,0)).unwrap();
187 |         match backend.convolution_plain(&mut filter, &mut x, &mut result, &mut workspace, &conf) {
188 |             Ok(_) => {
189 |                 result.sync(native.device()).unwrap();
190 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
191 |                     let mut payload: &mut [f64] = &mut ::std::iter::repeat(27f64).take(result.capacity()).collect::<Vec<f64>>();
192 |                     payload[0] = 28f64;
193 |                     assert_eq!(payload, mem.as_slice::<f64>());
194 |                 }
195 |             },
196 |             Err(err) => { println!("{:?}", err); assert!(false) }
197 |         }
198 |     }
199 | 
200 |     /*
201 |     #[test]
202 |     fn it_computes_correct_convolution_grad_on_cuda_for_f32() {
203 |     let backend = get_cuda_backend();
204 |     let native = get_native_backend();
205 |     let (mut x, mut x_diff, mut result, mut result_diff, mut filter) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
206 | 
207 |     let conf = backend.new_convolution_config(&x, &result, &mut filter, &vec!(1,1), &vec!(0,0)).unwrap();
208 |     match backend.convolution_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
209 |     Ok(_) => {
210 |     result_diff.sync(native.device()).unwrap();
211 |     if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
212 |     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
213 | }
214 | },
215 | Err(err) => { println!("{:?}", err); assert!(false) }
216 | }
217 | }
218 | 
219 | #[test]
220 | fn it_computes_correct_convolution_grad_on_cuda_for_f64() {
221 | let backend = get_cuda_backend();
222 | let native = get_native_backend();
223 | let (mut x, mut x_diff, mut result, mut result_diff, filter, conv) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
224 | 
225 | let conf = backend.new_convolution_config(&x, &result, &filter, &conv).unwrap();
226 | match backend.convolution_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
227 | Ok(_) => {
228 | result_diff.sync(native.device()).unwrap();
229 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
230 | assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
231 | }
232 | },
233 | Err(err) => { println!("{:?}", err); assert!(false) }
234 | }
235 | }
236 | 
237 | #[test]
238 | fn it_computes_correct_convolution_grad_on_cuda_for_f32_plain() {
239 | let backend = get_cuda_backend();
240 | let native = get_native_backend();
241 | let (mut x, mut x_diff, mut result, mut result_diff, filter, conv) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
242 | 
243 | let conf = backend.new_convolution_config(&x, &result, &filter, &conv).unwrap();
244 | match backend.convolution_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
245 | Ok(_) => {
246 | result_diff.sync(native.device()).unwrap();
247 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
248 | assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
249 | }
250 | },
251 | Err(err) => { println!("{:?}", err); assert!(false) }
252 | }
253 | }
254 | 
255 | #[test]
256 | fn it_computes_correct_convolution_grad_on_cuda_for_f64_plain() {
257 | let backend = get_cuda_backend();
258 | let native = get_native_backend();
259 | let (mut x, mut x_diff, mut result, mut result_diff, filter, conv) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
260 | 
261 | let conf = backend.new_convolution_config(&x, &result, &filter, &conv).unwrap();
262 | match backend.convolution_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
263 | Ok(_) => {
264 | result_diff.sync(native.device()).unwrap();
265 | if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
266 | assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
267 | }
268 | },
269 | Err(err) => { println!("{:?}", err); assert!(false) }
270 | }
271 | }
272 | */
273 | }
274 | 
275 | #[cfg(test)]
276 | #[cfg(feature = "native")]
277 | mod convolution_spec_native{
278 | 
279 |     // use co::backend::{Backend, BackendConfig};
280 |     // use co::framework::IFramework;
281 |     // use co::frameworks::Native;
282 |     // use co_nn::*;
283 |     // use co::memory::MemoryType;
284 |     // use co::tensor::SharedTensor;
285 |     // use co::plugin::numeric_helpers::{cast, Float};
286 |     //
287 |     // fn get_native_backend() -> Backend<Native> {
288 |     //     let framework = Native::new();
289 |     //     let hardwares = framework.hardwares();
290 |     //     let backend_config = BackendConfig::new(framework, hardwares);
291 |     //     Backend::new(backend_config).unwrap()
292 |     // }
293 |     //
294 |     // fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
295 |     //     match mem {
296 |     //         &mut MemoryType::Native(ref mut mem) => {
297 |     //             let mut mem_buffer = mem.as_mut_slice::<T>();
298 |     //             for (index, datum) in data.iter().enumerate() {
299 |     //                 mem_buffer[index] = *datum;
300 |     //             }
301 |     //         },
302 |     //         #[cfg(any(feature = "opencl", feature = "cuda"))]
303 |     //         _ => {}
304 |     //     }
305 |     // }
306 |     //
307 |     // fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
308 |     //     let val = cast::<f64, T>(1f64).unwrap();
309 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
310 |     //     let batch = 4;
311 |     //     let w1 = 9;
312 |     //     let h1 = 9;
313 |     //     let d1 = 3;
314 |     //     let k = 6;
315 |     //     let f = 3;
316 |     //     let w2 = (w1 - f + 0) / 1;
317 |     //     let h2 = (h1 - f + 0) / 1;
318 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
319 |     //     let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::<Vec<T>>();
320 |     //     payload[0] = val2;
321 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), payload);
322 |     //
323 |     //     let mut filter = SharedTensor::<T>::new(backend.device(), &(k, d1, f, f)).unwrap();
324 |     //     let payload: &[T] = &::std::iter::repeat(val).take(filter.capacity()).collect::<Vec<T>>();
325 |     //     write_to_memory(filter.get_mut(backend.device()).unwrap(), payload);
326 |     //
327 |     //     let mut result = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
328 |     //     let payload: &[T] = &::std::iter::repeat(val2).take(result.capacity()).collect::<Vec<T>>();
329 |     //     write_to_memory(result.get_mut(backend.device()).unwrap(), payload);
330 |     //
331 |     //     (x, result, filter)
332 |     // }
333 |     //
334 |     // #[allow(dead_code)]
335 |     // fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
336 |     //     let val = cast::<f64, T>(1f64).unwrap();
337 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
338 |     //     let batch = 4;
339 |     //     let w1 = 9;
340 |     //     let h1 = 9;
341 |     //     let d1 = 3;
342 |     //     let k = 6;
343 |     //     let f = 3;
344 |     //     let w2 = (w1 - f + 0) / 1;
345 |     //     let h2 = (h1 - f + 0) / 1;
346 |     //
347 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(batch, d1, h1, w1)).unwrap();
348 |     //     let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x.capacity()).collect::<Vec<T>>();
349 |     //     payload[0] = val2;
350 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), payload);
351 |     //
352 |     //     let mut x_diff = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
353 |     //     let mut payload: &mut [T] = &mut ::std::iter::repeat(val).take(x_diff.capacity()).collect::<Vec<T>>();
354 |     //     payload[0] = val2;
355 |     //     write_to_memory(x_diff.get_mut(backend.device()).unwrap(), payload);
356 |     //
357 |     //     let mut filter = SharedTensor::<T>::new(backend.device(), &(k, d1, f, f)).unwrap();
358 |     //     let payload: &[T] = &::std::iter::repeat(val).take(filter.capacity()).collect::<Vec<T>>();
359 |     //     write_to_memory(filter.get_mut(backend.device()).unwrap(), payload);
360 |     //
361 |     //     let mut result = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
362 |     //     let payload: &[T] = &::std::iter::repeat(val).take(result.capacity()).collect::<Vec<T>>();
363 |     //     write_to_memory(result.get_mut(backend.device()).unwrap(), payload);
364 |     //
365 |     //     let result_diff = SharedTensor::<T>::new(backend.device(), &(batch, k, h2, w2)).unwrap();
366 |     //
367 |     //     (x, x_diff, result, result_diff, filter)
368 |     // }
369 | 
370 |     // #[test]
371 |     // #[ignore]
372 |     // fn it_computes_correct_convolution_on_native_for_f32() {
373 |     //     let backend = get_native_backend();
374 |     //     let (mut x, mut result, mut filter) = get_memory::<f32, Native>(&backend);
375 |     //
376 |     //     let conf = backend.new_convolution_config(&x, &result, &mut filter, &vec!(1,1), &vec!(0,0)).unwrap();
377 |     //     match backend.convolution(&mut x, &mut result, &conf) {
378 |     //         Ok(_) => {
379 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
380 |     //                 let mut payload: &mut [f32] = &mut ::std::iter::repeat(27f32).take(result.capacity()).collect::<Vec<f32>>();
381 |     //                 payload[0] = 28f32;
382 |     //                 assert_eq!(payload, mem.as_slice::<f32>());
383 |     //             }
384 |     //         },
385 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
386 |     //     }
387 |     // }
388 |     //
389 |     // #[test]
390 |     // #[ignore]
391 |     // fn it_computes_correct_convolution_on_native_for_f64() {
392 |     //     let backend = get_native_backend();
393 |     //     let (mut x, mut result, mut filter) = get_memory::<f64, Native>(&backend);
394 |     //
395 |     //     let conf = backend.new_convolution_config(&x, &result, &mut filter, &vec!(1,1), &vec!(0,0)).unwrap();
396 |     //     match backend.convolution(&mut x, &mut result, &conf) {
397 |     //         Ok(_) => {
398 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
399 |     //                 let mut payload: &mut [f64] = &mut ::std::iter::repeat(27f64).take(result.capacity()).collect::<Vec<f64>>();
400 |     //                 payload[0] = 28f64;
401 |     //                 assert_eq!(payload, mem.as_slice::<f64>());
402 |     //             }
403 |     //         },
404 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
405 |     //     }
406 |     // }
407 |     //
408 |     // #[test]
409 |     // #[ignore]
410 |     // fn it_computes_correct_convolution_on_native_for_f32_plain() {
411 |     //     let backend = get_native_backend();
412 |     //     let (mut x, mut result, mut filter) = get_memory::<f32, Native>(&backend);
413 |     //
414 |     //     let conf = backend.new_convolution_config(&x, &result, &mut filter, &vec!(1,1), &vec!(0,0)).unwrap();
415 |     //     match backend.convolution_plain(&mut x, &mut result, &conf) {
416 |     //         Ok(_) => {
417 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
418 |     //                 let mut payload: &mut [f32] = &mut ::std::iter::repeat(27f32).take(result.capacity()).collect::<Vec<f32>>();
419 |     //                 payload[0] = 28f32;
420 |     //                 assert_eq!(payload, mem.as_slice::<f32>());
421 |     //             }
422 |     //         },
423 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
424 |     //     }
425 |     // }
426 |     //
427 |     // #[test]
428 |     // #[ignore]
429 |     // fn it_computes_correct_convolution_on_native_for_f64_plain() {
430 |     //     let backend = get_native_backend();
431 |     //     let (mut x, mut result, mut filter) = get_memory::<f64, Native>(&backend);
432 |     //
433 |     //     let conf = backend.new_convolution_config(&x, &result, &mut filter, &vec!(1,1), &vec!(0,0)).unwrap();
434 |     //     match backend.convolution_plain(&mut x, &mut result, &conf) {
435 |     //         Ok(_) => {
436 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
437 |     //                 let mut payload: &mut [f64] = &mut ::std::iter::repeat(27f64).take(result.capacity()).collect::<Vec<f64>>();
438 |     //                 payload[0] = 28f64;
439 |     //                 assert_eq!(payload, mem.as_slice::<f64>());
440 |     //             }
441 |     //         },
442 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
443 |     //     }
444 |     // }
445 | }
446 | 


--------------------------------------------------------------------------------
/tests/lrn_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod lrn_spec_cuda {
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 43 |         result.add_device(native.device()).unwrap();
 44 | 
 45 |         (x, result)
 46 |     }
 47 | 
 48 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
 49 |         let val = cast::<f64, T>(1f64).unwrap();
 50 |         let val2 = cast::<f64, T>(2f64).unwrap();
 51 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 52 |         x.add_device(native.device()).unwrap();
 53 |         x.sync(native.device()).unwrap();
 54 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 55 |         x.sync(backend.device()).unwrap();
 56 | 
 57 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 58 |         x_diff.add_device(native.device()).unwrap();
 59 |         x_diff.sync(native.device()).unwrap();
 60 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 61 |         x_diff.sync(backend.device()).unwrap();
 62 | 
 63 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 64 |         result.add_device(native.device()).unwrap();
 65 |         result.sync(native.device()).unwrap();
 66 |         write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
 67 |         result.sync(backend.device()).unwrap();
 68 | 
 69 |         let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 70 |         result_diff.add_device(native.device()).unwrap();
 71 | 
 72 |         (x, x_diff, result, result_diff)
 73 |     }
 74 | 
 75 |     #[test]
 76 |     fn it_computes_correct_lrn_on_cuda_for_f32() {
 77 |         let backend = get_cuda_backend();
 78 |         let native = get_native_backend();
 79 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
 80 | 
 81 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
 82 |         match backend.lrn(&mut x, &mut result, &conf) {
 83 |             Ok(_) => {
 84 |                 result.sync(native.device()).unwrap();
 85 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
 86 |                     assert_eq!(&[0.59458125f32, 0.59458125f32, 1.1890286f32], mem.as_slice::<f32>());
 87 |                 }
 88 |             },
 89 |             Err(err) => { println!("{:?}", err); assert!(false) }
 90 |         }
 91 |     }
 92 | 
 93 |     #[test]
 94 |     fn it_computes_correct_lrn_on_cuda_for_f64() {
 95 |         let backend = get_cuda_backend();
 96 |         let native = get_native_backend();
 97 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
 98 | 
 99 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
100 |         match backend.lrn(&mut x, &mut result, &conf) {
101 |             Ok(_) => {
102 |                 result.sync(native.device()).unwrap();
103 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
104 |                     assert_eq!(&[0.594581260843431f64, 0.594581260843431f64, 1.1890287651464355f64], mem.as_slice::<f64>());
105 |                 }
106 |             },
107 |             Err(err) => { println!("{:?}", err); assert!(false) }
108 |         }
109 |     }
110 | 
111 |     #[test]
112 |     fn it_computes_correct_lrn_on_cuda_for_f32_plain() {
113 |         let backend = get_cuda_backend();
114 |         let native = get_native_backend();
115 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
116 | 
117 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
118 |         match backend.lrn_plain(&mut x, &mut result, &conf) {
119 |             Ok(_) => {
120 |                 result.sync(native.device()).unwrap();
121 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
122 |                     assert_eq!(&[0.59458125f32, 0.59458125f32, 1.1890286f32], mem.as_slice::<f32>());
123 |                 }
124 |             },
125 |             Err(err) => { println!("{:?}", err); assert!(false) }
126 |         }
127 |     }
128 | 
129 |     #[test]
130 |     fn it_computes_correct_lrn_on_cuda_for_f64_plain() {
131 |         let backend = get_cuda_backend();
132 |         let native = get_native_backend();
133 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
134 | 
135 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
136 |         match backend.lrn_plain(&mut x, &mut result, &conf) {
137 |             Ok(_) => {
138 |                 result.sync(native.device()).unwrap();
139 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
140 |                     assert_eq!(&[0.594581260843431f64, 0.594581260843431f64, 1.1890287651464355f64], mem.as_slice::<f64>());
141 |                 }
142 |             },
143 |             Err(err) => { println!("{:?}", err); assert!(false) }
144 |         }
145 |     }
146 | 
147 |     #[test]
148 |     fn it_computes_correct_lrn_grad_on_cuda_for_f32() {
149 |         let backend = get_cuda_backend();
150 |         let native = get_native_backend();
151 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
152 | 
153 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
154 |         match backend.lrn_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
155 |             Ok(_) => {
156 |                 result_diff.sync(native.device()).unwrap();
157 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
158 |                     assert_eq!(&[0.59453666f32, 0.59453666f32, 1.188672f32], mem.as_slice::<f32>());
159 |                 }
160 |             },
161 |             Err(err) => { println!("{:?}", err); assert!(false) }
162 |         }
163 |     }
164 | 
165 |     #[test]
166 |     fn it_computes_correct_lrn_grad_on_cuda_for_f64() {
167 |         let backend = get_cuda_backend();
168 |         let native = get_native_backend();
169 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
170 | 
171 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
172 |         match backend.lrn_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
173 |             Ok(_) => {
174 |                 result_diff.sync(native.device()).unwrap();
175 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
176 |                     assert_eq!(&[0.594536669478436f64, 0.594536669478436f64, 1.188672127844352f64], mem.as_slice::<f64>());
177 |                 }
178 |             },
179 |             Err(err) => { println!("{:?}", err); assert!(false) }
180 |         }
181 |     }
182 | 
183 |     #[test]
184 |     fn it_computes_correct_lrn_grad_on_cuda_for_f32_plain() {
185 |         let backend = get_cuda_backend();
186 |         let native = get_native_backend();
187 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
188 | 
189 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
190 |         match backend.lrn_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
191 |             Ok(_) => {
192 |                 result_diff.sync(native.device()).unwrap();
193 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
194 |                     assert_eq!(&[0.59453666f32, 0.59453666f32, 1.188672f32], mem.as_slice::<f32>());
195 |                 }
196 |             },
197 |             Err(err) => { println!("{:?}", err); assert!(false) }
198 |         }
199 |     }
200 | 
201 |     #[test]
202 |     fn it_computes_correct_lrn_grad_on_cuda_for_f64_plain() {
203 |         let backend = get_cuda_backend();
204 |         let native = get_native_backend();
205 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
206 | 
207 |         let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
208 |         match backend.lrn_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
209 |             Ok(_) => {
210 |                 result_diff.sync(native.device()).unwrap();
211 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
212 |                     assert_eq!(&[0.594536669478436f64, 0.594536669478436f64, 1.188672127844352f64], mem.as_slice::<f64>());
213 |                 }
214 |             },
215 |             Err(err) => { println!("{:?}", err); assert!(false) }
216 |         }
217 |     }
218 | }
219 | 
220 | #[cfg(test)]
221 | #[cfg(feature = "native")]
222 | mod lrn_spec_native {
223 | 
224 |     // use co::backend::{Backend, BackendConfig};
225 |     // use co::framework::IFramework;
226 |     // use co::frameworks::Native;
227 |     // use co_nn::*;
228 |     // use co::memory::MemoryType;
229 |     // use co::tensor::SharedTensor;
230 |     // use co::plugin::numeric_helpers::{cast, Float};
231 |     //
232 |     // fn get_native_backend() -> Backend<Native> {
233 |     //     let framework = Native::new();
234 |     //     let hardwares = framework.hardwares();
235 |     //     let backend_config = BackendConfig::new(framework, hardwares);
236 |     //     Backend::new(backend_config).unwrap()
237 |     // }
238 |     //
239 |     // fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
240 |     //     match mem {
241 |     //         &mut MemoryType::Native(ref mut mem) => {
242 |     //             let mut mem_buffer = mem.as_mut_slice::<T>();
243 |     //             for (index, datum) in data.iter().enumerate() {
244 |     //                 mem_buffer[index] = *datum;
245 |     //             }
246 |     //         },
247 |     //         #[cfg(any(feature = "opencl", feature = "cuda"))]
248 |     //         _ => {}
249 |     //     }
250 |     // }
251 |     //
252 |     // fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
253 |     //     let val = cast::<f64, T>(1f64).unwrap();
254 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
255 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
256 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
257 |     //
258 |     //     let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
259 |     //
260 |     //     (x, result)
261 |     // }
262 |     //
263 |     // fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
264 |     //     let val = cast::<f64, T>(1f64).unwrap();
265 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
266 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
267 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
268 |     //
269 |     //     let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
270 |     //     write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
271 |     //
272 |     //     let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
273 |     //     write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
274 |     //
275 |     //     let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
276 |     //
277 |     //     (x, x_diff, result, result_diff)
278 |     // }
279 | 
280 |     // #[test]
281 |     // #[ignore]
282 |     // fn it_computes_correct_lrn_on_cuda_for_f32() {
283 |     //     let backend = get_native_backend();
284 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
285 |     //
286 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
287 |     //     match backend.lrn(&mut x, &mut result, &conf) {
288 |     //         Ok(_) => {
289 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
290 |     //                 assert_eq!(&[0.59458125f32, 0.59458125f32, 1.1890286f32], mem.as_slice::<f32>());
291 |     //             }
292 |     //         },
293 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
294 |     //     }
295 |     // }
296 |     //
297 |     // #[test]
298 |     // #[ignore]
299 |     // fn it_computes_correct_lrn_on_cuda_for_f64() {
300 |     //     let backend = get_native_backend();
301 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
302 |     //
303 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
304 |     //     match backend.lrn(&mut x, &mut result, &conf) {
305 |     //         Ok(_) => {
306 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
307 |     //                 assert_eq!(&[0.594581260843431f64, 0.594581260843431f64, 1.1890287651464355f64], mem.as_slice::<f64>());
308 |     //             }
309 |     //         },
310 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
311 |     //     }
312 |     // }
313 |     //
314 |     // #[test]
315 |     // #[ignore]
316 |     // fn it_computes_correct_lrn_on_native_for_f32_plain() {
317 |     //     let backend = get_native_backend();
318 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
319 |     //
320 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
321 |     //     match backend.lrn_plain(&mut x, &mut result, &conf) {
322 |     //         Ok(_) => {
323 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
324 |     //                 assert_eq!(&[0.59458125f32, 0.59458125f32, 1.1890286f32], mem.as_slice::<f32>());
325 |     //             }
326 |     //         },
327 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
328 |     //     }
329 |     // }
330 |     //
331 |     // #[test]
332 |     // #[ignore]
333 |     // fn it_computes_correct_lrn_on_native_for_f64_plain() {
334 |     //     let backend = get_native_backend();
335 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
336 |     //
337 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
338 |     //     match backend.lrn_plain(&mut x, &mut result, &conf) {
339 |     //         Ok(_) => {
340 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
341 |     //                 assert_eq!(&[0.594581260843431f64, 0.594581260843431f64, 1.1890287651464355f64], mem.as_slice::<f64>());
342 |     //             }
343 |     //         },
344 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
345 |     //     }
346 |     // }
347 |     //
348 |     // #[test]
349 |     // #[ignore]
350 |     // fn it_computes_correct_lrn_grad_on_native_for_f32() {
351 |     //     let backend = get_native_backend();
352 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
353 |     //
354 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
355 |     //     match backend.lrn_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
356 |     //         Ok(_) => {
357 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
358 |     //                 assert_eq!(&[0.59453666f32, 0.59453666f32, 1.188672f32], mem.as_slice::<f32>());
359 |     //             }
360 |     //         },
361 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
362 |     //     }
363 |     // }
364 |     //
365 |     // #[test]
366 |     // #[ignore]
367 |     // fn it_computes_correct_lrn_grad_on_native_for_f64() {
368 |     //     let backend = get_native_backend();
369 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
370 |     //
371 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
372 |     //     match backend.lrn_grad(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
373 |     //         Ok(_) => {
374 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
375 |     //                 assert_eq!(&[0.594536669478436f64, 0.594536669478436f64, 1.188672127844352f64], mem.as_slice::<f64>());
376 |     //             }
377 |     //         },
378 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
379 |     //     }
380 |     // }
381 |     //
382 |     // #[test]
383 |     // #[ignore]
384 |     // fn it_computes_correct_lrn_grad_on_native_for_f32_plain() {
385 |     //     let backend = get_native_backend();
386 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
387 |     //
388 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
389 |     //     match backend.lrn_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
390 |     //         Ok(_) => {
391 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
392 |     //                 assert_eq!(&[0.59453666f32, 0.59453666f32, 1.188672f32], mem.as_slice::<f32>());
393 |     //             }
394 |     //         },
395 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
396 |     //     }
397 |     // }
398 |     //
399 |     // #[test]
400 |     // #[ignore]
401 |     // fn it_computes_correct_lrn_grad_on_native_for_f64_plain() {
402 |     //     let backend = get_native_backend();
403 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
404 |     //
405 |     //     let conf = LRN::<f64>::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap();
406 |     //     match backend.lrn_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff, &conf) {
407 |     //         Ok(_) => {
408 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
409 |     //                 assert_eq!(&[0.594536669478436f64, 0.594536669478436f64, 1.188672127844352f64], mem.as_slice::<f64>());
410 |     //             }
411 |     //         },
412 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
413 |     //     }
414 |     // }
415 | }
416 | 


--------------------------------------------------------------------------------
/tests/relu_pointwise_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod relu_pointwise_spec_cuda{
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> SharedTensor<T>{
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         x
 43 |     }
 44 | 
 45 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 46 |         let val = cast::<f64, T>(1f64).unwrap();
 47 |         let val2 = cast::<f64, T>(2f64).unwrap();
 48 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 49 |         x.add_device(native.device()).unwrap();
 50 |         x.sync(native.device()).unwrap();
 51 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 52 |         x.sync(backend.device()).unwrap();
 53 | 
 54 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 55 |         x_diff.add_device(native.device()).unwrap();
 56 |         x_diff.sync(native.device()).unwrap();
 57 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 58 |         x_diff.sync(backend.device()).unwrap();
 59 | 
 60 |         (x, x_diff)
 61 |     }
 62 | 
 63 |     #[test]
 64 |     fn it_computes_correct_relu_on_cuda_for_f32() {
 65 |         let backend = get_cuda_backend();
 66 |         let native = get_native_backend();
 67 |         let mut x = get_memory::<f32, Cuda, Native>(&backend, &native);
 68 | 
 69 |         match backend.relu_pointwise(&mut x) {
 70 |             Ok(_) => {
 71 |                 x.sync(native.device()).unwrap();
 72 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
 73 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
 74 |                 }
 75 |             },
 76 |             Err(err) => { println!("{:?}", err); assert!(false) }
 77 |         }
 78 |     }
 79 | 
 80 |     #[test]
 81 |     fn it_computes_correct_relu_on_cuda_for_f64() {
 82 |         let backend = get_cuda_backend();
 83 |         let native = get_native_backend();
 84 |         let mut x = get_memory::<f64, Cuda, Native>(&backend, &native);
 85 | 
 86 |         match backend.relu_pointwise(&mut x) {
 87 |             Ok(_) => {
 88 |                 x.sync(native.device()).unwrap();
 89 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
 90 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
 91 |                 }
 92 |             },
 93 |             Err(err) => { println!("{:?}", err); assert!(false) }
 94 |         }
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn it_computes_correct_relu_on_cuda_for_f32_plain() {
 99 |         let backend = get_cuda_backend();
100 |         let native = get_native_backend();
101 |         let mut x = get_memory::<f32, Cuda, Native>(&backend, &native);
102 | 
103 |         match backend.relu_pointwise_plain(&mut x) {
104 |             Ok(_) => {
105 |                 x.sync(native.device()).unwrap();
106 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
107 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
108 |                 }
109 |             },
110 |             Err(err) => { println!("{:?}", err); assert!(false) }
111 |         }
112 |     }
113 | 
114 |     #[test]
115 |     fn it_computes_correct_relu_on_cuda_for_f64_plain() {
116 |         let backend = get_cuda_backend();
117 |         let native = get_native_backend();
118 |         let mut x = get_memory::<f64, Cuda, Native>(&backend, &native);
119 | 
120 |         match backend.relu_pointwise_plain(&mut x) {
121 |             Ok(_) => {
122 |                 x.sync(native.device()).unwrap();
123 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
124 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
125 |                 }
126 |             },
127 |             Err(err) => { println!("{:?}", err); assert!(false) }
128 |         }
129 |     }
130 | 
131 |     #[test]
132 |     fn it_computes_correct_relu_grad_on_cuda_for_f32() {
133 |         let backend = get_cuda_backend();
134 |         let native = get_native_backend();
135 |         let (mut x, mut x_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
136 | 
137 |         match backend.relu_pointwise_grad(&mut x, &mut x_diff) {
138 |             Ok(_) => {
139 |                 x_diff.sync(native.device()).unwrap();
140 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
141 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
142 |                 }
143 |             },
144 |             Err(err) => { println!("{:?}", err); assert!(false) }
145 |         }
146 |     }
147 | 
148 |     #[test]
149 |     fn it_computes_correct_relu_grad_on_cuda_for_f64() {
150 |         let backend = get_cuda_backend();
151 |         let native = get_native_backend();
152 |         let (mut x, mut x_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
153 | 
154 |         match backend.relu_pointwise_grad(&mut x, &mut x_diff) {
155 |             Ok(_) => {
156 |                 x_diff.sync(native.device()).unwrap();
157 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
158 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
159 |                 }
160 |             },
161 |             Err(err) => { println!("{:?}", err); assert!(false) }
162 |         }
163 |     }
164 | 
165 |     #[test]
166 |     fn it_computes_correct_relu_grad_on_cuda_for_f32_plain() {
167 |         let backend = get_cuda_backend();
168 |         let native = get_native_backend();
169 |         let (mut x, mut x_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
170 | 
171 |         match backend.relu_pointwise_grad_plain(&mut x, &mut x_diff) {
172 |             Ok(_) => {
173 |                 x_diff.sync(native.device()).unwrap();
174 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
175 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
176 |                 }
177 |             },
178 |             Err(err) => { println!("{:?}", err); assert!(false) }
179 |         }
180 |     }
181 | 
182 |     #[test]
183 |     fn it_computes_correct_relu_grad_on_cuda_for_f64_plain() {
184 |         let backend = get_cuda_backend();
185 |         let native = get_native_backend();
186 |         let (mut x, mut x_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
187 | 
188 |         match backend.relu_pointwise_grad_plain(&mut x, &mut x_diff) {
189 |             Ok(_) => {
190 |                 x_diff.sync(native.device()).unwrap();
191 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
192 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
193 |                 }
194 |             },
195 |             Err(err) => { println!("{:?}", err); assert!(false) }
196 |         }
197 |     }
198 | }
199 | 
200 | #[cfg(test)]
201 | #[cfg(feature = "native")]
202 | mod relu_pointwise_spec_native {
203 | 
204 |     // use co::prelude::*;
205 |     // use co_nn::*;
206 |     // use co::plugin::numeric_helpers::{cast, Float};
207 |     //
208 |     // fn get_native_backend() -> Backend<Native> {
209 |     //     Backend::<Native>::default().unwrap()
210 |     // }
211 |     //
212 |     // fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
213 |     //     match mem {
214 |     //         &mut MemoryType::Native(ref mut mem) => {
215 |     //             let mut mem_buffer = mem.as_mut_slice::<T>();
216 |     //             for (index, datum) in data.iter().enumerate() {
217 |     //                 mem_buffer[index] = *datum;
218 |     //             }
219 |     //         },
220 |     //         #[cfg(any(feature = "opencl", feature = "cuda"))]
221 |     //         _ => {}
222 |     //     }
223 |     // }
224 |     //
225 |     // fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
226 |     //     let val = cast::<f64, T>(1f64).unwrap();
227 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
228 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
229 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
230 |     //
231 |     //     let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
232 |     //
233 |     //     (x, result)
234 |     // }
235 |     //
236 |     // fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
237 |     //     let val = cast::<f64, T>(1f64).unwrap();
238 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
239 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
240 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
241 |     //
242 |     //     let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
243 |     //     write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
244 |     //
245 |     //     let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
246 |     //     write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
247 |     //
248 |     //     let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
249 |     //
250 |     //     (x, x_diff, result, result_diff)
251 |     // }
252 |     //
253 |     // #[test]
254 |     // fn it_computes_correct_relu_on_native_for_f32() {
255 |     //     let backend = get_native_backend();
256 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
257 |     //
258 |     //     match backend.relu(&mut x, &mut result) {
259 |     //         Ok(_) => {
260 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
261 |     //                 assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
262 |     //             } else {
263 |     //                 println!("No result: {:?}", result); assert!(false);
264 |     //             }
265 |     //         },
266 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
267 |     //     }
268 |     // }
269 |     //
270 |     // #[test]
271 |     // fn it_computes_correct_relu_on_native_for_f64() {
272 |     //     let backend = get_native_backend();
273 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
274 |     //
275 |     //     match backend.relu(&mut x, &mut result) {
276 |     //         Ok(_) => {
277 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
278 |     //                 assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
279 |     //             }
280 |     //         },
281 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
282 |     //     }
283 |     // }
284 |     //
285 |     // #[test]
286 |     // fn it_computes_correct_relu_on_native_for_f32_plain() {
287 |     //     let backend = get_native_backend();
288 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
289 |     //
290 |     //     match backend.relu_plain(&mut x, &mut result) {
291 |     //         Ok(_) => {
292 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
293 |     //                 assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
294 |     //             }
295 |     //         },
296 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
297 |     //     }
298 |     // }
299 |     //
300 |     // #[test]
301 |     // fn it_computes_correct_relu_on_native_for_f64_plain() {
302 |     //     let backend = get_native_backend();
303 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
304 |     //
305 |     //     match backend.relu_plain(&mut x, &mut result) {
306 |     //         Ok(_) => {
307 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
308 |     //                 assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
309 |     //             }
310 |     //         },
311 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
312 |     //     }
313 |     // }
314 |     //
315 |     // #[test]
316 |     // fn it_computes_correct_relu_grad_on_native_for_f32() {
317 |     //     let backend = get_native_backend();
318 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
319 |     //
320 |     //     match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
321 |     //         Ok(_) => {
322 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
323 |     //                 assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
324 |     //             }
325 |     //         },
326 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
327 |     //     }
328 |     // }
329 |     //
330 |     // #[test]
331 |     // fn it_computes_correct_relu_grad_on_native_for_f64() {
332 |     //     let backend = get_native_backend();
333 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
334 |     //
335 |     //     match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
336 |     //         Ok(_) => {
337 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
338 |     //                 assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
339 |     //             }
340 |     //         },
341 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
342 |     //     }
343 |     // }
344 |     //
345 |     // #[test]
346 |     // fn it_computes_correct_relu_grad_on_native_for_f32_plain() {
347 |     //     let backend = get_native_backend();
348 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
349 |     //
350 |     //     match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
351 |     //         Ok(_) => {
352 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
353 |     //                 assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
354 |     //             }
355 |     //         },
356 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
357 |     //     }
358 |     // }
359 |     //
360 |     // #[test]
361 |     // fn it_computes_correct_relu_grad_on_native_for_f64_plain() {
362 |     //     let backend = get_native_backend();
363 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
364 |     //
365 |     //     match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
366 |     //         Ok(_) => {
367 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
368 |     //                 assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
369 |     //             }
370 |     //         },
371 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
372 |     //     }
373 |     // }
374 | }
375 | 


--------------------------------------------------------------------------------
/tests/relu_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod relu_spec_cuda {
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 43 |         result.add_device(native.device()).unwrap();
 44 | 
 45 |         (x, result)
 46 |     }
 47 | 
 48 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
 49 |         let val = cast::<f64, T>(1f64).unwrap();
 50 |         let val2 = cast::<f64, T>(2f64).unwrap();
 51 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 52 |         x.add_device(native.device()).unwrap();
 53 |         x.sync(native.device()).unwrap();
 54 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 55 |         x.sync(backend.device()).unwrap();
 56 | 
 57 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 58 |         x_diff.add_device(native.device()).unwrap();
 59 |         x_diff.sync(native.device()).unwrap();
 60 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 61 |         x_diff.sync(backend.device()).unwrap();
 62 | 
 63 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 64 |         result.add_device(native.device()).unwrap();
 65 |         result.sync(native.device()).unwrap();
 66 |         write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
 67 |         result.sync(backend.device()).unwrap();
 68 | 
 69 |         let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 70 |         result_diff.add_device(native.device()).unwrap();
 71 | 
 72 |         (x, x_diff, result, result_diff)
 73 |     }
 74 | 
 75 |     #[test]
 76 |     fn it_computes_correct_relu_on_cuda_for_f32() {
 77 |         let backend = get_cuda_backend();
 78 |         let native = get_native_backend();
 79 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
 80 | 
 81 |         match backend.relu(&mut x, &mut result) {
 82 |             Ok(_) => {
 83 |                 result.sync(native.device()).unwrap();
 84 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
 85 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
 86 |                 }
 87 |             },
 88 |             Err(err) => { println!("{:?}", err); assert!(false) }
 89 |         }
 90 |     }
 91 | 
 92 |     #[test]
 93 |     fn it_computes_correct_relu_on_cuda_for_f64() {
 94 |         let backend = get_cuda_backend();
 95 |         let native = get_native_backend();
 96 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
 97 | 
 98 |         match backend.relu(&mut x, &mut result) {
 99 |             Ok(_) => {
100 |                 result.sync(native.device()).unwrap();
101 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
102 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
103 |                 }
104 |             },
105 |             Err(err) => { println!("{:?}", err); assert!(false) }
106 |         }
107 |     }
108 | 
109 |     #[test]
110 |     fn it_computes_correct_relu_on_cuda_for_f32_plain() {
111 |         let backend = get_cuda_backend();
112 |         let native = get_native_backend();
113 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
114 | 
115 |         match backend.relu_plain(&mut x, &mut result) {
116 |             Ok(_) => {
117 |                 result.sync(native.device()).unwrap();
118 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
119 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
120 |                 }
121 |             },
122 |             Err(err) => { println!("{:?}", err); assert!(false) }
123 |         }
124 |     }
125 | 
126 |     #[test]
127 |     fn it_computes_correct_relu_on_cuda_for_f64_plain() {
128 |         let backend = get_cuda_backend();
129 |         let native = get_native_backend();
130 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
131 | 
132 |         match backend.relu_plain(&mut x, &mut result) {
133 |             Ok(_) => {
134 |                 result.sync(native.device()).unwrap();
135 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
136 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
137 |                 }
138 |             },
139 |             Err(err) => { println!("{:?}", err); assert!(false) }
140 |         }
141 |     }
142 | 
143 |     #[test]
144 |     fn it_computes_correct_relu_grad_on_cuda_for_f32() {
145 |         let backend = get_cuda_backend();
146 |         let native = get_native_backend();
147 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
148 | 
149 |         match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
150 |             Ok(_) => {
151 |                 result_diff.sync(native.device()).unwrap();
152 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
153 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
154 |                 }
155 |             },
156 |             Err(err) => { println!("{:?}", err); assert!(false) }
157 |         }
158 |     }
159 | 
160 |     #[test]
161 |     fn it_computes_correct_relu_grad_on_cuda_for_f64() {
162 |         let backend = get_cuda_backend();
163 |         let native = get_native_backend();
164 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
165 | 
166 |         match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
167 |             Ok(_) => {
168 |                 result_diff.sync(native.device()).unwrap();
169 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
170 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
171 |                 }
172 |             },
173 |             Err(err) => { println!("{:?}", err); assert!(false) }
174 |         }
175 |     }
176 | 
177 |     #[test]
178 |     fn it_computes_correct_relu_grad_on_cuda_for_f32_plain() {
179 |         let backend = get_cuda_backend();
180 |         let native = get_native_backend();
181 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
182 | 
183 |         match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
184 |             Ok(_) => {
185 |                 result_diff.sync(native.device()).unwrap();
186 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
187 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
188 |                 }
189 |             },
190 |             Err(err) => { println!("{:?}", err); assert!(false) }
191 |         }
192 |     }
193 | 
194 |     #[test]
195 |     fn it_computes_correct_relu_grad_on_cuda_for_f64_plain() {
196 |         let backend = get_cuda_backend();
197 |         let native = get_native_backend();
198 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
199 | 
200 |         match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
201 |             Ok(_) => {
202 |                 result_diff.sync(native.device()).unwrap();
203 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
204 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
205 |                 }
206 |             },
207 |             Err(err) => { println!("{:?}", err); assert!(false) }
208 |         }
209 |     }
210 | }
211 | 
212 | #[cfg(test)]
213 | #[cfg(feature = "native")]
214 | mod relu_spec_native {
215 | 
216 |     use co::prelude::*;
217 |     use co_nn::*;
218 |     use co::plugin::numeric_helpers::{cast, Float};
219 | 
220 |     fn get_native_backend() -> Backend<Native> {
221 |         Backend::<Native>::default().unwrap()
222 |     }
223 | 
224 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
225 |         match mem {
226 |             &mut MemoryType::Native(ref mut mem) => {
227 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
228 |                 for (index, datum) in data.iter().enumerate() {
229 |                     mem_buffer[index] = *datum;
230 |                 }
231 |             },
232 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
233 |             _ => {}
234 |         }
235 |     }
236 | 
237 |     fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
238 |         let val = cast::<f64, T>(1f64).unwrap();
239 |         let val2 = cast::<f64, T>(2f64).unwrap();
240 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
241 |         write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
242 | 
243 |         let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
244 | 
245 |         (x, result)
246 |     }
247 | 
248 |     fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
249 |         let val = cast::<f64, T>(1f64).unwrap();
250 |         let val2 = cast::<f64, T>(2f64).unwrap();
251 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
252 |         write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
253 | 
254 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
255 |         write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
256 | 
257 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
258 |         write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
259 | 
260 |         let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
261 | 
262 |         (x, x_diff, result, result_diff)
263 |     }
264 | 
265 |     #[test]
266 |     fn it_computes_correct_relu_on_native_for_f32() {
267 |         let backend = get_native_backend();
268 |         let (mut x, mut result) = get_memory::<f32, Native>(&backend);
269 | 
270 |         match backend.relu(&mut x, &mut result) {
271 |             Ok(_) => {
272 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
273 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
274 |                 }
275 |             },
276 |             Err(err) => { println!("{:?}", err); assert!(false) }
277 |         }
278 |     }
279 | 
280 |     #[test]
281 |     fn it_computes_correct_relu_on_native_for_f64() {
282 |         let backend = get_native_backend();
283 |         let (mut x, mut result) = get_memory::<f64, Native>(&backend);
284 | 
285 |         match backend.relu(&mut x, &mut result) {
286 |             Ok(_) => {
287 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
288 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
289 |                 }
290 |             },
291 |             Err(err) => { println!("{:?}", err); assert!(false) }
292 |         }
293 |     }
294 | 
295 |     #[test]
296 |     fn it_computes_correct_relu_on_native_for_f32_plain() {
297 |         let backend = get_native_backend();
298 |         let (mut x, mut result) = get_memory::<f32, Native>(&backend);
299 | 
300 |         match backend.relu_plain(&mut x, &mut result) {
301 |             Ok(_) => {
302 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
303 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
304 |                 }
305 |             },
306 |             Err(err) => { println!("{:?}", err); assert!(false) }
307 |         }
308 |     }
309 | 
310 |     #[test]
311 |     fn it_computes_correct_relu_on_native_for_f64_plain() {
312 |         let backend = get_native_backend();
313 |         let (mut x, mut result) = get_memory::<f64, Native>(&backend);
314 | 
315 |         match backend.relu_plain(&mut x, &mut result) {
316 |             Ok(_) => {
317 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
318 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
319 |                 }
320 |             },
321 |             Err(err) => { println!("{:?}", err); assert!(false) }
322 |         }
323 |     }
324 | 
325 |     #[test]
326 |     fn it_computes_correct_relu_grad_on_native_for_f32() {
327 |         let backend = get_native_backend();
328 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
329 | 
330 |         match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
331 |             Ok(_) => {
332 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
333 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
334 |                 }
335 |             },
336 |             Err(err) => { println!("{:?}", err); assert!(false) }
337 |         }
338 |     }
339 | 
340 |     #[test]
341 |     fn it_computes_correct_relu_grad_on_native_for_f64() {
342 |         let backend = get_native_backend();
343 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
344 | 
345 |         match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
346 |             Ok(_) => {
347 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
348 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
349 |                 }
350 |             },
351 |             Err(err) => { println!("{:?}", err); assert!(false) }
352 |         }
353 |     }
354 | 
355 |     #[test]
356 |     fn it_computes_correct_relu_grad_on_native_for_f32_plain() {
357 |         let backend = get_native_backend();
358 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
359 | 
360 |         match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
361 |             Ok(_) => {
362 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
363 |                     assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
364 |                 }
365 |             },
366 |             Err(err) => { println!("{:?}", err); assert!(false) }
367 |         }
368 |     }
369 | 
370 |     #[test]
371 |     fn it_computes_correct_relu_grad_on_native_for_f64_plain() {
372 |         let backend = get_native_backend();
373 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
374 | 
375 |         match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
376 |             Ok(_) => {
377 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
378 |                     assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
379 |                 }
380 |             },
381 |             Err(err) => { println!("{:?}", err); assert!(false) }
382 |         }
383 |     }
384 | }
385 | 


--------------------------------------------------------------------------------
/tests/sigmoid_pointwise_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod sigmoid_pointwise_spec_cuda{
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> SharedTensor<T>{
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         x
 43 |     }
 44 | 
 45 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 46 |         let val = cast::<f64, T>(1f64).unwrap();
 47 |         let val2 = cast::<f64, T>(2f64).unwrap();
 48 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 49 |         x.add_device(native.device()).unwrap();
 50 |         x.sync(native.device()).unwrap();
 51 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 52 |         x.sync(backend.device()).unwrap();
 53 | 
 54 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 55 |         x_diff.add_device(native.device()).unwrap();
 56 |         x_diff.sync(native.device()).unwrap();
 57 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 58 |         x_diff.sync(backend.device()).unwrap();
 59 | 
 60 |         (x, x_diff)
 61 |     }
 62 | 
 63 |     #[test]
 64 |     fn it_computes_correct_sigmoid_on_cuda_for_f32() {
 65 |         let backend = get_cuda_backend();
 66 |         let native = get_native_backend();
 67 |         let mut x = get_memory::<f32, Cuda, Native>(&backend, &native);
 68 | 
 69 |         match backend.sigmoid_pointwise(&mut x) {
 70 |             Ok(_) => {
 71 |                 x.sync(native.device()).unwrap();
 72 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
 73 |                     assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
 74 |                 }
 75 |             },
 76 |             Err(err) => { println!("{:?}", err); assert!(false) }
 77 |         }
 78 |     }
 79 | 
 80 |     #[test]
 81 |     fn it_computes_correct_sigmoid_on_cuda_for_f64() {
 82 |         let backend = get_cuda_backend();
 83 |         let native = get_native_backend();
 84 |         let mut x = get_memory::<f64, Cuda, Native>(&backend, &native);
 85 | 
 86 |         match backend.sigmoid_pointwise(&mut x) {
 87 |             Ok(_) => {
 88 |                 x.sync(native.device()).unwrap();
 89 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
 90 |                     assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
 91 |                 }
 92 |             },
 93 |             Err(err) => { println!("{:?}", err); assert!(false) }
 94 |         }
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn it_computes_correct_sigmoid_on_cuda_for_f32_plain() {
 99 |         let backend = get_cuda_backend();
100 |         let native = get_native_backend();
101 |         let mut x = get_memory::<f32, Cuda, Native>(&backend, &native);
102 | 
103 |         match backend.sigmoid_pointwise_plain(&mut x) {
104 |             Ok(_) => {
105 |                 x.sync(native.device()).unwrap();
106 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
107 |                     assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
108 |                 }
109 |             },
110 |             Err(err) => { println!("{:?}", err); assert!(false) }
111 |         }
112 |     }
113 | 
114 |     #[test]
115 |     fn it_computes_correct_sigmoid_on_cuda_for_f64_plain() {
116 |         let backend = get_cuda_backend();
117 |         let native = get_native_backend();
118 |         let mut x = get_memory::<f64, Cuda, Native>(&backend, &native);
119 | 
120 |         match backend.sigmoid_pointwise_plain(&mut x) {
121 |             Ok(_) => {
122 |                 x.sync(native.device()).unwrap();
123 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
124 |                     assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
125 |                 }
126 |             },
127 |             Err(err) => { println!("{:?}", err); assert!(false) }
128 |         }
129 |     }
130 | 
131 |     #[test]
132 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f32() {
133 |         let backend = get_cuda_backend();
134 |         let native = get_native_backend();
135 |         let (mut x, mut x_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
136 | 
137 |         match backend.sigmoid_pointwise_grad(&mut x, &mut x_diff) {
138 |             Ok(_) => {
139 |                 x_diff.sync(native.device()).unwrap();
140 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
141 |                     assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
142 |                 }
143 |             },
144 |             Err(err) => { println!("{:?}", err); assert!(false) }
145 |         }
146 |     }
147 | 
148 |     #[test]
149 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f64() {
150 |         let backend = get_cuda_backend();
151 |         let native = get_native_backend();
152 |         let (mut x, mut x_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
153 | 
154 |         match backend.sigmoid_pointwise_grad(&mut x, &mut x_diff) {
155 |             Ok(_) => {
156 |                 x_diff.sync(native.device()).unwrap();
157 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
158 |                     assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
159 |                 }
160 |             },
161 |             Err(err) => { println!("{:?}", err); assert!(false) }
162 |         }
163 |     }
164 | 
165 |     #[test]
166 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f32_plain() {
167 |         let backend = get_cuda_backend();
168 |         let native = get_native_backend();
169 |         let (mut x, mut x_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
170 | 
171 |         match backend.sigmoid_pointwise_grad_plain(&mut x, &mut x_diff) {
172 |             Ok(_) => {
173 |                 x_diff.sync(native.device()).unwrap();
174 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
175 |                     assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
176 |                 }
177 |             },
178 |             Err(err) => { println!("{:?}", err); assert!(false) }
179 |         }
180 |     }
181 | 
182 |     #[test]
183 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f64_plain() {
184 |         let backend = get_cuda_backend();
185 |         let native = get_native_backend();
186 |         let (mut x, mut x_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
187 | 
188 |         match backend.sigmoid_pointwise_grad_plain(&mut x, &mut x_diff) {
189 |             Ok(_) => {
190 |                 x_diff.sync(native.device()).unwrap();
191 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
192 |                     assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
193 |                 }
194 |             },
195 |             Err(err) => { println!("{:?}", err); assert!(false) }
196 |         }
197 |     }
198 | }
199 | 
200 | #[cfg(test)]
201 | #[cfg(feature = "native")]
202 | mod sigmoid_pointwise_spec_native {
203 | 
204 |     // use co::prelude::*;
205 |     // use co_nn::*;
206 |     // use co::plugin::numeric_helpers::{cast, Float};
207 |     //
208 |     // fn get_native_backend() -> Backend<Native> {
209 |     //     Backend::<Native>::default().unwrap()
210 |     // }
211 |     //
212 |     // fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
213 |     //     match mem {
214 |     //         &mut MemoryType::Native(ref mut mem) => {
215 |     //             let mut mem_buffer = mem.as_mut_slice::<T>();
216 |     //             for (index, datum) in data.iter().enumerate() {
217 |     //                 mem_buffer[index] = *datum;
218 |     //             }
219 |     //         },
220 |     //         #[cfg(any(feature = "opencl", feature = "cuda"))]
221 |     //         _ => {}
222 |     //     }
223 |     // }
224 |     //
225 |     // fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
226 |     //     let val = cast::<f64, T>(1f64).unwrap();
227 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
228 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
229 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
230 |     //
231 |     //     let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
232 |     //
233 |     //     (x, result)
234 |     // }
235 |     //
236 |     // fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
237 |     //     let val = cast::<f64, T>(1f64).unwrap();
238 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
239 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
240 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
241 |     //
242 |     //     let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
243 |     //     write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
244 |     //
245 |     //     let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
246 |     //     write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
247 |     //
248 |     //     let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
249 |     //
250 |     //     (x, x_diff, result, result_diff)
251 |     // }
252 |     // 
253 |     // #[test]
254 |     // fn it_computes_correct_sigmoid_on_native_for_f32() {
255 |     //     let backend = get_native_backend();
256 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
257 |     //
258 |     //     match backend.sigmoid(&mut x, &mut result) {
259 |     //         Ok(_) => {
260 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
261 |     //                 assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
262 |     //             } else {
263 |     //                 println!("No result: {:?}", result); assert!(false);
264 |     //             }
265 |     //         },
266 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
267 |     //     }
268 |     // }
269 |     //
270 |     // #[test]
271 |     // fn it_computes_correct_sigmoid_on_native_for_f64() {
272 |     //     let backend = get_native_backend();
273 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
274 |     //
275 |     //     match backend.sigmoid(&mut x, &mut result) {
276 |     //         Ok(_) => {
277 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
278 |     //                 assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
279 |     //             }
280 |     //         },
281 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
282 |     //     }
283 |     // }
284 |     //
285 |     // #[test]
286 |     // fn it_computes_correct_sigmoid_on_native_for_f32_plain() {
287 |     //     let backend = get_native_backend();
288 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
289 |     //
290 |     //     match backend.sigmoid_plain(&mut x, &mut result) {
291 |     //         Ok(_) => {
292 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
293 |     //                 assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
294 |     //             }
295 |     //         },
296 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
297 |     //     }
298 |     // }
299 |     //
300 |     // #[test]
301 |     // fn it_computes_correct_sigmoid_on_native_for_f64_plain() {
302 |     //     let backend = get_native_backend();
303 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
304 |     //
305 |     //     match backend.sigmoid_plain(&mut x, &mut result) {
306 |     //         Ok(_) => {
307 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
308 |     //                 assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
309 |     //             }
310 |     //         },
311 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
312 |     //     }
313 |     // }
314 |     //
315 |     // #[test]
316 |     // fn it_computes_correct_sigmoid_grad_on_native_for_f32() {
317 |     //     let backend = get_native_backend();
318 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
319 |     //
320 |     //     match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
321 |     //         Ok(_) => {
322 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
323 |     //                 assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
324 |     //             }
325 |     //         },
326 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
327 |     //     }
328 |     // }
329 |     //
330 |     // #[test]
331 |     // fn it_computes_correct_sigmoid_grad_on_native_for_f64() {
332 |     //     let backend = get_native_backend();
333 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
334 |     //
335 |     //     match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
336 |     //         Ok(_) => {
337 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
338 |     //                 assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
339 |     //             }
340 |     //         },
341 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
342 |     //     }
343 |     // }
344 |     //
345 |     // #[test]
346 |     // fn it_computes_correct_sigmoid_grad_on_native_for_f32_plain() {
347 |     //     let backend = get_native_backend();
348 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
349 |     //
350 |     //     match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
351 |     //         Ok(_) => {
352 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
353 |     //                 assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
354 |     //             }
355 |     //         },
356 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
357 |     //     }
358 |     // }
359 |     //
360 |     // #[test]
361 |     // fn it_computes_correct_sigmoid_grad_on_native_for_f64_plain() {
362 |     //     let backend = get_native_backend();
363 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
364 |     //
365 |     //     match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
366 |     //         Ok(_) => {
367 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
368 |     //                 assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
369 |     //             }
370 |     //         },
371 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
372 |     //     }
373 |     // }
374 | }
375 | 


--------------------------------------------------------------------------------
/tests/sigmoid_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod sigmoid_spec_cuda{
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 43 |         result.add_device(native.device()).unwrap();
 44 | 
 45 |         (x, result)
 46 |     }
 47 | 
 48 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
 49 |         let val = cast::<f64, T>(1f64).unwrap();
 50 |         let val2 = cast::<f64, T>(2f64).unwrap();
 51 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 52 |         x.add_device(native.device()).unwrap();
 53 |         x.sync(native.device()).unwrap();
 54 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 55 |         x.sync(backend.device()).unwrap();
 56 | 
 57 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 58 |         x_diff.add_device(native.device()).unwrap();
 59 |         x_diff.sync(native.device()).unwrap();
 60 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 61 |         x_diff.sync(backend.device()).unwrap();
 62 | 
 63 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 64 |         result.add_device(native.device()).unwrap();
 65 |         result.sync(native.device()).unwrap();
 66 |         write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
 67 |         result.sync(backend.device()).unwrap();
 68 | 
 69 |         let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 70 |         result_diff.add_device(native.device()).unwrap();
 71 | 
 72 |         (x, x_diff, result, result_diff)
 73 |     }
 74 | 
 75 |     #[test]
 76 |     fn it_computes_correct_sigmoid_on_cuda_for_f32() {
 77 |         let backend = get_cuda_backend();
 78 |         let native = get_native_backend();
 79 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
 80 | 
 81 |         result.sync(native.device()).unwrap();
 82 |         result.sync(backend.device()).unwrap();
 83 |         result.sync(backend.device()).unwrap();
 84 |         result.sync(native.device()).unwrap();
 85 |         result.sync(backend.device()).unwrap();
 86 |         match backend.sigmoid(&mut x, &mut result) {
 87 |             Ok(_) => {
 88 |                 result.sync(native.device()).unwrap();
 89 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
 90 |                     assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
 91 |                 }
 92 |             },
 93 |             Err(err) => { println!("{:?}", err); assert!(false) }
 94 |         }
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn it_computes_correct_sigmoid_on_cuda_for_f64() {
 99 |         let backend = get_cuda_backend();
100 |         let native = get_native_backend();
101 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
102 | 
103 |         match backend.sigmoid(&mut x, &mut result) {
104 |             Ok(_) => {
105 |                 result.sync(native.device()).unwrap();
106 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
107 |                     assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
108 |                 }
109 |             },
110 |             Err(err) => { println!("{:?}", err); assert!(false) }
111 |         }
112 |     }
113 | 
114 |     #[test]
115 |     fn it_computes_correct_sigmoid_on_cuda_for_f32_plain() {
116 |         let backend = get_cuda_backend();
117 |         let native = get_native_backend();
118 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
119 | 
120 |         match backend.sigmoid_plain(&mut x, &mut result) {
121 |             Ok(_) => {
122 |                 result.sync(native.device()).unwrap();
123 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
124 |                     assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
125 |                 }
126 |             },
127 |             Err(err) => { println!("{:?}", err); assert!(false) }
128 |         }
129 |     }
130 | 
131 |     #[test]
132 |     fn it_computes_correct_sigmoid_on_cuda_for_f64_plain() {
133 |         let backend = get_cuda_backend();
134 |         let native = get_native_backend();
135 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
136 | 
137 |         match backend.sigmoid_plain(&mut x, &mut result) {
138 |             Ok(_) => {
139 |                 result.sync(native.device()).unwrap();
140 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
141 |                     assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
142 |                 }
143 |             },
144 |             Err(err) => { println!("{:?}", err); assert!(false) }
145 |         }
146 |     }
147 | 
148 |     #[test]
149 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f32() {
150 |         let backend = get_cuda_backend();
151 |         let native = get_native_backend();
152 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
153 | 
154 |         match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
155 |             Ok(_) => {
156 |                 result_diff.sync(native.device()).unwrap();
157 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
158 |                     assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
159 |                 }
160 |             },
161 |             Err(err) => { println!("{:?}", err); assert!(false) }
162 |         }
163 |     }
164 | 
165 |     #[test]
166 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f64() {
167 |         let backend = get_cuda_backend();
168 |         let native = get_native_backend();
169 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
170 | 
171 |         match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
172 |             Ok(_) => {
173 |                 result_diff.sync(native.device()).unwrap();
174 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
175 |                     assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
176 |                 }
177 |             },
178 |             Err(err) => { println!("{:?}", err); assert!(false) }
179 |         }
180 |     }
181 | 
182 |     #[test]
183 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f32_plain() {
184 |         let backend = get_cuda_backend();
185 |         let native = get_native_backend();
186 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
187 | 
188 |         match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
189 |             Ok(_) => {
190 |                 result_diff.sync(native.device()).unwrap();
191 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
192 |                     assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
193 |                 }
194 |             },
195 |             Err(err) => { println!("{:?}", err); assert!(false) }
196 |         }
197 |     }
198 | 
199 |     #[test]
200 |     fn it_computes_correct_sigmoid_grad_on_cuda_for_f64_plain() {
201 |         let backend = get_cuda_backend();
202 |         let native = get_native_backend();
203 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
204 | 
205 |         match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
206 |             Ok(_) => {
207 |                 result_diff.sync(native.device()).unwrap();
208 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
209 |                     assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
210 |                 }
211 |             },
212 |             Err(err) => { println!("{:?}", err); assert!(false) }
213 |         }
214 |     }
215 | }
216 | 
217 | #[cfg(test)]
218 | #[cfg(feature = "native")]
219 | mod sigmoid_spec_native {
220 | 
221 |     use co::prelude::*;
222 |     use co_nn::*;
223 |     use co::plugin::numeric_helpers::{cast, Float};
224 | 
225 |     fn get_native_backend() -> Backend<Native> {
226 |         Backend::<Native>::default().unwrap()
227 |     }
228 | 
229 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
230 |         match mem {
231 |             &mut MemoryType::Native(ref mut mem) => {
232 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
233 |                 for (index, datum) in data.iter().enumerate() {
234 |                     mem_buffer[index] = *datum;
235 |                 }
236 |             },
237 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
238 |             _ => {}
239 |         }
240 |     }
241 | 
242 |     fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
243 |         let val = cast::<f64, T>(1f64).unwrap();
244 |         let val2 = cast::<f64, T>(2f64).unwrap();
245 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
246 |         write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
247 | 
248 |         let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
249 | 
250 |         (x, result)
251 |     }
252 | 
253 |     fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
254 |         let val = cast::<f64, T>(1f64).unwrap();
255 |         let val2 = cast::<f64, T>(2f64).unwrap();
256 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
257 |         write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
258 | 
259 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
260 |         write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
261 | 
262 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
263 |         write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
264 | 
265 |         let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
266 | 
267 |         (x, x_diff, result, result_diff)
268 |     }
269 | 
270 |     #[test]
271 |     fn it_computes_correct_sigmoid_on_native_for_f32() {
272 |         let backend = get_native_backend();
273 |         let (mut x, mut result) = get_memory::<f32, Native>(&backend);
274 | 
275 |         match backend.sigmoid(&mut x, &mut result) {
276 |             Ok(_) => {
277 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
278 |                     assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
279 |                 } else {
280 |                     println!("No result: {:?}", result); assert!(false);
281 |                 }
282 |             },
283 |             Err(err) => { println!("{:?}", err); assert!(false) }
284 |         }
285 |     }
286 | 
287 |     #[test]
288 |     fn it_computes_correct_sigmoid_on_native_for_f64() {
289 |         let backend = get_native_backend();
290 |         let (mut x, mut result) = get_memory::<f64, Native>(&backend);
291 | 
292 |         match backend.sigmoid(&mut x, &mut result) {
293 |             Ok(_) => {
294 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
295 |                     assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
296 |                 }
297 |             },
298 |             Err(err) => { println!("{:?}", err); assert!(false) }
299 |         }
300 |     }
301 | 
302 |     #[test]
303 |     fn it_computes_correct_sigmoid_on_native_for_f32_plain() {
304 |         let backend = get_native_backend();
305 |         let (mut x, mut result) = get_memory::<f32, Native>(&backend);
306 | 
307 |         match backend.sigmoid_plain(&mut x, &mut result) {
308 |             Ok(_) => {
309 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
310 |                     assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
311 |                 }
312 |             },
313 |             Err(err) => { println!("{:?}", err); assert!(false) }
314 |         }
315 |     }
316 | 
317 |     #[test]
318 |     fn it_computes_correct_sigmoid_on_native_for_f64_plain() {
319 |         let backend = get_native_backend();
320 |         let (mut x, mut result) = get_memory::<f64, Native>(&backend);
321 | 
322 |         match backend.sigmoid_plain(&mut x, &mut result) {
323 |             Ok(_) => {
324 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
325 |                     assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
326 |                 }
327 |             },
328 |             Err(err) => { println!("{:?}", err); assert!(false) }
329 |         }
330 |     }
331 | 
332 |     #[test]
333 |     fn it_computes_correct_sigmoid_grad_on_native_for_f32() {
334 |         let backend = get_native_backend();
335 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
336 | 
337 |         match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
338 |             Ok(_) => {
339 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
340 |                     assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
341 |                 }
342 |             },
343 |             Err(err) => { println!("{:?}", err); assert!(false) }
344 |         }
345 |     }
346 | 
347 |     #[test]
348 |     fn it_computes_correct_sigmoid_grad_on_native_for_f64() {
349 |         let backend = get_native_backend();
350 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
351 | 
352 |         match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
353 |             Ok(_) => {
354 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
355 |                     assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
356 |                 }
357 |             },
358 |             Err(err) => { println!("{:?}", err); assert!(false) }
359 |         }
360 |     }
361 | 
362 |     #[test]
363 |     fn it_computes_correct_sigmoid_grad_on_native_for_f32_plain() {
364 |         let backend = get_native_backend();
365 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
366 | 
367 |         match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
368 |             Ok(_) => {
369 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
370 |                     assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
371 |                 }
372 |             },
373 |             Err(err) => { println!("{:?}", err); assert!(false) }
374 |         }
375 |     }
376 | 
377 |     #[test]
378 |     fn it_computes_correct_sigmoid_grad_on_native_for_f64_plain() {
379 |         let backend = get_native_backend();
380 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
381 | 
382 |         match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
383 |             Ok(_) => {
384 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
385 |                     assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
386 |                 }
387 |             },
388 |             Err(err) => { println!("{:?}", err); assert!(false) }
389 |         }
390 |     }
391 | }
392 | 


--------------------------------------------------------------------------------
/tests/tanh_pointwise_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod tanh_pointwise_spec_cuda{
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> SharedTensor<T>{
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         x
 43 |     }
 44 | 
 45 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 46 |         let val = cast::<f64, T>(1f64).unwrap();
 47 |         let val2 = cast::<f64, T>(2f64).unwrap();
 48 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 49 |         x.add_device(native.device()).unwrap();
 50 |         x.sync(native.device()).unwrap();
 51 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 52 |         x.sync(backend.device()).unwrap();
 53 | 
 54 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 55 |         x_diff.add_device(native.device()).unwrap();
 56 |         x_diff.sync(native.device()).unwrap();
 57 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 58 |         x_diff.sync(backend.device()).unwrap();
 59 | 
 60 |         (x, x_diff)
 61 |     }
 62 | 
 63 |     #[test]
 64 |     fn it_computes_correct_tanh_on_cuda_for_f32() {
 65 |         let backend = get_cuda_backend();
 66 |         let native = get_native_backend();
 67 |         let mut x = get_memory::<f32, Cuda, Native>(&backend, &native);
 68 | 
 69 |         match backend.tanh_pointwise(&mut x) {
 70 |             Ok(_) => {
 71 |                 x.sync(native.device()).unwrap();
 72 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
 73 |                     assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
 74 |                 }
 75 |             },
 76 |             Err(err) => { println!("{:?}", err); assert!(false) }
 77 |         }
 78 |     }
 79 | 
 80 |     #[test]
 81 |     fn it_computes_correct_tanh_on_cuda_for_f64() {
 82 |         let backend = get_cuda_backend();
 83 |         let native = get_native_backend();
 84 |         let mut x = get_memory::<f64, Cuda, Native>(&backend, &native);
 85 | 
 86 |         match backend.tanh_pointwise(&mut x) {
 87 |             Ok(_) => {
 88 |                 x.sync(native.device()).unwrap();
 89 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
 90 |                     assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
 91 |                 }
 92 |             },
 93 |             Err(err) => { println!("{:?}", err); assert!(false) }
 94 |         }
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn it_computes_correct_tanh_on_cuda_for_f32_plain() {
 99 |         let backend = get_cuda_backend();
100 |         let native = get_native_backend();
101 |         let mut x = get_memory::<f32, Cuda, Native>(&backend, &native);
102 | 
103 |         match backend.tanh_pointwise_plain(&mut x) {
104 |             Ok(_) => {
105 |                 x.sync(native.device()).unwrap();
106 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
107 |                     assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
108 |                 }
109 |             },
110 |             Err(err) => { println!("{:?}", err); assert!(false) }
111 |         }
112 |     }
113 | 
114 |     #[test]
115 |     fn it_computes_correct_tanh_on_cuda_for_f64_plain() {
116 |         let backend = get_cuda_backend();
117 |         let native = get_native_backend();
118 |         let mut x = get_memory::<f64, Cuda, Native>(&backend, &native);
119 | 
120 |         match backend.tanh_pointwise_plain(&mut x) {
121 |             Ok(_) => {
122 |                 x.sync(native.device()).unwrap();
123 |                 if let Some(mem) = x.get(native.device()).unwrap().as_native() {
124 |                     assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
125 |                 }
126 |             },
127 |             Err(err) => { println!("{:?}", err); assert!(false) }
128 |         }
129 |     }
130 | 
131 |     #[test]
132 |     fn it_computes_correct_tanh_grad_on_cuda_for_f32() {
133 |         let backend = get_cuda_backend();
134 |         let native = get_native_backend();
135 |         let (mut x, mut x_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
136 | 
137 |         match backend.tanh_pointwise_grad(&mut x, &mut x_diff) {
138 |             Ok(_) => {
139 |                 x_diff.sync(native.device()).unwrap();
140 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
141 |                     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
142 |                 }
143 |             },
144 |             Err(err) => { println!("{:?}", err); assert!(false) }
145 |         }
146 |     }
147 | 
148 |     #[test]
149 |     fn it_computes_correct_tanh_grad_on_cuda_for_f64() {
150 |         let backend = get_cuda_backend();
151 |         let native = get_native_backend();
152 |         let (mut x, mut x_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
153 | 
154 |         match backend.tanh_pointwise_grad(&mut x, &mut x_diff) {
155 |             Ok(_) => {
156 |                 x_diff.sync(native.device()).unwrap();
157 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
158 |                     assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
159 |                 }
160 |             },
161 |             Err(err) => { println!("{:?}", err); assert!(false) }
162 |         }
163 |     }
164 | 
165 |     #[test]
166 |     fn it_computes_correct_tanh_grad_on_cuda_for_f32_plain() {
167 |         let backend = get_cuda_backend();
168 |         let native = get_native_backend();
169 |         let (mut x, mut x_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
170 | 
171 |         match backend.tanh_pointwise_grad_plain(&mut x, &mut x_diff) {
172 |             Ok(_) => {
173 |                 x_diff.sync(native.device()).unwrap();
174 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
175 |                     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
176 |                 }
177 |             },
178 |             Err(err) => { println!("{:?}", err); assert!(false) }
179 |         }
180 |     }
181 | 
182 |     #[test]
183 |     fn it_computes_correct_tanh_grad_on_cuda_for_f64_plain() {
184 |         let backend = get_cuda_backend();
185 |         let native = get_native_backend();
186 |         let (mut x, mut x_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
187 | 
188 |         match backend.tanh_pointwise_grad_plain(&mut x, &mut x_diff) {
189 |             Ok(_) => {
190 |                 x_diff.sync(native.device()).unwrap();
191 |                 if let Some(mem) = x_diff.get(native.device()).unwrap().as_native() {
192 |                     assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
193 |                 }
194 |             },
195 |             Err(err) => { println!("{:?}", err); assert!(false) }
196 |         }
197 |     }
198 | }
199 | 
200 | #[cfg(test)]
201 | #[cfg(feature = "native")]
202 | mod tanh_pointwise_spec_native {
203 | 
204 |     // use co::prelude::*;
205 |     // use co_nn::*;
206 |     // use co::plugin::numeric_helpers::{cast, Float};
207 |     //
208 |     // fn get_native_backend() -> Backend<Native> {
209 |     //     Backend::<Native>::default().unwrap()
210 |     // }
211 |     //
212 |     // fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
213 |     //     match mem {
214 |     //         &mut MemoryType::Native(ref mut mem) => {
215 |     //             let mut mem_buffer = mem.as_mut_slice::<T>();
216 |     //             for (index, datum) in data.iter().enumerate() {
217 |     //                 mem_buffer[index] = *datum;
218 |     //             }
219 |     //         },
220 |     //         #[cfg(any(feature = "opencl", feature = "cuda"))]
221 |     //         _ => {}
222 |     //     }
223 |     // }
224 |     //
225 |     // fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
226 |     //     let val = cast::<f64, T>(1f64).unwrap();
227 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
228 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
229 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
230 |     //
231 |     //     let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
232 |     //
233 |     //     (x, result)
234 |     // }
235 |     //
236 |     // fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
237 |     //     let val = cast::<f64, T>(1f64).unwrap();
238 |     //     let val2 = cast::<f64, T>(2f64).unwrap();
239 |     //     let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
240 |     //     write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
241 |     //
242 |     //     let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
243 |     //     write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
244 |     //
245 |     //     let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
246 |     //     write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
247 |     //
248 |     //     let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
249 |     //
250 |     //     (x, x_diff, result, result_diff)
251 |     // }
252 |     //
253 |     // #[test]
254 |     // fn it_computes_correct_tanh_on_native_for_f32() {
255 |     //     let backend = get_native_backend();
256 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
257 |     //
258 |     //     match backend.tanh(&mut x, &mut result) {
259 |     //         Ok(_) => {
260 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
261 |     //                 assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
262 |     //             } else {
263 |     //                 println!("No result: {:?}", result); assert!(false);
264 |     //             }
265 |     //         },
266 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
267 |     //     }
268 |     // }
269 |     //
270 |     // #[test]
271 |     // fn it_computes_correct_tanh_on_native_for_f64() {
272 |     //     let backend = get_native_backend();
273 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
274 |     //
275 |     //     match backend.tanh(&mut x, &mut result) {
276 |     //         Ok(_) => {
277 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
278 |     //                 assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
279 |     //             }
280 |     //         },
281 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
282 |     //     }
283 |     // }
284 |     //
285 |     // #[test]
286 |     // fn it_computes_correct_tanh_on_native_for_f32_plain() {
287 |     //     let backend = get_native_backend();
288 |     //     let (mut x, mut result) = get_memory::<f32, Native>(&backend);
289 |     //
290 |     //     match backend.tanh_plain(&mut x, &mut result) {
291 |     //         Ok(_) => {
292 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
293 |     //                 assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
294 |     //             }
295 |     //         },
296 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
297 |     //     }
298 |     // }
299 |     //
300 |     // #[test]
301 |     // fn it_computes_correct_tanh_on_native_for_f64_plain() {
302 |     //     let backend = get_native_backend();
303 |     //     let (mut x, mut result) = get_memory::<f64, Native>(&backend);
304 |     //
305 |     //     match backend.tanh_plain(&mut x, &mut result) {
306 |     //         Ok(_) => {
307 |     //             if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
308 |     //                 assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
309 |     //             }
310 |     //         },
311 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
312 |     //     }
313 |     // }
314 |     //
315 |     // #[test]
316 |     // fn it_computes_correct_tanh_grad_on_native_for_f32() {
317 |     //     let backend = get_native_backend();
318 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
319 |     //
320 |     //     match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
321 |     //         Ok(_) => {
322 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
323 |     //                 assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
324 |     //             }
325 |     //         },
326 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
327 |     //     }
328 |     // }
329 |     //
330 |     // #[test]
331 |     // fn it_computes_correct_tanh_grad_on_native_for_f64() {
332 |     //     let backend = get_native_backend();
333 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
334 |     //
335 |     //     match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
336 |     //         Ok(_) => {
337 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
338 |     //                 assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
339 |     //             }
340 |     //         },
341 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
342 |     //     }
343 |     // }
344 |     //
345 |     // #[test]
346 |     // fn it_computes_correct_tanh_grad_on_native_for_f32_plain() {
347 |     //     let backend = get_native_backend();
348 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
349 |     //
350 |     //     match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
351 |     //         Ok(_) => {
352 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
353 |     //                 assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
354 |     //             }
355 |     //         },
356 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
357 |     //     }
358 |     // }
359 |     //
360 |     // #[test]
361 |     // fn it_computes_correct_tanh_grad_on_native_for_f64_plain() {
362 |     //     let backend = get_native_backend();
363 |     //     let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
364 |     //
365 |     //     match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
366 |     //         Ok(_) => {
367 |     //             if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
368 |     //                 assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
369 |     //             }
370 |     //         },
371 |     //         Err(err) => { println!("{:?}", err); assert!(false) }
372 |     //     }
373 |     // }
374 | }
375 | 


--------------------------------------------------------------------------------
/tests/tanh_specs.rs:
--------------------------------------------------------------------------------
  1 | extern crate collenchyma_nn as co_nn;
  2 | extern crate collenchyma as co;
  3 | 
  4 | #[cfg(test)]
  5 | #[cfg(feature = "cuda")]
  6 | mod tanh_spec_cuda {
  7 | 
  8 |     use co::prelude::*;
  9 |     use co_nn::*;
 10 |     use co::plugin::numeric_helpers::{cast, Float};
 11 | 
 12 |     fn get_native_backend() -> Backend<Native> {
 13 |         Backend::<Native>::default().unwrap()
 14 |     }
 15 | 
 16 |     fn get_cuda_backend() -> Backend<Cuda> {
 17 |         Backend::<Cuda>::default().unwrap()
 18 |     }
 19 | 
 20 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
 21 |         match mem {
 22 |             &mut MemoryType::Native(ref mut mem) => {
 23 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
 24 |                 for (index, datum) in data.iter().enumerate() {
 25 |                     mem_buffer[index] = *datum;
 26 |                 }
 27 |             },
 28 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
 29 |             _ => {}
 30 |         }
 31 |     }
 32 | 
 33 |     fn get_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
 34 |         let val = cast::<f64, T>(1f64).unwrap();
 35 |         let val2 = cast::<f64, T>(2f64).unwrap();
 36 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 37 |         x.add_device(native.device()).unwrap();
 38 |         x.sync(native.device()).unwrap();
 39 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 40 |         x.sync(backend.device()).unwrap();
 41 | 
 42 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 43 |         result.add_device(native.device()).unwrap();
 44 | 
 45 |         (x, result)
 46 |     }
 47 | 
 48 |     fn get_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
 49 |         let val = cast::<f64, T>(1f64).unwrap();
 50 |         let val2 = cast::<f64, T>(2f64).unwrap();
 51 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 52 |         x.add_device(native.device()).unwrap();
 53 |         x.sync(native.device()).unwrap();
 54 |         write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
 55 |         x.sync(backend.device()).unwrap();
 56 | 
 57 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 58 |         x_diff.add_device(native.device()).unwrap();
 59 |         x_diff.sync(native.device()).unwrap();
 60 |         write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
 61 |         x_diff.sync(backend.device()).unwrap();
 62 | 
 63 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 64 |         result.add_device(native.device()).unwrap();
 65 |         result.sync(native.device()).unwrap();
 66 |         write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
 67 |         result.sync(backend.device()).unwrap();
 68 | 
 69 |         let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
 70 |         result_diff.add_device(native.device()).unwrap();
 71 | 
 72 |         (x, x_diff, result, result_diff)
 73 |     }
 74 | 
 75 |     #[test]
 76 |     fn it_computes_correct_tanh_on_cuda_for_f32() {
 77 |         let backend = get_cuda_backend();
 78 |         let native = get_native_backend();
 79 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
 80 | 
 81 |         match backend.tanh(&mut x, &mut result) {
 82 |             Ok(_) => {
 83 |                 result.sync(native.device()).unwrap();
 84 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
 85 |                     assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
 86 |                 }
 87 |             },
 88 |             Err(err) => { println!("{:?}", err); assert!(false) }
 89 |         }
 90 |     }
 91 | 
 92 |     #[test]
 93 |     fn it_computes_correct_tanh_on_cuda_for_f64() {
 94 |         let backend = get_cuda_backend();
 95 |         let native = get_native_backend();
 96 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
 97 | 
 98 |         match backend.tanh(&mut x, &mut result) {
 99 |             Ok(_) => {
100 |                 result.sync(native.device()).unwrap();
101 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
102 |                     assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
103 |                 }
104 |             },
105 |             Err(err) => { println!("{:?}", err); assert!(false) }
106 |         }
107 |     }
108 | 
109 |     #[test]
110 |     fn it_computes_correct_tanh_on_cuda_for_f32_plain() {
111 |         let backend = get_cuda_backend();
112 |         let native = get_native_backend();
113 |         let (mut x, mut result) = get_memory::<f32, Cuda, Native>(&backend, &native);
114 | 
115 |         match backend.tanh_plain(&mut x, &mut result) {
116 |             Ok(_) => {
117 |                 result.sync(native.device()).unwrap();
118 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
119 |                     assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
120 |                 }
121 |             },
122 |             Err(err) => { println!("{:?}", err); assert!(false) }
123 |         }
124 |     }
125 | 
126 |     #[test]
127 |     fn it_computes_correct_tanh_on_cuda_for_f64_plain() {
128 |         let backend = get_cuda_backend();
129 |         let native = get_native_backend();
130 |         let (mut x, mut result) = get_memory::<f64, Cuda, Native>(&backend, &native);
131 | 
132 |         match backend.tanh_plain(&mut x, &mut result) {
133 |             Ok(_) => {
134 |                 result.sync(native.device()).unwrap();
135 |                 if let Some(mem) = result.get(native.device()).unwrap().as_native() {
136 |                     assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
137 |                 }
138 |             },
139 |             Err(err) => { println!("{:?}", err); assert!(false) }
140 |         }
141 |     }
142 | 
143 |     #[test]
144 |     fn it_computes_correct_tanh_grad_on_cuda_for_f32() {
145 |         let backend = get_cuda_backend();
146 |         let native = get_native_backend();
147 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
148 | 
149 |         match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
150 |             Ok(_) => {
151 |                 result_diff.sync(native.device()).unwrap();
152 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
153 |                     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
154 |                 }
155 |             },
156 |             Err(err) => { println!("{:?}", err); assert!(false) }
157 |         }
158 |     }
159 | 
160 |     #[test]
161 |     fn it_computes_correct_tanh_grad_on_cuda_for_f64() {
162 |         let backend = get_cuda_backend();
163 |         let native = get_native_backend();
164 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
165 | 
166 |         match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
167 |             Ok(_) => {
168 |                 result_diff.sync(native.device()).unwrap();
169 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
170 |                     assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
171 |                 }
172 |             },
173 |             Err(err) => { println!("{:?}", err); assert!(false) }
174 |         }
175 |     }
176 | 
177 |     #[test]
178 |     fn it_computes_correct_tanh_grad_on_cuda_for_f32_plain() {
179 |         let backend = get_cuda_backend();
180 |         let native = get_native_backend();
181 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Cuda, Native>(&backend, &native);
182 | 
183 |         match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
184 |             Ok(_) => {
185 |                 result_diff.sync(native.device()).unwrap();
186 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
187 |                     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
188 |                 }
189 |             },
190 |             Err(err) => { println!("{:?}", err); assert!(false) }
191 |         }
192 |     }
193 | 
194 |     #[test]
195 |     fn it_computes_correct_tanh_grad_on_cuda_for_f64_plain() {
196 |         let backend = get_cuda_backend();
197 |         let native = get_native_backend();
198 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Cuda, Native>(&backend, &native);
199 | 
200 |         match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
201 |             Ok(_) => {
202 |                 result_diff.sync(native.device()).unwrap();
203 |                 if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
204 |                     assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
205 |                 }
206 |             },
207 |             Err(err) => { println!("{:?}", err); assert!(false) }
208 |         }
209 |     }
210 | }
211 | 
212 | #[cfg(test)]
213 | #[cfg(feature = "native")]
214 | mod tanh_spec_native {
215 | 
216 |     use co::prelude::*;
217 |     use co_nn::*;
218 |     use co::plugin::numeric_helpers::{cast, Float};
219 | 
220 |     fn get_native_backend() -> Backend<Native> {
221 |         Backend::<Native>::default().unwrap()
222 |     }
223 | 
224 |     fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
225 |         match mem {
226 |             &mut MemoryType::Native(ref mut mem) => {
227 |                 let mut mem_buffer = mem.as_mut_slice::<T>();
228 |                 for (index, datum) in data.iter().enumerate() {
229 |                     mem_buffer[index] = *datum;
230 |                 }
231 |             },
232 |             #[cfg(any(feature = "opencl", feature = "cuda"))]
233 |             _ => {}
234 |         }
235 |     }
236 | 
237 |     fn get_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>){
238 |         let val = cast::<f64, T>(1f64).unwrap();
239 |         let val2 = cast::<f64, T>(2f64).unwrap();
240 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
241 |         write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
242 | 
243 |         let result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
244 | 
245 |         (x, result)
246 |     }
247 | 
248 |     fn get_grad_memory<T: Float, B: IFramework + Clone>(backend: &Backend<B>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
249 |         let val = cast::<f64, T>(1f64).unwrap();
250 |         let val2 = cast::<f64, T>(2f64).unwrap();
251 |         let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
252 |         write_to_memory(x.get_mut(backend.device()).unwrap(), &[val, val, val2]);
253 | 
254 |         let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
255 |         write_to_memory(x_diff.get_mut(backend.device()).unwrap(), &[val, val, val2]);
256 | 
257 |         let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
258 |         write_to_memory(result.get_mut(backend.device()).unwrap(), &[val, val, val2]);
259 | 
260 |         let result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
261 | 
262 |         (x, x_diff, result, result_diff)
263 |     }
264 | 
265 |     #[test]
266 |     fn it_computes_correct_tanh_on_native_for_f32() {
267 |         let backend = get_native_backend();
268 |         let (mut x, mut result) = get_memory::<f32, Native>(&backend);
269 | 
270 |         match backend.tanh(&mut x, &mut result) {
271 |             Ok(_) => {
272 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
273 |                     assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
274 |                 }
275 |             },
276 |             Err(err) => { println!("{:?}", err); assert!(false) }
277 |         }
278 |     }
279 | 
280 |     #[test]
281 |     fn it_computes_correct_tanh_on_native_for_f64() {
282 |         let backend = get_native_backend();
283 |         let (mut x, mut result) = get_memory::<f64, Native>(&backend);
284 | 
285 |         match backend.tanh(&mut x, &mut result) {
286 |             Ok(_) => {
287 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
288 |                     assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
289 |                 }
290 |             },
291 |             Err(err) => { println!("{:?}", err); assert!(false) }
292 |         }
293 |     }
294 | 
295 |     #[test]
296 |     fn it_computes_correct_tanh_on_native_for_f32_plain() {
297 |         let backend = get_native_backend();
298 |         let (mut x, mut result) = get_memory::<f32, Native>(&backend);
299 | 
300 |         match backend.tanh_plain(&mut x, &mut result) {
301 |             Ok(_) => {
302 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
303 |                     assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
304 |                 }
305 |             },
306 |             Err(err) => { println!("{:?}", err); assert!(false) }
307 |         }
308 |     }
309 | 
310 |     #[test]
311 |     fn it_computes_correct_tanh_on_native_for_f64_plain() {
312 |         let backend = get_native_backend();
313 |         let (mut x, mut result) = get_memory::<f64, Native>(&backend);
314 | 
315 |         match backend.tanh_plain(&mut x, &mut result) {
316 |             Ok(_) => {
317 |                 if let Some(mem) = result.get(backend.device()).unwrap().as_native() {
318 |                     assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
319 |                 }
320 |             },
321 |             Err(err) => { println!("{:?}", err); assert!(false) }
322 |         }
323 |     }
324 | 
325 |     #[test]
326 |     fn it_computes_correct_tanh_grad_on_native_for_f32() {
327 |         let backend = get_native_backend();
328 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
329 | 
330 |         match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
331 |             Ok(_) => {
332 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
333 |                     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
334 |                 }
335 |             },
336 |             Err(err) => { println!("{:?}", err); assert!(false) }
337 |         }
338 |     }
339 | 
340 |     #[test]
341 |     fn it_computes_correct_tanh_grad_on_native_for_f64() {
342 |         let backend = get_native_backend();
343 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
344 | 
345 |         match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
346 |             Ok(_) => {
347 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
348 |                     assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
349 |                 }
350 |             },
351 |             Err(err) => { println!("{:?}", err); assert!(false) }
352 |         }
353 |     }
354 | 
355 |     #[test]
356 |     fn it_computes_correct_tanh_grad_on_native_for_f32_plain() {
357 |         let backend = get_native_backend();
358 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f32, Native>(&backend);
359 | 
360 |         match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
361 |             Ok(_) => {
362 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
363 |                     assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
364 |                 }
365 |             },
366 |             Err(err) => { println!("{:?}", err); assert!(false) }
367 |         }
368 |     }
369 | 
370 |     #[test]
371 |     fn it_computes_correct_tanh_grad_on_native_for_f64_plain() {
372 |         let backend = get_native_backend();
373 |         let (mut x, mut x_diff, mut result, mut result_diff) = get_grad_memory::<f64, Native>(&backend);
374 | 
375 |         match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
376 |             Ok(_) => {
377 |                 if let Some(mem) = result_diff.get(backend.device()).unwrap().as_native() {
378 |                     assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
379 |                 }
380 |             },
381 |             Err(err) => { println!("{:?}", err); assert!(false) }
382 |         }
383 |     }
384 | }
385 | 


--------------------------------------------------------------------------------