├── .gitignore
├── LICENSE
├── README.md
├── c2nim
    └── cuda12_5
    │   ├── cuComplex.h
    │   ├── cublas_api.h
    │   ├── cublas_v2.h
    │   ├── cuda.h
    │   ├── cuda_occupancy.h
    │   ├── cuda_runtime_api.h
    │   ├── cudnn_v9.h
    │   ├── cufft.h
    │   ├── cufftw.h
    │   ├── curand.h
    │   ├── curand_discrete.h
    │   ├── cusolverDn.h
    │   ├── cusolverRf.h
    │   ├── cusolverSp.h
    │   ├── cusolver_common.h
    │   ├── cusparse.h
    │   ├── driver_types.h
    │   ├── library_types.h
    │   ├── nvblas.h
    │   ├── nvrtc.h
    │   ├── surface_types.h
    │   ├── texture_types.h
    │   └── vector_types.h
├── changelog.org
├── examples
    ├── cuda12_5
    │   ├── blas.nim
    │   ├── denseLinearSystem.nim
    │   ├── fft.nim
    │   ├── random.nim
    │   ├── runtimeCompilation.nim
    │   ├── sparse.nim
    │   └── sparseLinearSystem.nim
    └── cuda8_0
    │   ├── fft.nim
    │   ├── nim.cfg
    │   ├── pagerank.nim
    │   ├── random.nim
    │   └── sparse.nim
├── include
    ├── cuda12_5
    │   ├── cublas_api.h
    │   ├── cuda.h
    │   ├── cuda_occupancy.h
    │   ├── cuda_runtime.h
    │   ├── cuda_runtime_api.h
    │   ├── cusolver_common.h
    │   ├── cusparse.h
    │   ├── driver_types.h
    │   ├── nvrtc.h
    │   └── vector_types.h
    └── cuda8_0
    │   ├── cuda_occupancy.h
    │   ├── cuda_runtime_api.h
    │   └── vector_types.h
├── nimcuda.nimble
├── src
    ├── nimcuda.nim
    └── nimcuda
    │   ├── cuda12_5
    │       ├── check.nim
    │       ├── cuComplex.nim
    │       ├── cublas_api.nim
    │       ├── cublas_v2.nim
    │       ├── cuda.nim
    │       ├── cuda_occupancy.nim
    │       ├── cuda_runtime_api.nim
    │       ├── cufft.nim
    │       ├── curand.nim
    │       ├── cusolverDn.nim
    │       ├── cusolverRf.nim
    │       ├── cusolverSp.nim
    │       ├── cusolver_common.nim
    │       ├── cusparse.nim
    │       ├── driver_types.nim
    │       ├── helpers.nim
    │       ├── libpaths.nim
    │       ├── library_types.nim
    │       ├── nvblas.nim
    │       ├── nvrtc.nim
    │       ├── surface_types.nim
    │       ├── texture_types.nim
    │       └── vector_types.nim
    │   └── cuda8_0
    │       ├── check.nim
    │       ├── cuComplex.nim
    │       ├── cublas_api.nim
    │       ├── cublas_v2.nim
    │       ├── cuda_occupancy.nim
    │       ├── cuda_runtime_api.nim
    │       ├── cudnn.nim
    │       ├── cufft.nim
    │       ├── curand.nim
    │       ├── cusolverDn.nim
    │       ├── cusolverRf.nim
    │       ├── cusolverSp.nim
    │       ├── cusolver_common.nim
    │       ├── cusparse.nim
    │       ├── driver_types.nim
    │       ├── library_types.nim
    │       ├── nvblas.nim
    │       ├── nvgraph.nim
    │       ├── surface_types.nim
    │       ├── texture_types.nim
    │       └── vector_types.nim
└── utils
    ├── postprocessor.nim
    └── preprocessor.nim


/.gitignore:
--------------------------------------------------------------------------------
 1 | nimcache
 2 | 
 3 | # ignore all files except nim source files in the examples dir
 4 | examples/cuda12_5/*
 5 | !examples/cuda12_5/*.nim
 6 | 
 7 | # ignore all files except nim source files in the examples dir
 8 | examples/cuda8_0/*
 9 | !examples/cuda8_0/*.nim
10 | 
11 | #ignore docs
12 | htmldocs
13 | 
14 | .vscode
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NimCUDA
 2 | 
 3 | Nim bindings for the [CUDA](https://developer.nvidia.com/cuda-toolkit)
 4 | libraries. The versions currently in use are
 5 | 
 6 | * CUDA 8.0 + CuDNN 5.1
 7 | * CUDA 12.5
 8 | 
 9 | ## Status
10 | 
11 | Most libraries have working bindings. Out of these:
12 | 
13 | * most bindings are generated using [c2nim](http://nim-lang.org/docs/c2nim.html)
14 |   and suitable directives (see the files inside `/c2nim`)
15 | * a preprocessor is used on the header files to help with common issues that
16 |   c2nim has during parsing.
17 | * a postprocessor is used on the nim files to help alleviate some common
18 |   output problems that c2nim has.
19 | * some headers files are manually edited before being passed to c2nim.
20 | * some nim files are manually edited.
21 | 
22 | Ideally, once some improvements are available in c2nim, there should be no
23 | need to manually modify any files.
24 | 
25 | ## Usage
26 | 
27 | See a few examples under `/examples`. The examples can be run with the command
28 | `nimble EXAMPLE_NAME CUDA_VERSION`, where `EXAMPLE_NAME` is one of the examples and
29 | `CUDA_VERSION` is the version of cuda that you want it to run on - for
30 | instance `nimble fft 12.5`.
31 | 
32 | API documentation lives under `/htmldocs`. Generate it by running `nimble docs`.
33 | 
34 | ## Name mangling
35 | 
36 | c2nim supports name mangling, which could be useful to simplify a few names
37 | (e.g. turn `CUBLAS_STATUS_ARCH_MISMATCH` into `ARCH_MISMATCH`, which can be
38 | qualified as `cublasStatus_t.ARCH_MISMATCH` in case of ambiguity).
39 | 
40 | Right now, no unnecessary mangling is performed, because the API surface is large and
41 | not always consistent, so it felt simpler to leave it as is. This may change
42 | in a future release.
43 | 
44 | ## Error handling
45 | 
46 | In each cuda version's library there is a file called `check.nim`.
47 | In it are a few templates that turn CUDA errors
48 | into Nim exceptions. They are all under the overloaded name `check`, so that
49 | one can do, for instance
50 | 
51 | ```nim
52 | check cudaMalloc(cast[ptr pointer](addr gpuRows), sizeof(rows))
53 | ```
54 | 
55 | (here `cudaMalloc` is one of the many functions that fail by returning an
56 | error code).
57 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cuComplex.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef CU_COMPLEX_H_
 3 | 
 4 |   #mangle __GNUC__ GNUC
 5 |   #mangle __GNUC_MINOR__ GNUC_MINOR
 6 | 
 7 |   #def __host__
 8 |   #def __device__
 9 |   #def __inline__
10 | 
11 | #@
12 | from std/math import sqrt
13 | 
14 | template sqrtf(x: cfloat): cfloat = sqrt(x)
15 | 
16 | template fabsf(x: cfloat): cfloat = abs(x)
17 | 
18 | template fabs(x: float): float = abs(x)
19 | 
20 | template `div`(a: static[float64], b: cfloat): cfloat = cfloat(a) / b
21 | 
22 | template `div`(a: cfloat, b: cfloat): cfloat = a / b
23 | @#
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cublas_api.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #def CUBLASWINAPI
 3 |   #def CUBLASAPI
 4 | 
 5 |   #mangle __half half
 6 |   #mangle __half2 half2
 7 | 
 8 |   #dynlib libName
 9 |   #private libName
10 |   #cdecl
11 |   #if defined(windows)
12 |   #  define libName "cublas.dll"
13 |   #elif defined(macosx)
14 |   #  define libName "libcublas.dylib"
15 |   #else
16 |   #  define libName "libcublas.so"
17 |   #endif
18 | 
19 |   #assumendef CUBLAS_API_H_
20 | 
21 |   #include "library_types.h"
22 | #endif
23 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cublas_v2.h:
--------------------------------------------------------------------------------
1 | #ifdef C2NIM
2 |   #assumendef CUBLAS_V2_H_
3 | 
4 |   #def CUBLASAPI
5 | #endif
6 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cuda.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifdef C2NIM
 3 |   #assumendef __cuda_cuda_h__
 4 | 
 5 |   #mangle cuuint32_t cint
 6 |   #mangle cuuclonglong culonglong
 7 |   #mangle default_ defaultUnderScore
 8 |   #mangle _oversize underScoreOversize
 9 |   #mangle _internal_padding underScoreInternal_padding
10 | 
11 |   #def __device_builtin__
12 | 
13 |   #def __CUDA_DEPRECATED
14 | 
15 |   #skipinclude
16 | 
17 |   #dynlib libName
18 |   #private libName
19 |   #cdecl
20 |   #if defined(windows)
21 |   #  stdcall
22 |   #  define libName "cuda.dll" // dont know that this is right
23 |   #elif defined(macosx)
24 |   #  define libName "libcuda.dylib"
25 |   #else
26 |   #  define libName "libcuda.so"
27 |   #endif
28 | 
29 | 
30 | 
31 | #endif
32 | 
33 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cuda_occupancy.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #mangle __cuda_occupancy_h__ cuda_occupancy_h
 3 |   #mangle __CUDA_OCC_MAJOR__ CUDA_OCC_MAJOR
 4 |   #mangle __CUDA_OCC_MINOR__ CUDA_OCC_MINOR
 5 |   #mangle __occMin occMin
 6 |   #mangle __occDivideRoundUp occDivideRoundUp
 7 |   #mangle __occRoundUp occRoundUp
 8 | 
 9 |   #assumendef __CUDACC__
10 |   #def __OCC_INLINE inline
11 | 
12 |   // typedef struct {} cudaOccResult;
13 |   // typedef struct {} cudaOccDeviceProp;
14 |   // typedef struct {} cudaOccFuncAttributes;
15 |   // typedef struct {} cudaOccDeviceState;
16 | #endif


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cuda_runtime_api.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef __CUDA_RUNTIME_API_H__
 3 |   #assumendef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
 4 | 
 5 |   #assumendef _WIN32
 6 |   #assumendef __CUDA_API_VERSION_INTERNAL
 7 |   #assumedef __CUDACC_INTEGRATED__
 8 |   #assumendef CUDA_API_PER_THREAD_DEFAULT_STREAM
 9 | 
10 |   #def CUDARTAPI
11 |   #def __host__
12 |   #def __cudart_builtin__
13 |   #def CUDART_CB
14 |   #def __dv(v)
15 | 
16 |   #mangle __CUDA_API_VER_MAJOR__ CUDA_API_VER_MAJOR
17 |   #mangle __CUDA_API_VER_MINOR__ CUDA_API_VER_MINOR
18 |   #mangle __CUDA_API_VER_MAJOR__ CUDA_API_VER_MAJOR
19 |   #mangle __CUDART_API_VERSION CUDART_API_VERSION
20 |   #mangle __DOXYGEN_ONLY__ DOXYGEN_ONLY
21 |   #mangle __CUDACC_RTC_MINIMAL__ CUDACC_RTC_MINIMAL
22 |   #mangle __CUDACC_RDC__ CUDACC_RDC
23 |   #mangle __CUDACC_EWP__ CUDACC_EWP
24 |   #mangle __CUDACC_RTC__ CUDACC_RTC
25 |   #mangle __CUDACC_RTC_MINIMAL__ CUDACC_RTC_MINIMAL
26 | 
27 |   #dynlib libName
28 |   #private libName
29 |   #cdecl
30 |   #if defined(windows)
31 |   #  stdcall
32 |   #  define libName "cudart.dll"
33 |   #elif defined(macosx)
34 |   #  define libName "libcudart.dylib"
35 |   #else
36 |   #  define libName "libcudart.so"
37 |   #endif
38 | 
39 |   #include "vector_types.h"
40 |   #include "driver_types.h"
41 |   #include "surface_types.h"
42 |   #include "texture_types.h"
43 |   #skipinclude
44 | #endif
45 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cudnn_v9.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #def CUDNNWINAPI
 3 | 
 4 |   #mangle CUDNN_H_ CUDNN_H
 5 | 
 6 |   #dynlib libName
 7 |   #private libName
 8 |   #cdecl
 9 |   #if defined(windows)
10 |   #  define libName "cudnn.dll"
11 |   #elif defined(macosx)
12 |   #  define libName "libcudnn.dylib"
13 |   #else
14 |   #  define libName "libcudnn.so"
15 |   #endif
16 | 
17 |   #private cudnnTensorStruct
18 |   #private cudnnConvolutionStruct
19 |   #private cudnnPoolingStruct
20 |   #private cudnnFilterStruct
21 |   #private cudnnLRNStruct
22 |   #private cudnnActivationStruct
23 |   #private cudnnSpatialTransformerStruct
24 |   #private cudnnOpTensorStruct
25 |   #private cudnnDropoutStruct
26 | 
27 |   struct cudnnTensorStruct {};
28 |   struct cudnnConvolutionStruct {};
29 |   struct cudnnPoolingStruct {};
30 |   struct cudnnFilterStruct {};
31 |   struct cudnnLRNStruct {};
32 |   struct cudnnActivationStruct {};
33 |   struct cudnnSpatialTransformerStruct {};
34 |   struct cudnnOpTensorStruct {};
35 |   struct cudnnDropoutStruct {};
36 | 
37 | #endif


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cufft.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #def CUFFTAPI
 3 | 
 4 |   #dynlib libName
 5 |   #private libName
 6 |   #cdecl
 7 |   #if defined(windows)
 8 |   #  stdcall
 9 |   #  define libName "cufft.dll"
10 |   #elif defined(macosx)
11 |   #  define libName "libcufft.dylib"
12 |   #else
13 |   #  define libName "libcufft.so"
14 |   #endif
15 | 
16 |   #include "cuComplex.h"
17 |   #include "library_types.h"
18 |   #include "driver_types.h"
19 |   #skipinclude
20 | #endif
21 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cufftw.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef _CUFFTW_H_
 3 | 
 4 |   #def CUFFTAPI
 5 | 
 6 |   #dynlib libName
 7 |   #private libName
 8 |   #cdecl
 9 |   #if defined(windows)
10 |   #  stdcall
11 |   #  define libName "cufftw.dll"
12 |   #elif defined(macosx)
13 |   #  define libName "libcufftw.dylib"
14 |   #else
15 |   #  define libName "libcufftw.so"
16 |   #endif
17 | 
18 |   #skipinclude
19 | #endif
20 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/curand.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #def CURANDAPI
 3 | 
 4 |   #assumendef CURAND_H_
 5 | 
 6 |   #dynlib libName
 7 |   #private libName
 8 |   #cdecl
 9 |   #if defined(windows)
10 |   #  define libName "curand.dll"
11 |   #elif defined(macosx)
12 |   #  define libName "libcurand.dylib"
13 |   #else
14 |   #  define libName "libcurand.so"
15 |   #endif
16 | 
17 |   #private curandDistributionShift_st
18 |   #private curandDistributionM2Shift_st
19 |   #private curandHistogramM2_st
20 |   #private curandDiscreteDistribution_st
21 | 
22 |   struct curandDistributionShift_st {};
23 |   struct curandDistributionM2Shift_st {};
24 |   struct curandHistogramM2_st {};
25 |   struct curandDiscreteDistribution_st {};
26 | 
27 |   #include "library_types.h"
28 |   #include "driver_types.h"
29 |   #skipinclude
30 | #endif
31 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/curand_discrete.h:
--------------------------------------------------------------------------------
1 | #ifdef C2NIM
2 |   #assumendef CURANDDISCRETE_H_
3 | #endif
4 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cusolverDn.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #asssumendef CUSOLVERDN_H_
 3 | 
 4 |   #def CUSOLVERAPI
 5 | 
 6 |   #define CUSOLVER_DEPRECATED(new_func)
 7 | 
 8 |   #dynlib libName
 9 |   #private libName
10 |   #cdecl
11 |   #if defined(windows)
12 |   #  define libName "cusolver.dll"
13 |   #elif defined(macosx)
14 |   #  define libName "libcusolver.dylib"
15 |   #else
16 |   #  define libName "libcusolver.so"
17 |   #endif
18 | 
19 |   #include "cuComplex.h"
20 |   #include "cublas_api.h"
21 |   #include "cusolver_common.h"
22 |   #include "library_types.h"
23 |   #include "driver_types.h"
24 |   #skipinclude
25 | #endif
26 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cusolverRf.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef CUSOLVERRF_H_
 3 | 
 4 |   #def CUSOLVERAPI
 5 | 
 6 |   #dynlib libName
 7 |   #private libName
 8 |   #cdecl
 9 |   #if defined(windows)
10 |   #  define libName "cusolver.dll"
11 |   #elif defined(macosx)
12 |   #  define libName "libcusolver.dylib"
13 |   #else
14 |   #  define libName "libcusolver.so"
15 |   #endif
16 | 
17 |   #include "cusolver_common.h"
18 |   #skipinclude
19 | #endif
20 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cusolverSp.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef CUSOLVERSP_H_
 3 | 
 4 |   #def CUSOLVERAPI
 5 | 
 6 |   #dynlib libName
 7 |   #private libName
 8 |   #cdecl
 9 |   #if defined(windows)
10 |   #  define libName "cusolver.dll"
11 |   #elif defined(macosx)
12 |   #  define libName "libcusolver.dylib"
13 |   #else
14 |   #  define libName "libcusolver.so"
15 |   #endif
16 | 
17 |   #include "cuComplex.h"
18 |   #include "driver_types.h"
19 |   #include "cusolver_common.h"
20 |   #include "cusparse.h"
21 |   #skipinclude
22 | #endif
23 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cusolver_common.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef CUSOLVER_COMMON_H_
 3 |   #mangle __int64 int64
 4 | 
 5 |   #def CUSOLVERAPI
 6 | 
 7 |   // #assumendef _MSC_VER
 8 | 
 9 |   #dynlib libName
10 |   #private libName
11 |   #cdecl
12 |   #if defined(windows)
13 |   #  define libName "cusolver.dll"
14 |   #elif defined(macosx)
15 |   #  define libName "libcusolver.dylib"
16 |   #else
17 |   #  define libName "libcusolver.so"
18 |   #endif
19 | 
20 |   #include "library_types.h"
21 |   #skipinclude
22 | #endif
23 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/cusparse.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #def CUSPARSEAPI
 3 | 
 4 |   #assumendef CUSPARSE_H_
 5 |   #assumendef _MSC_VER
 6 | 
 7 |   // #prefix cusparse
 8 |   // #prefix cusparse_
 9 | 
10 |   #dynlib libName
11 |   #private libName
12 |   #cdecl
13 |   #if defined(windows)
14 |   #  define libName "cusparse.dll"
15 |   #elif defined(macosx)
16 |   #  define libName "libcusparse.dylib"
17 |   #else
18 |   #  define libName "libcusparse.so"
19 |   #endif
20 | 
21 |   #define DISABLE_CUSPARSE_DEPRECATED
22 | 
23 |   #def CUSPARSE_DEPRECATED_REPLACE_WITH(new_func)
24 |   #def CUSPARSE_DEPRECATED
25 |   #def CUSPARSE_DEPRECATED_TYPE
26 |   #def CUSPARSE_DEPRECATED_TYPE_MSVC
27 |   #def CUSPARSE_DEPRECATED_ENUM_REPLACE_WITH(new_enum)
28 |   #def CUSPARSE_DEPRECATED_ENUM
29 | 
30 |   #include "library_types.h"
31 |   #include "driver_types.h"
32 |   #include "cuComplex.h"
33 |   #skipinclude
34 | #endif
35 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/driver_types.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef __DRIVER_TYPES_H__
 3 |   #assumedef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
 4 |   #assumendef __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DRIVER_TYPES_H__
 5 | 
 6 | 
 7 |   #def __device_builtin__
 8 | 
 9 |   // #pp cudaDevicePropDontCare
10 | 
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/library_types.h:
--------------------------------------------------------------------------------
1 | #ifdef C2NIM
2 |   #assumendef __LIBRARY_TYPES_H__
3 | 
4 | #endif
5 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/nvblas.h:
--------------------------------------------------------------------------------
 1 | #ifdef C2NIM
 2 |   #assumendef NVBLAS_H_
 3 | 
 4 |   #dynlib libName
 5 |   #private libName
 6 |   #cdecl
 7 |   #if defined(windows)
 8 |   #  define libName "nvblas.dll"
 9 |   #elif defined(macosx)
10 |   #  define libName "libnvblas.dylib"
11 |   #else
12 |   #  define libName "libnvblas.so"
13 |   #endif
14 | 
15 |   #include "cuComplex.h"
16 |   #skipinclude
17 | #endif
18 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/nvrtc.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifdef C2NIM
 3 |   #assumendef __NVRTC_H__
 4 | 
 5 |   #def __device_builtin__
 6 | 
 7 |   #mangle _nvrtcProgram nvrtcProgramObj
 8 | 
 9 |   #skipinclude
10 | 
11 |   #dynlib libName
12 |   #private libName
13 |   #cdecl
14 |   #if defined(windows)
15 |   #  stdcall
16 |   #  define libName "nvrtc64.dll" // dont know that this is right
17 |   #elif defined(macosx)
18 |   #  define libName "libnvrtc.dylib"
19 |   #else
20 |   #  define libName "libnvrtc.so"
21 |   #endif
22 | 
23 | #@
24 | type nvrtcProgramObj {.noDecl, incompleteStruct.} = object
25 | @#
26 | 
27 | #endif
28 | 
29 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/surface_types.h:
--------------------------------------------------------------------------------
1 | #ifdef C2NIM
2 |   #assumendef __SURFACE_TYPES_H__
3 | 
4 |   #def __device_builtin__
5 | 
6 |   #skipinclude
7 | #endif
8 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/texture_types.h:
--------------------------------------------------------------------------------
1 | #ifdef C2NIM
2 |   #assumendef __TEXTURE_TYPES_H__
3 | 
4 |   #def __device_builtin__
5 | 
6 |   #skipinclude
7 | #endif
8 | 


--------------------------------------------------------------------------------
/c2nim/cuda12_5/vector_types.h:
--------------------------------------------------------------------------------
1 | #ifdef C2NIM
2 |   #assumendef __VECTOR_TYPES_H__
3 | 
4 |   #def __device_builtin__
5 | 
6 |   #header "vector_types.h"
7 | #endif
8 | 


--------------------------------------------------------------------------------
/changelog.org:
--------------------------------------------------------------------------------
 1 | * v0.2.2
 2 | - add basic CUDA installation detection (PR #28)
 3 |   - can be overwritten defined using ~CudaPath~ and ~CudaIncludes~
 4 | - fix issue #27 by using ~header~ pragma for ~cuda.h~ wrapper (PR #28)
 5 | * v0.2.1
 6 | - fix nimble file for Windows, Nim <= 1.6 (up to 1.4 supported), PR #22
 7 | * v0.2.0
 8 | - add support for CUDA version 12.5. Includes a massive revamp of how
 9 |   the bindings are generated, by @lilkeet (PR #17)
10 | * v0.1.9
11 | - fix ~fileExists~ ambiguous identifier for older Nim versions
12 | - fix placement of ~{.union.}~ pragma  
13 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/blas.nim:
--------------------------------------------------------------------------------
 1 | # Link against the cuBLAS and CUDA runtime libraries
 2 | 
 3 | import
 4 |   std / [strformat],
 5 |   ../../src/nimcuda/cuda12_5/[cuda_runtime_api, cublas_api,
 6 |                                driver_types, check]
 7 | 
 8 | 
 9 | 
10 | proc main() =
11 |   var handle: cublasHandle_t
12 | 
13 |   # Initialize cuBLAS library
14 |   check cublasCreate_v2(addr handle)
15 | 
16 |   # Matrix dimensions
17 |   const m = 2  # Rows of A and C
18 |   const n = 2  # Columns of B and C
19 |   const k = 2  # Columns of A and rows of B
20 | 
21 |   # Host matrices (column-major order)
22 |   var h_A: array[0..(m*k)-1, cfloat] = [cfloat 1.0, 2.0,
23 |                                                3.0, 4.0]
24 | 
25 |   var h_B: array[0..(k*n)-1, cfloat] = [cfloat 5.0, 6.0,
26 |                                                7.0, 8.0]
27 | 
28 |   var h_C: array[0..(m*n)-1, cfloat] = [cfloat 0.0, 0.0,
29 |                                                0.0, 0.0]
30 | 
31 |   # Device pointers
32 |   var d_A, d_B, d_C: pointer
33 | 
34 |   # Allocate device memory
35 |   check cudaMalloc(addr d_A, culong(m*k*sizeof(cfloat)))
36 |   check cudaMalloc(addr d_B, culong(k*n*sizeof(cfloat)))
37 |   check cudaMalloc(addr d_C, culong(m*n*sizeof(cfloat)))
38 | 
39 |   # Copy host data to device
40 |   check cudaMemcpy(d_A, addr h_A[0], culong(m*k*sizeof(cfloat)),
41 |     cudaMemcpyHostToDevice)
42 |   check cudaMemcpy(d_B, addr h_B[0], culong(k*n*sizeof(cfloat)),
43 |     cudaMemcpyHostToDevice)
44 | 
45 |   # Scalars for the operation
46 |   var alpha: cfloat = 1.0
47 |   var beta: cfloat = 0.0
48 | 
49 |   # Perform matrix multiplication: C = alpha * A * B + beta * C
50 |   check cublasSgemm_v2(handle, CUBLAS_OP_N #[No transpose for A]#,
51 |     CUBLAS_OP_N #[No transpose for B]#, m, n, k, addr alpha,
52 |     cast[ptr cfloat](d_A), m, cast[ptr cfloat](d_B), k, addr beta,
53 |     cast[ptr cfloat](d_C), m)
54 | 
55 |   # Copy result back to host
56 |   check cudaMemcpy(addr h_C[0], d_C, culong(m*n*sizeof(cfloat)),
57 |     cudaMemcpyDeviceToHost)
58 | 
59 |   # Display the result
60 |   echo "Result matrix C:"
61 |   for i in 0..<m:
62 |     var rowStr = ""
63 |     for j in 0..<n:
64 |       rowStr.add fmt"{h_C[i + j*m]:^6.1f} "
65 |     echo rowStr
66 | 
67 |   # Clean up resources
68 |   check cudaFree(d_A)
69 |   check cudaFree(d_B)
70 |   check cudaFree(d_C)
71 |   check cublasDestroy_v2(handle)
72 | 
73 | main()
74 | 
75 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/denseLinearSystem.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | import
 3 |   std / [strformat],
 4 |   ../../src/nimcuda/cuda12_5/[driver_types, cusolver_common,
 5 |     cusolverDn, cuda_runtime_api, check, cublas_api]
 6 | 
 7 | proc main() =
 8 |   var status: cusolverStatus_t
 9 |   var handle: cusolverDnHandle_t
10 | 
11 |   # Initialize cuSOLVER library
12 |   status = cusolverDnCreate(addr handle)
13 |   if status != CUSOLVER_STATUS_SUCCESS:
14 |     echo "CUSOLVER initialization failed"
15 |     return
16 | 
17 |   # Matrix dimensions
18 |   const n = 3  # Number of equations and variables
19 | 
20 |   # Host matrix A and right-hand side vector b (column-major order)
21 |   var h_A: array[0..(n*n)-1, cfloat] = [
22 |     cfloat 3.0, 1.0, 1.0,  # First column
23 |            2.0, 2.0, 1.0,  # Second column
24 |            1.0, 1.0, 1.0  # Third column
25 |     ]
26 | 
27 |   var h_b: array[0..n-1, cfloat] = [cfloat 10, 8, 6]  # Right-hand side vector
28 | 
29 |   # Device pointers
30 |   var d_A, d_b: pointer
31 |   var devIpiv: pointer  # Pivot array
32 |   var devInfo: pointer  # Info output
33 | 
34 |   # Allocate device memory
35 |   check cudaMalloc(addr d_A, culong(n*n*sizeof(cfloat)))
36 |   check cudaMalloc(addr d_b, culong(n*sizeof(cfloat)))
37 |   check cudaMalloc(addr devIpiv, culong(n*sizeof(cint)))
38 |   check cudaMalloc(addr devInfo, culong(sizeof(cint)))
39 | 
40 |   # Copy host data to device
41 |   check cudaMemcpy(d_A, addr h_A[0], culong(n*n*sizeof(cfloat)),
42 |                    cudaMemcpyHostToDevice)
43 |   check cudaMemcpy(d_b, addr h_b[0], culong(n*sizeof(cfloat)),
44 |                    cudaMemcpyHostToDevice)
45 | 
46 |   # Get the buffer size for LU decomposition
47 |   var lwork: cint
48 |   check cusolverDnSgetrf_bufferSize(handle, n, n, cast[ptr cfloat](d_A), n,
49 |                                     addr lwork)
50 | 
51 |   # Allocate workspace
52 |   var d_Workspace: pointer
53 |   check cudaMalloc(addr d_Workspace, culong(lwork*sizeof(cfloat)))
54 | 
55 |   # Perform LU decomposition
56 |   check cusolverDnSgetrf(handle, n, n, cast[ptr cfloat](d_A), n,
57 |     cast[ptr cfloat](d_Workspace), cast[ptr cint](devIpiv),
58 |     cast[ptr cint](devInfo))
59 | 
60 |   # Check devInfo after getrf
61 |   var h_info: cint
62 |   check cudaMemcpy(addr h_info, devInfo, culong(sizeof(cint)),
63 |                    cudaMemcpyDeviceToHost)
64 |   if h_info != 0:
65 |     echo "LU decomposition failed, info = ", h_info
66 |     return
67 | 
68 |   # Solve the system A*x = b
69 |   check cusolverDnSgetrs(handle, CUBLAS_OP_N, n, 1, cast[ptr cfloat](d_A), n,
70 |     cast[ptr cint](devIpiv), cast[ptr cfloat](d_b), n, cast[ptr cint](devInfo))
71 | 
72 |   # Check devInfo after getrs
73 |   check cudaMemcpy(addr h_info, devInfo, culong(sizeof(cint)),
74 |                    cudaMemcpyDeviceToHost)
75 |   if h_info != 0:
76 |     echo "Solving the linear system failed, info = ", h_info
77 |     return
78 | 
79 |   # Copy result back to host
80 |   check cudaMemcpy(addr h_b[0], d_b, culong(n*sizeof(cfloat)),
81 |                    cudaMemcpyDeviceToHost)
82 | 
83 |   # Display the result
84 |   echo "Solution vector x:"
85 |   for i in 0..<n:
86 |     echo fmt" x[{i}] = {h_b[i]:^6.4f}"
87 | 
88 |   # Clean up resources
89 |   check cudaFree(d_A)
90 |   check cudaFree(d_b)
91 |   check cudaFree(d_Workspace)
92 |   check cudaFree(devIpiv)
93 |   check cudaFree(devInfo)
94 |   check cusolverDnDestroy(handle)
95 | 
96 | main()
97 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/fft.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import
15 |   cufft, cuda_runtime_api, driver_types, vector_types, check
16 | 
17 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
18 | 
19 | proc main() =
20 |   const
21 |     NX = 256
22 |     NY = 128
23 |     N = NX * NY
24 |   let size = sizeof(cufftComplex).csize_t * N.csize_t
25 |   var
26 |     plan: cufftHandle
27 |     idata: ptr cufftComplex
28 |     odata: ptr cufftComplex
29 | 
30 |   check cudaMalloc(cast[ptr pointer](addr idata), size)
31 |   check cudaMalloc(cast[ptr pointer](addr odata), size)
32 | 
33 |   var
34 |     input = newSeq[cufftComplex](N)
35 |     output = newSeq[cufftComplex](N)
36 | 
37 |   for i in 0 ..< input.len:
38 |     input[i].x = cfloat(i) / cfloat(N)
39 |     input[i].y = cfloat(N - i) / cfloat(N)
40 | 
41 |   check cudaMemcpy(idata, input.first, size, cudaMemcpyHostToDevice)
42 | 
43 |   check cufftPlan2d(addr plan, NX, NY, CUFFT_C2C)
44 |   check cufftExecC2C(plan, idata, odata, CUFFT_FORWARD)
45 |   check cufftExecC2C(plan, odata, odata, CUFFT_INVERSE)
46 | 
47 |   check cudaMemcpy(output.first, odata, size, cudaMemcpyDeviceToHost)
48 | 
49 |   check cufftDestroy(plan)
50 |   check cudaFree(idata)
51 |   check cudaFree(odata)
52 | 
53 |   echo "original : ", input[0..10]
54 |   echo "transform: ", output[0..10]
55 | 
56 | when isMainModule:
57 |   main()
58 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/random.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import cuda_runtime_api, curand, check, driver_types
15 | 
16 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
17 | 
18 | proc main() =
19 |   const n = 30
20 |   var
21 |     host: array[n, cfloat]
22 |     device: ptr cfloat
23 |     gen: curandGenerator_t
24 | 
25 |   check cudaMalloc(cast[ptr pointer](addr device), n * sizeof(cfloat))
26 |   check curandCreateGenerator(addr gen, CURAND_RNG_PSEUDO_DEFAULT)
27 |   check curandSetPseudoRandomGeneratorSeed(gen, 1234.culonglong)
28 |   check curandGenerateUniform(gen, device, n)
29 |   check cudaMemcpy(host.first, device, n * sizeof(cfloat), cudaMemcpyDeviceToHost)
30 |   check curandDestroyGenerator(gen)
31 |   check cudaFree(device)
32 | 
33 |   echo "Random numbers genedated on device: ", @host
34 | 
35 |   # curand can be used to generate random numbers directly on host
36 |   var
37 |     hostGen: curandGenerator_t
38 | 
39 |   check curandCreateGeneratorHost(addr hostGen, CURAND_RNG_PSEUDO_DEFAULT)
40 |   check curandGenerateNormal(hostGen, host.first, n, mean = 0, stddev = 1)
41 |   check curandDestroyGenerator(hostGen)
42 | 
43 |   echo "Random numbers genedated on host: ", @host
44 | 
45 | when isMainModule:
46 |   main()
47 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/runtimeCompilation.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | # this example is from https://docs.nvidia.com/cuda/nvrtc/index.html#basic-usage
  3 | 
  4 | import
  5 |   std / [strformat],
  6 |   ../../src/nimcuda/cuda12_5/[nvrtc, check, cuda]
  7 | 
  8 | const
  9 |   NumThreads = 128
 10 |   NumBlocks = 32
 11 | 
 12 | const Saxpy = cstring"""
 13 | extern "C" __global__
 14 | void saxpy(float a, float *x, float *y, float *out, size_t n)
 15 | {
 16 |    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
 17 |    if (tid < n) {
 18 |       out[tid] = a * x[tid] + y[tid];
 19 |    }
 20 | }
 21 | """
 22 | 
 23 | proc main =
 24 |   # Create an instance of nvrtcProgram with the SAXPY code string.
 25 |   var prog: nvrtcProgram
 26 |   check nvrtcCreateProgram(addr(prog), Saxpy, "saxpy.cu", 0, nil, nil)
 27 | 
 28 |   # Compile the program with fmad disabled.
 29 |   # Note: Can specify GPU target architecture explicitly with '-arch' flag.
 30 |   const
 31 |     Options = [cstring "--fmad=false"]
 32 |     NumberOfOptions = cint Options.len
 33 |   let compileResult =  nvrtcCompileProgram(prog, NumberOfOptions,
 34 |                                            cast[cstringArray](addr Options[0]))
 35 | 
 36 |   block obtainLog: # Obtain compilation log from the program.
 37 |     var logSize: csize_t
 38 |     check nvrtcGetProgramLogSize(prog, addr logSize)
 39 | 
 40 |     var log = cstring newString(Natural logSize)
 41 | 
 42 |     check nvrtcGetProgramLog(prog, log)
 43 | 
 44 |     echo fmt"log = '{log}'" # usually empty if no issues found by the compiler
 45 | 
 46 |   check compileResult
 47 | 
 48 | 
 49 |   var ptx: cstring
 50 |   block obtainPtx: # Obtain PTX from the program.
 51 |     var ptxSize: csize_t
 52 |     check nvrtcGetPTXSize(prog, addr ptxSize)
 53 | 
 54 |     ptx = cstring newString(Natural ptxSize)
 55 |     check nvrtcGetPTX(prog, ptx)
 56 | 
 57 |     check nvrtcDestroyProgram(addr prog) # Destroy the program.
 58 | 
 59 |   block execution:
 60 |     # Load the generated PTX and get a handle to the SAXPY kernel.
 61 |     var
 62 |       cuDevice: CUdevice
 63 |       context: CUcontext
 64 |       module: CUmodule
 65 |       kernel: CUfunction
 66 | 
 67 |     check cuInit(0)
 68 |     check cuDeviceGet(addr cuDevice, 0)
 69 |     check cuCtxCreate(addr context, 0, cuDevice)
 70 |     check cuModuleLoadDataEx(addr module, ptx, 0, nil, nil)
 71 |     check cuModuleGetFunction(addr kernel, module, "saxpy")
 72 | 
 73 |     let
 74 |       n = csize_t(NumThreads * NumBlocks)
 75 |       bufferSize = n * csize_t(sizeOf cfloat)
 76 |     let a = cfloat 5.1
 77 |     var
 78 |       hX = newSeqUninit[cfloat](n)
 79 |       hY = newSeqUninit[cfloat](n)
 80 |       hOut = newSeqUninit[cfloat](n)
 81 |     for i in 0 ..< n: # Initialize host data (fill hX and hY with ur data)
 82 |       hX[i] = cfloat(i)
 83 |       hY[i] = cfloat(i * 2)
 84 | 
 85 |     var
 86 |       dX: CUdeviceptr
 87 |       dY: CUdeviceptr
 88 |       dOut: CUdeviceptr
 89 |     check cuMemAlloc(addr dX, bufferSize)
 90 |     check cuMemAlloc(addr dY, bufferSize)
 91 |     check cuMemAlloc(addr dOut, bufferSize)
 92 | 
 93 |     check cuMemcpyHtoD(dX, addr hX[0], bufferSize)
 94 |     check cuMemcpyHtoD(dY, addr hY[0], bufferSize)
 95 | 
 96 |     # Execute SAXPY.
 97 |     let args: array[5, pointer] = [pointer(addr a), addr dX, addr dY,
 98 |       addr dOut, addr n]
 99 |     check cuLaunchKernel(kernel,
100 |                          NumBlocks, 1, 1,  # grid dim
101 |                          NumThreads, 1, 1, # block dim
102 |                          0, nil,           # shared mem and stream
103 |                          addr args[0],      # arguments,
104 |                          nil)
105 | 
106 |     check cuCtxSynchronize()
107 |     check cuMemcpyDtoH(addr hOut[0], dOut, bufferSize)
108 | 
109 |     for i in 0 ..< n:
110 |       echo fmt"{a} * {hX[i]} + {hY[i]} = {hOut[i]}"
111 | 
112 |     check cuMemFree(dX)
113 |     check cuMemFree(dY)
114 |     check cuMemFree(dOut)
115 | 
116 |     check cuModuleUnload module
117 |     check cuCtxDestroy context
118 | 
119 | 
120 | main()
121 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/sparse.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import cuda_runtime_api, cusparse, driver_types, check
15 | 
16 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
17 | 
18 | proc main() =
19 |   let
20 |     n = 4.cint
21 |     nnz = 9.cint
22 |   var
23 |     rows = [0.cint, 0, 0, 1, 2, 2, 2, 3, 3]
24 |     cols = [0.cint, 2, 3, 1, 0, 2, 3, 1, 3]
25 |     vals = [1'f32, 2, 3, 4, 5, 6, 7, 8, 9]
26 |     csrRows = [0.cint, 0, 0, 0]
27 | 
28 |     handle: cusparseHandle_t
29 |     gpuRows: ptr cint
30 |     gpuCols: ptr cint
31 |     gpuVals: ptr cfloat
32 |     gpuCsrRows: ptr cint
33 | 
34 |   check cudaMalloc(cast[ptr pointer](addr gpuRows), sizeof(rows).csize_t)
35 |   check cudaMalloc(cast[ptr pointer](addr gpuCols), sizeof(cols).csize_t)
36 |   check cudaMalloc(cast[ptr pointer](addr gpuVals), sizeof(vals).csize_t)
37 |   check cudaMalloc(cast[ptr pointer](addr gpuCsrRows), sizeof(csrRows).csize_t)
38 | 
39 |   check cudaMemcpy(gpuRows, rows.first, sizeof(rows).csize_t, cudaMemcpyHostToDevice)
40 |   check cudaMemcpy(gpuCols, cols.first, sizeof(cols).csize_t, cudaMemcpyHostToDevice)
41 |   check cudaMemcpy(gpuVals, vals.first, sizeof(vals).csize_t, cudaMemcpyHostToDevice)
42 | 
43 |   check cusparseCreate(addr handle)
44 |   check cusparseXcoo2csr(handle, gpuRows, nnz, n, gpuCsrRows,CUSPARSE_INDEX_BASE_ZERO)
45 | 
46 |   check cudaMemcpy(csrRows.first, gpuCsrRows, sizeof(csrRows).csize_t, cudaMemcpyDeviceToHost)
47 | 
48 |   check cusparseDestroy(handle)
49 |   check cudaFree(gpuRows)
50 |   check cudaFree(gpuCols)
51 |   check cudaFree(gpuVals)
52 |   check cudaFree(gpuCsrRows)
53 | 
54 |   echo @csrRows
55 | 
56 | when isMainModule:
57 |   main()
58 | 


--------------------------------------------------------------------------------
/examples/cuda12_5/sparseLinearSystem.nim:
--------------------------------------------------------------------------------
  1 |  
  2 | # Link against the cuSOLVER, cuSPARSE, and CUDA runtime libraries
  3 | {.passL: "-lcusolver -lcusparse -lcudart".}
  4 | 
  5 | import
  6 |   std / [strformat],
  7 |   ../../src/nimcuda/cuda12_5/[cuda_runtime_api, driver_types, cusolverSp,
  8 |     check, cusparse]
  9 | 
 10 | 
 11 | proc main() =
 12 |   var handle: cusolverSpHandle_t
 13 |   var descrA: cusparseMatDescr_t
 14 | 
 15 |   # Initialize cuSOLVER Sparse library
 16 |   check cusolverSpCreate(addr handle)
 17 | 
 18 |   # Create matrix descriptor
 19 |   check cusparseCreateMatDescr(addr descrA)
 20 | 
 21 |   # Matrix dimensions and number of non-zero elements
 22 |   const m = 3    # Number of rows
 23 |   const n = 3    # Number of columns
 24 |   const nnz = 7  # Number of non-zero elements
 25 | 
 26 |   # Host representation of the sparse matrix A in CSR format
 27 |   # A = [ 10  0   0
 28 |   #        3  9   0
 29 |   #        0  7   8 ]
 30 | 
 31 |   # Row pointers
 32 |   var h_csrRowPtrA: array[0..m, cint] = [cint 0, 1, 3, 7]
 33 | 
 34 |   # Column indices
 35 |   var h_csrColIndA: array[0..nnz-1, cint] = [cint 0, 0, 1, 1, 2, 1, 2]
 36 | 
 37 |   # Non-zero values
 38 |   var h_csrValA: array[0..nnz-1, cfloat] = [cfloat 10.0, 3.0, 9.0, 7.0, 8.0, 7.0, 8.0]
 39 | 
 40 |   # Right-hand side vector b
 41 |   var h_b: array[0..m-1, cfloat] = [cfloat 10.0, 21.0, 38.0]
 42 | 
 43 |   # Solution vector x
 44 |   var h_x: array[0..m-1, cfloat] = [cfloat 0.0, 0.0, 0.0]
 45 | 
 46 |   # Device pointers
 47 |   var d_csrRowPtrA, d_csrColIndA, d_csrValA, d_b, d_x: pointer
 48 | 
 49 |   # Allocate device memory
 50 |   check cudaMalloc(addr d_csrRowPtrA, culong((m+1)*sizeof(cint)))
 51 |   check cudaMalloc(addr d_csrColIndA, culong(nnz*sizeof(cint)))
 52 |   check cudaMalloc(addr d_csrValA, culong(nnz*sizeof(cfloat)))
 53 |   check cudaMalloc(addr d_b, culong(m*sizeof(cfloat)))
 54 |   check cudaMalloc(addr d_x, culong(n*sizeof(cfloat)))
 55 | 
 56 |   # Copy host data to device
 57 |   check cudaMemcpy(d_csrRowPtrA, addr h_csrRowPtrA[0],
 58 |                    culong((m+1)*sizeof(cint)), cudaMemcpyHostToDevice)
 59 |   check cudaMemcpy(d_csrColIndA, addr h_csrColIndA[0], culong(nnz*sizeof(cint)),
 60 |                    cudaMemcpyHostToDevice)
 61 |   check cudaMemcpy(d_csrValA, addr h_csrValA[0], culong(nnz*sizeof(cfloat)),
 62 |                    cudaMemcpyHostToDevice)
 63 |   check cudaMemcpy(d_b, addr h_b[0], culong(m*sizeof(cfloat)),
 64 |                    cudaMemcpyHostToDevice)
 65 | 
 66 |   # Tolerance for the solver and reorder parameter
 67 |   const tol: cfloat = 1e-6
 68 |   const reorder: cint = 0  # No reordering
 69 | 
 70 |   # Variable to hold the position of zero pivot (if any)
 71 |   var singularity: cint
 72 | 
 73 |   # Solve the sparse linear system A*x = b
 74 |   check cusolverSpScsrlsvQr(handle, m, nnz, descrA, cast[ptr cfloat](d_csrValA),
 75 |     cast[ptr cint](d_csrRowPtrA), cast[ptr cint](d_csrColIndA),
 76 |     cast[ptr cfloat](d_b), tol, reorder, cast[ptr cfloat](d_x),
 77 |     addr singularity)
 78 | 
 79 |   if singularity >= 0:
 80 |     echo "A is singular at row ", singularity
 81 |     return
 82 | 
 83 |   # Copy result back to host
 84 |   check cudaMemcpy(addr h_x[0], d_x, culong(n*sizeof(cfloat)),
 85 |                    cudaMemcpyDeviceToHost)
 86 | 
 87 |   # Display the result
 88 |   echo "Solution vector x:"
 89 |   for i in 0..<n:
 90 |     echo fmt" x[{i}] = {h_x[i]:^6.4f}"
 91 | 
 92 |   # Clean up resources
 93 |   check cudaFree(d_csrRowPtrA)
 94 |   check cudaFree(d_csrColIndA)
 95 |   check cudaFree(d_csrValA)
 96 |   check cudaFree(d_b)
 97 |   check cudaFree(d_x)
 98 |   check cusparseDestroyMatDescr(descrA)
 99 |   check cusolverSpDestroy(handle)
100 | 
101 | main()
102 | 


--------------------------------------------------------------------------------
/examples/cuda8_0/fft.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import nimcuda/[cufft, cuda_runtime_api, driver_types, vector_types, nimcuda]
15 | 
16 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
17 | 
18 | proc main() =
19 |   const
20 |     NX = 256
21 |     NY = 128
22 |     N = NX * NY
23 |   let size = sizeof(cufftComplex) * N
24 |   var
25 |     plan: cufftHandle
26 |     idata: ptr cufftComplex
27 |     odata: ptr cufftComplex
28 | 
29 |   check cudaMalloc(cast[ptr pointer](addr idata), size)
30 |   check cudaMalloc(cast[ptr pointer](addr odata), size)
31 | 
32 |   var
33 |     input = newSeq[cufftComplex](N)
34 |     output = newSeq[cufftComplex](N)
35 | 
36 |   for i in 0 ..< input.len:
37 |     input[i].x = cfloat(i) / cfloat(N)
38 |     input[i].y = cfloat(N - i) / cfloat(N)
39 | 
40 |   check cudaMemcpy(idata, input.first, size, cudaMemcpyHostToDevice)
41 | 
42 |   check cufftPlan2d(addr plan, NX, NY, CUFFT_C2C)
43 |   check cufftExecC2C(plan, idata, odata, CUFFT_FORWARD)
44 |   check cufftExecC2C(plan, odata, odata, CUFFT_INVERSE)
45 | 
46 |   check cudaMemcpy(output.first, odata, size, cudaMemcpyDeviceToHost)
47 | 
48 |   check cufftDestroy(plan)
49 |   check cudaFree(idata)
50 |   check cudaFree(odata)
51 | 
52 |   echo "original : ", input[0..10]
53 |   echo "transform: ", output[0..10]
54 | 
55 | when isMainModule:
56 |   main()


--------------------------------------------------------------------------------
/examples/cuda8_0/nim.cfg:
--------------------------------------------------------------------------------
1 | path = ".."


--------------------------------------------------------------------------------
/examples/cuda8_0/pagerank.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import sequtils
15 | import nimcuda/[nvgraph, library_types, nimcuda]
16 | 
17 | type
18 |   CArray{.unchecked.}[T] = array[1, T]
19 |   CPointer[T] = ptr CArray[T]
20 | 
21 | proc allocCPointer[T](n: Natural): CPointer[T] {.inline.} =
22 |   cast[CPointer[T]](alloc(n * sizeof(T)))
23 | 
24 | proc first[T](p: CPointer[T]): ptr T {.inline.} = addr(p[0])
25 | 
26 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
27 | 
28 | proc main() =
29 |   var
30 |     srcIndices = [2.cint, 0, 2, 0, 4, 5, 2, 3, 3, 4]
31 |     destOffsets = [0.cint, 1, 3, 4, 6, 8, 10]
32 |     vertexInitial = [0.cfloat, 1, 0, 0, 0, 0]
33 |     vertexFinal: array[6, cfloat]
34 |     weights = [0.333333.cfloat, 0.5, 0.333333, 0.5, 0.5, 1.0, 0.333333, 0.5, 0.5, 0.5]
35 | 
36 |   let
37 |     n = 6
38 |     nnz = 10
39 |     vert_sets = 2
40 |     edge_sets = 1
41 |   var
42 |     alpha = 0.9.cfloat
43 |     handle: nvgraphHandle_t
44 |     graph: nvgraphGraphDescr_t
45 |     edge_dimT = repeat(CUDA_R_32F, edge_sets)
46 |     vertex_dimT = repeat(CUDA_R_32F, vert_sets)
47 |     CSC_input = allocCPointer[nvgraphCSCTopology32I_st](1).first
48 | 
49 |   check(nvgraphCreate(addr handle))
50 |   check(nvgraphCreateGraphDescr(handle, addr graph))
51 | 
52 |   CSC_input.nvertices = n.cint
53 |   CSC_input.nedges = nnz.cint
54 |   CSC_input.destination_offsets = destOffsets.first
55 |   CSC_input.source_indices = srcIndices.first
56 | 
57 |   check(nvgraphSetGraphStructure(handle, graph, cast[pointer](CSC_input), NVGRAPH_CSC_32))
58 |   check(nvgraphAllocateVertexData(handle, graph, vert_sets, vertex_dimT.first))
59 |   check(nvgraphAllocateEdgeData(handle, graph, edge_sets, edge_dimT.first))
60 |   check(nvgraphSetVertexData(handle, graph, vertexInitial.first, 0))
61 |   check(nvgraphSetEdgeData(handle, graph, weights.first, 0))
62 | 
63 |   check(nvgraphPagerank(handle, graph, 0, addr alpha, 0, 0, 1, 0, 0))
64 | 
65 |   check(nvgraphGetVertexData(handle, graph, vertexFinal.first, 1))
66 | 
67 |   check(nvgraphDestroyGraphDescr(handle, graph))
68 |   check(nvgraphDestroy(handle))
69 | 
70 |   dealloc(CSC_input)
71 | 
72 |   echo "Pagerank = ", @vertexFinal
73 | 
74 | when isMainModule:
75 |   main()


--------------------------------------------------------------------------------
/examples/cuda8_0/random.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import nimcuda/[cuda_runtime_api, curand, nimcuda, driver_types]
15 | 
16 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
17 | 
18 | proc main() =
19 |   const n = 30
20 |   var
21 |     host: array[n, cfloat]
22 |     device: ptr cfloat
23 |     gen: curandGenerator_t
24 | 
25 |   check cudaMalloc(cast[ptr pointer](addr device), n * sizeof(cfloat))
26 |   check curandCreateGenerator(addr gen, CURAND_RNG_PSEUDO_DEFAULT)
27 |   check curandSetPseudoRandomGeneratorSeed(gen, 1234.culonglong)
28 |   check curandGenerateUniform(gen, device, n)
29 |   check cudaMemcpy(host.first, device, n * sizeof(cfloat), cudaMemcpyDeviceToHost)
30 |   check curandDestroyGenerator(gen)
31 |   check cudaFree(device)
32 | 
33 |   echo "Random numbers genedated on device: ", @host
34 | 
35 |   # curand can be used to generate random numbers directly on host
36 |   var
37 |     hostGen: curandGenerator_t
38 | 
39 |   check curandCreateGeneratorHost(addr hostGen, CURAND_RNG_PSEUDO_DEFAULT)
40 |   check curandGenerateNormal(hostGen, host.first, n, mean = 0, stddev = 1)
41 |   check curandDestroyGenerator(hostGen)
42 | 
43 |   echo "Random numbers genedated on host: ", @host
44 | 
45 | when isMainModule:
46 |   main()


--------------------------------------------------------------------------------
/examples/cuda8_0/sparse.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import nimcuda/[cuda_runtime_api, cusparse, driver_types, nimcuda]
15 | 
16 | proc first[T](a: var openarray[T]): ptr T {.inline.} = addr(a[0])
17 | 
18 | proc main() =
19 |   let
20 |     n = 4.cint
21 |     nnz = 9.cint
22 |   var
23 |     rows = [0.cint, 0, 0, 1, 2, 2, 2, 3, 3]
24 |     cols = [0.cint, 2, 3, 1, 0, 2, 3, 1, 3]
25 |     vals = [1'f32, 2, 3, 4, 5, 6, 7, 8, 9]
26 |     csrRows = [0.cint, 0, 0, 0]
27 | 
28 |     handle: cusparseHandle_t
29 |     gpuRows: ptr cint
30 |     gpuCols: ptr cint
31 |     gpuVals: ptr cfloat
32 |     gpuCsrRows: ptr cint
33 | 
34 |   check cudaMalloc(cast[ptr pointer](addr gpuRows), sizeof(rows))
35 |   check cudaMalloc(cast[ptr pointer](addr gpuCols), sizeof(cols))
36 |   check cudaMalloc(cast[ptr pointer](addr gpuVals), sizeof(vals))
37 |   check cudaMalloc(cast[ptr pointer](addr gpuCsrRows), sizeof(csrRows))
38 | 
39 |   check cudaMemcpy(gpuRows, rows.first, sizeof(rows), cudaMemcpyHostToDevice)
40 |   check cudaMemcpy(gpuCols, cols.first, sizeof(cols), cudaMemcpyHostToDevice)
41 |   check cudaMemcpy(gpuVals, vals.first, sizeof(vals), cudaMemcpyHostToDevice)
42 | 
43 |   check cusparseCreate(addr handle)
44 |   check cusparseXcoo2csr(handle, gpuRows, nnz, n, gpuCsrRows,CUSPARSE_INDEX_BASE_ZERO)
45 | 
46 |   check cudaMemcpy(csrRows.first, gpuCsrRows, sizeof(csrRows), cudaMemcpyDeviceToHost)
47 | 
48 |   check cusparseDestroy(handle)
49 |   check cudaFree(gpuRows)
50 |   check cudaFree(gpuCols)
51 |   check cudaFree(gpuVals)
52 |   check cudaFree(gpuCsrRows)
53 | 
54 |   echo @csrRows
55 | 
56 | when isMainModule:
57 |   main()


--------------------------------------------------------------------------------
/include/cuda12_5/cusolver_common.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 2014 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * NOTICE TO LICENSEE:
  5 |  *
  6 |  * This source code and/or documentation ("Licensed Deliverables") are
  7 |  * subject to NVIDIA intellectual property rights under U.S. and
  8 |  * international Copyright laws.
  9 |  *
 10 |  * These Licensed Deliverables contained herein is PROPRIETARY and
 11 |  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 |  * conditions of a form of NVIDIA software license agreement by and
 13 |  * between NVIDIA and Licensee ("License Agreement") or electronically
 14 |  * accepted by Licensee.  Notwithstanding any terms or conditions to
 15 |  * the contrary in the License Agreement, reproduction or disclosure
 16 |  * of the Licensed Deliverables to any third party without the express
 17 |  * written consent of NVIDIA is prohibited.
 18 |  *
 19 |  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 |  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 |  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 |  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 |  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 |  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 |  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 |  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 |  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 |  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 |  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 |  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 |  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 |  * OF THESE LICENSED DELIVERABLES.
 33 |  *
 34 |  * U.S. Government End Users.  These Licensed Deliverables are a
 35 |  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 |  * 1995), consisting of "commercial computer software" and "commercial
 37 |  * computer software documentation" as such terms are used in 48
 38 |  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 |  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 |  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 |  * U.S. Government End Users acquire the Licensed Deliverables with
 42 |  * only those rights set forth herein.
 43 |  *
 44 |  * Any use of the Licensed Deliverables in individual and commercial
 45 |  * software must include, in the user documentation and internal
 46 |  * comments to the code, the above Disclaimer and U.S. Government End
 47 |  * Users Notice.
 48 |  */
 49 | 
 50 | #if !defined(CUSOLVER_COMMON_H_)
 51 |   #define CUSOLVER_COMMON_H_
 52 | 
 53 |   #include "library_types.h"
 54 | 
 55 |   #ifndef CUSOLVERAPI
 56 |     #ifdef _WIN32
 57 |       #define CUSOLVERAPI __stdcall
 58 |     #else
 59 |       #define CUSOLVERAPI
 60 |     #endif
 61 |   #endif
 62 | 
 63 |   #if defined(_MSC_VER)
 64 | typedef __int64 clonglong;
 65 |   #else
 66 |     #include <inttypes.h>
 67 |   #endif
 68 | 
 69 | typedef int cusolver_int_t;
 70 | 
 71 |   #define CUSOLVER_VER_MAJOR 11
 72 |   #define CUSOLVER_VER_MINOR 6
 73 |   #define CUSOLVER_VER_PATCH 3
 74 |   #define CUSOLVER_VER_BUILD 83
 75 |   #define CUSOLVER_VERSION                                                     \
 76 |     (CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH)
 77 | 
 78 | //------------------------------------------------------------------------------
 79 | 
 80 |   // #if !defined(_MSC_VER)
 81 |   //   #define CUSOLVER_CPP_VERSION __cplusplus
 82 |   // #elif _MSC_FULL_VER >= 190024210 // Visual Studio 2015 Update 3
 83 |   //   #define CUSOLVER_CPP_VERSION _MSVC_LANG
 84 |   // #else
 85 |   //   #define CUSOLVER_CPP_VERSION 0
 86 |   // #endif
 87 | 
 88 | //------------------------------------------------------------------------------
 89 | 
 90 |   // #if !defined(DISABLE_CUSOLVER_DEPRECATED)
 91 |   //
 92 |   //   #if CUSOLVER_CPP_VERSION >= 201402L
 93 |   //
 94 |   //     #define CUSOLVER_DEPRECATED(new_func)                                    \
 95 |   //       [[deprecated("please use " #new_func " instead")]]
 96 |   //
 97 |   //   #elif defined(_MSC_VER)
 98 |   //
 99 |   //     #define CUSOLVER_DEPRECATED(new_func)                                    \
100 |   //       __declspec(deprecated("please use " #new_func " instead"))
101 |   //
102 |   //   #elif defined(__INTEL_COMPILER) || defined(__clang__) ||                   \
103 |   //     (defined(__GNUC__) &&                                                    \
104 |   //      (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
105 |   //
106 |   //     #define CUSOLVER_DEPRECATED(new_func)                                    \
107 |   //       __attribute__((deprecated("please use " #new_func " instead")))
108 |   //
109 |   //   #elif defined(__GNUC__) || defined(__xlc__)
110 |   //
111 |   //     #define CUSOLVER_DEPRECATED(new_func) __attribute__((deprecated))
112 |   //
113 |   //   #else
114 |   //
115 |   //     #define CUSOLVER_DEPRECATED(new_func)
116 |   //
117 |   //   #endif // defined(__cplusplus) && __cplusplus >= 201402L
118 |   // //------------------------------------------------------------------------------
119 |   //
120 |   //   #if CUSOLVER_CPP_VERSION >= 201703L
121 |   //
122 |   //     #define CUSOLVER_DEPRECATED_ENUM(new_enum)                               \
123 |   //       [[deprecated("please use " #new_enum " instead")]]
124 |   //
125 |   //   #elif defined(__clang__) ||                                                \
126 |   //     (defined(__GNUC__) && __GNUC__ >= 6 && !defined(__PGI))
127 |   //
128 |   //     #define CUSOLVER_DEPRECATED_ENUM(new_enum)                               \
129 |   //       __attribute__((deprecated("please use " #new_enum " instead")))
130 |   //
131 |   //   #else
132 |   //
133 |   //     #define CUSOLVER_DEPRECATED_ENUM(new_enum)
134 |   //
135 |   //   #endif // defined(__cplusplus) && __cplusplus >= 201402L
136 |   //
137 |   // #else // defined(DISABLE_CUSOLVER_DEPRECATED)
138 |   //
139 |   //   #define CUSOLVER_DEPRECATED(new_func)
140 |   //   #define CUSOLVER_DEPRECATED_ENUM(new_enum)
141 |   //
142 |   // #endif // !defined(DISABLE_CUSOLVER_DEPRECATED)
143 | 
144 |   // #undef CUSOLVER_CPP_VERSION
145 | 
146 |   #if defined(__cplusplus)
147 | extern "C" {
148 |   #endif /* __cplusplus */
149 | 
150 |   typedef enum {
151 |     CUSOLVER_STATUS_SUCCESS = 0,
152 |     CUSOLVER_STATUS_NOT_INITIALIZED = 1,
153 |     CUSOLVER_STATUS_ALLOC_FAILED = 2,
154 |     CUSOLVER_STATUS_INVALID_VALUE = 3,
155 |     CUSOLVER_STATUS_ARCH_MISMATCH = 4,
156 |     CUSOLVER_STATUS_MAPPING_ERROR = 5,
157 |     CUSOLVER_STATUS_EXECUTION_FAILED = 6,
158 |     CUSOLVER_STATUS_INTERNAL_ERROR = 7,
159 |     CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
160 |     CUSOLVER_STATUS_NOT_SUPPORTED = 9,
161 |     CUSOLVER_STATUS_ZERO_PIVOT = 10,
162 |     CUSOLVER_STATUS_INVALID_LICENSE = 11,
163 |     CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED = 12,
164 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID = 13,
165 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC = 14,
166 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE = 15,
167 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER = 16,
168 |     CUSOLVER_STATUS_IRS_INTERNAL_ERROR = 20,
169 |     CUSOLVER_STATUS_IRS_NOT_SUPPORTED = 21,
170 |     CUSOLVER_STATUS_IRS_OUT_OF_RANGE = 22,
171 |     CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES = 23,
172 |     CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED = 25,
173 |     CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED = 26,
174 |     CUSOLVER_STATUS_IRS_MATRIX_SINGULAR = 30,
175 |     CUSOLVER_STATUS_INVALID_WORKSPACE = 31
176 |   } cusolverStatus_t;
177 | 
178 |   typedef enum {
179 |     CUSOLVER_EIG_TYPE_1 = 1,
180 |     CUSOLVER_EIG_TYPE_2 = 2,
181 |     CUSOLVER_EIG_TYPE_3 = 3
182 |   } cusolverEigType_t;
183 | 
184 |   typedef enum {
185 |     CUSOLVER_EIG_MODE_NOVECTOR = 0,
186 |     CUSOLVER_EIG_MODE_VECTOR = 1
187 |   } cusolverEigMode_t;
188 | 
189 |   typedef enum {
190 |     CUSOLVER_EIG_RANGE_ALL = 1001,
191 |     CUSOLVER_EIG_RANGE_I = 1002,
192 |     CUSOLVER_EIG_RANGE_V = 1003,
193 |   } cusolverEigRange_t;
194 | 
195 |   typedef enum {
196 |     CUSOLVER_INF_NORM = 104,
197 |     CUSOLVER_MAX_NORM = 105,
198 |     CUSOLVER_ONE_NORM = 106,
199 |     CUSOLVER_FRO_NORM = 107,
200 |   } cusolverNorm_t;
201 | 
202 |   typedef enum {
203 |     CUSOLVER_IRS_REFINE_NOT_SET = 1100,
204 |     CUSOLVER_IRS_REFINE_NONE = 1101,
205 |     CUSOLVER_IRS_REFINE_CLASSICAL = 1102,
206 |     CUSOLVER_IRS_REFINE_CLASSICAL_GMRES = 1103,
207 |     CUSOLVER_IRS_REFINE_GMRES = 1104,
208 |     CUSOLVER_IRS_REFINE_GMRES_GMRES = 1105,
209 |     CUSOLVER_IRS_REFINE_GMRES_NOPCOND = 1106,
210 | 
211 |     CUSOLVER_PREC_DD = 1150,
212 |     CUSOLVER_PREC_SS = 1151,
213 |     CUSOLVER_PREC_SHT = 1152,
214 | 
215 |   } cusolverIRSRefinement_t;
216 | 
217 |   typedef enum {
218 |     CUSOLVER_R_8I = 1201,
219 |     CUSOLVER_R_8U = 1202,
220 |     CUSOLVER_R_64F = 1203,
221 |     CUSOLVER_R_32F = 1204,
222 |     CUSOLVER_R_16F = 1205,
223 |     CUSOLVER_R_16BF = 1206,
224 |     CUSOLVER_R_TF32 = 1207,
225 |     CUSOLVER_R_AP = 1208,
226 |     CUSOLVER_C_8I = 1211,
227 |     CUSOLVER_C_8U = 1212,
228 |     CUSOLVER_C_64F = 1213,
229 |     CUSOLVER_C_32F = 1214,
230 |     CUSOLVER_C_16F = 1215,
231 |     CUSOLVER_C_16BF = 1216,
232 |     CUSOLVER_C_TF32 = 1217,
233 |     CUSOLVER_C_AP = 1218,
234 |   } cusolverPrecType_t;
235 | 
236 |   typedef enum {
237 |     CUSOLVER_ALG_0 = 0, /* default algorithm */
238 |     CUSOLVER_ALG_1 = 1,
239 |     CUSOLVER_ALG_2 = 2
240 |   } cusolverAlgMode_t;
241 | 
242 |   typedef enum {
243 |     CUBLAS_STOREV_COLUMNWISE = 0,
244 |     CUBLAS_STOREV_ROWWISE = 1
245 |   } cusolverStorevMode_t;
246 | 
247 |   typedef enum {
248 |     CUBLAS_DIRECT_FORWARD = 0,
249 |     CUBLAS_DIRECT_BACKWARD = 1
250 |   } cusolverDirectMode_t;
251 | 
252 |   cusolverStatus_t CUSOLVERAPI
253 |     cusolverGetProperty(libraryPropertyType type, int *value);
254 | 
255 |   cusolverStatus_t CUSOLVERAPI cusolverGetVersion(int *version);
256 | 
257 |   #if defined(__cplusplus)
258 | }
259 |   #endif /* __cplusplus */
260 | 
261 | #endif // CUSOLVER_COMMON_H_
262 | 


--------------------------------------------------------------------------------
/include/cuda12_5/vector_types.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * NOTICE TO LICENSEE:
  5 |  *
  6 |  * This source code and/or documentation ("Licensed Deliverables") are
  7 |  * subject to NVIDIA intellectual property rights under U.S. and
  8 |  * international Copyright laws.
  9 |  *
 10 |  * These Licensed Deliverables contained herein is PROPRIETARY and
 11 |  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 |  * conditions of a form of NVIDIA software license agreement by and
 13 |  * between NVIDIA and Licensee ("License Agreement") or electronically
 14 |  * accepted by Licensee.  Notwithstanding any terms or conditions to
 15 |  * the contrary in the License Agreement, reproduction or disclosure
 16 |  * of the Licensed Deliverables to any third party without the express
 17 |  * written consent of NVIDIA is prohibited.
 18 |  *
 19 |  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 |  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 |  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 |  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 |  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 |  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 |  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 |  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 |  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 |  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 |  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 |  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 |  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 |  * OF THESE LICENSED DELIVERABLES.
 33 |  *
 34 |  * U.S. Government End Users.  These Licensed Deliverables are a
 35 |  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 |  * 1995), consisting of "commercial computer software" and "commercial
 37 |  * computer software documentation" as such terms are used in 48
 38 |  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 |  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 |  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 |  * U.S. Government End Users acquire the Licensed Deliverables with
 42 |  * only those rights set forth herein.
 43 |  *
 44 |  * Any use of the Licensed Deliverables in individual and commercial
 45 |  * software must include, in the user documentation and internal
 46 |  * comments to the code, the above Disclaimer and U.S. Government End
 47 |  * Users Notice.
 48 |  */
 49 | 
 50 | #if !defined(__VECTOR_TYPES_H__)
 51 | #define __VECTOR_TYPES_H__
 52 | 
 53 | struct __device_builtin__ char1
 54 | {
 55 |     signed char x;
 56 | };
 57 | 
 58 | struct __device_builtin__ uchar1
 59 | {
 60 |     unsigned char x;
 61 | };
 62 | 
 63 | 
 64 | struct __device_builtin__  char2
 65 | {
 66 |     signed char x, y;
 67 | };
 68 | 
 69 | struct __device_builtin__  uchar2
 70 | {
 71 |     unsigned char x, y;
 72 | };
 73 | 
 74 | struct __device_builtin__ char3
 75 | {
 76 |     signed char x, y, z;
 77 | };
 78 | 
 79 | struct __device_builtin__ uchar3
 80 | {
 81 |     unsigned char x, y, z;
 82 | };
 83 | 
 84 | struct __device_builtin__  char4
 85 | {
 86 |     signed char x, y, z, w;
 87 | };
 88 | 
 89 | struct __device_builtin__  uchar4
 90 | {
 91 |     unsigned char x, y, z, w;
 92 | };
 93 | 
 94 | struct __device_builtin__ short1
 95 | {
 96 |     short x;
 97 | };
 98 | 
 99 | struct __device_builtin__ ushort1
100 | {
101 |     unsigned short x;
102 | };
103 | 
104 | struct __device_builtin__  short2
105 | {
106 |     short x, y;
107 | };
108 | 
109 | struct __device_builtin__  ushort2
110 | {
111 |     unsigned short x, y;
112 | };
113 | 
114 | struct __device_builtin__ short3
115 | {
116 |     short x, y, z;
117 | };
118 | 
119 | struct __device_builtin__ ushort3
120 | {
121 |     unsigned short x, y, z;
122 | };
123 | 
124 | struct __device_builtin__ short4
125 | {
126 |     short x, y, z, w;
127 | };
128 | 
129 | struct __device_builtin__ ushort4
130 | {
131 |     unsigned short x, y, z, w;
132 | };
133 | 
134 | struct __device_builtin__ int1
135 | {
136 |     int x;
137 | };
138 | 
139 | struct __device_builtin__ uint1
140 | {
141 |     unsigned int x;
142 | };
143 | 
144 | struct __device_builtin__ int2
145 | {
146 |     int x, y;
147 | };
148 | 
149 | struct __device_builtin__ uint2
150 | {
151 |     unsigned int x, y;
152 | };
153 | 
154 | struct __device_builtin__ int3
155 | {
156 |     int x, y, z;
157 | };
158 | 
159 | struct __device_builtin__ uint3
160 | {
161 |     unsigned int x, y, z;
162 | };
163 | 
164 | struct __device_builtin__ int4
165 | {
166 |     int x, y, z, w;
167 | };
168 | 
169 | struct __device_builtin__ uint4
170 | {
171 |     unsigned int x, y, z, w;
172 | };
173 | 
174 | struct __device_builtin__ long1
175 | {
176 |     long int x;
177 | };
178 | 
179 | struct __device_builtin__ ulong1
180 | {
181 |     unsigned long x;
182 | };
183 | 
184 | struct __device_builtin__ long2
185 | {
186 |     long int x, y;
187 | };
188 | 
189 | struct __device_builtin__ ulong2
190 | {
191 |     unsigned long int x, y;
192 | };
193 | 
194 | struct __device_builtin__ long3
195 | {
196 |     long int x, y, z;
197 | };
198 | 
199 | struct __device_builtin__ ulong3
200 | {
201 |     unsigned long int x, y, z;
202 | };
203 | 
204 | struct __device_builtin__ long4
205 | {
206 |     long int x, y, z, w;
207 | };
208 | 
209 | struct __device_builtin__ ulong4
210 | {
211 |     unsigned long int x, y, z, w;
212 | };
213 | 
214 | struct __device_builtin__ float1
215 | {
216 |     float x;
217 | };
218 | 
219 | struct __device_builtin__ float2
220 | {
221 |     float x; float y;
222 | };
223 | 
224 | struct __device_builtin__ float3
225 | {
226 |     float x, y, z;
227 | };
228 | 
229 | struct __device_builtin__ float4
230 | {
231 |     float x, y, z, w;
232 | };
233 | 
234 | struct __device_builtin__ longlong1
235 | {
236 |     long long int x;
237 | };
238 | 
239 | struct __device_builtin__ ulonglong1
240 | {
241 |     unsigned long long int x;
242 | };
243 | 
244 | struct __device_builtin__ longlong2
245 | {
246 |     long long int x, y;
247 | };
248 | 
249 | struct __device_builtin__ ulonglong2
250 | {
251 |     unsigned long long int x, y;
252 | };
253 | 
254 | struct __device_builtin__ longlong3
255 | {
256 |     long long int x, y, z;
257 | };
258 | 
259 | struct __device_builtin__ ulonglong3
260 | {
261 |     unsigned long long int x, y, z;
262 | };
263 | 
264 | struct __device_builtin__ longlong4
265 | {
266 |     long long int x, y, z ,w;
267 | };
268 | 
269 | struct __device_builtin__ ulonglong4
270 | {
271 |     unsigned long long int x, y, z, w;
272 | };
273 | 
274 | struct __device_builtin__ double1
275 | {
276 |     double x;
277 | };
278 | 
279 | struct __device_builtin__ double2
280 | {
281 |     double x, y;
282 | };
283 | 
284 | struct __device_builtin__ double3
285 | {
286 |     double x, y, z;
287 | };
288 | 
289 | struct __device_builtin__ double4
290 | {
291 |     double x, y, z, w;
292 | };
293 | 
294 | 
295 | /*******************************************************************************
296 | *                                                                              *
297 | *                                                                              *
298 | *                                                                              *
299 | *******************************************************************************/
300 | 
301 | typedef __device_builtin__ struct char1 char1;
302 | typedef __device_builtin__ struct uchar1 uchar1;
303 | typedef __device_builtin__ struct char2 char2;
304 | typedef __device_builtin__ struct uchar2 uchar2;
305 | typedef __device_builtin__ struct char3 char3;
306 | typedef __device_builtin__ struct uchar3 uchar3;
307 | typedef __device_builtin__ struct char4 char4;
308 | typedef __device_builtin__ struct uchar4 uchar4;
309 | typedef __device_builtin__ struct short1 short1;
310 | typedef __device_builtin__ struct ushort1 ushort1;
311 | typedef __device_builtin__ struct short2 short2;
312 | typedef __device_builtin__ struct ushort2 ushort2;
313 | typedef __device_builtin__ struct short3 short3;
314 | typedef __device_builtin__ struct ushort3 ushort3;
315 | typedef __device_builtin__ struct short4 short4;
316 | typedef __device_builtin__ struct ushort4 ushort4;
317 | typedef __device_builtin__ struct int1 int1;
318 | typedef __device_builtin__ struct uint1 uint1;
319 | typedef __device_builtin__ struct int2 int2;
320 | typedef __device_builtin__ struct uint2 uint2;
321 | typedef __device_builtin__ struct int3 int3;
322 | typedef __device_builtin__ struct uint3 uint3;
323 | typedef __device_builtin__ struct int4 int4;
324 | typedef __device_builtin__ struct uint4 uint4;
325 | typedef __device_builtin__ struct long1 long1;
326 | typedef __device_builtin__ struct ulong1 ulong1;
327 | typedef __device_builtin__ struct long2 long2;
328 | typedef __device_builtin__ struct ulong2 ulong2;
329 | typedef __device_builtin__ struct long3 long3;
330 | typedef __device_builtin__ struct ulong3 ulong3;
331 | typedef __device_builtin__ struct long4 long4;
332 | typedef __device_builtin__ struct ulong4 ulong4;
333 | typedef __device_builtin__ struct float1 float1;
334 | typedef __device_builtin__ struct float2 float2;
335 | typedef __device_builtin__ struct float3 float3;
336 | typedef __device_builtin__ struct float4 float4;
337 | typedef __device_builtin__ struct longlong1 longlong1;
338 | typedef __device_builtin__ struct ulonglong1 ulonglong1;
339 | typedef __device_builtin__ struct longlong2 longlong2;
340 | typedef __device_builtin__ struct ulonglong2 ulonglong2;
341 | typedef __device_builtin__ struct longlong3 longlong3;
342 | typedef __device_builtin__ struct ulonglong3 ulonglong3;
343 | typedef __device_builtin__ struct longlong4 longlong4;
344 | typedef __device_builtin__ struct ulonglong4 ulonglong4;
345 | typedef __device_builtin__ struct double1 double1;
346 | typedef __device_builtin__ struct double2 double2;
347 | typedef __device_builtin__ struct double3 double3;
348 | typedef __device_builtin__ struct double4 double4;
349 | 
350 | /*******************************************************************************
351 | *                                                                              *
352 | *                                                                              *
353 | *                                                                              *
354 | *******************************************************************************/
355 | 
356 | struct __device_builtin__ dim3
357 | {
358 |     unsigned int x, y, z;
359 | };
360 | 
361 | typedef __device_builtin__ struct dim3 dim3;
362 | 
363 | #endif /* !__VECTOR_TYPES_H__ */
364 | 


--------------------------------------------------------------------------------
/include/cuda8_0/vector_types.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
  3 |  *
  4 |  * NOTICE TO LICENSEE:
  5 |  *
  6 |  * This source code and/or documentation ("Licensed Deliverables") are
  7 |  * subject to NVIDIA intellectual property rights under U.S. and
  8 |  * international Copyright laws.
  9 |  *
 10 |  * These Licensed Deliverables contained herein is PROPRIETARY and
 11 |  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 |  * conditions of a form of NVIDIA software license agreement by and
 13 |  * between NVIDIA and Licensee ("License Agreement") or electronically
 14 |  * accepted by Licensee.  Notwithstanding any terms or conditions to
 15 |  * the contrary in the License Agreement, reproduction or disclosure
 16 |  * of the Licensed Deliverables to any third party without the express
 17 |  * written consent of NVIDIA is prohibited.
 18 |  *
 19 |  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 |  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 |  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 |  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 |  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 |  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 |  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 |  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 |  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 |  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 |  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 |  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 |  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 |  * OF THESE LICENSED DELIVERABLES.
 33 |  *
 34 |  * U.S. Government End Users.  These Licensed Deliverables are a
 35 |  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 |  * 1995), consisting of "commercial computer software" and "commercial
 37 |  * computer software documentation" as such terms are used in 48
 38 |  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 |  * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 |  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 |  * U.S. Government End Users acquire the Licensed Deliverables with
 42 |  * only those rights set forth herein.
 43 |  *
 44 |  * Any use of the Licensed Deliverables in individual and commercial
 45 |  * software must include, in the user documentation and internal
 46 |  * comments to the code, the above Disclaimer and U.S. Government End
 47 |  * Users Notice.
 48 |  */
 49 | 
 50 | #if !defined(__VECTOR_TYPES_H__)
 51 | #define __VECTOR_TYPES_H__
 52 | 
 53 | struct __device_builtin__ char1
 54 | {
 55 |     signed char x;
 56 | };
 57 | 
 58 | struct __device_builtin__ uchar1
 59 | {
 60 |     unsigned char x;
 61 | };
 62 | 
 63 | 
 64 | struct __device_builtin__  char2
 65 | {
 66 |     signed char x, y;
 67 | };
 68 | 
 69 | struct __device_builtin__  uchar2
 70 | {
 71 |     unsigned char x, y;
 72 | };
 73 | 
 74 | struct __device_builtin__ char3
 75 | {
 76 |     signed char x, y, z;
 77 | };
 78 | 
 79 | struct __device_builtin__ uchar3
 80 | {
 81 |     unsigned char x, y, z;
 82 | };
 83 | 
 84 | struct __device_builtin__  char4
 85 | {
 86 |     signed char x, y, z, w;
 87 | };
 88 | 
 89 | struct __device_builtin__  uchar4
 90 | {
 91 |     unsigned char x, y, z, w;
 92 | };
 93 | 
 94 | struct __device_builtin__ short1
 95 | {
 96 |     short x;
 97 | };
 98 | 
 99 | struct __device_builtin__ ushort1
100 | {
101 |     unsigned short x;
102 | };
103 | 
104 | struct __device_builtin__  short2
105 | {
106 |     short x, y;
107 | };
108 | 
109 | struct __device_builtin__  ushort2
110 | {
111 |     unsigned short x, y;
112 | };
113 | 
114 | struct __device_builtin__ short3
115 | {
116 |     short x, y, z;
117 | };
118 | 
119 | struct __device_builtin__ ushort3
120 | {
121 |     unsigned short x, y, z;
122 | };
123 | 
124 | struct __device_builtin__ short4
125 | {
126 |     short x, y, z, w;
127 | };
128 | 
129 | struct __device_builtin__ ushort4
130 | {
131 |     unsigned short x, y, z, w;
132 | };
133 | 
134 | struct __device_builtin__ int1
135 | {
136 |     int x;
137 | };
138 | 
139 | struct __device_builtin__ uint1
140 | {
141 |     unsigned int x;
142 | };
143 | 
144 | struct __device_builtin__ int2
145 | {
146 |     int x, y;
147 | };
148 | 
149 | struct __device_builtin__ uint2
150 | {
151 |     unsigned int x, y;
152 | };
153 | 
154 | struct __device_builtin__ int3
155 | {
156 |     int x, y, z;
157 | };
158 | 
159 | struct __device_builtin__ uint3
160 | {
161 |     unsigned int x, y, z;
162 | };
163 | 
164 | struct __device_builtin__ int4
165 | {
166 |     int x, y, z, w;
167 | };
168 | 
169 | struct __device_builtin__ uint4
170 | {
171 |     unsigned int x, y, z, w;
172 | };
173 | 
174 | struct __device_builtin__ long1
175 | {
176 |     long int x;
177 | };
178 | 
179 | struct __device_builtin__ ulong1
180 | {
181 |     unsigned long x;
182 | };
183 | 
184 | struct __device_builtin__ long2
185 | {
186 |     long int x, y;
187 | };
188 | 
189 | struct __device_builtin__ ulong2
190 | {
191 |     unsigned long int x, y;
192 | };
193 | 
194 | struct __device_builtin__ long3
195 | {
196 |     long int x, y, z;
197 | };
198 | 
199 | struct __device_builtin__ ulong3
200 | {
201 |     unsigned long int x, y, z;
202 | };
203 | 
204 | struct __device_builtin__ long4
205 | {
206 |     long int x, y, z, w;
207 | };
208 | 
209 | struct __device_builtin__ ulong4
210 | {
211 |     unsigned long int x, y, z, w;
212 | };
213 | 
214 | struct __device_builtin__ float1
215 | {
216 |     float x;
217 | };
218 | 
219 | struct __device_builtin__ float2
220 | {
221 |     float x; float y;
222 | };
223 | 
224 | struct __device_builtin__ float3
225 | {
226 |     float x, y, z;
227 | };
228 | 
229 | struct __device_builtin__ float4
230 | {
231 |     float x, y, z, w;
232 | };
233 | 
234 | struct __device_builtin__ longlong1
235 | {
236 |     long long int x;
237 | };
238 | 
239 | struct __device_builtin__ ulonglong1
240 | {
241 |     unsigned long long int x;
242 | };
243 | 
244 | struct __device_builtin__ longlong2
245 | {
246 |     long long int x, y;
247 | };
248 | 
249 | struct __device_builtin__ ulonglong2
250 | {
251 |     unsigned long long int x, y;
252 | };
253 | 
254 | struct __device_builtin__ longlong3
255 | {
256 |     long long int x, y, z;
257 | };
258 | 
259 | struct __device_builtin__ ulonglong3
260 | {
261 |     unsigned long long int x, y, z;
262 | };
263 | 
264 | struct __device_builtin__ longlong4
265 | {
266 |     long long int x, y, z ,w;
267 | };
268 | 
269 | struct __device_builtin__ ulonglong4
270 | {
271 |     unsigned long long int x, y, z, w;
272 | };
273 | 
274 | struct __device_builtin__ double1
275 | {
276 |     double x;
277 | };
278 | 
279 | struct __device_builtin__ double2
280 | {
281 |     double x, y;
282 | };
283 | 
284 | struct __device_builtin__ double3
285 | {
286 |     double x, y, z;
287 | };
288 | 
289 | struct __device_builtin__ double4
290 | {
291 |     double x, y, z, w;
292 | };
293 | 
294 | 
295 | /*******************************************************************************
296 | *                                                                              *
297 | *                                                                              *
298 | *                                                                              *
299 | *******************************************************************************/
300 | 
301 | typedef __device_builtin__ struct char1 char1;
302 | typedef __device_builtin__ struct uchar1 uchar1;
303 | typedef __device_builtin__ struct char2 char2;
304 | typedef __device_builtin__ struct uchar2 uchar2;
305 | typedef __device_builtin__ struct char3 char3;
306 | typedef __device_builtin__ struct uchar3 uchar3;
307 | typedef __device_builtin__ struct char4 char4;
308 | typedef __device_builtin__ struct uchar4 uchar4;
309 | typedef __device_builtin__ struct short1 short1;
310 | typedef __device_builtin__ struct ushort1 ushort1;
311 | typedef __device_builtin__ struct short2 short2;
312 | typedef __device_builtin__ struct ushort2 ushort2;
313 | typedef __device_builtin__ struct short3 short3;
314 | typedef __device_builtin__ struct ushort3 ushort3;
315 | typedef __device_builtin__ struct short4 short4;
316 | typedef __device_builtin__ struct ushort4 ushort4;
317 | typedef __device_builtin__ struct int1 int1;
318 | typedef __device_builtin__ struct uint1 uint1;
319 | typedef __device_builtin__ struct int2 int2;
320 | typedef __device_builtin__ struct uint2 uint2;
321 | typedef __device_builtin__ struct int3 int3;
322 | typedef __device_builtin__ struct uint3 uint3;
323 | typedef __device_builtin__ struct int4 int4;
324 | typedef __device_builtin__ struct uint4 uint4;
325 | typedef __device_builtin__ struct long1 long1;
326 | typedef __device_builtin__ struct ulong1 ulong1;
327 | typedef __device_builtin__ struct long2 long2;
328 | typedef __device_builtin__ struct ulong2 ulong2;
329 | typedef __device_builtin__ struct long3 long3;
330 | typedef __device_builtin__ struct ulong3 ulong3;
331 | typedef __device_builtin__ struct long4 long4;
332 | typedef __device_builtin__ struct ulong4 ulong4;
333 | typedef __device_builtin__ struct float1 float1;
334 | typedef __device_builtin__ struct float2 float2;
335 | typedef __device_builtin__ struct float3 float3;
336 | typedef __device_builtin__ struct float4 float4;
337 | typedef __device_builtin__ struct longlong1 longlong1;
338 | typedef __device_builtin__ struct ulonglong1 ulonglong1;
339 | typedef __device_builtin__ struct longlong2 longlong2;
340 | typedef __device_builtin__ struct ulonglong2 ulonglong2;
341 | typedef __device_builtin__ struct longlong3 longlong3;
342 | typedef __device_builtin__ struct ulonglong3 ulonglong3;
343 | typedef __device_builtin__ struct longlong4 longlong4;
344 | typedef __device_builtin__ struct ulonglong4 ulonglong4;
345 | typedef __device_builtin__ struct double1 double1;
346 | typedef __device_builtin__ struct double2 double2;
347 | typedef __device_builtin__ struct double3 double3;
348 | typedef __device_builtin__ struct double4 double4;
349 | 
350 | /*******************************************************************************
351 | *                                                                              *
352 | *                                                                              *
353 | *                                                                              *
354 | *******************************************************************************/
355 | 
356 | struct __device_builtin__ dim3
357 | {
358 |     unsigned int x, y, z;
359 | };
360 | 
361 | typedef __device_builtin__ struct dim3 dim3;
362 | 
363 | #endif /* !__VECTOR_TYPES_H__ */
364 | 


--------------------------------------------------------------------------------
/nimcuda.nimble:
--------------------------------------------------------------------------------
  1 | # Package
  2 | 
  3 | version       = "0.2.2"
  4 | author        = "Andrea Ferretti"
  5 | description   = "Nim binding for CUDA"
  6 | license       = "Apache2"
  7 | skipDirs      = @["headers", "include", "c2nim", "examples", "htmldocs"]
  8 | srcDir        = "src"
  9 | 
 10 | # Dependencies
 11 | 
 12 | requires "nim >= 1.4.0"
 13 | 
 14 | import
 15 |   std / [strscans, strformat, os, sequtils, strutils, pegs]
 16 | 
 17 | type CudaVersion = enum
 18 |   cuda8_0, cuda12_5
 19 | 
 20 | const DefaultVersion = cuda8_0
 21 | 
 22 | const
 23 |   ModifiedHeadersDir = "include"
 24 |   NimCodeDir = "src"
 25 |   UtilitiesDir = "utils"
 26 |   DocumentationDir = "htmldocs"
 27 |   ExamplesDir = "examples"
 28 |   C2nimDirectivesDir = "c2nim"
 29 |   TemporaryHeadersDir = "headers"
 30 | 
 31 | const Libs = [
 32 |   # "library_types",
 33 |   # "vector_types",
 34 |   # "driver_types", # do not decomment - the nim file is manually adjusted
 35 |   "surface_types",
 36 |   "texture_types",
 37 |   "cuda_runtime_api",
 38 |   # "cuda_occupancy", # do not decomment - the nim file is manually adjusted
 39 |   "cuComplex",
 40 |   "cublas_api",
 41 |   # "cublas_v2",
 42 |   # "cudnn_v9",
 43 |   "cufft",
 44 |   "curand",
 45 |   "cusolver_common",
 46 |   "cusolverDn",
 47 |   "cusolverRf",
 48 |   "cusolverSp",
 49 |   "cusparse",
 50 |   "nvblas",
 51 |   #"nvgraph" <- removed in cuda 11.0, adopted into cugraph
 52 |   "nvrtc",
 53 |   "cuda",
 54 | ]
 55 | 
 56 | proc systemCudaName(v: CudaVersion): string =
 57 |   ## Returns the name used for cuda directories on linux.
 58 |   var captures: array[2, string]
 59 |   assert ($v).match(peg" 'cuda' {\d+} '_' {\d+} ", captures)
 60 |   fmt"cuda-{captures[0]}.{captures[1]}"
 61 | 
 62 | proc systemCudaInclude(version: CudaVersion): string =
 63 |   when hostOS == "windows":
 64 |     getEnv("CUDA_PATH") / "include"
 65 |   else:
 66 |     "/usr/local" / version.systemCudaName / "include"
 67 | 
 68 | proc systemCudaCLib(version: CudaVersion): string =
 69 |   when hostOS == "windows":
 70 |     getEnv("CUDA_PATH") / "lib" / "x64"
 71 |   else:
 72 |     "/usr/local" / version.systemCudaName / "lib64"
 73 | 
 74 | 
 75 | func nimcudaSourceDir(version: CudaVersion): string =
 76 |   const dirThatHoldsVersions = NimCodeDir / "nimcuda"
 77 |   result = dirThatHoldsVersions / $version
 78 | 
 79 | func nimcudaExamplesDir(version: CudaVersion): string =
 80 |   const dirThatHoldsVersions = ExamplesDir
 81 |   result = dirThatHoldsVersions / $version
 82 | 
 83 | 
 84 | proc patch(libFileName: string; version: CudaVersion): string =
 85 |   let installedLib = systemCudaInclude(version) / libFileName
 86 | 
 87 |   let
 88 |     simpleLibPath = ModifiedHeadersDir / $version / libFileName
 89 |     patchPath = C2nimDirectivesDir / $version / libFileName
 90 |     outPath = TemporaryHeadersDir / libFileName
 91 |     libContent =
 92 |       if simpleLibPath.fileExists: readFile(simpleLibPath)
 93 |       else: readFile(installedLib)
 94 |     patchContent = readFile(patchPath)
 95 | 
 96 |   writeFile(outPath, patchContent & "\n" & libContent)
 97 |   return outPath
 98 | 
 99 | 
100 | proc preprocess(filePath: string) =
101 |   const preprocessorExe = UtilitiesDir / "preprocessor".toExe
102 | 
103 |   if not preprocessorExe.fileExists:
104 |     # Compile preprocessor.
105 |     const preprocessorSource = preprocessorExe.changeFileExt("nim")
106 |     exec "nim c -d:release " & preprocessorSource
107 | 
108 |   exec preprocessorExe & " " & filePath
109 | 
110 | 
111 | proc postprocess(filePath: string) =
112 |   const postprocessorExe = UtilitiesDir / "postprocessor".toExe
113 | 
114 |   if not postprocessorExe.fileExists:
115 |     # Compile preprocessor.
116 |     const postprocessorSource = postprocessorExe.changeFileExt("nim")
117 |     exec "nim c -d:release " & postprocessorSource
118 | 
119 |   exec postprocessorExe & " " & filePath
120 | 
121 | 
122 | proc process(libName: string; version: CudaVersion) =
123 |   let
124 |     headerFileName = libName.addFileExt("h")
125 |     outPath = nimcudaSourceDir(version) / headerFileName.changeFileExt("nim")
126 |     headerPath = patch(headerFileName, version)
127 |   preprocess headerPath
128 |   exec("c2nim --debug --strict --prefix\"_\" --prefix\"__\" --suffix\"_\" " &
129 |        "--suffix\"__\" " & headerPath & " -o:" & outPath)
130 |   postprocess outPath
131 | 
132 | proc compile(filePath: string) =
133 |   exec("nim c -c " & filePath)
134 | 
135 | proc compile(libName: string; version: CudaVersion) =
136 |   let libPath = nimcudaSourceDir(version) / libName.addFileExt("nim")
137 |   compile libPath
138 | 
139 | 
140 | proc processAll(version: CudaVersion) =
141 |   mkDir TemporaryHeadersDir
142 | 
143 |   for lib in Libs:
144 |     process(lib, version)
145 | 
146 |   let allTemporaryFiles = TemporaryHeadersDir.listFiles()
147 |   for file in allTemporaryFiles:
148 |     rmFile file
149 | 
150 | 
151 | 
152 | proc compileAll(version: CudaVersion) =
153 |   if version == DefaultVersion:
154 |     compile NimCodeDir / "nimcuda".addFileExt("nim")
155 |   for nimSourceFile in nimcudaSourceDir(version).listFiles:
156 |     exec "nim c -c " & nimSourceFile
157 | 
158 | 
159 | func parseCudaVersion(input: string): CudaVersion =
160 |   ## Parses the passed cuda version, returning `DefaultVersion` if no match
161 |   ## is found.
162 |   # proc normalizer(s: string): string =
163 |   #   var captures: array[2, string]
164 |   #   if s.match(peg" y'cuda'? {\d+} ('_' / '.' / '-') {\d+} $ ", captures):
165 |   #     fmt"cuda{captures[0]}_{captures[1]}"
166 |   #   else:
167 |   #     s
168 |   var index = 0
169 |   var
170 |     major = ""
171 |     minor = ""
172 |   let success = input.scanp(index, ?"cuda", +(`Digits` -> major.add($_)),
173 |     {'.', '-', '_'}, +(`Digits` -> minor.add($_)))
174 |   if success:
175 |     case fmt"{major}.{minor}"
176 |     of "8.0":
177 |       cuda8_0
178 |     of "12.5":
179 |       cuda12_5
180 |     else:
181 |       DefaultVersion
182 |   else:
183 |     DefaultVersion
184 | 
185 | 
186 | const args = when NimMajor >= 2:
187 |   cmdline.commandLineParams()
188 | else:
189 |   os.commandLineParams()
190 | 
191 | template taskWithCudaVersionArgument(name: untyped; description: string;
192 |                                      body: untyped): untyped =
193 |   ## Creates a nimble task that takes one command line argument: a cuda version.
194 |   ## This argument is accessible as the symbol `cudaVersion`.
195 |   task name, description:
196 |     const NameOfThisTask = `name Task`.astToStr[0..^5] #removing "Task"
197 | 
198 |     let
199 |       noVersionArgPassed = args[^1] == NameOfThisTask
200 |       oneVersionArgPassed = args[^2] == NameOfThisTask
201 |       tooManyArgs = not (noVersionArgPassed or oneVersionArgPassed)
202 | 
203 |     if tooManyArgs:
204 |       echo "Too many arguments! Please only pass the cuda version to this task."
205 |       echo "Example: 'nimble $1 12.5'" % NameOfThisTask
206 |     else:
207 |       # parseCudaVersion defaults to `DefaultVersion`, so if the task is the
208 |       # last param, it returns the default.
209 |       let cudaVersion {.inject.} = args[^1].parseCudaVersion()
210 |       body
211 | 
212 | template taskWithCertainVersions(name: untyped; description: string;
213 |                                  versions: set[CudaVersion];
214 |                                  body: untyped): untyped =
215 |   ## Creates a nimble task that takes one command line argument: a cuda version.
216 |   ## This argument is accessible as the symbol `cudaVersion`.
217 |   ## The task can only be run on some versions of cuda, specified by `versions`.
218 |   taskWithCudaVersionArgument name, description:
219 |     if cudaVersion in versions:
220 |       body
221 |     else:
222 |       echo "This task is only available for version(s) $1." % [$versions]
223 | 
224 | 
225 | taskWithCudaVersionArgument headers, "generate bindings from headers":
226 |   processAll(cudaVersion)
227 | 
228 | taskWithCudaVersionArgument checkcheck,
229 |                           "check that bindings compile":
230 |   compileAll(cudaVersion)
231 | 
232 | task docs, "generate documentation":
233 |   # remove possibly outdated files:
234 |   if DocumentationDir.dirExists:
235 |     rmDir DocumentationDir
236 |     mkDir DocumentationDir
237 | 
238 |   for cudaVersion in CudaVersion:
239 |     let outDir = DocumentationDir / $cudaVersion
240 | 
241 |     for nimSourceFile in nimcudaSourceDir(cudaVersion).listFiles:
242 |       exec fmt"nim doc2 --index:on --outDir:{outDir} {nimSourceFile}"
243 | 
244 |     let indexFile = outDir / "theindex".addFileExt("html")
245 |     exec fmt"nim buildIndex -o:{indexFile} {outDir}"
246 | 
247 | 
248 | proc exampleConfig(version: CudaVersion) =
249 |   --hints: off
250 |   --linedir: on
251 |   --stacktrace: on
252 |   --linetrace: on
253 |   --debuginfo
254 |   switch("path", thisDir() / nimcudaSourceDir(version))
255 |   --run
256 | 
257 | taskWithCudaVersionArgument fft, "run fft example":
258 |   exampleConfig(cudaVersion)
259 |   setCommand "c", nimcudaExamplesDir(cudaVersion) / "fft".addFileExt("nim")
260 | 
261 | taskWithCudaVersionArgument sparse, "run sparse example":
262 |   exampleConfig(cudaVersion)
263 |   setCommand "c", nimcudaExamplesDir(cudaVersion) / "sparse".addFileExt("nim")
264 | 
265 | taskWithCudaVersionArgument random, "run random example":
266 |   exampleConfig(cudaVersion)
267 |   setCommand "c", nimcudaExamplesDir(cudaVersion) / "random".addFileExt("nim")
268 | 
269 | taskWithCertainVersions pagerank, "run pagerank example", {cuda8_0}:
270 |   # removed in cuda 11.0
271 |   exampleConfig(cudaVersion)
272 |   setCommand "c", nimcudaExamplesDir(cudaVersion) / "pagerank".addFileExt("nim")
273 | 
274 | taskWithCertainVersions blas, "run cublas example", {cuda12_5}:
275 |   # TODO: implement and test for 8.0
276 |   exampleConfig(cudaVersion)
277 |   setCommand "c", nimcudaExamplesDir(cudaVersion) / "blas".addFileExt("nim")
278 | 
279 | taskWithCertainVersions denseLinearSystem, "run cusolverDn example", {cuda12_5}:
280 |   # TODO: implement and test for 8.0
281 |   exampleConfig(cudaVersion)
282 |   setCommand "c", nimcudaExamplesDir(cudaVersion) /
283 |                                            "denseLinearSystem".addFileExt("nim")
284 | 
285 | taskWithCertainVersions sparseLinearSystem, "run cusolverSp example",
286 |     {cuda12_5}:
287 |   # TODO: implement and test for 8.0
288 |   exampleConfig(cudaVersion)
289 |   setCommand "c", nimcudaExamplesDir(cudaVersion) /
290 |                                           "sparseLinearSystem".addFileExt("nim")
291 | 
292 | taskWithCertainVersions runtimeCompilation, "run nvrtc example",
293 |     {cuda12_5}:
294 |   # TODO: implement and test for 8.0
295 |   exampleConfig(cudaVersion)
296 |   setCommand "c", nimcudaExamplesDir(cudaVersion) /
297 |                                           "runtimeCompilation".addFileExt("nim")
298 | 


--------------------------------------------------------------------------------
/src/nimcuda.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | ##[
 3 |   This main module exports a few of cuda 8.0's modules.
 4 |   If you need a different version or a module not exported here, try something
 5 |   like:
 6 | 
 7 |   .. code-block:: Nim
 8 |     import nimcuda/cuda8_0/library_name
 9 |   ]##
10 | 
11 | import
12 |   ./nimcuda/cuda8_0/[check, cuda_runtime_api, library_types, driver_types,
13 |                  vector_types]
14 | 
15 | 
16 | export check, cuda_runtime_api, library_types, driver_types, vector_types
17 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/check.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ./cublas_api
16 | # import ./cublas_v2
17 | # import ./cuComplex
18 | import ./cuda_occupancy
19 | # import ./cuda_runtime_api
20 | # import ./cudnn
21 | import ./cufft
22 | import ./curand
23 | import ./cusolver_common
24 | # import ./cusolverDn
25 | # import ./cusolverRf
26 | # import ./cusolverSp
27 | import ./cusparse
28 | import ./driver_types
29 | # import ./library_types
30 | # import ./nvblas
31 | # import ./nvgraph
32 | # import ./surface_types
33 | # import ./texture_types
34 | # import ./vector_types
35 | import ./nvrtc
36 | import ./cuda
37 | 
38 | type
39 |   CudaError* = object of IOError
40 |   CudaOccError* = object of IOError
41 |   CufftError* = object of IOError
42 |   CublasError* = object of IOError
43 |   CusparseError* = object of IOError
44 |   CusolverError* = object of IOError
45 |   CurandError* = object of IOError
46 |   # CudnnError* = object of IOError
47 |   # NVGraphError* = object of IOError
48 |   NvrtcError* = object of IOError
49 |   CudaDriverError* = object of IOError
50 | 
51 | 
52 | func check*(a: sink cudaError_t) =
53 |   if a != cudaSuccess:
54 |     raise newException(CudaError, $a & " " & $int(a))
55 | 
56 | func check*(a: sink cudaOccError) =
57 |   if a != CUDA_OCC_SUCCESS:
58 |     raise newException(CudaOccError, $a & " " & $int(a))
59 | 
60 | func check*(a: sink cublasStatus_t) =
61 |   if a != CUBLAS_STATUS_SUCCESS:
62 |     raise newException(CublasError, $a & " " & $int(a))
63 | 
64 | func check*(a: sink cufftResult) =
65 |   if a != CUFFT_SUCCESS:
66 |     raise newException(CufftError, $a & " " & $int(a))
67 | 
68 | func check*(a: sink cusparseStatus_t) =
69 |   if a != CUSPARSE_STATUS_SUCCESS:
70 |     raise newException(CusparseError, $a & " " & $int(a))
71 | 
72 | func check*(a: sink cusolverStatus_t) =
73 |   if a != CUSOLVER_STATUS_SUCCESS:
74 |     raise newException(CusolverError, $a & " " & $int(a))
75 | 
76 | func check*(a: sink curandStatus) =
77 |   if a != CURAND_STATUS_SUCCESS:
78 |     raise newException(CurandError, $a & " " & $int(a))
79 | 
80 | # func check*(a: cudnnStatus_t) =
81 | #   let a = a # ensure we only evaluate once even if the expression has side effects
82 | #   if a != CUDNN_STATUS_SUCCESS:
83 | #     raise newException(CudnnError, $a & " " & $int(a))
84 | 
85 | # func check*(a: nvgraphStatus_t) =
86 | #   let a = a # ensure we only evaluate once even if the expression has side effects
87 | #   if a != NVGRAPH_STATUS_SUCCESS:
88 | #     raise newException(NVGraphError, $a & " " & $int(a))
89 | 
90 | func check*(a: sink nvrtcResult) =
91 |   if a != NVRTC_SUCCESS:
92 |     raise newException(NvrtcError, $a & " " & $int(a))
93 | 
94 | func check*(a: sink CUresult) =
95 |   if a != CUDA_SUCCESS:
96 |     raise newException(NvrtcError, $a & " " & $int(a))
97 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/cuComplex.nim:
--------------------------------------------------------------------------------
  1 | from std/math import sqrt
  2 | 
  3 | template sqrtf(x: cfloat): cfloat = sqrt(x)
  4 | 
  5 | template fabsf(x: cfloat): cfloat = abs(x)
  6 | 
  7 | template fabs(x: float): float = abs(x)
  8 | 
  9 | template `div`(a: static[float64], b: cfloat): cfloat = cfloat(a) / b
 10 | 
 11 | template `div`(a: cfloat, b: cfloat): cfloat = a / b
 12 | ##
 13 | ##  Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
 14 | ##
 15 | ##  NOTICE TO LICENSEE:
 16 | ##
 17 | ##  This source code and/or documentation ("Licensed Deliverables") are
 18 | ##  subject to NVIDIA intellectual property rights under U.S. and
 19 | ##  international Copyright laws.
 20 | ##
 21 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 22 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 23 | ##  conditions of a form of NVIDIA software license agreement by and
 24 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 25 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 26 | ##  the contrary in the License Agreement, reproduction or disclosure
 27 | ##  of the Licensed Deliverables to any third party without the express
 28 | ##  written consent of NVIDIA is prohibited.
 29 | ##
 30 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 31 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 32 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 33 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 34 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 35 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 36 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 37 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 38 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 39 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 40 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 41 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 42 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 43 | ##  OF THESE LICENSED DELIVERABLES.
 44 | ##
 45 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 46 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 47 | ##  1995), consisting of "commercial computer software" and "commercial
 48 | ##  computer software documentation" as such terms are used in 48
 49 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 50 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 51 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 52 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 53 | ##  only those rights set forth herein.
 54 | ##
 55 | ##  Any use of the Licensed Deliverables in individual and commercial
 56 | ##  software must include, in the user documentation and internal
 57 | ##  comments to the code, the above Disclaimer and U.S. Government End
 58 | ##  Users Notice.
 59 | ##
 60 | import ./libpaths
 61 | tellCompilerToUseCuda()
 62 | 
 63 | when not defined(CUDACC_RTC):
 64 |   when defined(GNUC):
 65 |     when defined(clang) or
 66 |         (not defined(PGIC) and
 67 |         (GNUC > 4 or (GNUC == 4 and GNUC_MINOR >= 2))):
 68 |       discard
 69 | ##  When trying to include C header file in C++ Code extern "C" is required
 70 | ##  But the Standard QNX headers already have ifdef extern in them when compiling C++ Code
 71 | ##  extern "C" cannot be nested
 72 | ##  Hence keep the header out of extern "C" block
 73 | ##
 74 | 
 75 | when not defined(CUDACC):
 76 |   discard
 77 | import
 78 |   vector_types
 79 | 
 80 | type
 81 |   cuFloatComplex* = float2
 82 | 
 83 | proc cuCrealf*(x: cuFloatComplex): cfloat =
 84 |   return x.x
 85 | 
 86 | proc cuCimagf*(x: cuFloatComplex): cfloat =
 87 |   return x.y
 88 | 
 89 | proc make_cuFloatComplex*(r: cfloat; i: cfloat): cuFloatComplex =
 90 |   var res: cuFloatComplex
 91 |   res.x = r
 92 |   res.y = i
 93 |   return res
 94 | 
 95 | proc cuConjf*(x: cuFloatComplex): cuFloatComplex =
 96 |   return make_cuFloatComplex(cuCrealf(x), -cuCimagf(x))
 97 | 
 98 | proc cuCaddf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
 99 |   return make_cuFloatComplex(cuCrealf(x) + cuCrealf(y), cuCimagf(x) + cuCimagf(y))
100 | 
101 | proc cuCsubf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
102 |   return make_cuFloatComplex(cuCrealf(x) - cuCrealf(y), cuCimagf(x) - cuCimagf(y))
103 | 
104 | ##  This implementation could suffer from intermediate overflow even though
105 | ##  the final resultNotKeyWord would be in range. However, various implementations do
106 | ##  not guard against this (presumably to avoid losing performance), so we
107 | ##  don't do it either to stay competitive.
108 | ##
109 | 
110 | proc cuCmulf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
111 |   var prod: cuFloatComplex
112 |   prod = make_cuFloatComplex((cuCrealf(x) * cuCrealf(y)) -
113 |       (cuCimagf(x) * cuCimagf(y)), (cuCrealf(x) * cuCimagf(y)) +
114 |       (cuCimagf(x) * cuCrealf(y)))
115 |   return prod
116 | 
117 | ##  This implementation guards against intermediate underflow and overflow
118 | ##  by scaling. Such guarded implementations are usually the default for
119 | ##  complex library implementations, with some also offering an unguarded,
120 | ##  faster version.
121 | ##
122 | 
123 | proc cuCdivf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
124 |   var quot: cuFloatComplex
125 |   var s: cfloat = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y))
126 |   var oos: cfloat = 1.0f div s
127 |   var ars: cfloat = cuCrealf(x) * oos
128 |   var ais: cfloat = cuCimagf(x) * oos
129 |   var brs: cfloat = cuCrealf(y) * oos
130 |   var bis: cfloat = cuCimagf(y) * oos
131 |   s = (brs * brs) + (bis * bis)
132 |   oos = 1.0f div s
133 |   quot = make_cuFloatComplex(((ars * brs) + (ais * bis)) * oos,
134 |                            ((ais * brs) - (ars * bis)) * oos)
135 |   return quot
136 | 
137 | ##
138 | ##  We would like to call hypotf(), but it's not available on all platforms.
139 | ##  This discrete implementation guards against intermediate underflow and
140 | ##  overflow by scaling. Otherwise we would lose half the exponent range.
141 | ##  There are various ways of doing guarded computation. For now chose the
142 | ##  simplest and fastest solution, however this may suffer from inaccuracies
143 | ##  if sqrt and division are not IEEE compliant.
144 | ##
145 | 
146 | proc cuCabsf*(x: cuFloatComplex): cfloat =
147 |   var a: cfloat = cuCrealf(x)
148 |   var b: cfloat = cuCimagf(x)
149 |   var
150 |     v: cfloat
151 |     w: cfloat
152 |     t: cfloat
153 |   a = fabsf(a)
154 |   b = fabsf(b)
155 |   if a > b:
156 |     v = a
157 |     w = b
158 |   else:
159 |     v = b
160 |     w = a
161 |   t = w div v
162 |   t = 1.0f + t * t
163 |   t = v * sqrtf(t)
164 |   if (v == 0.0f) or (v > 3.402823466e38f) or (w > 3.402823466e38f):
165 |     t = v + w
166 |   return t
167 | 
168 | ##  Double precision
169 | 
170 | type
171 |   cuDoubleComplex* = double2
172 | 
173 | proc cuCreal*(x: cuDoubleComplex): cdouble =
174 |   return x.x
175 | 
176 | proc cuCimag*(x: cuDoubleComplex): cdouble =
177 |   return x.y
178 | 
179 | proc make_cuDoubleComplex*(r: cdouble; i: cdouble): cuDoubleComplex =
180 |   var res: cuDoubleComplex
181 |   res.x = r
182 |   res.y = i
183 |   return res
184 | 
185 | proc cuConj*(x: cuDoubleComplex): cuDoubleComplex =
186 |   return make_cuDoubleComplex(cuCreal(x), -cuCimag(x))
187 | 
188 | proc cuCadd*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
189 |   return make_cuDoubleComplex(cuCreal(x) + cuCreal(y), cuCimag(x) + cuCimag(y))
190 | 
191 | proc cuCsub*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
192 |   return make_cuDoubleComplex(cuCreal(x) - cuCreal(y), cuCimag(x) - cuCimag(y))
193 | 
194 | ##  This implementation could suffer from intermediate overflow even though
195 | ##  the final resultNotKeyWord would be in range. However, various implementations do
196 | ##  not guard against this (presumably to avoid losing performance), so we
197 | ##  don't do it either to stay competitive.
198 | ##
199 | 
200 | proc cuCmul*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
201 |   var prod: cuDoubleComplex
202 |   prod = make_cuDoubleComplex((cuCreal(x) * cuCreal(y)) - (cuCimag(x) * cuCimag(y)), (
203 |       cuCreal(x) * cuCimag(y)) + (cuCimag(x) * cuCreal(y)))
204 |   return prod
205 | 
206 | ##  This implementation guards against intermediate underflow and overflow
207 | ##  by scaling. Such guarded implementations are usually the default for
208 | ##  complex library implementations, with some also offering an unguarded,
209 | ##  faster version.
210 | ##
211 | 
212 | proc cuCdiv*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
213 |   var quot: cuDoubleComplex
214 |   var s: cdouble = (fabs(cuCreal(y))) + (fabs(cuCimag(y)))
215 |   var oos: cdouble = 1.0 div s
216 |   var ars: cdouble = cuCreal(x) * oos
217 |   var ais: cdouble = cuCimag(x) * oos
218 |   var brs: cdouble = cuCreal(y) * oos
219 |   var bis: cdouble = cuCimag(y) * oos
220 |   s = (brs * brs) + (bis * bis)
221 |   oos = 1.0 div s
222 |   quot = make_cuDoubleComplex(((ars * brs) + (ais * bis)) * oos,
223 |                             ((ais * brs) - (ars * bis)) * oos)
224 |   return quot
225 | 
226 | ##  This implementation guards against intermediate underflow and overflow
227 | ##  by scaling. Otherwise we would lose half the exponent range. There are
228 | ##  various ways of doing guarded computation. For now chose the simplest
229 | ##  and fastest solution, however this may suffer from inaccuracies if sqrt
230 | ##  and division are not IEEE compliant.
231 | ##
232 | 
233 | proc cuCabs*(x: cuDoubleComplex): cdouble =
234 |   var a: cdouble = cuCreal(x)
235 |   var b: cdouble = cuCimag(x)
236 |   var
237 |     v: cdouble
238 |     w: cdouble
239 |     t: cdouble
240 |   a = fabs(a)
241 |   b = fabs(b)
242 |   if a > b:
243 |     v = a
244 |     w = b
245 |   else:
246 |     v = b
247 |     w = a
248 |   t = w div v
249 |   t = 1.0 + t * t
250 |   t = v * sqrt(t)
251 |   if (v == 0.0) or (v > 1.79769313486231570e+308) or (w > 1.79769313486231570e+308):
252 |     t = v + w
253 |   return t
254 | 
255 | ##  aliases
256 | 
257 | type
258 |   cuComplex* = cuFloatComplex
259 | 
260 | proc make_cuComplex*(x: cfloat; y: cfloat): cuComplex =
261 |   return make_cuFloatComplex(x, y)
262 | 
263 | ##  float-to-double promotion
264 | 
265 | proc cuComplexFloatToDouble*(c: cuFloatComplex): cuDoubleComplex =
266 |   return make_cuDoubleComplex(cast[cdouble](cuCrealf(c)),
267 |                              cast[cdouble](cuCimagf(c)))
268 | 
269 | proc cuComplexDoubleToFloat*(c: cuDoubleComplex): cuFloatComplex =
270 |   return make_cuFloatComplex(cast[cfloat](cuCreal(c)), cast[cfloat](cuCimag(c)))
271 | 
272 | proc cuCfmaf*(x: cuComplex; y: cuComplex; d: cuComplex): cuComplex =
273 |   var real_res: cfloat
274 |   var imag_res: cfloat
275 |   real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d)
276 |   imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d)
277 |   real_res = -(cuCimagf(x) * cuCimagf(y)) + real_res
278 |   imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res
279 |   return make_cuComplex(real_res, imag_res)
280 | 
281 | proc cuCfma*(x: cuDoubleComplex; y: cuDoubleComplex; d: cuDoubleComplex): cuDoubleComplex =
282 |   var real_res: cdouble
283 |   var imag_res: cdouble
284 |   real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d)
285 |   imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d)
286 |   real_res = -(cuCimag(x) * cuCimag(y)) + real_res
287 |   imag_res = (cuCimag(x) * cuCreal(y)) + imag_res
288 |   return make_cuDoubleComplex(real_res, imag_res)
289 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/cusolver_common.nim:
--------------------------------------------------------------------------------
  1 | ##  #assumendef _MSC_VER
  2 | 
  3 | when defined(windows):
  4 |   const
  5 |     libName = "cusolver.dll"
  6 | elif defined(macosx):
  7 |   const
  8 |     libName = "libcusolver.dylib"
  9 | else:
 10 |   const
 11 |     libName = "libcusolver.so"
 12 | import
 13 |   library_types
 14 | import ./libpaths
 15 | tellCompilerToUseCuda()
 16 | ##
 17 | ##  Copyright 2014 NVIDIA Corporation.  All rights reserved.
 18 | ##
 19 | ##  NOTICE TO LICENSEE:
 20 | ##
 21 | ##  This source code and/or documentation ("Licensed Deliverables") are
 22 | ##  subject to NVIDIA intellectual property rights under U.S. and
 23 | ##  international Copyright laws.
 24 | ##
 25 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 26 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 27 | ##  conditions of a form of NVIDIA software license agreement by and
 28 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 29 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 30 | ##  the contrary in the License Agreement, reproduction or disclosure
 31 | ##  of the Licensed Deliverables to any third party without the express
 32 | ##  written consent of NVIDIA is prohibited.
 33 | ##
 34 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 35 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 36 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 37 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 38 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 39 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 40 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 41 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 42 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 43 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 44 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 45 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 46 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 47 | ##  OF THESE LICENSED DELIVERABLES.
 48 | ##
 49 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 50 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 51 | ##  1995), consisting of "commercial computer software" and "commercial
 52 | ##  computer software documentation" as such terms are used in 48
 53 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 54 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 55 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 56 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 57 | ##  only those rights set forth herein.
 58 | ##
 59 | ##  Any use of the Licensed Deliverables in individual and commercial
 60 | ##  software must include, in the user documentation and internal
 61 | ##  comments to the code, the above Disclaimer and U.S. Government End
 62 | ##  Users Notice.
 63 | ##
 64 | 
 65 | when defined(MSC_VER):
 66 |   type
 67 |     clonglong* = int64
 68 | else:
 69 |   discard
 70 | type
 71 |   cusolver_int_t* = cint
 72 | 
 73 | const
 74 |   CUSOLVER_VER_MAJOR* = 11
 75 |   CUSOLVER_VER_MINOR* = 6
 76 |   CUSOLVER_VER_PATCH* = 3
 77 |   CUSOLVER_VER_BUILD* = 83
 78 |   CUSOLVER_VERSION* = (
 79 |     CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH)
 80 | 
 81 | ## ------------------------------------------------------------------------------
 82 | ##  #if !defined(MSC_VER)
 83 | ##    #define CUSOLVER_CPP_VERSION __cplusplus
 84 | ##  #elif _MSC_FULL_VER >= 190024210 // Visual Studio 2015 Update 3
 85 | ##    #define CUSOLVER_CPP_VERSION _MSVC_LANG
 86 | ##  #else
 87 | ##    #define CUSOLVER_CPP_VERSION 0
 88 | ##  #endif
 89 | ## ------------------------------------------------------------------------------
 90 | ##  #if !defined(DISABLE_CUSOLVER_DEPRECATED)
 91 | ##
 92 | ##    #if CUSOLVER_CPP_VERSION >= 201402L
 93 | ##
 94 | ##      #define CUSOLVER_DEPRECATED(new_func)                                    \
 95 | ##        [[deprecated("please use " #new_func " instead")]]
 96 | ##
 97 | ##    #elif defined(MSC_VER)
 98 | ##
 99 | ##      #define CUSOLVER_DEPRECATED(new_func)                                    \
100 | ##        __declspec(deprecated("please use " #new_func " instead"))
101 | ##
102 | ##    #elif defined(INTEL_COMPILER) || defined(clang) ||                   \
103 | ##      (defined(GNUC) &&                                                    \
104 | ##       (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
105 | ##
106 | ##      #define CUSOLVER_DEPRECATED(new_func)                                    \
107 | ##        __attribute__((deprecated("please use " #new_func " instead")))
108 | ##
109 | ##    #elif defined(GNUC) || defined(xlc)
110 | ##
111 | ##      #define CUSOLVER_DEPRECATED(new_func) __attribute__((deprecated))
112 | ##
113 | ##    #else
114 | ##
115 | ##      #define CUSOLVER_DEPRECATED(new_func)
116 | ##
117 | ##    #endif // defined(cplusplus) && __cplusplus >= 201402L
118 | ##  //------------------------------------------------------------------------------
119 | ##
120 | ##    #if CUSOLVER_CPP_VERSION >= 201703L
121 | ##
122 | ##      #define CUSOLVER_DEPRECATED_ENUM(new_enum)                               \
123 | ##        [[deprecated("please use " #new_enum " instead")]]
124 | ##
125 | ##    #elif defined(clang) ||                                                \
126 | ##      (defined(GNUC) && __GNUC__ >= 6 && !defined(PGI))
127 | ##
128 | ##      #define CUSOLVER_DEPRECATED_ENUM(new_enum)                               \
129 | ##        __attribute__((deprecated("please use " #new_enum " instead")))
130 | ##
131 | ##    #else
132 | ##
133 | ##      #define CUSOLVER_DEPRECATED_ENUM(new_enum)
134 | ##
135 | ##    #endif // defined(cplusplus) && __cplusplus >= 201402L
136 | ##
137 | ##  #else // defined(DISABLE_CUSOLVER_DEPRECATED)
138 | ##
139 | ##    #define CUSOLVER_DEPRECATED(new_func)
140 | ##    #define CUSOLVER_DEPRECATED_ENUM(new_enum)
141 | ##
142 | ##  #endif // !defined(DISABLE_CUSOLVER_DEPRECATED)
143 | ##  #undef CUSOLVER_CPP_VERSION
144 | 
145 | type
146 |   cusolverStatus_t* {.size: sizeof(cint).} = enum
147 |     CUSOLVER_STATUS_SUCCESS = 0, CUSOLVER_STATUS_NOT_INITIALIZED = 1,
148 |     CUSOLVER_STATUS_ALLOC_FAILED = 2, CUSOLVER_STATUS_INVALID_VALUE = 3,
149 |     CUSOLVER_STATUS_ARCH_MISMATCH = 4, CUSOLVER_STATUS_MAPPING_ERROR = 5,
150 |     CUSOLVER_STATUS_EXECUTION_FAILED = 6, CUSOLVER_STATUS_INTERNAL_ERROR = 7,
151 |     CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
152 |     CUSOLVER_STATUS_NOT_SUPPORTED = 9, CUSOLVER_STATUS_ZERO_PIVOT = 10,
153 |     CUSOLVER_STATUS_INVALID_LICENSE = 11,
154 |     CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED = 12,
155 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID = 13,
156 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC = 14,
157 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE = 15,
158 |     CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER = 16,
159 |     CUSOLVER_STATUS_IRS_INTERNAL_ERROR = 20,
160 |     CUSOLVER_STATUS_IRS_NOT_SUPPORTED = 21, CUSOLVER_STATUS_IRS_OUT_OF_RANGE = 22,
161 |     CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES = 23,
162 |     CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED = 25,
163 |     CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED = 26,
164 |     CUSOLVER_STATUS_IRS_MATRIX_SINGULAR = 30,
165 |     CUSOLVER_STATUS_INVALID_WORKSPACE = 31
166 |   cusolverEigType_t* {.size: sizeof(cint).} = enum
167 |     CUSOLVER_EIG_TYPE_1 = 1, CUSOLVER_EIG_TYPE_2 = 2, CUSOLVER_EIG_TYPE_3 = 3
168 |   cusolverEigMode_t* {.size: sizeof(cint).} = enum
169 |     CUSOLVER_EIG_MODE_NOVECTOR = 0, CUSOLVER_EIG_MODE_VECTOR = 1
170 |   cusolverEigRange_t* {.size: sizeof(cint).} = enum
171 |     CUSOLVER_EIG_RANGE_ALL = 1001, CUSOLVER_EIG_RANGE_I = 1002,
172 |     CUSOLVER_EIG_RANGE_V = 1003
173 |   cusolverNorm_t* {.size: sizeof(cint).} = enum
174 |     CUSOLVER_INF_NORM = 104, CUSOLVER_MAX_NORM = 105, CUSOLVER_ONE_NORM = 106,
175 |     CUSOLVER_FRO_NORM = 107
176 |   cusolverIRSRefinement_t* {.size: sizeof(cint).} = enum
177 |     CUSOLVER_IRS_REFINE_NOT_SET = 1100, CUSOLVER_IRS_REFINE_NONE = 1101,
178 |     CUSOLVER_IRS_REFINE_CLASSICAL = 1102,
179 |     CUSOLVER_IRS_REFINE_CLASSICAL_GMRES = 1103, CUSOLVER_IRS_REFINE_GMRES = 1104,
180 |     CUSOLVER_IRS_REFINE_GMRES_GMRES = 1105,
181 |     CUSOLVER_IRS_REFINE_GMRES_NOPCOND = 1106, CUSOLVER_PREC_DD = 1150,
182 |     CUSOLVER_PREC_SS = 1151, CUSOLVER_PREC_SHT = 1152
183 |   cusolverPrecType_t* {.size: sizeof(cint).} = enum
184 |     CUSOLVER_R_8I = 1201, CUSOLVER_R_8U = 1202, CUSOLVER_R_64F = 1203,
185 |     CUSOLVER_R_32F = 1204, CUSOLVER_R_16F = 1205, CUSOLVER_R_16BF = 1206,
186 |     CUSOLVER_R_TF32 = 1207, CUSOLVER_R_AP = 1208, CUSOLVER_C_8I = 1211,
187 |     CUSOLVER_C_8U = 1212, CUSOLVER_C_64F = 1213, CUSOLVER_C_32F = 1214,
188 |     CUSOLVER_C_16F = 1215, CUSOLVER_C_16BF = 1216, CUSOLVER_C_TF32 = 1217,
189 |     CUSOLVER_C_AP = 1218
190 |   cusolverAlgMode_t* {.size: sizeof(cint).} = enum
191 |     CUSOLVER_ALG_0 = 0,         ##  default algorithm
192 |     CUSOLVER_ALG_1 = 1, CUSOLVER_ALG_2 = 2
193 |   cusolverStorevMode_t* {.size: sizeof(cint).} = enum
194 |     CUBLAS_STOREV_COLUMNWISE = 0, CUBLAS_STOREV_ROWWISE = 1
195 |   cusolverDirectMode_t* {.size: sizeof(cint).} = enum
196 |     CUBLAS_DIRECT_FORWARD = 0, CUBLAS_DIRECT_BACKWARD = 1
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | proc cusolverGetProperty*(`type`: libraryPropertyType; value: ptr cint): cusolverStatus_t {.
209 |     cdecl, importc: "cusolverGetProperty", dynlib: libName.}
210 | proc cusolverGetVersion*(version: ptr cint): cusolverStatus_t {.cdecl,
211 |     importc: "cusolverGetVersion", dynlib: libName.}
212 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/helpers.nim:
--------------------------------------------------------------------------------
 1 | 
 2 | ##[
 3 |   This module helps with some differences between C and Nim that C2Nim misses.
 4 |   ]##
 5 | 
 6 | 
 7 | converter toCSize_t*(self: cint): csize_t {.inline.} =
 8 |   csize_t(self)
 9 | 
10 | converter toBool*(self: cint): bool {.inline.} =
11 |   bool(self)
12 | 
13 | converter toBool*(self: uint): bool {.inline.} =
14 |   bool(self)
15 | 
16 | converter toCint*(self: bool): cint {.inline.} =
17 |   cint(self)
18 | 
19 | 
20 | const INT_MAX * = cint.high
21 | 
22 | 
23 | func `or`*(a: bool; b: cint): bool {.inline.} =
24 |   a or b.toBool
25 | 
26 | func `or`*(a: cint; b: bool): bool {.inline.} =
27 |   a.toBool or b
28 | 
29 | func `and`*(a: bool; b: cint): bool {.inline.} =
30 |   a and b.toBool
31 | 
32 | func `and`*(a: cint; b: bool): bool {.inline.} =
33 |   a.toBool and b
34 | 
35 | 
36 | converter toBool*[T: ptr|pointer|proc](self: T): bool {.inline.} =
37 |   not self.isNil
38 | 
39 | 
40 | converter toCUInt*[U: enum](self: U): cuint {.inline.} =
41 |   cuint(self)
42 | 
43 | converter toCInt*[U: enum](self: U): cint {.inline.} =
44 |   cint(self)
45 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/libpaths.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | ##[This module implements some auto-detection of cuda installation locations,
  3 |    as well as communication with the c compilers about this info.
  4 | 
  5 |    If you want to manually overide the autodetection, pass the nim compiler
  6 |    `-d:CudaLib="PATH_TO_CUDA_DYN_LIBS"` and/or
  7 |    `-d:CudaIncludes="PATH_TO_CUDA_HEADERS"`.
  8 | ]##
  9 | 
 10 | #[The following is a rip of std/distros, slightly modified for compile-time
 11 |   use.
 12 |   The extra specificity compared to normal `defined` tests or `hostOS`
 13 |   is needed because some linux distros install cuda in very different places
 14 |   (im looking at you, arch!)
 15 | ]#
 16 | 
 17 | 
 18 | from std/distros import Distribution
 19 | import std/[os, strutils, macros, macrocache]
 20 | when NimMajor == 2:
 21 |   import std/envvars
 22 | 
 23 | 
 24 | # we cache the result of the 'cmdRelease'
 25 | # execution for faster platform detections.
 26 | var
 27 |   unameRes {.compileTime.}: string
 28 |   osReleaseIDRes {.compileTime.}: string
 29 |   releaseRes {.compileTime.}: string
 30 |   hostnamectlRes {.compileTime.}: string
 31 | 
 32 | template cmdRelease(cmd, cache): untyped =
 33 |   if cache.len == 0:
 34 |     # cache = (when defined(nimscript): gorge(cmd) else: execProcess(cmd))
 35 |     cache = gorge(cmd)
 36 |   cache
 37 | 
 38 | template uname(): untyped = cmdRelease("uname -a", unameRes)
 39 | template osReleaseID(): untyped =
 40 |   cmdRelease("cat /etc/os-release | grep ^ID=", osReleaseIDRes)
 41 | template release(): untyped = cmdRelease("lsb_release -d", releaseRes)
 42 | template hostnamectl(): untyped = cmdRelease("hostnamectl", hostnamectlRes)
 43 | 
 44 | proc detectOsWithAllCmd(d: Distribution): bool {.compileTime.} =
 45 |   let dd = toLowerAscii($d)
 46 |   result = dd in toLowerAscii(osReleaseID()) or dd in toLowerAscii(release()) or
 47 |             dd in toLowerAscii(uname()) or ("operating system: " & dd) in
 48 |                 toLowerAscii(hostnamectl())
 49 | 
 50 | proc detectOsImpl(d: Distribution): bool {.compileTime.} =
 51 |   case d
 52 |   of Distribution.Windows: result = defined(windows)
 53 |   of Distribution.Posix: result = defined(posix)
 54 |   of Distribution.MacOSX: result = defined(macosx)
 55 |   of Distribution.Linux: result = defined(linux)
 56 |   of Distribution.BSD: result = defined(bsd)
 57 |   else:
 58 |     when defined(bsd):
 59 |       case d
 60 |       of Distribution.FreeBSD, Distribution.NetBSD, Distribution.OpenBSD:
 61 |         result = $d in uname()
 62 |       else:
 63 |         result = false
 64 |     elif defined(linux):
 65 |       const EasyLinux = when (NimMajor, NimMinor) >= (1, 6):
 66 |           {Distribution.Elementary, Distribution.Ubuntu, Distribution.Debian,
 67 |           Distribution.Fedora, Distribution.OpenMandriva, Distribution.CentOS,
 68 |           Distribution.Alpine, Distribution.Mageia, Distribution.Zorin,
 69 |           Distribution.Void}
 70 |         else:
 71 |           {Distribution.Elementary, Distribution.Ubuntu, Distribution.Debian,
 72 |           Distribution.Fedora, Distribution.OpenMandriva, Distribution.CentOS,
 73 |           Distribution.Alpine, Distribution.Mageia, Distribution.Zorin}
 74 | 
 75 |       case d
 76 |       of Distribution.Gentoo:
 77 |         result = ("-" & $d & " ") in uname()
 78 |       of EasyLinux:
 79 |         result = toLowerAscii($d) in osReleaseID()
 80 |       of Distribution.RedHat:
 81 |         result = "rhel" in osReleaseID()
 82 |       of Distribution.ArchLinux:
 83 |         result = "arch" in osReleaseID()
 84 |       # when (NimMajor, NimMinor) >= (1, 6):
 85 |       #   of Distribution.Artix:
 86 |       #     result = "artix" in osReleaseID()
 87 |       of Distribution.NixOS:
 88 |         # Check if this is a Nix build or NixOS environment
 89 |         result = existsEnv("NIX_BUILD_TOP") or
 90 |           existsEnv("__NIXOS_SET_ENVIRONMENT_DONE")
 91 |       of Distribution.OpenSUSE:
 92 |         result = "suse" in toLowerAscii(uname()) or
 93 |           "suse" in toLowerAscii(release())
 94 |       of Distribution.GoboLinux:
 95 |         result = "-Gobo " in uname()
 96 |       of Distribution.Solaris:
 97 |         let uname = toLowerAscii(uname())
 98 |         result = ("sun" in uname) or ("solaris" in uname)
 99 |       of Distribution.Haiku:
100 |         result = defined(haiku)
101 |       else:
102 |         result = detectOsWithAllCmd(d)
103 |     else:
104 |       result = false
105 | 
106 | template detectOs(d: untyped): bool =
107 |   ## Distro/OS detection. For convenience, the
108 |   ## required `Distribution.` qualifier is added to the
109 |   ## enum value.
110 |   detectOsImpl(Distribution.d)
111 | 
112 | 
113 | 
114 | # begin actual detection
115 | when detectOs(Windows):
116 |   from std/os import getEnv, `/`
117 |   const
118 |     CudaPath = getEnv("CUDA_PATH")
119 |     CudaIncludes* {.strdefine.} = CudaPath / "include"
120 |     CudaLib* {.strdefine.} = CudaPath / "lib64"
121 | 
122 | elif detectOs(ArchLinux):
123 |   from std/os import `/`
124 |   const
125 |     CudaPath = "/opt/cuda"
126 |     CudaIncludes* {.strdefine.} = CudaPath / "include"
127 |     CudaLib* {.strdefine.} = CudaPath / "lib64"
128 | 
129 | elif detectOs(Linux):
130 |   # Generic linux catch-all.
131 |   # This includes anyone following the cuda installation guide.
132 |   const
133 |     CudaPath = "/usr/local/cuda"
134 |     CudaIncludes* {.strdefine.} = CudaPath / "include"
135 |     CudaLib* {.strdefine.} = CudaPath / "lib64"
136 | 
137 | else:
138 |   # Some wild operating system!
139 |   const
140 |     CudaIncludes* {.strdefine.} = "unknown"
141 |     CudaLib* {.strdefine.} = "unknown"
142 | 
143 | 
144 | # check for validity
145 | when not dirExists(CudaIncludes):
146 |   {.error: "Could not find the cuda source headers! Please specify the " &
147 |      "location of the cuda includes directory by passing " &
148 |      "`-d:CudaIncludes=\"YOUR_PATH\"` to the nim compiler.".}
149 | elif not dirExists(CudaLib):
150 |   {.error: "Could not find the cuda shared libraries! Please specify the " &
151 |      "location of the cuda library directory by passing " &
152 |      "`-d:CudaLib=\"YOUR_PATH\"` to the nim compiler.".}
153 | 
154 | 
155 | 
156 | macro tellCompilerToUseCuda*(): untyped =
157 |   ## Tells the compiler and linker to use cuda libraries.
158 |   # we'll use macrocaching so that we dont unneccessarily emit a million times
159 | 
160 |   const ToldCompilerCount = CacheCounter"ToldCompilerToUseCudaCount"
161 |   if ToldCompilerCount.value == 0:
162 |     result = quote do:
163 |       {.passC: "-I" & CudaIncludes.}
164 |       {.passL: "-L" & CudaLib & " -lcuda".}
165 |     inc ToldCompilerCount
166 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/library_types.nim:
--------------------------------------------------------------------------------
 1 | ##
 2 | ##  Copyright 1993-2023 NVIDIA Corporation.  All rights reserved.
 3 | ##
 4 | ##  NOTICE TO LICENSEE:
 5 | ##
 6 | ##  This source code and/or documentation ("Licensed Deliverables") are
 7 | ##  subject to NVIDIA intellectual property rights under U.S. and
 8 | ##  international Copyright laws.
 9 | ##
10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 | ##  conditions of a form of NVIDIA software license agreement by and
13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
15 | ##  the contrary in the License Agreement, reproduction or disclosure
16 | ##  of the Licensed Deliverables to any third party without the express
17 | ##  written consent of NVIDIA is prohibited.
18 | ##
19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 | ##  OF THESE LICENSED DELIVERABLES.
33 | ##
34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 | ##  1995), consisting of "commercial computer software" and "commercial
37 | ##  computer software documentation" as such terms are used in 48
38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
42 | ##  only those rights set forth herein.
43 | ##
44 | ##  Any use of the Licensed Deliverables in individual and commercial
45 | ##  software must include, in the user documentation and internal
46 | ##  comments to the code, the above Disclaimer and U.S. Government End
47 | ##  Users Notice.
48 | ##
49 | import ./libpaths
50 | tellCompilerToUseCuda()
51 | 
52 | type
53 |   cudaDataType* = enum
54 |     CUDA_R_32F = 0,             ##  real as a float
55 |     CUDA_R_64F = 1,             ##  real as a double
56 |     CUDA_R_16F = 2,             ##  real as a half
57 |     CUDA_R_8I = 3,              ##  real as a signed 8-bit int
58 |     CUDA_C_32F = 4,             ##  complex as a pair of float numbers
59 |     CUDA_C_64F = 5,             ##  complex as a pair of double numbers
60 |     CUDA_C_16F = 6,             ##  complex as a pair of half numbers
61 |     CUDA_C_8I = 7,              ##  complex as a pair of signed 8-bit int numbers
62 |     CUDA_R_8U = 8,              ##  real as a unsigned 8-bit int
63 |     CUDA_C_8U = 9,              ##  complex as a pair of unsigned 8-bit int numbers
64 |     CUDA_R_32I = 10,            ##  real as a signed 32-bit int
65 |     CUDA_C_32I = 11,            ##  complex as a pair of signed 32-bit int numbers
66 |     CUDA_R_32U = 12,            ##  real as a unsigned 32-bit int
67 |     CUDA_C_32U = 13,            ##  complex as a pair of unsigned 32-bit int numbers
68 |     CUDA_R_16BF = 14,           ##  real as a nv_bfloat16
69 |     CUDA_C_16BF = 15,           ##  complex as a pair of nv_bfloat16 numbers
70 |     CUDA_R_4I = 16,             ##  real as a signed 4-bit int
71 |     CUDA_C_4I = 17,             ##  complex as a pair of signed 4-bit int numbers
72 |     CUDA_R_4U = 18,             ##  real as a unsigned 4-bit int
73 |     CUDA_C_4U = 19,             ##  complex as a pair of unsigned 4-bit int numbers
74 |     CUDA_R_16I = 20,            ##  real as a signed 16-bit int
75 |     CUDA_C_16I = 21,            ##  complex as a pair of signed 16-bit int numbers
76 |     CUDA_R_16U = 22,            ##  real as a unsigned 16-bit int
77 |     CUDA_C_16U = 23,            ##  complex as a pair of unsigned 16-bit int numbers
78 |     CUDA_R_64I = 24,            ##  real as a signed 64-bit int
79 |     CUDA_C_64I = 25,            ##  complex as a pair of signed 64-bit int numbers
80 |     CUDA_R_64U = 26,            ##  real as a unsigned 64-bit int
81 |     CUDA_C_64U = 27,            ##  complex as a pair of unsigned 64-bit int numbers
82 |     CUDA_R_8F_E4M3 = 28,        ##  real as a nv_fp8_e4m3
83 |     CUDA_R_8F_E5M2 = 29         ##  real as a nv_fp8_e5m2
84 |   libraryPropertyType* = enum
85 |     MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL
86 | 
87 | 
88 | 
89 |   cudaDataType_t* = cudaDataType
90 | 
91 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/surface_types.nim:
--------------------------------------------------------------------------------
 1 | ##
 2 | ##  Copyright 1993-2023 NVIDIA Corporation.  All rights reserved.
 3 | ##
 4 | ##  NOTICE TO LICENSEE:
 5 | ##
 6 | ##  This source code and/or documentation ("Licensed Deliverables") are
 7 | ##  subject to NVIDIA intellectual property rights under U.S. and
 8 | ##  international Copyright laws.
 9 | ##
10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 | ##  conditions of a form of NVIDIA software license agreement by and
13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
15 | ##  the contrary in the License Agreement, reproduction or disclosure
16 | ##  of the Licensed Deliverables to any third party without the express
17 | ##  written consent of NVIDIA is prohibited.
18 | ##
19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 | ##  OF THESE LICENSED DELIVERABLES.
33 | ##
34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 | ##  1995), consisting of "commercial computer software" and "commercial
37 | ##  computer software documentation" as such terms are used in 48
38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
42 | ##  only those rights set forth herein.
43 | ##
44 | ##  Any use of the Licensed Deliverables in individual and commercial
45 | ##  software must include, in the user documentation and internal
46 | ##  comments to the code, the above Disclaimer and U.S. Government End
47 | ##  Users Notice.
48 | ##
49 | 
50 | ## *****************************************************************************
51 | ##                                                                               *
52 | ##                                                                               *
53 | ##                                                                               *
54 | ## *****************************************************************************
55 | import ./libpaths
56 | tellCompilerToUseCuda()
57 | when not defined(CUDACC_RTC_MINIMAL):
58 |   ##
59 |   ##  \addtogroup CUDART_TYPES
60 |   ##
61 |   ##  @{
62 |   ##
63 |   ## *****************************************************************************
64 |   ##                                                                               *
65 |   ##                                                                               *
66 |   ##                                                                               *
67 |   ## *****************************************************************************
68 |   const
69 |     cudaSurfaceType1D* = 0x01
70 |     cudaSurfaceType2D* = 0x02
71 |     cudaSurfaceType3D* = 0x03
72 |     cudaSurfaceTypeCubemap* = 0x0C
73 |     cudaSurfaceType1DLayered* = 0xF1
74 |     cudaSurfaceType2DLayered* = 0xF2
75 |     cudaSurfaceTypeCubemapLayered* = 0xFC
76 |   ##
77 |   ##  CUDA Surface boundary modes
78 |   ##
79 |   type
80 |     cudaSurfaceBoundaryMode* = enum
81 |       cudaBoundaryModeZero = 0, ## < Zero boundary mode
82 |       cudaBoundaryModeClamp = 1, ## < Clamp boundary mode
83 |       cudaBoundaryModeTrap = 2  ## < Trap boundary mode
84 |   ##
85 |   ##  CUDA Surface format modes
86 |   ##
87 |   type
88 |     cudaSurfaceFormatMode* = enum
89 |       cudaFormatModeForced = 0, ## < Forced format mode
90 |       cudaFormatModeAuto = 1    ## < Auto format mode
91 |   ##
92 |   ##  An opaque value that represents a CUDA Surface object
93 |   ##
94 |   type
95 |     cudaSurfaceObject_t* = culonglong
96 |   ##  @}
97 |   ##  @}
98 |   ##  END CUDART_TYPES
99 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/texture_types.nim:
--------------------------------------------------------------------------------
  1 | ##
  2 | ##  Copyright 1993-2023 NVIDIA Corporation.  All rights reserved.
  3 | ##
  4 | ##  NOTICE TO LICENSEE:
  5 | ##
  6 | ##  This source code and/or documentation ("Licensed Deliverables") are
  7 | ##  subject to NVIDIA intellectual property rights under U.S. and
  8 | ##  international Copyright laws.
  9 | ##
 10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | ##  conditions of a form of NVIDIA software license agreement by and
 13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | ##  the contrary in the License Agreement, reproduction or disclosure
 16 | ##  of the Licensed Deliverables to any third party without the express
 17 | ##  written consent of NVIDIA is prohibited.
 18 | ##
 19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | ##  OF THESE LICENSED DELIVERABLES.
 33 | ##
 34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | ##  1995), consisting of "commercial computer software" and "commercial
 37 | ##  computer software documentation" as such terms are used in 48
 38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 42 | ##  only those rights set forth herein.
 43 | ##
 44 | ##  Any use of the Licensed Deliverables in individual and commercial
 45 | ##  software must include, in the user documentation and internal
 46 | ##  comments to the code, the above Disclaimer and U.S. Government End
 47 | ##  Users Notice.
 48 | ##
 49 | 
 50 | ## *****************************************************************************
 51 | ##                                                                               *
 52 | ##                                                                               *
 53 | ##                                                                               *
 54 | ## *****************************************************************************
 55 | import ./libpaths
 56 | tellCompilerToUseCuda()
 57 | when not defined(CUDACC_RTC_MINIMAL):
 58 |   ##
 59 |   ##  \addtogroup CUDART_TYPES
 60 |   ##
 61 |   ##  @{
 62 |   ##
 63 |   ## *****************************************************************************
 64 |   ##                                                                               *
 65 |   ##                                                                               *
 66 |   ##                                                                               *
 67 |   ## *****************************************************************************
 68 |   const
 69 |     cudaTextureType1D* = 0x01
 70 |     cudaTextureType2D* = 0x02
 71 |     cudaTextureType3D* = 0x03
 72 |     cudaTextureTypeCubemap* = 0x0C
 73 |     cudaTextureType1DLayered* = 0xF1
 74 |     cudaTextureType2DLayered* = 0xF2
 75 |     cudaTextureTypeCubemapLayered* = 0xFC
 76 |   ##
 77 |   ##  CUDA texture address modes
 78 |   ##
 79 |   type
 80 |     cudaTextureAddressMode* = enum
 81 |       cudaAddressModeWrap = 0,  ## < Wrapping address mode
 82 |       cudaAddressModeClamp = 1, ## < Clamp to edge address mode
 83 |       cudaAddressModeMirror = 2, ## < Mirror address mode
 84 |       cudaAddressModeBorder = 3 ## < Border address mode
 85 |   ##
 86 |   ##  CUDA texture filter modes
 87 |   ##
 88 |   type
 89 |     cudaTextureFilterMode* = enum
 90 |       cudaFilterModePoint = 0,  ## < Point filter mode
 91 |       cudaFilterModeLinear = 1  ## < Linear filter mode
 92 |   ##
 93 |   ##  CUDA texture read modes
 94 |   ##
 95 |   type
 96 |     cudaTextureReadMode* = enum
 97 |       cudaReadModeElementType = 0, ## < Read texture as specified element type
 98 |       cudaReadModeNormalizedFloat = 1 ## < Read texture as normalized float
 99 |   ##
100 |   ##  CUDA texture descriptor
101 |   ##
102 |   type
103 |     cudaTextureDesc* {.bycopy.} = object
104 |       ##
105 |       ##  Texture address mode for up to 3 dimensions
106 |       ##
107 |       addressMode*: array[3, cudaTextureAddressMode]
108 |       ##
109 |       ##  Texture filter mode
110 |       ##
111 |       filterMode*: cudaTextureFilterMode
112 |       ##
113 |       ##  Texture read mode
114 |       ##
115 |       readMode*: cudaTextureReadMode
116 |       ##
117 |       ##  Perform sRGB->linear conversion during texture read
118 |       ##
119 |       sRGB*: cint
120 |       ##
121 |       ##  Texture Border Color
122 |       ##
123 |       borderColor*: array[4, cfloat]
124 |       ##
125 |       ##  Indicates whether texture reads are normalized or not
126 |       ##
127 |       normalizedCoords*: cint
128 |       ##
129 |       ##  Limit to the anisotropy ratio
130 |       ##
131 |       maxAnisotropy*: cuint
132 |       ##
133 |       ##  Mipmap filter mode
134 |       ##
135 |       mipmapFilterMode*: cudaTextureFilterMode
136 |       ##
137 |       ##  Offset applied to the supplied mipmap level
138 |       ##
139 |       mipmapLevelBias*: cfloat
140 |       ##
141 |       ##  Lower end of the mipmap level range to clamp access to
142 |       ##
143 |       minMipmapLevelClamp*: cfloat
144 |       ##
145 |       ##  Upper end of the mipmap level range to clamp access to
146 |       ##
147 |       maxMipmapLevelClamp*: cfloat
148 |       ##
149 |       ##  Disable any trilinear filtering optimizations.
150 |       ##
151 |       disableTrilinearOptimization*: cint
152 |       ##
153 |       ##  Enable seamless cube map filtering.
154 |       ##
155 |       seamlessCubemap*: cint
156 | 
157 |   ##
158 |   ##  An opaque value that represents a CUDA texture object
159 |   ##
160 |   type
161 |     cudaTextureObject_t* = culonglong
162 |   ##  @}
163 |   ##  @}
164 |   ##  END CUDART_TYPES
165 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda12_5/vector_types.nim:
--------------------------------------------------------------------------------
  1 | ##
  2 | ##  Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
  3 | ##
  4 | ##  NOTICE TO LICENSEE:
  5 | ##
  6 | ##  This source code and/or documentation ("Licensed Deliverables") are
  7 | ##  subject to NVIDIA intellectual property rights under U.S. and
  8 | ##  international Copyright laws.
  9 | ##
 10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | ##  conditions of a form of NVIDIA software license agreement by and
 13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | ##  the contrary in the License Agreement, reproduction or disclosure
 16 | ##  of the Licensed Deliverables to any third party without the express
 17 | ##  written consent of NVIDIA is prohibited.
 18 | ##
 19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | ##  OF THESE LICENSED DELIVERABLES.
 33 | ##
 34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | ##  1995), consisting of "commercial computer software" and "commercial
 37 | ##  computer software documentation" as such terms are used in 48
 38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 42 | ##  only those rights set forth herein.
 43 | ##
 44 | ##  Any use of the Licensed Deliverables in individual and commercial
 45 | ##  software must include, in the user documentation and internal
 46 | ##  comments to the code, the above Disclaimer and U.S. Government End
 47 | ##  Users Notice.
 48 | ##
 49 | import ./libpaths
 50 | tellCompilerToUseCuda()
 51 | type
 52 |   char1* {.importc: "char1", header: "vector_types.h", bycopy.} = object
 53 |     x* {.importc: "x".}: cchar
 54 | 
 55 |   uchar1* {.importc: "uchar1", header: "vector_types.h", bycopy.} = object
 56 |     x* {.importc: "x".}: char
 57 | 
 58 |   char2* {.importc: "char2", header: "vector_types.h", bycopy.} = object
 59 |     x* {.importc: "x".}: cchar
 60 |     y* {.importc: "y".}: cchar
 61 | 
 62 |   uchar2* {.importc: "uchar2", header: "vector_types.h", bycopy.} = object
 63 |     x* {.importc: "x".}: char
 64 |     y* {.importc: "y".}: char
 65 | 
 66 |   char3* {.importc: "char3", header: "vector_types.h", bycopy.} = object
 67 |     x* {.importc: "x".}: cchar
 68 |     y* {.importc: "y".}: cchar
 69 |     z* {.importc: "z".}: cchar
 70 | 
 71 |   uchar3* {.importc: "uchar3", header: "vector_types.h", bycopy.} = object
 72 |     x* {.importc: "x".}: char
 73 |     y* {.importc: "y".}: char
 74 |     z* {.importc: "z".}: char
 75 | 
 76 |   char4* {.importc: "char4", header: "vector_types.h", bycopy.} = object
 77 |     x* {.importc: "x".}: cchar
 78 |     y* {.importc: "y".}: cchar
 79 |     z* {.importc: "z".}: cchar
 80 |     w* {.importc: "w".}: cchar
 81 | 
 82 |   uchar4* {.importc: "uchar4", header: "vector_types.h", bycopy.} = object
 83 |     x* {.importc: "x".}: char
 84 |     y* {.importc: "y".}: char
 85 |     z* {.importc: "z".}: char
 86 |     w* {.importc: "w".}: char
 87 | 
 88 |   short1* {.importc: "short1", header: "vector_types.h", bycopy.} = object
 89 |     x* {.importc: "x".}: cshort
 90 | 
 91 |   ushort1* {.importc: "ushort1", header: "vector_types.h", bycopy.} = object
 92 |     x* {.importc: "x".}: cushort
 93 | 
 94 |   short2* {.importc: "short2", header: "vector_types.h", bycopy.} = object
 95 |     x* {.importc: "x".}: cshort
 96 |     y* {.importc: "y".}: cshort
 97 | 
 98 |   ushort2* {.importc: "ushort2", header: "vector_types.h", bycopy.} = object
 99 |     x* {.importc: "x".}: cushort
100 |     y* {.importc: "y".}: cushort
101 | 
102 |   short3* {.importc: "short3", header: "vector_types.h", bycopy.} = object
103 |     x* {.importc: "x".}: cshort
104 |     y* {.importc: "y".}: cshort
105 |     z* {.importc: "z".}: cshort
106 | 
107 |   ushort3* {.importc: "ushort3", header: "vector_types.h", bycopy.} = object
108 |     x* {.importc: "x".}: cushort
109 |     y* {.importc: "y".}: cushort
110 |     z* {.importc: "z".}: cushort
111 | 
112 |   short4* {.importc: "short4", header: "vector_types.h", bycopy.} = object
113 |     x* {.importc: "x".}: cshort
114 |     y* {.importc: "y".}: cshort
115 |     z* {.importc: "z".}: cshort
116 |     w* {.importc: "w".}: cshort
117 | 
118 |   ushort4* {.importc: "ushort4", header: "vector_types.h", bycopy.} = object
119 |     x* {.importc: "x".}: cushort
120 |     y* {.importc: "y".}: cushort
121 |     z* {.importc: "z".}: cushort
122 |     w* {.importc: "w".}: cushort
123 | 
124 |   int1* {.importc: "int1", header: "vector_types.h", bycopy.} = object
125 |     x* {.importc: "x".}: cint
126 | 
127 |   uint1* {.importc: "uint1", header: "vector_types.h", bycopy.} = object
128 |     x* {.importc: "x".}: cuint
129 | 
130 |   int2* {.importc: "int2", header: "vector_types.h", bycopy.} = object
131 |     x* {.importc: "x".}: cint
132 |     y* {.importc: "y".}: cint
133 | 
134 |   uint2* {.importc: "uint2", header: "vector_types.h", bycopy.} = object
135 |     x* {.importc: "x".}: cuint
136 |     y* {.importc: "y".}: cuint
137 | 
138 |   int3* {.importc: "int3", header: "vector_types.h", bycopy.} = object
139 |     x* {.importc: "x".}: cint
140 |     y* {.importc: "y".}: cint
141 |     z* {.importc: "z".}: cint
142 | 
143 |   uint3* {.importc: "uint3", header: "vector_types.h", bycopy.} = object
144 |     x* {.importc: "x".}: cuint
145 |     y* {.importc: "y".}: cuint
146 |     z* {.importc: "z".}: cuint
147 | 
148 |   int4* {.importc: "int4", header: "vector_types.h", bycopy.} = object
149 |     x* {.importc: "x".}: cint
150 |     y* {.importc: "y".}: cint
151 |     z* {.importc: "z".}: cint
152 |     w* {.importc: "w".}: cint
153 | 
154 |   uint4* {.importc: "uint4", header: "vector_types.h", bycopy.} = object
155 |     x* {.importc: "x".}: cuint
156 |     y* {.importc: "y".}: cuint
157 |     z* {.importc: "z".}: cuint
158 |     w* {.importc: "w".}: cuint
159 | 
160 |   long1* {.importc: "long1", header: "vector_types.h", bycopy.} = object
161 |     x* {.importc: "x".}: clong
162 | 
163 |   ulong1* {.importc: "ulong1", header: "vector_types.h", bycopy.} = object
164 |     x* {.importc: "x".}: culong
165 | 
166 |   long2* {.importc: "long2", header: "vector_types.h", bycopy.} = object
167 |     x* {.importc: "x".}: clong
168 |     y* {.importc: "y".}: clong
169 | 
170 |   ulong2* {.importc: "ulong2", header: "vector_types.h", bycopy.} = object
171 |     x* {.importc: "x".}: culong
172 |     y* {.importc: "y".}: culong
173 | 
174 |   long3* {.importc: "long3", header: "vector_types.h", bycopy.} = object
175 |     x* {.importc: "x".}: clong
176 |     y* {.importc: "y".}: clong
177 |     z* {.importc: "z".}: clong
178 | 
179 |   ulong3* {.importc: "ulong3", header: "vector_types.h", bycopy.} = object
180 |     x* {.importc: "x".}: culong
181 |     y* {.importc: "y".}: culong
182 |     z* {.importc: "z".}: culong
183 | 
184 |   long4* {.importc: "long4", header: "vector_types.h", bycopy.} = object
185 |     x* {.importc: "x".}: clong
186 |     y* {.importc: "y".}: clong
187 |     z* {.importc: "z".}: clong
188 |     w* {.importc: "w".}: clong
189 | 
190 |   ulong4* {.importc: "ulong4", header: "vector_types.h", bycopy.} = object
191 |     x* {.importc: "x".}: culong
192 |     y* {.importc: "y".}: culong
193 |     z* {.importc: "z".}: culong
194 |     w* {.importc: "w".}: culong
195 | 
196 |   float1* {.importc: "float1", header: "vector_types.h", bycopy.} = object
197 |     x* {.importc: "x".}: cfloat
198 | 
199 |   float2* {.importc: "float2", header: "vector_types.h", bycopy.} = object
200 |     x* {.importc: "x".}: cfloat
201 |     y* {.importc: "y".}: cfloat
202 | 
203 |   float3* {.importc: "float3", header: "vector_types.h", bycopy.} = object
204 |     x* {.importc: "x".}: cfloat
205 |     y* {.importc: "y".}: cfloat
206 |     z* {.importc: "z".}: cfloat
207 | 
208 |   float4* {.importc: "float4", header: "vector_types.h", bycopy.} = object
209 |     x* {.importc: "x".}: cfloat
210 |     y* {.importc: "y".}: cfloat
211 |     z* {.importc: "z".}: cfloat
212 |     w* {.importc: "w".}: cfloat
213 | 
214 |   longlong1* {.importc: "longlong1", header: "vector_types.h", bycopy.} = object
215 |     x* {.importc: "x".}: clonglong
216 | 
217 |   ulonglong1* {.importc: "ulonglong1", header: "vector_types.h", bycopy.} = object
218 |     x* {.importc: "x".}: culonglong
219 | 
220 |   longlong2* {.importc: "longlong2", header: "vector_types.h", bycopy.} = object
221 |     x* {.importc: "x".}: clonglong
222 |     y* {.importc: "y".}: clonglong
223 | 
224 |   ulonglong2* {.importc: "ulonglong2", header: "vector_types.h", bycopy.} = object
225 |     x* {.importc: "x".}: culonglong
226 |     y* {.importc: "y".}: culonglong
227 | 
228 |   longlong3* {.importc: "longlong3", header: "vector_types.h", bycopy.} = object
229 |     x* {.importc: "x".}: clonglong
230 |     y* {.importc: "y".}: clonglong
231 |     z* {.importc: "z".}: clonglong
232 | 
233 |   ulonglong3* {.importc: "ulonglong3", header: "vector_types.h", bycopy.} = object
234 |     x* {.importc: "x".}: culonglong
235 |     y* {.importc: "y".}: culonglong
236 |     z* {.importc: "z".}: culonglong
237 | 
238 |   longlong4* {.importc: "longlong4", header: "vector_types.h", bycopy.} = object
239 |     x* {.importc: "x".}: clonglong
240 |     y* {.importc: "y".}: clonglong
241 |     z* {.importc: "z".}: clonglong
242 |     w* {.importc: "w".}: clonglong
243 | 
244 |   ulonglong4* {.importc: "ulonglong4", header: "vector_types.h", bycopy.} = object
245 |     x* {.importc: "x".}: culonglong
246 |     y* {.importc: "y".}: culonglong
247 |     z* {.importc: "z".}: culonglong
248 |     w* {.importc: "w".}: culonglong
249 | 
250 |   double1* {.importc: "double1", header: "vector_types.h", bycopy.} = object
251 |     x* {.importc: "x".}: cdouble
252 | 
253 |   double2* {.importc: "double2", header: "vector_types.h", bycopy.} = object
254 |     x* {.importc: "x".}: cdouble
255 |     y* {.importc: "y".}: cdouble
256 | 
257 |   double3* {.importc: "double3", header: "vector_types.h", bycopy.} = object
258 |     x* {.importc: "x".}: cdouble
259 |     y* {.importc: "y".}: cdouble
260 |     z* {.importc: "z".}: cdouble
261 | 
262 |   double4* {.importc: "double4", header: "vector_types.h", bycopy.} = object
263 |     x* {.importc: "x".}: cdouble
264 |     y* {.importc: "y".}: cdouble
265 |     z* {.importc: "z".}: cdouble
266 |     w* {.importc: "w".}: cdouble
267 | 
268 | 
269 | ## *****************************************************************************
270 | ##                                                                               *
271 | ##                                                                               *
272 | ##                                                                               *
273 | ## *****************************************************************************
274 | 
275 | 
276 | ## *****************************************************************************
277 | ##                                                                               *
278 | ##                                                                               *
279 | ##                                                                               *
280 | ## *****************************************************************************
281 | 
282 | type
283 |   dim3* {.importc: "dim3", header: "vector_types.h", bycopy.} = object
284 |     x* {.importc: "x".}: cuint
285 |     y* {.importc: "y".}: cuint
286 |     z* {.importc: "z".}: cuint
287 | 
288 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/check.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import ./cublas_api
16 | import ./cublas_v2
17 | import ./cuComplex
18 | import ./cuda_occupancy
19 | import ./cuda_runtime_api
20 | import ./cudnn
21 | import ./cufft
22 | import ./curand
23 | import ./cusolver_common
24 | import ./cusolverDn
25 | import ./cusolverRf
26 | import ./cusolverSp
27 | import ./cusparse
28 | import ./driver_types
29 | import ./library_types
30 | import ./nvblas
31 | import ./nvgraph
32 | import ./surface_types
33 | import ./texture_types
34 | import ./vector_types
35 | 
36 | type
37 |   CudaError* = object of IOError
38 |   CudaOccError* = object of IOError
39 |   CufftError* = object of IOError
40 |   CublasError* = object of IOError
41 |   CusparseError* = object of IOError
42 |   CusolverError* = object of IOError
43 |   CurandError* = object of IOError
44 |   CudnnError* = object of IOError
45 |   NVGraphError* = object of IOError
46 | 
47 | template check*(a: cudaError_t) =
48 |   let y = a # ensure we only evaluate once even if the expression has side effects
49 |   if y != cudaSuccess:
50 |     raise newException(CudaError, $y & " " & $int(y))
51 | 
52 | template check*(a: cudaOccError) =
53 |   let y = a # ensure we only evaluate once even if the expression has side effects
54 |   if y != CUDA_OCC_SUCCESS:
55 |     raise newException(CudaOccError, $y & " " & $int(y))
56 | 
57 | template check*(a: cublasStatus_t) =
58 |   let y = a # ensure we only evaluate once even if the expression has side effects
59 |   if y != CUBLAS_STATUS_SUCCESS:
60 |     raise newException(CublasError, $y & " " & $int(y))
61 | 
62 | template check*(a: cufftResult) =
63 |   let y = a # ensure we only evaluate once even if the expression has side effects
64 |   if y != CUFFT_SUCCESS:
65 |     raise newException(CufftError, $y & " " & $int(y))
66 | 
67 | template check*(a: cusparseStatus_t) =
68 |   let y = a # ensure we only evaluate once even if the expression has side effects
69 |   if y != CUSPARSE_STATUS_SUCCESS:
70 |     raise newException(CusparseError, $y & " " & $int(y))
71 | 
72 | template check*(a: cusolverStatus_t) =
73 |   let y = a # ensure we only evaluate once even if the expression has side effects
74 |   if y != CUSOLVER_STATUS_SUCCESS:
75 |     raise newException(CusolverError, $y & " " & $int(y))
76 | 
77 | template check*(a: curandStatus) =
78 |   let y = a # ensure we only evaluate once even if the expression has side effects
79 |   if y != CURAND_STATUS_SUCCESS:
80 |     raise newException(CurandError, $y & " " & $int(y))
81 | 
82 | template check*(a: cudnnStatus_t) =
83 |   let y = a # ensure we only evaluate once even if the expression has side effects
84 |   if y != CUDNN_STATUS_SUCCESS:
85 |     raise newException(CudnnError, $y & " " & $int(y))
86 | 
87 | template check*(a: nvgraphStatus_t) =
88 |   let y = a # ensure we only evaluate once even if the expression has side effects
89 |   if y != NVGRAPH_STATUS_SUCCESS:
90 |     raise newException(NVGraphError, $y & " " & $int(y))
91 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/cuComplex.nim:
--------------------------------------------------------------------------------
  1 | from math import sqrt
  2 | 
  3 | template sqrtf(x: cfloat): cfloat = sqrt(x)
  4 | 
  5 | template fabsf(x: cfloat): cfloat = abs(x)
  6 | 
  7 | template fabs(x: float): float = abs(x)
  8 | 
  9 | template `div`(a: static[float64], b: cfloat): cfloat = cfloat(a) / b
 10 | 
 11 | template `div`(a: cfloat, b: cfloat): cfloat = a / b
 12 | ## 
 13 | ##  Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
 14 | ## 
 15 | ##  NOTICE TO LICENSEE:
 16 | ## 
 17 | ##  This source code and/or documentation ("Licensed Deliverables") are
 18 | ##  subject to NVIDIA intellectual property rights under U.S. and
 19 | ##  international Copyright laws.
 20 | ## 
 21 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 22 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 23 | ##  conditions of a form of NVIDIA software license agreement by and
 24 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 25 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 26 | ##  the contrary in the License Agreement, reproduction or disclosure
 27 | ##  of the Licensed Deliverables to any third party without the express
 28 | ##  written consent of NVIDIA is prohibited.
 29 | ## 
 30 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 31 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 32 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 33 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 34 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 35 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 36 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 37 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 38 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 39 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 40 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 41 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 42 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 43 | ##  OF THESE LICENSED DELIVERABLES.
 44 | ## 
 45 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 46 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 47 | ##  1995), consisting of "commercial computer software" and "commercial
 48 | ##  computer software documentation" as such terms are used in 48
 49 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 50 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 51 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 52 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 53 | ##  only those rights set forth herein.
 54 | ## 
 55 | ##  Any use of the Licensed Deliverables in individual and commercial
 56 | ##  software must include, in the user documentation and internal
 57 | ##  comments to the code, the above Disclaimer and U.S. Government End
 58 | ##  Users Notice.
 59 | ## 
 60 | 
 61 | when not defined(CU_COMPLEX_H):
 62 |   const
 63 |     CU_COMPLEX_H* = true
 64 |   ##  When trying to include C header file in C++ Code extern "C" is required
 65 |   ##  But the Standard QNX headers already have ifdef extern in them when compiling C++ Code
 66 |   ##  extern "C" cannot be nested
 67 |   ##  Hence keep the header out of extern "C" block
 68 |   ## 
 69 |   import
 70 |     vector_types
 71 | 
 72 |   type
 73 |     cuFloatComplex* = float2
 74 |   proc cuCrealf*(x: cuFloatComplex): cfloat =
 75 |     return x.x
 76 | 
 77 |   proc cuCimagf*(x: cuFloatComplex): cfloat =
 78 |     return x.y
 79 | 
 80 |   proc make_cuFloatComplex*(r: cfloat; i: cfloat): cuFloatComplex =
 81 |     var res: cuFloatComplex
 82 |     res.x = r
 83 |     res.y = i
 84 |     return res
 85 | 
 86 |   proc cuConjf*(x: cuFloatComplex): cuFloatComplex =
 87 |     return make_cuFloatComplex(cuCrealf(x), - cuCimagf(x))
 88 | 
 89 |   proc cuCaddf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
 90 |     return make_cuFloatComplex(cuCrealf(x) + cuCrealf(y), cuCimagf(x) + cuCimagf(y))
 91 | 
 92 |   proc cuCsubf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
 93 |     return make_cuFloatComplex(cuCrealf(x) - cuCrealf(y), cuCimagf(x) - cuCimagf(y))
 94 | 
 95 |   ##  This implementation could suffer from intermediate overflow even though
 96 |   ##  the final result would be in range. However, various implementations do
 97 |   ##  not guard against this (presumably to avoid losing performance), so we 
 98 |   ##  don't do it either to stay competitive.
 99 |   ## 
100 |   proc cuCmulf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
101 |     var prod: cuFloatComplex
102 |     prod = make_cuFloatComplex((cuCrealf(x) * cuCrealf(y)) -
103 |         (cuCimagf(x) * cuCimagf(y)), (cuCrealf(x) * cuCimagf(y)) +
104 |         (cuCimagf(x) * cuCrealf(y)))
105 |     return prod
106 | 
107 |   ##  This implementation guards against intermediate underflow and overflow
108 |   ##  by scaling. Such guarded implementations are usually the default for
109 |   ##  complex library implementations, with some also offering an unguarded,
110 |   ##  faster version.
111 |   ## 
112 |   proc cuCdivf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex =
113 |     var quot: cuFloatComplex
114 |     var s: cfloat = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y))
115 |     var oos: cfloat = 1.0 div s
116 |     var ars: cfloat = cuCrealf(x) * oos
117 |     var ais: cfloat = cuCimagf(x) * oos
118 |     var brs: cfloat = cuCrealf(y) * oos
119 |     var bis: cfloat = cuCimagf(y) * oos
120 |     s = (brs * brs) + (bis * bis)
121 |     oos = 1.0 div s
122 |     quot = make_cuFloatComplex(((ars * brs) + (ais * bis)) * oos,
123 |                              ((ais * brs) - (ars * bis)) * oos)
124 |     return quot
125 | 
126 |   ##  
127 |   ##  We would like to call hypotf(), but it's not available on all platforms.
128 |   ##  This discrete implementation guards against intermediate underflow and 
129 |   ##  overflow by scaling. Otherwise we would lose half the exponent range. 
130 |   ##  There are various ways of doing guarded computation. For now chose the 
131 |   ##  simplest and fastest solution, however this may suffer from inaccuracies 
132 |   ##  if sqrt and division are not IEEE compliant. 
133 |   ## 
134 |   proc cuCabsf*(x: cuFloatComplex): cfloat =
135 |     var a: cfloat = cuCrealf(x)
136 |     var b: cfloat = cuCimagf(x)
137 |     var
138 |       v: cfloat
139 |       w: cfloat
140 |       t: cfloat
141 |     a = fabsf(a)
142 |     b = fabsf(b)
143 |     if a > b:
144 |       v = a
145 |       w = b
146 |     else:
147 |       v = b
148 |       w = a
149 |     t = w div v
150 |     t = 1.0 + t * t
151 |     t = v * sqrtf(t)
152 |     if (v == 0.0) or (v > 3.402823466e+38) or (w > 3.402823466e+38):
153 |       t = v + w
154 |     return t
155 | 
156 |   ##  Double precision
157 |   type
158 |     cuDoubleComplex* = double2
159 |   proc cuCreal*(x: cuDoubleComplex): cdouble =
160 |     return x.x
161 | 
162 |   proc cuCimag*(x: cuDoubleComplex): cdouble =
163 |     return x.y
164 | 
165 |   proc make_cuDoubleComplex*(r: cdouble; i: cdouble): cuDoubleComplex =
166 |     var res: cuDoubleComplex
167 |     res.x = r
168 |     res.y = i
169 |     return res
170 | 
171 |   proc cuConj*(x: cuDoubleComplex): cuDoubleComplex =
172 |     return make_cuDoubleComplex(cuCreal(x), - cuCimag(x))
173 | 
174 |   proc cuCadd*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
175 |     return make_cuDoubleComplex(cuCreal(x) + cuCreal(y), cuCimag(x) + cuCimag(y))
176 | 
177 |   proc cuCsub*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
178 |     return make_cuDoubleComplex(cuCreal(x) - cuCreal(y), cuCimag(x) - cuCimag(y))
179 | 
180 |   ##  This implementation could suffer from intermediate overflow even though
181 |   ##  the final result would be in range. However, various implementations do
182 |   ##  not guard against this (presumably to avoid losing performance), so we 
183 |   ##  don't do it either to stay competitive.
184 |   ## 
185 |   proc cuCmul*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
186 |     var prod: cuDoubleComplex
187 |     prod = make_cuDoubleComplex((cuCreal(x) * cuCreal(y)) -
188 |         (cuCimag(x) * cuCimag(y)), (cuCreal(x) * cuCimag(y)) +
189 |         (cuCimag(x) * cuCreal(y)))
190 |     return prod
191 | 
192 |   ##  This implementation guards against intermediate underflow and overflow
193 |   ##  by scaling. Such guarded implementations are usually the default for
194 |   ##  complex library implementations, with some also offering an unguarded,
195 |   ##  faster version.
196 |   ## 
197 |   proc cuCdiv*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex =
198 |     var quot: cuDoubleComplex
199 |     var s: cdouble = (fabs(cuCreal(y))) + (fabs(cuCimag(y)))
200 |     var oos: cdouble = 1.0 div s
201 |     var ars: cdouble = cuCreal(x) * oos
202 |     var ais: cdouble = cuCimag(x) * oos
203 |     var brs: cdouble = cuCreal(y) * oos
204 |     var bis: cdouble = cuCimag(y) * oos
205 |     s = (brs * brs) + (bis * bis)
206 |     oos = 1.0 div s
207 |     quot = make_cuDoubleComplex(((ars * brs) + (ais * bis)) * oos,
208 |                               ((ais * brs) - (ars * bis)) * oos)
209 |     return quot
210 | 
211 |   ##  This implementation guards against intermediate underflow and overflow
212 |   ##  by scaling. Otherwise we would lose half the exponent range. There are
213 |   ##  various ways of doing guarded computation. For now chose the simplest
214 |   ##  and fastest solution, however this may suffer from inaccuracies if sqrt
215 |   ##  and division are not IEEE compliant.
216 |   ## 
217 |   proc cuCabs*(x: cuDoubleComplex): cdouble =
218 |     var a: cdouble = cuCreal(x)
219 |     var b: cdouble = cuCimag(x)
220 |     var
221 |       v: cdouble
222 |       w: cdouble
223 |       t: cdouble
224 |     a = fabs(a)
225 |     b = fabs(b)
226 |     if a > b:
227 |       v = a
228 |       w = b
229 |     else:
230 |       v = b
231 |       w = a
232 |     t = w div v
233 |     t = 1.0 + t * t
234 |     t = v * sqrt(t)
235 |     if (v == 0.0) or (v > 1.797693134862316e+308) or (w > 1.797693134862316e+308):
236 |       t = v + w
237 |     return t
238 | 
239 |   ##  aliases
240 |   type
241 |     cuComplex* = cuFloatComplex
242 |   proc make_cuComplex*(x: cfloat; y: cfloat): cuComplex =
243 |     return make_cuFloatComplex(x, y)
244 | 
245 |   ##  float-to-double promotion
246 |   proc cuComplexFloatToDouble*(c: cuFloatComplex): cuDoubleComplex =
247 |     return make_cuDoubleComplex(cast[cdouble](cuCrealf(c)),
248 |                                cast[cdouble](cuCimagf(c)))
249 | 
250 |   proc cuComplexDoubleToFloat*(c: cuDoubleComplex): cuFloatComplex =
251 |     return make_cuFloatComplex(cast[cfloat](cuCreal(c)), cast[cfloat](cuCimag(c)))
252 | 
253 |   proc cuCfmaf*(x: cuComplex; y: cuComplex; d: cuComplex): cuComplex =
254 |     var real_res: cfloat
255 |     var imag_res: cfloat
256 |     real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d)
257 |     imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d)
258 |     real_res = - (cuCimagf(x) * cuCimagf(y)) + real_res
259 |     imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res
260 |     return make_cuComplex(real_res, imag_res)
261 | 
262 |   proc cuCfma*(x: cuDoubleComplex; y: cuDoubleComplex; d: cuDoubleComplex): cuDoubleComplex =
263 |     var real_res: cdouble
264 |     var imag_res: cdouble
265 |     real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d)
266 |     imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d)
267 |     real_res = - (cuCimag(x) * cuCimag(y)) + real_res
268 |     imag_res = (cuCimag(x) * cuCreal(y)) + imag_res
269 |     return make_cuDoubleComplex(real_res, imag_res)
270 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/cublas_v2.nim:
--------------------------------------------------------------------------------
  1 | ## 
  2 | ##  Copyright 1993-2014 NVIDIA Corporation.  All rights reserved.
  3 | ## 
  4 | ##  NOTICE TO LICENSEE:
  5 | ## 
  6 | ##  This source code and/or documentation ("Licensed Deliverables") are
  7 | ##  subject to NVIDIA intellectual property rights under U.S. and
  8 | ##  international Copyright laws.
  9 | ## 
 10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | ##  conditions of a form of NVIDIA software license agreement by and
 13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | ##  the contrary in the License Agreement, reproduction or disclosure
 16 | ##  of the Licensed Deliverables to any third party without the express
 17 | ##  written consent of NVIDIA is prohibited.
 18 | ## 
 19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | ##  OF THESE LICENSED DELIVERABLES.
 33 | ## 
 34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | ##  1995), consisting of "commercial computer software" and "commercial
 37 | ##  computer software documentation" as such terms are used in 48
 38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 42 | ##  only those rights set forth herein.
 43 | ## 
 44 | ##  Any use of the Licensed Deliverables in individual and commercial
 45 | ##  software must include, in the user documentation and internal
 46 | ##  comments to the code, the above Disclaimer and U.S. Government End
 47 | ##  Users Notice.
 48 | ## 
 49 | ## 
 50 | ##  This is the public header file for the new CUBLAS library API, it mapped the generic 
 51 | ##  Cublas name functions to the actual _v2 implementations.
 52 | ## 
 53 | 
 54 | when not defined(CUBLAS_V2_H):
 55 |   const
 56 |     CUBLAS_V2_H* = true
 57 |   import
 58 |     cublas_api
 59 | 
 60 |   const
 61 |     cublasCreate* = cublasCreate_v2
 62 |     cublasDestroy* = cublasDestroy_v2
 63 |     cublasGetVersion* = cublasGetVersion_v2
 64 |     cublasSetStream* = cublasSetStream_v2
 65 |     cublasGetStream* = cublasGetStream_v2
 66 |     cublasGetPointerMode* = cublasGetPointerMode_v2
 67 |     cublasSetPointerMode* = cublasSetPointerMode_v2
 68 |   ##  Blas3 Routines
 69 |   const
 70 |     cublasSnrm2* = cublasSnrm2_v2
 71 |     cublasDnrm2* = cublasDnrm2_v2
 72 |     cublasScnrm2* = cublasScnrm2_v2
 73 |     cublasDznrm2* = cublasDznrm2_v2
 74 |     cublasSdot* = cublasSdot_v2
 75 |     cublasDdot* = cublasDdot_v2
 76 |     cublasCdotu* = cublasCdotu_v2
 77 |     cublasCdotc* = cublasCdotc_v2
 78 |     cublasZdotu* = cublasZdotu_v2
 79 |     cublasZdotc* = cublasZdotc_v2
 80 |     cublasSscal* = cublasSscal_v2
 81 |     cublasDscal* = cublasDscal_v2
 82 |     cublasCscal* = cublasCscal_v2
 83 |     cublasCsscal* = cublasCsscal_v2
 84 |     cublasZscal* = cublasZscal_v2
 85 |     cublasZdscal* = cublasZdscal_v2
 86 |     cublasSaxpy* = cublasSaxpy_v2
 87 |     cublasDaxpy* = cublasDaxpy_v2
 88 |     cublasCaxpy* = cublasCaxpy_v2
 89 |     cublasZaxpy* = cublasZaxpy_v2
 90 |     cublasScopy* = cublasScopy_v2
 91 |     cublasDcopy* = cublasDcopy_v2
 92 |     cublasCcopy* = cublasCcopy_v2
 93 |     cublasZcopy* = cublasZcopy_v2
 94 |     cublasSswap* = cublasSswap_v2
 95 |     cublasDswap* = cublasDswap_v2
 96 |     cublasCswap* = cublasCswap_v2
 97 |     cublasZswap* = cublasZswap_v2
 98 |     cublasIsamax* = cublasIsamax_v2
 99 |     cublasIdamax* = cublasIdamax_v2
100 |     cublasIcamax* = cublasIcamax_v2
101 |     cublasIzamax* = cublasIzamax_v2
102 |     cublasIsamin* = cublasIsamin_v2
103 |     cublasIdamin* = cublasIdamin_v2
104 |     cublasIcamin* = cublasIcamin_v2
105 |     cublasIzamin* = cublasIzamin_v2
106 |     cublasSasum* = cublasSasum_v2
107 |     cublasDasum* = cublasDasum_v2
108 |     cublasScasum* = cublasScasum_v2
109 |     cublasDzasum* = cublasDzasum_v2
110 |     cublasSrot* = cublasSrot_v2
111 |     cublasDrot* = cublasDrot_v2
112 |     cublasCrot* = cublasCrot_v2
113 |     cublasCsrot* = cublasCsrot_v2
114 |     cublasZrot* = cublasZrot_v2
115 |     cublasZdrot* = cublasZdrot_v2
116 |     cublasSrotg* = cublasSrotg_v2
117 |     cublasDrotg* = cublasDrotg_v2
118 |     cublasCrotg* = cublasCrotg_v2
119 |     cublasZrotg* = cublasZrotg_v2
120 |     cublasSrotm* = cublasSrotm_v2
121 |     cublasDrotm* = cublasDrotm_v2
122 |     cublasSrotmg* = cublasSrotmg_v2
123 |     cublasDrotmg* = cublasDrotmg_v2
124 |   ##  Blas2 Routines
125 |   const
126 |     cublasSgemv* = cublasSgemv_v2
127 |     cublasDgemv* = cublasDgemv_v2
128 |     cublasCgemv* = cublasCgemv_v2
129 |     cublasZgemv* = cublasZgemv_v2
130 |     cublasSgbmv* = cublasSgbmv_v2
131 |     cublasDgbmv* = cublasDgbmv_v2
132 |     cublasCgbmv* = cublasCgbmv_v2
133 |     cublasZgbmv* = cublasZgbmv_v2
134 |     cublasStrmv* = cublasStrmv_v2
135 |     cublasDtrmv* = cublasDtrmv_v2
136 |     cublasCtrmv* = cublasCtrmv_v2
137 |     cublasZtrmv* = cublasZtrmv_v2
138 |     cublasStbmv* = cublasStbmv_v2
139 |     cublasDtbmv* = cublasDtbmv_v2
140 |     cublasCtbmv* = cublasCtbmv_v2
141 |     cublasZtbmv* = cublasZtbmv_v2
142 |     cublasStpmv* = cublasStpmv_v2
143 |     cublasDtpmv* = cublasDtpmv_v2
144 |     cublasCtpmv* = cublasCtpmv_v2
145 |     cublasZtpmv* = cublasZtpmv_v2
146 |     cublasStrsv* = cublasStrsv_v2
147 |     cublasDtrsv* = cublasDtrsv_v2
148 |     cublasCtrsv* = cublasCtrsv_v2
149 |     cublasZtrsv* = cublasZtrsv_v2
150 |     cublasStpsv* = cublasStpsv_v2
151 |     cublasDtpsv* = cublasDtpsv_v2
152 |     cublasCtpsv* = cublasCtpsv_v2
153 |     cublasZtpsv* = cublasZtpsv_v2
154 |     cublasStbsv* = cublasStbsv_v2
155 |     cublasDtbsv* = cublasDtbsv_v2
156 |     cublasCtbsv* = cublasCtbsv_v2
157 |     cublasZtbsv* = cublasZtbsv_v2
158 |     cublasSsymv* = cublasSsymv_v2
159 |     cublasDsymv* = cublasDsymv_v2
160 |     cublasCsymv* = cublasCsymv_v2
161 |     cublasZsymv* = cublasZsymv_v2
162 |     cublasChemv* = cublasChemv_v2
163 |     cublasZhemv* = cublasZhemv_v2
164 |     cublasSsbmv* = cublasSsbmv_v2
165 |     cublasDsbmv* = cublasDsbmv_v2
166 |     cublasChbmv* = cublasChbmv_v2
167 |     cublasZhbmv* = cublasZhbmv_v2
168 |     cublasSspmv* = cublasSspmv_v2
169 |     cublasDspmv* = cublasDspmv_v2
170 |     cublasChpmv* = cublasChpmv_v2
171 |     cublasZhpmv* = cublasZhpmv_v2
172 |     cublasSger* = cublasSger_v2
173 |     cublasDger* = cublasDger_v2
174 |     cublasCgeru* = cublasCgeru_v2
175 |     cublasCgerc* = cublasCgerc_v2
176 |     cublasZgeru* = cublasZgeru_v2
177 |     cublasZgerc* = cublasZgerc_v2
178 |     cublasSsyr* = cublasSsyr_v2
179 |     cublasDsyr* = cublasDsyr_v2
180 |     cublasCsyr* = cublasCsyr_v2
181 |     cublasZsyr* = cublasZsyr_v2
182 |     cublasCher* = cublasCher_v2
183 |     cublasZher* = cublasZher_v2
184 |     cublasSspr* = cublasSspr_v2
185 |     cublasDspr* = cublasDspr_v2
186 |     cublasChpr* = cublasChpr_v2
187 |     cublasZhpr* = cublasZhpr_v2
188 |     cublasSsyr2* = cublasSsyr2_v2
189 |     cublasDsyr2* = cublasDsyr2_v2
190 |     cublasCsyr2* = cublasCsyr2_v2
191 |     cublasZsyr2* = cublasZsyr2_v2
192 |     cublasCher2* = cublasCher2_v2
193 |     cublasZher2* = cublasZher2_v2
194 |     cublasSspr2* = cublasSspr2_v2
195 |     cublasDspr2* = cublasDspr2_v2
196 |     cublasChpr2* = cublasChpr2_v2
197 |     cublasZhpr2* = cublasZhpr2_v2
198 |   ##  Blas3 Routines
199 |   const
200 |     cublasSgemm* = cublasSgemm_v2
201 |     cublasDgemm* = cublasDgemm_v2
202 |     cublasCgemm* = cublasCgemm_v2
203 |     cublasZgemm* = cublasZgemm_v2
204 |     cublasSsyrk* = cublasSsyrk_v2
205 |     cublasDsyrk* = cublasDsyrk_v2
206 |     cublasCsyrk* = cublasCsyrk_v2
207 |     cublasZsyrk* = cublasZsyrk_v2
208 |     cublasCherk* = cublasCherk_v2
209 |     cublasZherk* = cublasZherk_v2
210 |     cublasSsyr2k* = cublasSsyr2k_v2
211 |     cublasDsyr2k* = cublasDsyr2k_v2
212 |     cublasCsyr2k* = cublasCsyr2k_v2
213 |     cublasZsyr2k* = cublasZsyr2k_v2
214 |     cublasCher2k* = cublasCher2k_v2
215 |     cublasZher2k* = cublasZher2k_v2
216 |     cublasSsymm* = cublasSsymm_v2
217 |     cublasDsymm* = cublasDsymm_v2
218 |     cublasCsymm* = cublasCsymm_v2
219 |     cublasZsymm* = cublasZsymm_v2
220 |     cublasChemm* = cublasChemm_v2
221 |     cublasZhemm* = cublasZhemm_v2
222 |     cublasStrsm* = cublasStrsm_v2
223 |     cublasDtrsm* = cublasDtrsm_v2
224 |     cublasCtrsm* = cublasCtrsm_v2
225 |     cublasZtrsm* = cublasZtrsm_v2
226 |     cublasStrmm* = cublasStrmm_v2
227 |     cublasDtrmm* = cublasDtrmm_v2
228 |     cublasCtrmm* = cublasCtrmm_v2
229 |     cublasZtrmm* = cublasZtrmm_v2


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/cusolver_common.nim:
--------------------------------------------------------------------------------
 1 | {.deadCodeElim: on.}
 2 | when defined(windows):
 3 |   import os
 4 |   {.passL: "\"" & os.getEnv("CUDA_PATH") / "lib/x64" / "cusolver.lib" & "\"".}
 5 |   {.pragma: dyn.}
 6 | elif defined(macosx):
 7 |   const
 8 |     libName = "libcusolver.dylib"
 9 |   {.pragma: dyn, dynlib: libName.}
10 | else:
11 |   const
12 |     libName = "libcusolver.so"
13 |   {.pragma: dyn, dynlib: libName.}
14 | import
15 |   library_types
16 | 
17 | ##
18 | ##  Copyright 2014 NVIDIA Corporation.  All rights reserved.
19 | ##
20 | ##  NOTICE TO LICENSEE:
21 | ##
22 | ##  This source code and/or documentation ("Licensed Deliverables") are
23 | ##  subject to NVIDIA intellectual property rights under U.S. and
24 | ##  international Copyright laws.
25 | ##
26 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
27 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
28 | ##  conditions of a form of NVIDIA software license agreement by and
29 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
30 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
31 | ##  the contrary in the License Agreement, reproduction or disclosure
32 | ##  of the Licensed Deliverables to any third party without the express
33 | ##  written consent of NVIDIA is prohibited.
34 | ##
35 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
36 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
37 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
38 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
39 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
40 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
41 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
42 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
43 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
44 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
45 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
46 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
47 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
48 | ##  OF THESE LICENSED DELIVERABLES.
49 | ##
50 | ##  U.S. Government End Users.  These Licensed Deliverables are a
51 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
52 | ##  1995), consisting of "commercial computer software" and "commercial
53 | ##  computer software documentation" as such terms are used in 48
54 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
55 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
56 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
57 | ##  U.S. Government End Users acquire the Licensed Deliverables with
58 | ##  only those rights set forth herein.
59 | ##
60 | ##  Any use of the Licensed Deliverables in individual and commercial
61 | ##  software must include, in the user documentation and internal
62 | ##  comments to the code, the above Disclaimer and U.S. Government End
63 | ##  Users Notice.
64 | ##
65 | 
66 | when not defined(CUSOLVER_COMMON_H):
67 |   const
68 |     CUSOLVER_COMMON_H* = true
69 |   type
70 |     cusolverStatus_t* {.size: sizeof(cint).} = enum
71 |       CUSOLVER_STATUS_SUCCESS = 0, CUSOLVER_STATUS_NOT_INITIALIZED = 1,
72 |       CUSOLVER_STATUS_ALLOC_FAILED = 2, CUSOLVER_STATUS_INVALID_VALUE = 3,
73 |       CUSOLVER_STATUS_ARCH_MISMATCH = 4, CUSOLVER_STATUS_MAPPING_ERROR = 5,
74 |       CUSOLVER_STATUS_EXECUTION_FAILED = 6, CUSOLVER_STATUS_INTERNAL_ERROR = 7,
75 |       CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8,
76 |       CUSOLVER_STATUS_NOT_SUPPORTED = 9, CUSOLVER_STATUS_ZERO_PIVOT = 10,
77 |       CUSOLVER_STATUS_INVALID_LICENSE = 11
78 |     cusolverEigType_t* {.size: sizeof(cint).} = enum
79 |       CUSOLVER_EIG_TYPE_1 = 1, CUSOLVER_EIG_TYPE_2 = 2, CUSOLVER_EIG_TYPE_3 = 3
80 |     cusolverEigMode_t* {.size: sizeof(cint).} = enum
81 |       CUSOLVER_EIG_MODE_NOVECTOR = 0, CUSOLVER_EIG_MODE_VECTOR = 1
82 |   proc cusolverGetProperty*(`type`: libraryPropertyType; value: ptr cint): cusolverStatus_t {.
83 |       cdecl, importc: "cusolverGetProperty", dyn.}
84 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/library_types.nim:
--------------------------------------------------------------------------------
 1 | ## 
 2 | ##  Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 3 | ## 
 4 | ##  NOTICE TO LICENSEE:
 5 | ## 
 6 | ##  This source code and/or documentation ("Licensed Deliverables") are
 7 | ##  subject to NVIDIA intellectual property rights under U.S. and
 8 | ##  international Copyright laws.
 9 | ## 
10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 | ##  conditions of a form of NVIDIA software license agreement by and
13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
15 | ##  the contrary in the License Agreement, reproduction or disclosure
16 | ##  of the Licensed Deliverables to any third party without the express
17 | ##  written consent of NVIDIA is prohibited.
18 | ## 
19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 | ##  OF THESE LICENSED DELIVERABLES.
33 | ## 
34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 | ##  1995), consisting of "commercial computer software" and "commercial
37 | ##  computer software documentation" as such terms are used in 48
38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
42 | ##  only those rights set forth herein.
43 | ## 
44 | ##  Any use of the Licensed Deliverables in individual and commercial
45 | ##  software must include, in the user documentation and internal
46 | ##  comments to the code, the above Disclaimer and U.S. Government End
47 | ##  Users Notice.
48 | ## 
49 | 
50 | when not defined(LIBRARY_TYPES_H):
51 |   const
52 |     LIBRARY_TYPES_H* = true
53 |   type
54 |     cudaDataType* = enum
55 |       CUDA_R_32F = 0,           ##  real as a float
56 |       CUDA_R_64F = 1,           ##  real as a double
57 |       CUDA_R_16F = 2,           ##  real as a half
58 |       CUDA_R_8I = 3,            ##  real as a signed char
59 |       CUDA_C_32F = 4,           ##  complex as a pair of float numbers
60 |       CUDA_C_64F = 5,           ##  complex as a pair of double numbers
61 |       CUDA_C_16F = 6,           ##  complex as a pair of half numbers
62 |       CUDA_C_8I = 7,            ##  complex as a pair of signed char numbers
63 |       CUDA_R_8U = 8,            ##  real as a unsigned char
64 |       CUDA_C_8U = 9,            ##  complex as a pair of unsigned char numbers
65 |       CUDA_R_32I = 10,          ##  real as a signed int
66 |       CUDA_C_32I = 11,          ##  complex as a pair of signed int numbers
67 |       CUDA_R_32U = 12,          ##  real as a unsigned int
68 |       CUDA_C_32U = 13
69 |     libraryPropertyType* = enum
70 |       MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/nvgraph.nim:
--------------------------------------------------------------------------------
  1 | ##  #prefix nvgraph
  2 | ##  #prefix NVGRAPH_
  3 | 
  4 | {.deadCodeElim: on.}
  5 | when defined(windows):
  6 |   import os
  7 |   {.passL: "\"" & os.getEnv("CUDA_PATH") / "lib/x64" / "nvgraph.lib" & "\"".}
  8 |   {.pragma: dyn.}
  9 | elif defined(macosx):
 10 |   const
 11 |     libName = "libnvgraph.dylib"
 12 |   {.pragma: dyn, dynlib: libName.}
 13 | else:
 14 |   const
 15 |     libName = "libnvgraph.so"
 16 |   {.pragma: dyn, dynlib: libName.}
 17 | import
 18 |   library_types
 19 | 
 20 | ##
 21 | ##  Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
 22 | ##
 23 | ##  NVIDIA CORPORATION and its licensors retain all intellectual property
 24 | ##  and proprietary rights in and to this software, related documentation
 25 | ##  and any modifications thereto.  Any use, reproduction, disclosure or
 26 | ##  distribution of this software and related documentation without an express
 27 | ##  license agreement from NVIDIA CORPORATION is strictly prohibited.
 28 | ##
 29 | ##
 30 | 
 31 | ##  nvGRAPH status type returns
 32 | 
 33 | type
 34 |   nvgraphStatus_t* {.size: sizeof(cint).} = enum
 35 |     NVGRAPH_STATUS_SUCCESS = 0, NVGRAPH_STATUS_NOT_INITIALIZED = 1,
 36 |     NVGRAPH_STATUS_ALLOC_FAILED = 2, NVGRAPH_STATUS_INVALID_VALUE = 3,
 37 |     NVGRAPH_STATUS_ARCH_MISMATCH = 4, NVGRAPH_STATUS_MAPPING_ERROR = 5,
 38 |     NVGRAPH_STATUS_EXECUTION_FAILED = 6, NVGRAPH_STATUS_INTERNAL_ERROR = 7,
 39 |     NVGRAPH_STATUS_TYPE_NOT_SUPPORTED = 8, NVGRAPH_STATUS_NOT_CONVERGED = 9
 40 | 
 41 | 
 42 | proc nvgraphStatusGetString*(status: nvgraphStatus_t): cstring {.cdecl,
 43 |     importc: "nvgraphStatusGetString", dyn.}
 44 | ##  Opaque structure holding nvGRAPH library context
 45 | 
 46 | type
 47 |   nvgraphContext* = object
 48 | 
 49 |   nvgraphHandle_t* = ptr nvgraphContext
 50 | 
 51 | ##  Opaque structure holding the graph descriptor
 52 | 
 53 | type
 54 |   nvgraphGraphDescr* = object
 55 | 
 56 |   nvgraphGraphDescr_t* = ptr nvgraphGraphDescr
 57 | 
 58 | ##  Semi-ring types
 59 | 
 60 | type
 61 |   nvgraphSemiring_t* {.size: sizeof(cint).} = enum
 62 |     NVGRAPH_PLUS_TIMES_SR = 0, NVGRAPH_MIN_PLUS_SR = 1, NVGRAPH_MAX_MIN_SR = 2,
 63 |     NVGRAPH_OR_AND_SR = 3
 64 | 
 65 | 
 66 | ##  Topology types
 67 | 
 68 | type
 69 |   nvgraphTopologyType_t* {.size: sizeof(cint).} = enum
 70 |     NVGRAPH_CSR_32 = 0, NVGRAPH_CSC_32 = 1, NVGRAPH_COO_32 = 2
 71 |   nvgraphTag_t* {.size: sizeof(cint).} = enum
 72 |     NVGRAPH_DEFAULT = 0,        ##  Default is unsorted.
 73 |     NVGRAPH_UNSORTED = 1,       ##
 74 |     NVGRAPH_SORTED_BY_SOURCE = 2, ##  CSR
 75 |     NVGRAPH_SORTED_BY_DESTINATION = 3
 76 | 
 77 | 
 78 | 
 79 | type
 80 |   nvgraphCSRTopology32I_st* = object
 81 |     nvertices*: cint           ##  n+1
 82 |     nedges*: cint              ##  nnz
 83 |     source_offsets*: ptr cint   ##  rowPtr
 84 |     destination_indices*: ptr cint ##  colInd
 85 | 
 86 |   nvgraphCSRTopology32I_t* = ptr nvgraphCSRTopology32I_st
 87 |   nvgraphCSCTopology32I_st* = object
 88 |     nvertices*: cint           ##  n+1
 89 |     nedges*: cint              ##  nnz
 90 |     destination_offsets*: ptr cint ##  colPtr
 91 |     source_indices*: ptr cint   ##  rowInd
 92 | 
 93 |   nvgraphCSCTopology32I_t* = ptr nvgraphCSCTopology32I_st
 94 |   nvgraphCOOTopology32I_st* = object
 95 |     nvertices*: cint           ##  n+1
 96 |     nedges*: cint              ##  nnz
 97 |     source_indices*: ptr cint   ##  rowInd
 98 |     destination_indices*: ptr cint ##  colInd
 99 |     tag*: nvgraphTag_t
100 | 
101 |   nvgraphCOOTopology32I_t* = ptr nvgraphCOOTopology32I_st
102 | 
103 | ##  Open the library and create the handle
104 | 
105 | proc nvgraphCreate*(handle: ptr nvgraphHandle_t): nvgraphStatus_t {.cdecl,
106 |     importc: "nvgraphCreate", dyn.}
107 | ##   Close the library and destroy the handle
108 | 
109 | proc nvgraphDestroy*(handle: nvgraphHandle_t): nvgraphStatus_t {.cdecl,
110 |     importc: "nvgraphDestroy", dyn.}
111 | ##  Create an empty graph descriptor
112 | 
113 | proc nvgraphCreateGraphDescr*(handle: nvgraphHandle_t;
114 |                              descrG: ptr nvgraphGraphDescr_t): nvgraphStatus_t {.
115 |     cdecl, importc: "nvgraphCreateGraphDescr", dyn.}
116 | ##  Destroy a graph descriptor
117 | 
118 | proc nvgraphDestroyGraphDescr*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t): nvgraphStatus_t {.
119 |     cdecl, importc: "nvgraphDestroyGraphDescr", dyn.}
120 | ##  Set size, topology data in the graph descriptor
121 | 
122 | proc nvgraphSetGraphStructure*(handle: nvgraphHandle_t;
123 |                               descrG: nvgraphGraphDescr_t; topologyData: pointer;
124 |                               TType: nvgraphTopologyType_t): nvgraphStatus_t {.
125 |     cdecl, importc: "nvgraphSetGraphStructure", dyn.}
126 | ##  Query size and topology information from the graph descriptor
127 | 
128 | proc nvgraphGetGraphStructure*(handle: nvgraphHandle_t;
129 |                               descrG: nvgraphGraphDescr_t; topologyData: pointer;
130 |                               TType: ptr nvgraphTopologyType_t): nvgraphStatus_t {.
131 |     cdecl, importc: "nvgraphGetGraphStructure", dyn.}
132 | ##  Allocate numsets vectors of size V reprensenting Vertex Data and attached them the graph.
133 | ##  settypes[i] is the type of vector #i, currently all Vertex and Edge data should have the same type
134 | 
135 | proc nvgraphAllocateVertexData*(handle: nvgraphHandle_t;
136 |                                descrG: nvgraphGraphDescr_t; numsets: csize_t;
137 |                                settypes: ptr cudaDataType): nvgraphStatus_t {.cdecl,
138 |     importc: "nvgraphAllocateVertexData", dyn.}
139 | ##  Allocate numsets vectors of size E reprensenting Edge Data and attached them the graph.
140 | ##  settypes[i] is the type of vector #i, currently all Vertex and Edge data should have the same type
141 | 
142 | proc nvgraphAllocateEdgeData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
143 |                              numsets: csize_t; settypes: ptr cudaDataType): nvgraphStatus_t {.
144 |     cdecl, importc: "nvgraphAllocateEdgeData", dyn.}
145 | ##  `Update the vertex set #setnum with the data in *vertexData, sets have 0-based index`
146 | ##   Conversions are not sopported so nvgraphTopologyType_t should match the graph structure
147 | 
148 | proc nvgraphSetVertexData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
149 |                           vertexData: pointer; setnum: csize_t): nvgraphStatus_t {.
150 |     cdecl, importc: "nvgraphSetVertexData", dyn.}
151 | ##  `Copy the edge set #setnum in *edgeData, sets have 0-based index`
152 | ##   Conversions are not sopported so nvgraphTopologyType_t should match the graph structure
153 | 
154 | proc nvgraphGetVertexData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
155 |                           vertexData: pointer; setnum: csize_t): nvgraphStatus_t {.
156 |     cdecl, importc: "nvgraphGetVertexData", dyn.}
157 | ##  Convert the edge data to another topology
158 | ##
159 | 
160 | proc nvgraphConvertTopology*(handle: nvgraphHandle_t;
161 |                             srcTType: nvgraphTopologyType_t; srcTopology: pointer;
162 |                             srcEdgeData: pointer; dataType: ptr cudaDataType;
163 |                             dstTType: nvgraphTopologyType_t; dstTopology: pointer;
164 |                             dstEdgeData: pointer): nvgraphStatus_t {.cdecl,
165 |     importc: "nvgraphConvertTopology", dyn.}
166 | ##  Convert graph to another structure
167 | ##
168 | 
169 | proc nvgraphConvertGraph*(handle: nvgraphHandle_t; srcDescrG: nvgraphGraphDescr_t;
170 |                          dstDescrG: nvgraphGraphDescr_t;
171 |                          dstTType: nvgraphTopologyType_t): nvgraphStatus_t {.cdecl,
172 |     importc: "nvgraphConvertGraph", dyn.}
173 | ##  `Update the edge set #setnum with the data in *edgeData, sets have 0-based index`
174 | ##   Conversions are not sopported so nvgraphTopologyType_t should match the graph structure
175 | 
176 | proc nvgraphSetEdgeData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
177 |                         edgeData: pointer; setnum: csize_t): nvgraphStatus_t {.cdecl,
178 |     importc: "nvgraphSetEdgeData", dyn.}
179 | ##  `Copy the edge set #setnum in *edgeData, sets have 0-based index`
180 | ##  Conversions are not sopported so nvgraphTopologyType_t should match the graph structure
181 | 
182 | proc nvgraphGetEdgeData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
183 |                         edgeData: pointer; setnum: csize_t): nvgraphStatus_t {.cdecl,
184 |     importc: "nvgraphGetEdgeData", dyn.}
185 | ##  create a new graph by extracting a subgraph given a list of vertices
186 | ##
187 | 
188 | proc nvgraphExtractSubgraphByVertex*(handle: nvgraphHandle_t;
189 |                                     descrG: nvgraphGraphDescr_t;
190 |                                     subdescrG: nvgraphGraphDescr_t;
191 |                                     subvertices: ptr cint; numvertices: csize_t): nvgraphStatus_t {.
192 |     cdecl, importc: "nvgraphExtractSubgraphByVertex", dyn.}
193 | ##  create a new graph by extracting a subgraph given a list of edges
194 | ##
195 | 
196 | proc nvgraphExtractSubgraphByEdge*(handle: nvgraphHandle_t;
197 |                                   descrG: nvgraphGraphDescr_t;
198 |                                   subdescrG: nvgraphGraphDescr_t;
199 |                                   subedges: ptr cint; numedges: csize_t): nvgraphStatus_t {.
200 |     cdecl, importc: "nvgraphExtractSubgraphByEdge", dyn.}
201 | ##  nvGRAPH Semi-ring sparse matrix vector multiplication
202 | ##
203 | 
204 | proc nvgraphSrSpmv*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
205 |                    weight_index: csize_t; alpha: pointer; x_index: csize_t; beta: pointer;
206 |                    y_index: csize_t; SR: nvgraphSemiring_t): nvgraphStatus_t {.cdecl,
207 |     importc: "nvgraphSrSpmv", dyn.}
208 | ##  nvGRAPH Single Source Shortest Path (SSSP)
209 | ##  Calculate the shortest path distance from a single vertex in the graph to all other vertices.
210 | ##
211 | 
212 | proc nvgraphSssp*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
213 |                  weight_index: csize_t; source_vert: ptr cint; sssp_index: csize_t): nvgraphStatus_t {.
214 |     cdecl, importc: "nvgraphSssp", dyn.}
215 | ##  nvGRAPH WidestPath
216 | ##  Find widest path potential from source_index to every other vertices.
217 | ##
218 | 
219 | proc nvgraphWidestPath*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
220 |                        weight_index: csize_t; source_vert: ptr cint;
221 |                        widest_path_index: csize_t): nvgraphStatus_t {.cdecl,
222 |     importc: "nvgraphWidestPath", dyn.}
223 | ##  nvGRAPH PageRank
224 | ##  Find PageRank for each vertex of a graph with a given transition probabilities, a bookmark vector of dangling vertices, and the damping factor.
225 | ##
226 | 
227 | proc nvgraphPagerank*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t;
228 |                      weight_index: csize_t; alpha: pointer; bookmark_index: csize_t;
229 |                      has_guess: cint; pagerank_index: csize_t; tolerance: cfloat;
230 |                      max_iter: cint): nvgraphStatus_t {.cdecl,
231 |     importc: "nvgraphPagerank", dyn.}
232 | 


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/surface_types.nim:
--------------------------------------------------------------------------------
  1 | ## 
  2 | ##  Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
  3 | ## 
  4 | ##  NOTICE TO LICENSEE:
  5 | ## 
  6 | ##  This source code and/or documentation ("Licensed Deliverables") are
  7 | ##  subject to NVIDIA intellectual property rights under U.S. and
  8 | ##  international Copyright laws.
  9 | ## 
 10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | ##  conditions of a form of NVIDIA software license agreement by and
 13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | ##  the contrary in the License Agreement, reproduction or disclosure
 16 | ##  of the Licensed Deliverables to any third party without the express
 17 | ##  written consent of NVIDIA is prohibited.
 18 | ## 
 19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | ##  OF THESE LICENSED DELIVERABLES.
 33 | ## 
 34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | ##  1995), consisting of "commercial computer software" and "commercial
 37 | ##  computer software documentation" as such terms are used in 48
 38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 42 | ##  only those rights set forth herein.
 43 | ## 
 44 | ##  Any use of the Licensed Deliverables in individual and commercial
 45 | ##  software must include, in the user documentation and internal
 46 | ##  comments to the code, the above Disclaimer and U.S. Government End
 47 | ##  Users Notice.
 48 | ## 
 49 | 
 50 | when not defined(SURFACE_TYPES_H):
 51 |   const
 52 |     SURFACE_TYPES_H* = true
 53 |   ## ******************************************************************************
 54 |   ##                                                                               *
 55 |   ##                                                                               *
 56 |   ##                                                                               *
 57 |   ## *****************************************************************************
 58 |   import
 59 |     driver_types
 60 | 
 61 |   ## *
 62 |   ##  \addtogroup CUDART_TYPES
 63 |   ## 
 64 |   ##  @{
 65 |   ## 
 66 |   ## ******************************************************************************
 67 |   ##                                                                               *
 68 |   ##                                                                               *
 69 |   ##                                                                               *
 70 |   ## *****************************************************************************
 71 |   const
 72 |     cudaSurfaceType1D* = 0x00000001
 73 |     cudaSurfaceType2D* = 0x00000002
 74 |     cudaSurfaceType3D* = 0x00000003
 75 |     cudaSurfaceTypeCubemap* = 0x0000000C
 76 |     cudaSurfaceType1DLayered* = 0x000000F1
 77 |     cudaSurfaceType2DLayered* = 0x000000F2
 78 |     cudaSurfaceTypeCubemapLayered* = 0x000000FC
 79 |   ## *
 80 |   ##  CUDA Surface boundary modes
 81 |   ## 
 82 |   type
 83 |     cudaSurfaceBoundaryMode* = enum
 84 |       cudaBoundaryModeZero = 0, ## *< Zero boundary mode
 85 |       cudaBoundaryModeClamp = 1, ## *< Clamp boundary mode
 86 |       cudaBoundaryModeTrap = 2
 87 |   ## *
 88 |   ##  CUDA Surface format modes
 89 |   ## 
 90 |   type
 91 |     cudaSurfaceFormatMode* = enum
 92 |       cudaFormatModeForced = 0, ## *< Forced format mode
 93 |       cudaFormatModeAuto = 1
 94 |   ## *
 95 |   ##  CUDA Surface reference
 96 |   ## 
 97 |   type
 98 |     surfaceReference* = object
 99 |       channelDesc*: cudaChannelFormatDesc ## *
100 |                                         ##  Channel descriptor for surface reference
101 |                                         ## 
102 |     
103 |   ## *
104 |   ##  An opaque value that represents a CUDA Surface object
105 |   ## 
106 |   type
107 |     cudaSurfaceObject_t* = culonglong
108 |   ## * @}
109 |   ## * @}
110 |   ##  END CUDART_TYPES


--------------------------------------------------------------------------------
/src/nimcuda/cuda8_0/texture_types.nim:
--------------------------------------------------------------------------------
  1 | ## 
  2 | ##  Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
  3 | ## 
  4 | ##  NOTICE TO LICENSEE:
  5 | ## 
  6 | ##  This source code and/or documentation ("Licensed Deliverables") are
  7 | ##  subject to NVIDIA intellectual property rights under U.S. and
  8 | ##  international Copyright laws.
  9 | ## 
 10 | ##  These Licensed Deliverables contained herein is PROPRIETARY and
 11 | ##  CONFIDENTIAL to NVIDIA and is being provided under the terms and
 12 | ##  conditions of a form of NVIDIA software license agreement by and
 13 | ##  between NVIDIA and Licensee ("License Agreement") or electronically
 14 | ##  accepted by Licensee.  Notwithstanding any terms or conditions to
 15 | ##  the contrary in the License Agreement, reproduction or disclosure
 16 | ##  of the Licensed Deliverables to any third party without the express
 17 | ##  written consent of NVIDIA is prohibited.
 18 | ## 
 19 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 20 | ##  LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 21 | ##  SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 22 | ##  PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 23 | ##  NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 24 | ##  DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 25 | ##  NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 26 | ##  NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 27 | ##  LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 28 | ##  SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 29 | ##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 30 | ##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 31 | ##  ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 32 | ##  OF THESE LICENSED DELIVERABLES.
 33 | ## 
 34 | ##  U.S. Government End Users.  These Licensed Deliverables are a
 35 | ##  "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 36 | ##  1995), consisting of "commercial computer software" and "commercial
 37 | ##  computer software documentation" as such terms are used in 48
 38 | ##  C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 39 | ##  only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 40 | ##  48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 41 | ##  U.S. Government End Users acquire the Licensed Deliverables with
 42 | ##  only those rights set forth herein.
 43 | ## 
 44 | ##  Any use of the Licensed Deliverables in individual and commercial
 45 | ##  software must include, in the user documentation and internal
 46 | ##  comments to the code, the above Disclaimer and U.S. Government End
 47 | ##  Users Notice.
 48 | ## 
 49 | 
 50 | when not defined(TEXTURE_TYPES_H):
 51 |   const
 52 |     TEXTURE_TYPES_H* = true
 53 |   ## ******************************************************************************
 54 |   ##                                                                               *
 55 |   ##                                                                               *
 56 |   ##                                                                               *
 57 |   ## *****************************************************************************
 58 |   import
 59 |     driver_types
 60 | 
 61 |   ## *
 62 |   ##  \addtogroup CUDART_TYPES
 63 |   ## 
 64 |   ##  @{
 65 |   ## 
 66 |   ## ******************************************************************************
 67 |   ##                                                                               *
 68 |   ##                                                                               *
 69 |   ##                                                                               *
 70 |   ## *****************************************************************************
 71 |   const
 72 |     cudaTextureType1D* = 0x00000001
 73 |     cudaTextureType2D* = 0x00000002
 74 |     cudaTextureType3D* = 0x00000003
 75 |     cudaTextureTypeCubemap* = 0x0000000C
 76 |     cudaTextureType1DLayered* = 0x000000F1
 77 |     cudaTextureType2DLayered* = 0x000000F2
 78 |     cudaTextureTypeCubemapLayered* = 0x000000FC
 79 |   ## *
 80 |   ##  CUDA texture address modes
 81 |   ## 
 82 |   type
 83 |     cudaTextureAddressMode* = enum
 84 |       cudaAddressModeWrap = 0,  ## *< Wrapping address mode
 85 |       cudaAddressModeClamp = 1, ## *< Clamp to edge address mode
 86 |       cudaAddressModeMirror = 2, ## *< Mirror address mode
 87 |       cudaAddressModeBorder = 3
 88 |   ## *
 89 |   ##  CUDA texture filter modes
 90 |   ## 
 91 |   type
 92 |     cudaTextureFilterMode* = enum
 93 |       cudaFilterModePoint = 0,  ## *< Point filter mode
 94 |       cudaFilterModeLinear = 1
 95 |   ## *
 96 |   ##  CUDA texture read modes
 97 |   ## 
 98 |   type
 99 |     cudaTextureReadMode* = enum
100 |       cudaReadModeElementType = 0, ## *< Read texture as specified element type
101 |       cudaReadModeNormalizedFloat = 1
102 |   ## *
103 |   ##  CUDA texture reference
104 |   ## 
105 |   type
106 |     textureReference* = object
107 |       normalized*: cint ## *
108 |                       ##  Indicates whether texture reads are normalized or not
109 |                       ## 
110 |       ## *
111 |       ##  Texture filter mode
112 |       ## 
113 |       filterMode*: cudaTextureFilterMode ## *
114 |                                        ##  Texture address mode for up to 3 dimensions
115 |                                        ## 
116 |       addressMode*: array[3, cudaTextureAddressMode] ## *
117 |                                                   ##  Channel descriptor for the texture reference
118 |                                                   ## 
119 |       channelDesc*: cudaChannelFormatDesc ## *
120 |                                         ##  Perform sRGB->linear conversion during texture read
121 |                                         ## 
122 |       sRGB*: cint              ## *
123 |                 ##  Limit to the anisotropy ratio
124 |                 ## 
125 |       maxAnisotropy*: cuint    ## *
126 |                           ##  Mipmap filter mode
127 |                           ## 
128 |       mipmapFilterMode*: cudaTextureFilterMode ## *
129 |                                              ##  Offset applied to the supplied mipmap level
130 |                                              ## 
131 |       mipmapLevelBias*: cfloat ## *
132 |                              ##  Lower end of the mipmap level range to clamp access to
133 |                              ## 
134 |       minMipmapLevelClamp*: cfloat ## *
135 |                                  ##  Upper end of the mipmap level range to clamp access to
136 |                                  ## 
137 |       maxMipmapLevelClamp*: cfloat
138 |       cudaReserved*: array[15, cint]
139 | 
140 |   ## *
141 |   ##  CUDA texture descriptor
142 |   ## 
143 |   type
144 |     cudaTextureDesc* = object
145 |       addressMode*: array[3, cudaTextureAddressMode] ## *
146 |                                                   ##  Texture address mode for up to 3 dimensions
147 |                                                   ## 
148 |       ## *
149 |       ##  Texture filter mode
150 |       ## 
151 |       filterMode*: cudaTextureFilterMode ## *
152 |                                        ##  Texture read mode
153 |                                        ## 
154 |       readMode*: cudaTextureReadMode ## *
155 |                                    ##  Perform sRGB->linear conversion during texture read
156 |                                    ## 
157 |       sRGB*: cint              ## *
158 |                 ##  Texture Border Color
159 |                 ## 
160 |       borderColor*: array[4, cfloat] ## *
161 |                                   ##  Indicates whether texture reads are normalized or not
162 |                                   ## 
163 |       normalizedCoords*: cint  ## *
164 |                             ##  Limit to the anisotropy ratio
165 |                             ## 
166 |       maxAnisotropy*: cuint    ## *
167 |                           ##  Mipmap filter mode
168 |                           ## 
169 |       mipmapFilterMode*: cudaTextureFilterMode ## *
170 |                                              ##  Offset applied to the supplied mipmap level
171 |                                              ## 
172 |       mipmapLevelBias*: cfloat ## *
173 |                              ##  Lower end of the mipmap level range to clamp access to
174 |                              ## 
175 |       minMipmapLevelClamp*: cfloat ## *
176 |                                  ##  Upper end of the mipmap level range to clamp access to
177 |                                  ## 
178 |       maxMipmapLevelClamp*: cfloat
179 | 
180 |   ## *
181 |   ##  An opaque value that represents a CUDA texture object
182 |   ## 
183 |   type
184 |     cudaTextureObject_t* = culonglong
185 |   ## * @}
186 |   ## * @}
187 |   ##  END CUDART_TYPES


--------------------------------------------------------------------------------
/utils/postprocessor.nim:
--------------------------------------------------------------------------------
  1 | 
  2 | ##[
  3 |   This helper executable postprocesses the nim files after they get
  4 |   spit out by c2nim.
  5 |   ]##
  6 | 
  7 | import
  8 |   std / [pegs, cmdline, paths, files, strformat, strutils, sugar, sets, options]
  9 | 
 10 | 
 11 | 
 12 | func mangleDefines(code: sink string): string =
 13 |   ## C2nim struggles with mangling code that looks like this:
 14 |   ## `defined(__MY_CONST__)`. This proc mangles it to normal Nim style.
 15 |   let pegAst = peg"""definedExpr <- 'defined(' \s* middle \s* endOfDefined
 16 | 
 17 |     middle <- leading / trailing
 18 | 
 19 |     leading <- '_'+ identifer '_'*
 20 |     trailing <- '_'* identifer '_'+
 21 | 
 22 |     identifer <- { ( !(endOfIdentifier / endOfDefined) .)+ }
 23 | 
 24 |     endOfIdentifier <- '_'+ !(\a / \d)
 25 |     endOfDefined <- ')'
 26 |     """
 27 | 
 28 |   result = code.replacef(pegAst,
 29 |                          "defined($1)")
 30 |   # result = code.replacef(peg"'defined(__' { (!('__' / [)]) .)+ } '__'? [)]",
 31 |   #                        "defined($1)")
 32 | 
 33 | func handleForwardDecls(code: sink string): string =
 34 |   ## C2nim handles forward declarations by outputing the following:
 35 |   ## `discard "forward decl of {typeDesc}"`
 36 |   ## This proc replaces it with `type {typeDesc} = object`.
 37 |   result = code.replacef(peg""" 'discard "forward decl of ' {\ident} ["] """,
 38 |                          "type $1 {.nodecl.} = object")
 39 | 
 40 | 
 41 | func removeUnusedVariableSilencing(code: sink string): string =
 42 |   let matcher = peg" {\n '  '*} 'cast[nil](' \s* {\ident} \s* ')' {@\n} "
 43 | 
 44 |   result = code.replacef(matcher, "$1discard $2$3")
 45 | 
 46 | 
 47 | func fixTrailingUnderscoreProcName(name: string): string =
 48 |   let nameNoTrailing = name.strip(chars={'_'}, leading=false, trailing=true)
 49 |   result = fmt"{nameNoTrailing}UnderScore"
 50 | 
 51 | 
 52 | proc fixProcsDecls(code: sink string): string =
 53 |   ## This proc makes some procs discardable that should be.
 54 |   ## Currently this is any proc returning an error code.
 55 |   ## It also fixes trailing underscores in the name.
 56 | 
 57 |   let procDecls = peg"""procDecls <- (@procDecl)*
 58 |     procDecl <- 'proc ' procName '*(' \s* argDecls ')' (':' \s+ returnType)? (\s+ pragmas)? (\s+ '=')? @\n
 59 |     procName <- \ident
 60 |     notLastProcArg <- procArgName ': ' procArgType ';'
 61 |     lastProcArg <- procArgName ': ' procArgType !';'
 62 |     argDecls <- (notLastProcArg \s*)* lastProcArg
 63 |     returnType <- \ident
 64 |     pragmas <- '{.' \s* (notLastPragma \s+)* lastPragma \s* '.}'
 65 |     notLastPragma <- singlePragma ','
 66 |     lastPragma <- singlePragma !','
 67 | 
 68 | 
 69 |     singlePragma <- pragmaName (':' \s+ ["]? \ident ["]?)?
 70 |     pragmaName <- \ident
 71 | 
 72 |     procArgName <- \ident
 73 |     procArgType <- 'ptr '? \ident
 74 |     """
 75 | 
 76 |   type ProcToReplace = object
 77 |     startF, lengthF: int # template confusion later on requires the 'F'.
 78 |     oldName, newName: Option[string]
 79 |     makeDiscardable: bool
 80 | 
 81 |   var needsChanged: seq[ProcToReplace] = @[]
 82 | 
 83 |   const DiscardableReturnTypes = ["cudaError", "cudaOccError", "cublasStatus_t",
 84 |                                   "cufftResult", "cusparseStatus_t",
 85 |                                   "cusolverStatus_t", "curandStatus"]
 86 | 
 87 |   type ProcDeclarationParsingContext = object
 88 |     currentProc: string
 89 |     shouldBeDiscardable: bool
 90 | 
 91 |   func reset(self: var ProcDeclarationParsingContext) =
 92 |     self.currentProc = ""
 93 |     self.shouldBeDiscardable = false
 94 | 
 95 |   var context = ProcDeclarationParsingContext()
 96 | 
 97 | 
 98 |   let parseProcDecls = procDecls.eventParser:
 99 |     pkNonTerminal:
100 |       leave:
101 |         template thisMatch(): string =
102 |           code[start .. start + length - 1]
103 | 
104 |         if length > 0:
105 |           # Succesful match on a nonterminal (named) peg.
106 |           case p.nt.name
107 |           of "procName":
108 |             context.currentProc = thisMatch()
109 | 
110 |           of "returnType":
111 |             let returnType = thisMatch()
112 |             for discardableType in DiscardableReturnTypes:
113 |               if returnType.cmpIgnoreStyle(discardableType) == 0:
114 |                 context.shouldBeDiscardable = true
115 | 
116 | 
117 |           of "pragmaName":
118 |             if thisMatch().cmpIgnoreStyle("discardable") == 0:
119 |               # Proc is already discardable.
120 |               context.shouldBeDiscardable = false
121 | 
122 |           of "procDecl":
123 |             # Success parsing a proc declaration.
124 |             var found = ProcToReplace(startF: start, lengthF: length,
125 |                                    makeDiscardable: context.shouldBeDiscardable)
126 | 
127 |             if context.currentProc.endsWith('_'):
128 |               found.oldName = some context.currentProc
129 |               found.newName = some context.currentProc.fixTrailingUnderscoreProcName
130 | 
131 |             if found.makeDiscardable or found.newName.isSome:
132 |               needsChanged.add found
133 | 
134 |             reset context
135 | 
136 |           else: discard
137 | 
138 |         else:
139 |           case p.nt.name
140 |           of "procDecl":
141 |             # Failure parsing; not a proc declaration.
142 |             reset context
143 |           else: discard
144 | 
145 |   assert parseProcDecls(code) != -1
146 | 
147 |   func makeDiscardable(decl: string): string =
148 |     const
149 |       PragmaStart = "{."
150 |       NotFound = -1
151 |     let
152 |       foundPragmaStart = decl.rfind("{.")
153 |       alreadyHasPragmas = foundPragmaStart != NotFound
154 |     if alreadyHasPragmas:
155 |       let
156 |         firstPart = decl[0 .. foundPragmaStart + PragmaStart.high]
157 |         lastPart = decl[foundPragmaStart + PragmaStart.high + 1 .. ^1]
158 |       result = fmt"{firstPart}discardable, {lastPart}"
159 |     else:
160 |       assert decl[^1] == '='
161 |       result = fmt"{decl[0..^2]} {{.discardable.}} ="
162 | 
163 |   let replacePairs = collect:
164 |     for procedure in needsChanged:
165 | 
166 |       template thisMatch(): string =
167 |         code[procedure.startF .. procedure.startF + procedure.lengthF - 1]
168 | 
169 |       let original = thisMatch()
170 |       var modified = original
171 | 
172 |       if procedure.makeDiscardable:
173 |         modified = modified.makeDiscardable
174 |       if procedure.newName.isSome:
175 |         modified = modified.replace(fmt"proc {procedure.oldName.get}",
176 |                                     fmt"proc {procedure.newName.get}")
177 |       (original, modified)
178 | 
179 |   result = code.multiReplace(replacePairs)
180 | 
181 | 
182 | 
183 | 
184 | func doSimpleSwaps(code: sink string): string =
185 |   ## Corrects some types that c2nim doesn't get right.
186 |   let
187 |     renameCuchar = ("cuchar", "char")
188 |     fixCastToInt = ("(int)", "(cint)")
189 |     renameMemCopy = ("memcpy", "copyMem")
190 |   result = code.multiReplace(renameCuchar, fixCastToInt, renameMemCopy)
191 | 
192 | 
193 | func escapeKeyWords(code: sink string): string =
194 |   ## C2nim outputs variables named Nim keywords as-is.
195 |   ## This proc escapes them.
196 |   # TODO: add more keywords
197 |   let someKeyWord = peg" { \s+ / '(' } {'result'} !\w"
198 |   result = code.replacef(someKeyWord, "$1$2NotKeyWord")
199 | 
200 | 
201 | 
202 | 
203 | proc postprocess*(code: sink string): string =
204 |   result = code.mangleDefines.doSimpleSwaps.handleForwardDecls.
205 |                 fixProcsDecls.escapeKeyWords.
206 |                 removeUnusedVariableSilencing()
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | proc main =
214 |   for arg in commandLineParams():
215 |     assert (Path arg).fileExists, fmt "Bad argument! '{arg}' doesn't exist."
216 | 
217 |     let
218 |       input = readFile(arg)
219 |       postprocessed = input.postprocess()
220 | 
221 |     writeFile arg, postprocessed
222 |     echo fmt"Postprocessed '{arg}'"
223 | 
224 | 
225 | 
226 | when isMainModule:
227 |   main()
228 | 
229 | 


--------------------------------------------------------------------------------
/utils/preprocessor.nim:
--------------------------------------------------------------------------------
 1 |  
 2 | ##[
 3 |   This helper executable preprocesses the c header files before they get
 4 |   passed to c2nim.
 5 |   ]##
 6 | 
 7 | import
 8 |   std / [pegs, cmdline, paths, files, strformat, strutils]
 9 | 
10 | func renameUint64(code: sink string): string =
11 |   ## C2nim has trouble with the `unsigned long long int` type.
12 |   ## This func replaces it with something that it can handle.
13 |   result = code.replace(peg"'unsigned long long' ' int'?", "culonglong")
14 | 
15 | func renameInt64(code: sink string): string =
16 |   ## C2nim has trouble with the `unsigned long long int` type.
17 |   ## This func replaces it with something that it can handle.
18 |   result = code.replace("int64_t", "clonglong")
19 | 
20 | func renameCuchar(code: sink string): string =
21 |   ## `cuchar` is depreciated.
22 |   ## This func replaces it.
23 |   result = code.replace("cuchar", "uint8")
24 | 
25 | func rearrangeConstPtrTypeDefs(code: string): string =
26 |   ## C2nim has trouble with the a certain arrangement of `const*` typedefs.
27 |   ## This func replaces it with something that it can handle.
28 |   result = code.replacef(peg"'typedef struct ' {\ident} ' const* ' {\ident}[;]",
29 |                          "typedef const struct $1* $2;")
30 | 
31 | 
32 | 
33 | 
34 | 
35 | func preprocess*(code: sink string): string =
36 |   ## Does some nice formatting to Cuda library code before it gets passed to
37 |   ## c2Nim.
38 |   result = code.rearrangeConstPtrTypeDefs.renameUint64.renameInt64.
39 |                 renameCuchar()
40 | 
41 | 
42 | 
43 | 
44 | proc main =
45 |   for arg in commandLineParams():
46 |     assert (Path arg).fileExists, fmt "Bad argument! '{arg}' doesn't exist."
47 | 
48 |     let
49 |       input = readFile(arg)
50 |       preprocessed = input.preprocess()
51 | 
52 |     writeFile arg, preprocessed
53 |     echo fmt"Preprocessed '{arg}'"
54 | 
55 | 
56 | 
57 | when isMainModule:
58 |   main()
59 | 


--------------------------------------------------------------------------------