├── .gitignore ├── LICENSE ├── README.md ├── c2nim └── cuda12_5 │ ├── cuComplex.h │ ├── cublas_api.h │ ├── cublas_v2.h │ ├── cuda.h │ ├── cuda_occupancy.h │ ├── cuda_runtime_api.h │ ├── cudnn_v9.h │ ├── cufft.h │ ├── cufftw.h │ ├── curand.h │ ├── curand_discrete.h │ ├── cusolverDn.h │ ├── cusolverRf.h │ ├── cusolverSp.h │ ├── cusolver_common.h │ ├── cusparse.h │ ├── driver_types.h │ ├── library_types.h │ ├── nvblas.h │ ├── nvrtc.h │ ├── surface_types.h │ ├── texture_types.h │ └── vector_types.h ├── changelog.org ├── examples ├── cuda12_5 │ ├── blas.nim │ ├── denseLinearSystem.nim │ ├── fft.nim │ ├── random.nim │ ├── runtimeCompilation.nim │ ├── sparse.nim │ └── sparseLinearSystem.nim └── cuda8_0 │ ├── fft.nim │ ├── nim.cfg │ ├── pagerank.nim │ ├── random.nim │ └── sparse.nim ├── include ├── cuda12_5 │ ├── cublas_api.h │ ├── cuda.h │ ├── cuda_occupancy.h │ ├── cuda_runtime.h │ ├── cuda_runtime_api.h │ ├── cusolver_common.h │ ├── cusparse.h │ ├── driver_types.h │ ├── nvrtc.h │ └── vector_types.h └── cuda8_0 │ ├── cuda_occupancy.h │ ├── cuda_runtime_api.h │ └── vector_types.h ├── nimcuda.nimble ├── src ├── nimcuda.nim └── nimcuda │ ├── cuda12_5 │ ├── check.nim │ ├── cuComplex.nim │ ├── cublas_api.nim │ ├── cublas_v2.nim │ ├── cuda.nim │ ├── cuda_occupancy.nim │ ├── cuda_runtime_api.nim │ ├── cufft.nim │ ├── curand.nim │ ├── cusolverDn.nim │ ├── cusolverRf.nim │ ├── cusolverSp.nim │ ├── cusolver_common.nim │ ├── cusparse.nim │ ├── driver_types.nim │ ├── helpers.nim │ ├── libpaths.nim │ ├── library_types.nim │ ├── nvblas.nim │ ├── nvrtc.nim │ ├── surface_types.nim │ ├── texture_types.nim │ └── vector_types.nim │ └── cuda8_0 │ ├── check.nim │ ├── cuComplex.nim │ ├── cublas_api.nim │ ├── cublas_v2.nim │ ├── cuda_occupancy.nim │ ├── cuda_runtime_api.nim │ ├── cudnn.nim │ ├── cufft.nim │ ├── curand.nim │ ├── cusolverDn.nim │ ├── cusolverRf.nim │ ├── cusolverSp.nim │ ├── cusolver_common.nim │ ├── cusparse.nim │ ├── driver_types.nim │ ├── library_types.nim │ ├── nvblas.nim │ ├── nvgraph.nim │ ├── surface_types.nim │ ├── texture_types.nim │ └── vector_types.nim └── utils ├── postprocessor.nim └── preprocessor.nim /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache 2 | 3 | # ignore all files except nim source files in the examples dir 4 | examples/cuda12_5/* 5 | !examples/cuda12_5/*.nim 6 | 7 | # ignore all files except nim source files in the examples dir 8 | examples/cuda8_0/* 9 | !examples/cuda8_0/*.nim 10 | 11 | #ignore docs 12 | htmldocs 13 | 14 | .vscode 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NimCUDA 2 | 3 | Nim bindings for the [CUDA](https://developer.nvidia.com/cuda-toolkit) 4 | libraries. The versions currently in use are 5 | 6 | * CUDA 8.0 + CuDNN 5.1 7 | * CUDA 12.5 8 | 9 | ## Status 10 | 11 | Most libraries have working bindings. Out of these: 12 | 13 | * most bindings are generated using [c2nim](http://nim-lang.org/docs/c2nim.html) 14 | and suitable directives (see the files inside `/c2nim`) 15 | * a preprocessor is used on the header files to help with common issues that 16 | c2nim has during parsing. 17 | * a postprocessor is used on the nim files to help alleviate some common 18 | output problems that c2nim has. 19 | * some headers files are manually edited before being passed to c2nim. 20 | * some nim files are manually edited. 21 | 22 | Ideally, once some improvements are available in c2nim, there should be no 23 | need to manually modify any files. 24 | 25 | ## Usage 26 | 27 | See a few examples under `/examples`. The examples can be run with the command 28 | `nimble EXAMPLE_NAME CUDA_VERSION`, where `EXAMPLE_NAME` is one of the examples and 29 | `CUDA_VERSION` is the version of cuda that you want it to run on - for 30 | instance `nimble fft 12.5`. 31 | 32 | API documentation lives under `/htmldocs`. Generate it by running `nimble docs`. 33 | 34 | ## Name mangling 35 | 36 | c2nim supports name mangling, which could be useful to simplify a few names 37 | (e.g. turn `CUBLAS_STATUS_ARCH_MISMATCH` into `ARCH_MISMATCH`, which can be 38 | qualified as `cublasStatus_t.ARCH_MISMATCH` in case of ambiguity). 39 | 40 | Right now, no unnecessary mangling is performed, because the API surface is large and 41 | not always consistent, so it felt simpler to leave it as is. This may change 42 | in a future release. 43 | 44 | ## Error handling 45 | 46 | In each cuda version's library there is a file called `check.nim`. 47 | In it are a few templates that turn CUDA errors 48 | into Nim exceptions. They are all under the overloaded name `check`, so that 49 | one can do, for instance 50 | 51 | ```nim 52 | check cudaMalloc(cast[ptr pointer](addr gpuRows), sizeof(rows)) 53 | ``` 54 | 55 | (here `cudaMalloc` is one of the many functions that fail by returning an 56 | error code). 57 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cuComplex.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef CU_COMPLEX_H_ 3 | 4 | #mangle __GNUC__ GNUC 5 | #mangle __GNUC_MINOR__ GNUC_MINOR 6 | 7 | #def __host__ 8 | #def __device__ 9 | #def __inline__ 10 | 11 | #@ 12 | from std/math import sqrt 13 | 14 | template sqrtf(x: cfloat): cfloat = sqrt(x) 15 | 16 | template fabsf(x: cfloat): cfloat = abs(x) 17 | 18 | template fabs(x: float): float = abs(x) 19 | 20 | template `div`(a: static[float64], b: cfloat): cfloat = cfloat(a) / b 21 | 22 | template `div`(a: cfloat, b: cfloat): cfloat = a / b 23 | @# 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cublas_api.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #def CUBLASWINAPI 3 | #def CUBLASAPI 4 | 5 | #mangle __half half 6 | #mangle __half2 half2 7 | 8 | #dynlib libName 9 | #private libName 10 | #cdecl 11 | #if defined(windows) 12 | # define libName "cublas.dll" 13 | #elif defined(macosx) 14 | # define libName "libcublas.dylib" 15 | #else 16 | # define libName "libcublas.so" 17 | #endif 18 | 19 | #assumendef CUBLAS_API_H_ 20 | 21 | #include "library_types.h" 22 | #endif 23 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cublas_v2.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef CUBLAS_V2_H_ 3 | 4 | #def CUBLASAPI 5 | #endif 6 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cuda.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef C2NIM 3 | #assumendef __cuda_cuda_h__ 4 | 5 | #mangle cuuint32_t cint 6 | #mangle cuuclonglong culonglong 7 | #mangle default_ defaultUnderScore 8 | #mangle _oversize underScoreOversize 9 | #mangle _internal_padding underScoreInternal_padding 10 | 11 | #def __device_builtin__ 12 | 13 | #def __CUDA_DEPRECATED 14 | 15 | #skipinclude 16 | 17 | #dynlib libName 18 | #private libName 19 | #cdecl 20 | #if defined(windows) 21 | # stdcall 22 | # define libName "cuda.dll" // dont know that this is right 23 | #elif defined(macosx) 24 | # define libName "libcuda.dylib" 25 | #else 26 | # define libName "libcuda.so" 27 | #endif 28 | 29 | 30 | 31 | #endif 32 | 33 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cuda_occupancy.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #mangle __cuda_occupancy_h__ cuda_occupancy_h 3 | #mangle __CUDA_OCC_MAJOR__ CUDA_OCC_MAJOR 4 | #mangle __CUDA_OCC_MINOR__ CUDA_OCC_MINOR 5 | #mangle __occMin occMin 6 | #mangle __occDivideRoundUp occDivideRoundUp 7 | #mangle __occRoundUp occRoundUp 8 | 9 | #assumendef __CUDACC__ 10 | #def __OCC_INLINE inline 11 | 12 | // typedef struct {} cudaOccResult; 13 | // typedef struct {} cudaOccDeviceProp; 14 | // typedef struct {} cudaOccFuncAttributes; 15 | // typedef struct {} cudaOccDeviceState; 16 | #endif -------------------------------------------------------------------------------- /c2nim/cuda12_5/cuda_runtime_api.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef __CUDA_RUNTIME_API_H__ 3 | #assumendef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ 4 | 5 | #assumendef _WIN32 6 | #assumendef __CUDA_API_VERSION_INTERNAL 7 | #assumedef __CUDACC_INTEGRATED__ 8 | #assumendef CUDA_API_PER_THREAD_DEFAULT_STREAM 9 | 10 | #def CUDARTAPI 11 | #def __host__ 12 | #def __cudart_builtin__ 13 | #def CUDART_CB 14 | #def __dv(v) 15 | 16 | #mangle __CUDA_API_VER_MAJOR__ CUDA_API_VER_MAJOR 17 | #mangle __CUDA_API_VER_MINOR__ CUDA_API_VER_MINOR 18 | #mangle __CUDA_API_VER_MAJOR__ CUDA_API_VER_MAJOR 19 | #mangle __CUDART_API_VERSION CUDART_API_VERSION 20 | #mangle __DOXYGEN_ONLY__ DOXYGEN_ONLY 21 | #mangle __CUDACC_RTC_MINIMAL__ CUDACC_RTC_MINIMAL 22 | #mangle __CUDACC_RDC__ CUDACC_RDC 23 | #mangle __CUDACC_EWP__ CUDACC_EWP 24 | #mangle __CUDACC_RTC__ CUDACC_RTC 25 | #mangle __CUDACC_RTC_MINIMAL__ CUDACC_RTC_MINIMAL 26 | 27 | #dynlib libName 28 | #private libName 29 | #cdecl 30 | #if defined(windows) 31 | # stdcall 32 | # define libName "cudart.dll" 33 | #elif defined(macosx) 34 | # define libName "libcudart.dylib" 35 | #else 36 | # define libName "libcudart.so" 37 | #endif 38 | 39 | #include "vector_types.h" 40 | #include "driver_types.h" 41 | #include "surface_types.h" 42 | #include "texture_types.h" 43 | #skipinclude 44 | #endif 45 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cudnn_v9.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #def CUDNNWINAPI 3 | 4 | #mangle CUDNN_H_ CUDNN_H 5 | 6 | #dynlib libName 7 | #private libName 8 | #cdecl 9 | #if defined(windows) 10 | # define libName "cudnn.dll" 11 | #elif defined(macosx) 12 | # define libName "libcudnn.dylib" 13 | #else 14 | # define libName "libcudnn.so" 15 | #endif 16 | 17 | #private cudnnTensorStruct 18 | #private cudnnConvolutionStruct 19 | #private cudnnPoolingStruct 20 | #private cudnnFilterStruct 21 | #private cudnnLRNStruct 22 | #private cudnnActivationStruct 23 | #private cudnnSpatialTransformerStruct 24 | #private cudnnOpTensorStruct 25 | #private cudnnDropoutStruct 26 | 27 | struct cudnnTensorStruct {}; 28 | struct cudnnConvolutionStruct {}; 29 | struct cudnnPoolingStruct {}; 30 | struct cudnnFilterStruct {}; 31 | struct cudnnLRNStruct {}; 32 | struct cudnnActivationStruct {}; 33 | struct cudnnSpatialTransformerStruct {}; 34 | struct cudnnOpTensorStruct {}; 35 | struct cudnnDropoutStruct {}; 36 | 37 | #endif -------------------------------------------------------------------------------- /c2nim/cuda12_5/cufft.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #def CUFFTAPI 3 | 4 | #dynlib libName 5 | #private libName 6 | #cdecl 7 | #if defined(windows) 8 | # stdcall 9 | # define libName "cufft.dll" 10 | #elif defined(macosx) 11 | # define libName "libcufft.dylib" 12 | #else 13 | # define libName "libcufft.so" 14 | #endif 15 | 16 | #include "cuComplex.h" 17 | #include "library_types.h" 18 | #include "driver_types.h" 19 | #skipinclude 20 | #endif 21 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cufftw.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef _CUFFTW_H_ 3 | 4 | #def CUFFTAPI 5 | 6 | #dynlib libName 7 | #private libName 8 | #cdecl 9 | #if defined(windows) 10 | # stdcall 11 | # define libName "cufftw.dll" 12 | #elif defined(macosx) 13 | # define libName "libcufftw.dylib" 14 | #else 15 | # define libName "libcufftw.so" 16 | #endif 17 | 18 | #skipinclude 19 | #endif 20 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/curand.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #def CURANDAPI 3 | 4 | #assumendef CURAND_H_ 5 | 6 | #dynlib libName 7 | #private libName 8 | #cdecl 9 | #if defined(windows) 10 | # define libName "curand.dll" 11 | #elif defined(macosx) 12 | # define libName "libcurand.dylib" 13 | #else 14 | # define libName "libcurand.so" 15 | #endif 16 | 17 | #private curandDistributionShift_st 18 | #private curandDistributionM2Shift_st 19 | #private curandHistogramM2_st 20 | #private curandDiscreteDistribution_st 21 | 22 | struct curandDistributionShift_st {}; 23 | struct curandDistributionM2Shift_st {}; 24 | struct curandHistogramM2_st {}; 25 | struct curandDiscreteDistribution_st {}; 26 | 27 | #include "library_types.h" 28 | #include "driver_types.h" 29 | #skipinclude 30 | #endif 31 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/curand_discrete.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef CURANDDISCRETE_H_ 3 | #endif 4 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cusolverDn.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #asssumendef CUSOLVERDN_H_ 3 | 4 | #def CUSOLVERAPI 5 | 6 | #define CUSOLVER_DEPRECATED(new_func) 7 | 8 | #dynlib libName 9 | #private libName 10 | #cdecl 11 | #if defined(windows) 12 | # define libName "cusolver.dll" 13 | #elif defined(macosx) 14 | # define libName "libcusolver.dylib" 15 | #else 16 | # define libName "libcusolver.so" 17 | #endif 18 | 19 | #include "cuComplex.h" 20 | #include "cublas_api.h" 21 | #include "cusolver_common.h" 22 | #include "library_types.h" 23 | #include "driver_types.h" 24 | #skipinclude 25 | #endif 26 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cusolverRf.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef CUSOLVERRF_H_ 3 | 4 | #def CUSOLVERAPI 5 | 6 | #dynlib libName 7 | #private libName 8 | #cdecl 9 | #if defined(windows) 10 | # define libName "cusolver.dll" 11 | #elif defined(macosx) 12 | # define libName "libcusolver.dylib" 13 | #else 14 | # define libName "libcusolver.so" 15 | #endif 16 | 17 | #include "cusolver_common.h" 18 | #skipinclude 19 | #endif 20 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cusolverSp.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef CUSOLVERSP_H_ 3 | 4 | #def CUSOLVERAPI 5 | 6 | #dynlib libName 7 | #private libName 8 | #cdecl 9 | #if defined(windows) 10 | # define libName "cusolver.dll" 11 | #elif defined(macosx) 12 | # define libName "libcusolver.dylib" 13 | #else 14 | # define libName "libcusolver.so" 15 | #endif 16 | 17 | #include "cuComplex.h" 18 | #include "driver_types.h" 19 | #include "cusolver_common.h" 20 | #include "cusparse.h" 21 | #skipinclude 22 | #endif 23 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cusolver_common.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef CUSOLVER_COMMON_H_ 3 | #mangle __int64 int64 4 | 5 | #def CUSOLVERAPI 6 | 7 | // #assumendef _MSC_VER 8 | 9 | #dynlib libName 10 | #private libName 11 | #cdecl 12 | #if defined(windows) 13 | # define libName "cusolver.dll" 14 | #elif defined(macosx) 15 | # define libName "libcusolver.dylib" 16 | #else 17 | # define libName "libcusolver.so" 18 | #endif 19 | 20 | #include "library_types.h" 21 | #skipinclude 22 | #endif 23 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/cusparse.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #def CUSPARSEAPI 3 | 4 | #assumendef CUSPARSE_H_ 5 | #assumendef _MSC_VER 6 | 7 | // #prefix cusparse 8 | // #prefix cusparse_ 9 | 10 | #dynlib libName 11 | #private libName 12 | #cdecl 13 | #if defined(windows) 14 | # define libName "cusparse.dll" 15 | #elif defined(macosx) 16 | # define libName "libcusparse.dylib" 17 | #else 18 | # define libName "libcusparse.so" 19 | #endif 20 | 21 | #define DISABLE_CUSPARSE_DEPRECATED 22 | 23 | #def CUSPARSE_DEPRECATED_REPLACE_WITH(new_func) 24 | #def CUSPARSE_DEPRECATED 25 | #def CUSPARSE_DEPRECATED_TYPE 26 | #def CUSPARSE_DEPRECATED_TYPE_MSVC 27 | #def CUSPARSE_DEPRECATED_ENUM_REPLACE_WITH(new_enum) 28 | #def CUSPARSE_DEPRECATED_ENUM 29 | 30 | #include "library_types.h" 31 | #include "driver_types.h" 32 | #include "cuComplex.h" 33 | #skipinclude 34 | #endif 35 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/driver_types.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef __DRIVER_TYPES_H__ 3 | #assumedef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ 4 | #assumendef __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DRIVER_TYPES_H__ 5 | 6 | 7 | #def __device_builtin__ 8 | 9 | // #pp cudaDevicePropDontCare 10 | 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/library_types.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef __LIBRARY_TYPES_H__ 3 | 4 | #endif 5 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/nvblas.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef NVBLAS_H_ 3 | 4 | #dynlib libName 5 | #private libName 6 | #cdecl 7 | #if defined(windows) 8 | # define libName "nvblas.dll" 9 | #elif defined(macosx) 10 | # define libName "libnvblas.dylib" 11 | #else 12 | # define libName "libnvblas.so" 13 | #endif 14 | 15 | #include "cuComplex.h" 16 | #skipinclude 17 | #endif 18 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/nvrtc.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef C2NIM 3 | #assumendef __NVRTC_H__ 4 | 5 | #def __device_builtin__ 6 | 7 | #mangle _nvrtcProgram nvrtcProgramObj 8 | 9 | #skipinclude 10 | 11 | #dynlib libName 12 | #private libName 13 | #cdecl 14 | #if defined(windows) 15 | # stdcall 16 | # define libName "nvrtc64.dll" // dont know that this is right 17 | #elif defined(macosx) 18 | # define libName "libnvrtc.dylib" 19 | #else 20 | # define libName "libnvrtc.so" 21 | #endif 22 | 23 | #@ 24 | type nvrtcProgramObj {.noDecl, incompleteStruct.} = object 25 | @# 26 | 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/surface_types.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef __SURFACE_TYPES_H__ 3 | 4 | #def __device_builtin__ 5 | 6 | #skipinclude 7 | #endif 8 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/texture_types.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef __TEXTURE_TYPES_H__ 3 | 4 | #def __device_builtin__ 5 | 6 | #skipinclude 7 | #endif 8 | -------------------------------------------------------------------------------- /c2nim/cuda12_5/vector_types.h: -------------------------------------------------------------------------------- 1 | #ifdef C2NIM 2 | #assumendef __VECTOR_TYPES_H__ 3 | 4 | #def __device_builtin__ 5 | 6 | #header "vector_types.h" 7 | #endif 8 | -------------------------------------------------------------------------------- /changelog.org: -------------------------------------------------------------------------------- 1 | * v0.2.2 2 | - add basic CUDA installation detection (PR #28) 3 | - can be overwritten defined using ~CudaPath~ and ~CudaIncludes~ 4 | - fix issue #27 by using ~header~ pragma for ~cuda.h~ wrapper (PR #28) 5 | * v0.2.1 6 | - fix nimble file for Windows, Nim <= 1.6 (up to 1.4 supported), PR #22 7 | * v0.2.0 8 | - add support for CUDA version 12.5. Includes a massive revamp of how 9 | the bindings are generated, by @lilkeet (PR #17) 10 | * v0.1.9 11 | - fix ~fileExists~ ambiguous identifier for older Nim versions 12 | - fix placement of ~{.union.}~ pragma 13 | -------------------------------------------------------------------------------- /examples/cuda12_5/blas.nim: -------------------------------------------------------------------------------- 1 | # Link against the cuBLAS and CUDA runtime libraries 2 | 3 | import 4 | std / [strformat], 5 | ../../src/nimcuda/cuda12_5/[cuda_runtime_api, cublas_api, 6 | driver_types, check] 7 | 8 | 9 | 10 | proc main() = 11 | var handle: cublasHandle_t 12 | 13 | # Initialize cuBLAS library 14 | check cublasCreate_v2(addr handle) 15 | 16 | # Matrix dimensions 17 | const m = 2 # Rows of A and C 18 | const n = 2 # Columns of B and C 19 | const k = 2 # Columns of A and rows of B 20 | 21 | # Host matrices (column-major order) 22 | var h_A: array[0..(m*k)-1, cfloat] = [cfloat 1.0, 2.0, 23 | 3.0, 4.0] 24 | 25 | var h_B: array[0..(k*n)-1, cfloat] = [cfloat 5.0, 6.0, 26 | 7.0, 8.0] 27 | 28 | var h_C: array[0..(m*n)-1, cfloat] = [cfloat 0.0, 0.0, 29 | 0.0, 0.0] 30 | 31 | # Device pointers 32 | var d_A, d_B, d_C: pointer 33 | 34 | # Allocate device memory 35 | check cudaMalloc(addr d_A, culong(m*k*sizeof(cfloat))) 36 | check cudaMalloc(addr d_B, culong(k*n*sizeof(cfloat))) 37 | check cudaMalloc(addr d_C, culong(m*n*sizeof(cfloat))) 38 | 39 | # Copy host data to device 40 | check cudaMemcpy(d_A, addr h_A[0], culong(m*k*sizeof(cfloat)), 41 | cudaMemcpyHostToDevice) 42 | check cudaMemcpy(d_B, addr h_B[0], culong(k*n*sizeof(cfloat)), 43 | cudaMemcpyHostToDevice) 44 | 45 | # Scalars for the operation 46 | var alpha: cfloat = 1.0 47 | var beta: cfloat = 0.0 48 | 49 | # Perform matrix multiplication: C = alpha * A * B + beta * C 50 | check cublasSgemm_v2(handle, CUBLAS_OP_N #[No transpose for A]#, 51 | CUBLAS_OP_N #[No transpose for B]#, m, n, k, addr alpha, 52 | cast[ptr cfloat](d_A), m, cast[ptr cfloat](d_B), k, addr beta, 53 | cast[ptr cfloat](d_C), m) 54 | 55 | # Copy result back to host 56 | check cudaMemcpy(addr h_C[0], d_C, culong(m*n*sizeof(cfloat)), 57 | cudaMemcpyDeviceToHost) 58 | 59 | # Display the result 60 | echo "Result matrix C:" 61 | for i in 0..= 0: 80 | echo "A is singular at row ", singularity 81 | return 82 | 83 | # Copy result back to host 84 | check cudaMemcpy(addr h_x[0], d_x, culong(n*sizeof(cfloat)), 85 | cudaMemcpyDeviceToHost) 86 | 87 | # Display the result 88 | echo "Solution vector x:" 89 | for i in 0.. 67 | #endif 68 | 69 | typedef int cusolver_int_t; 70 | 71 | #define CUSOLVER_VER_MAJOR 11 72 | #define CUSOLVER_VER_MINOR 6 73 | #define CUSOLVER_VER_PATCH 3 74 | #define CUSOLVER_VER_BUILD 83 75 | #define CUSOLVER_VERSION \ 76 | (CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH) 77 | 78 | //------------------------------------------------------------------------------ 79 | 80 | // #if !defined(_MSC_VER) 81 | // #define CUSOLVER_CPP_VERSION __cplusplus 82 | // #elif _MSC_FULL_VER >= 190024210 // Visual Studio 2015 Update 3 83 | // #define CUSOLVER_CPP_VERSION _MSVC_LANG 84 | // #else 85 | // #define CUSOLVER_CPP_VERSION 0 86 | // #endif 87 | 88 | //------------------------------------------------------------------------------ 89 | 90 | // #if !defined(DISABLE_CUSOLVER_DEPRECATED) 91 | // 92 | // #if CUSOLVER_CPP_VERSION >= 201402L 93 | // 94 | // #define CUSOLVER_DEPRECATED(new_func) \ 95 | // [[deprecated("please use " #new_func " instead")]] 96 | // 97 | // #elif defined(_MSC_VER) 98 | // 99 | // #define CUSOLVER_DEPRECATED(new_func) \ 100 | // __declspec(deprecated("please use " #new_func " instead")) 101 | // 102 | // #elif defined(__INTEL_COMPILER) || defined(__clang__) || \ 103 | // (defined(__GNUC__) && \ 104 | // (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) 105 | // 106 | // #define CUSOLVER_DEPRECATED(new_func) \ 107 | // __attribute__((deprecated("please use " #new_func " instead"))) 108 | // 109 | // #elif defined(__GNUC__) || defined(__xlc__) 110 | // 111 | // #define CUSOLVER_DEPRECATED(new_func) __attribute__((deprecated)) 112 | // 113 | // #else 114 | // 115 | // #define CUSOLVER_DEPRECATED(new_func) 116 | // 117 | // #endif // defined(__cplusplus) && __cplusplus >= 201402L 118 | // //------------------------------------------------------------------------------ 119 | // 120 | // #if CUSOLVER_CPP_VERSION >= 201703L 121 | // 122 | // #define CUSOLVER_DEPRECATED_ENUM(new_enum) \ 123 | // [[deprecated("please use " #new_enum " instead")]] 124 | // 125 | // #elif defined(__clang__) || \ 126 | // (defined(__GNUC__) && __GNUC__ >= 6 && !defined(__PGI)) 127 | // 128 | // #define CUSOLVER_DEPRECATED_ENUM(new_enum) \ 129 | // __attribute__((deprecated("please use " #new_enum " instead"))) 130 | // 131 | // #else 132 | // 133 | // #define CUSOLVER_DEPRECATED_ENUM(new_enum) 134 | // 135 | // #endif // defined(__cplusplus) && __cplusplus >= 201402L 136 | // 137 | // #else // defined(DISABLE_CUSOLVER_DEPRECATED) 138 | // 139 | // #define CUSOLVER_DEPRECATED(new_func) 140 | // #define CUSOLVER_DEPRECATED_ENUM(new_enum) 141 | // 142 | // #endif // !defined(DISABLE_CUSOLVER_DEPRECATED) 143 | 144 | // #undef CUSOLVER_CPP_VERSION 145 | 146 | #if defined(__cplusplus) 147 | extern "C" { 148 | #endif /* __cplusplus */ 149 | 150 | typedef enum { 151 | CUSOLVER_STATUS_SUCCESS = 0, 152 | CUSOLVER_STATUS_NOT_INITIALIZED = 1, 153 | CUSOLVER_STATUS_ALLOC_FAILED = 2, 154 | CUSOLVER_STATUS_INVALID_VALUE = 3, 155 | CUSOLVER_STATUS_ARCH_MISMATCH = 4, 156 | CUSOLVER_STATUS_MAPPING_ERROR = 5, 157 | CUSOLVER_STATUS_EXECUTION_FAILED = 6, 158 | CUSOLVER_STATUS_INTERNAL_ERROR = 7, 159 | CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8, 160 | CUSOLVER_STATUS_NOT_SUPPORTED = 9, 161 | CUSOLVER_STATUS_ZERO_PIVOT = 10, 162 | CUSOLVER_STATUS_INVALID_LICENSE = 11, 163 | CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED = 12, 164 | CUSOLVER_STATUS_IRS_PARAMS_INVALID = 13, 165 | CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC = 14, 166 | CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE = 15, 167 | CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER = 16, 168 | CUSOLVER_STATUS_IRS_INTERNAL_ERROR = 20, 169 | CUSOLVER_STATUS_IRS_NOT_SUPPORTED = 21, 170 | CUSOLVER_STATUS_IRS_OUT_OF_RANGE = 22, 171 | CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES = 23, 172 | CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED = 25, 173 | CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED = 26, 174 | CUSOLVER_STATUS_IRS_MATRIX_SINGULAR = 30, 175 | CUSOLVER_STATUS_INVALID_WORKSPACE = 31 176 | } cusolverStatus_t; 177 | 178 | typedef enum { 179 | CUSOLVER_EIG_TYPE_1 = 1, 180 | CUSOLVER_EIG_TYPE_2 = 2, 181 | CUSOLVER_EIG_TYPE_3 = 3 182 | } cusolverEigType_t; 183 | 184 | typedef enum { 185 | CUSOLVER_EIG_MODE_NOVECTOR = 0, 186 | CUSOLVER_EIG_MODE_VECTOR = 1 187 | } cusolverEigMode_t; 188 | 189 | typedef enum { 190 | CUSOLVER_EIG_RANGE_ALL = 1001, 191 | CUSOLVER_EIG_RANGE_I = 1002, 192 | CUSOLVER_EIG_RANGE_V = 1003, 193 | } cusolverEigRange_t; 194 | 195 | typedef enum { 196 | CUSOLVER_INF_NORM = 104, 197 | CUSOLVER_MAX_NORM = 105, 198 | CUSOLVER_ONE_NORM = 106, 199 | CUSOLVER_FRO_NORM = 107, 200 | } cusolverNorm_t; 201 | 202 | typedef enum { 203 | CUSOLVER_IRS_REFINE_NOT_SET = 1100, 204 | CUSOLVER_IRS_REFINE_NONE = 1101, 205 | CUSOLVER_IRS_REFINE_CLASSICAL = 1102, 206 | CUSOLVER_IRS_REFINE_CLASSICAL_GMRES = 1103, 207 | CUSOLVER_IRS_REFINE_GMRES = 1104, 208 | CUSOLVER_IRS_REFINE_GMRES_GMRES = 1105, 209 | CUSOLVER_IRS_REFINE_GMRES_NOPCOND = 1106, 210 | 211 | CUSOLVER_PREC_DD = 1150, 212 | CUSOLVER_PREC_SS = 1151, 213 | CUSOLVER_PREC_SHT = 1152, 214 | 215 | } cusolverIRSRefinement_t; 216 | 217 | typedef enum { 218 | CUSOLVER_R_8I = 1201, 219 | CUSOLVER_R_8U = 1202, 220 | CUSOLVER_R_64F = 1203, 221 | CUSOLVER_R_32F = 1204, 222 | CUSOLVER_R_16F = 1205, 223 | CUSOLVER_R_16BF = 1206, 224 | CUSOLVER_R_TF32 = 1207, 225 | CUSOLVER_R_AP = 1208, 226 | CUSOLVER_C_8I = 1211, 227 | CUSOLVER_C_8U = 1212, 228 | CUSOLVER_C_64F = 1213, 229 | CUSOLVER_C_32F = 1214, 230 | CUSOLVER_C_16F = 1215, 231 | CUSOLVER_C_16BF = 1216, 232 | CUSOLVER_C_TF32 = 1217, 233 | CUSOLVER_C_AP = 1218, 234 | } cusolverPrecType_t; 235 | 236 | typedef enum { 237 | CUSOLVER_ALG_0 = 0, /* default algorithm */ 238 | CUSOLVER_ALG_1 = 1, 239 | CUSOLVER_ALG_2 = 2 240 | } cusolverAlgMode_t; 241 | 242 | typedef enum { 243 | CUBLAS_STOREV_COLUMNWISE = 0, 244 | CUBLAS_STOREV_ROWWISE = 1 245 | } cusolverStorevMode_t; 246 | 247 | typedef enum { 248 | CUBLAS_DIRECT_FORWARD = 0, 249 | CUBLAS_DIRECT_BACKWARD = 1 250 | } cusolverDirectMode_t; 251 | 252 | cusolverStatus_t CUSOLVERAPI 253 | cusolverGetProperty(libraryPropertyType type, int *value); 254 | 255 | cusolverStatus_t CUSOLVERAPI cusolverGetVersion(int *version); 256 | 257 | #if defined(__cplusplus) 258 | } 259 | #endif /* __cplusplus */ 260 | 261 | #endif // CUSOLVER_COMMON_H_ 262 | -------------------------------------------------------------------------------- /include/cuda12_5/vector_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NOTICE TO LICENSEE: 5 | * 6 | * This source code and/or documentation ("Licensed Deliverables") are 7 | * subject to NVIDIA intellectual property rights under U.S. and 8 | * international Copyright laws. 9 | * 10 | * These Licensed Deliverables contained herein is PROPRIETARY and 11 | * CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | * conditions of a form of NVIDIA software license agreement by and 13 | * between NVIDIA and Licensee ("License Agreement") or electronically 14 | * accepted by Licensee. Notwithstanding any terms or conditions to 15 | * the contrary in the License Agreement, reproduction or disclosure 16 | * of the Licensed Deliverables to any third party without the express 17 | * written consent of NVIDIA is prohibited. 18 | * 19 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | * OF THESE LICENSED DELIVERABLES. 33 | * 34 | * U.S. Government End Users. These Licensed Deliverables are a 35 | * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | * 1995), consisting of "commercial computer software" and "commercial 37 | * computer software documentation" as such terms are used in 48 38 | * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | * only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | * U.S. Government End Users acquire the Licensed Deliverables with 42 | * only those rights set forth herein. 43 | * 44 | * Any use of the Licensed Deliverables in individual and commercial 45 | * software must include, in the user documentation and internal 46 | * comments to the code, the above Disclaimer and U.S. Government End 47 | * Users Notice. 48 | */ 49 | 50 | #if !defined(__VECTOR_TYPES_H__) 51 | #define __VECTOR_TYPES_H__ 52 | 53 | struct __device_builtin__ char1 54 | { 55 | signed char x; 56 | }; 57 | 58 | struct __device_builtin__ uchar1 59 | { 60 | unsigned char x; 61 | }; 62 | 63 | 64 | struct __device_builtin__ char2 65 | { 66 | signed char x, y; 67 | }; 68 | 69 | struct __device_builtin__ uchar2 70 | { 71 | unsigned char x, y; 72 | }; 73 | 74 | struct __device_builtin__ char3 75 | { 76 | signed char x, y, z; 77 | }; 78 | 79 | struct __device_builtin__ uchar3 80 | { 81 | unsigned char x, y, z; 82 | }; 83 | 84 | struct __device_builtin__ char4 85 | { 86 | signed char x, y, z, w; 87 | }; 88 | 89 | struct __device_builtin__ uchar4 90 | { 91 | unsigned char x, y, z, w; 92 | }; 93 | 94 | struct __device_builtin__ short1 95 | { 96 | short x; 97 | }; 98 | 99 | struct __device_builtin__ ushort1 100 | { 101 | unsigned short x; 102 | }; 103 | 104 | struct __device_builtin__ short2 105 | { 106 | short x, y; 107 | }; 108 | 109 | struct __device_builtin__ ushort2 110 | { 111 | unsigned short x, y; 112 | }; 113 | 114 | struct __device_builtin__ short3 115 | { 116 | short x, y, z; 117 | }; 118 | 119 | struct __device_builtin__ ushort3 120 | { 121 | unsigned short x, y, z; 122 | }; 123 | 124 | struct __device_builtin__ short4 125 | { 126 | short x, y, z, w; 127 | }; 128 | 129 | struct __device_builtin__ ushort4 130 | { 131 | unsigned short x, y, z, w; 132 | }; 133 | 134 | struct __device_builtin__ int1 135 | { 136 | int x; 137 | }; 138 | 139 | struct __device_builtin__ uint1 140 | { 141 | unsigned int x; 142 | }; 143 | 144 | struct __device_builtin__ int2 145 | { 146 | int x, y; 147 | }; 148 | 149 | struct __device_builtin__ uint2 150 | { 151 | unsigned int x, y; 152 | }; 153 | 154 | struct __device_builtin__ int3 155 | { 156 | int x, y, z; 157 | }; 158 | 159 | struct __device_builtin__ uint3 160 | { 161 | unsigned int x, y, z; 162 | }; 163 | 164 | struct __device_builtin__ int4 165 | { 166 | int x, y, z, w; 167 | }; 168 | 169 | struct __device_builtin__ uint4 170 | { 171 | unsigned int x, y, z, w; 172 | }; 173 | 174 | struct __device_builtin__ long1 175 | { 176 | long int x; 177 | }; 178 | 179 | struct __device_builtin__ ulong1 180 | { 181 | unsigned long x; 182 | }; 183 | 184 | struct __device_builtin__ long2 185 | { 186 | long int x, y; 187 | }; 188 | 189 | struct __device_builtin__ ulong2 190 | { 191 | unsigned long int x, y; 192 | }; 193 | 194 | struct __device_builtin__ long3 195 | { 196 | long int x, y, z; 197 | }; 198 | 199 | struct __device_builtin__ ulong3 200 | { 201 | unsigned long int x, y, z; 202 | }; 203 | 204 | struct __device_builtin__ long4 205 | { 206 | long int x, y, z, w; 207 | }; 208 | 209 | struct __device_builtin__ ulong4 210 | { 211 | unsigned long int x, y, z, w; 212 | }; 213 | 214 | struct __device_builtin__ float1 215 | { 216 | float x; 217 | }; 218 | 219 | struct __device_builtin__ float2 220 | { 221 | float x; float y; 222 | }; 223 | 224 | struct __device_builtin__ float3 225 | { 226 | float x, y, z; 227 | }; 228 | 229 | struct __device_builtin__ float4 230 | { 231 | float x, y, z, w; 232 | }; 233 | 234 | struct __device_builtin__ longlong1 235 | { 236 | long long int x; 237 | }; 238 | 239 | struct __device_builtin__ ulonglong1 240 | { 241 | unsigned long long int x; 242 | }; 243 | 244 | struct __device_builtin__ longlong2 245 | { 246 | long long int x, y; 247 | }; 248 | 249 | struct __device_builtin__ ulonglong2 250 | { 251 | unsigned long long int x, y; 252 | }; 253 | 254 | struct __device_builtin__ longlong3 255 | { 256 | long long int x, y, z; 257 | }; 258 | 259 | struct __device_builtin__ ulonglong3 260 | { 261 | unsigned long long int x, y, z; 262 | }; 263 | 264 | struct __device_builtin__ longlong4 265 | { 266 | long long int x, y, z ,w; 267 | }; 268 | 269 | struct __device_builtin__ ulonglong4 270 | { 271 | unsigned long long int x, y, z, w; 272 | }; 273 | 274 | struct __device_builtin__ double1 275 | { 276 | double x; 277 | }; 278 | 279 | struct __device_builtin__ double2 280 | { 281 | double x, y; 282 | }; 283 | 284 | struct __device_builtin__ double3 285 | { 286 | double x, y, z; 287 | }; 288 | 289 | struct __device_builtin__ double4 290 | { 291 | double x, y, z, w; 292 | }; 293 | 294 | 295 | /******************************************************************************* 296 | * * 297 | * * 298 | * * 299 | *******************************************************************************/ 300 | 301 | typedef __device_builtin__ struct char1 char1; 302 | typedef __device_builtin__ struct uchar1 uchar1; 303 | typedef __device_builtin__ struct char2 char2; 304 | typedef __device_builtin__ struct uchar2 uchar2; 305 | typedef __device_builtin__ struct char3 char3; 306 | typedef __device_builtin__ struct uchar3 uchar3; 307 | typedef __device_builtin__ struct char4 char4; 308 | typedef __device_builtin__ struct uchar4 uchar4; 309 | typedef __device_builtin__ struct short1 short1; 310 | typedef __device_builtin__ struct ushort1 ushort1; 311 | typedef __device_builtin__ struct short2 short2; 312 | typedef __device_builtin__ struct ushort2 ushort2; 313 | typedef __device_builtin__ struct short3 short3; 314 | typedef __device_builtin__ struct ushort3 ushort3; 315 | typedef __device_builtin__ struct short4 short4; 316 | typedef __device_builtin__ struct ushort4 ushort4; 317 | typedef __device_builtin__ struct int1 int1; 318 | typedef __device_builtin__ struct uint1 uint1; 319 | typedef __device_builtin__ struct int2 int2; 320 | typedef __device_builtin__ struct uint2 uint2; 321 | typedef __device_builtin__ struct int3 int3; 322 | typedef __device_builtin__ struct uint3 uint3; 323 | typedef __device_builtin__ struct int4 int4; 324 | typedef __device_builtin__ struct uint4 uint4; 325 | typedef __device_builtin__ struct long1 long1; 326 | typedef __device_builtin__ struct ulong1 ulong1; 327 | typedef __device_builtin__ struct long2 long2; 328 | typedef __device_builtin__ struct ulong2 ulong2; 329 | typedef __device_builtin__ struct long3 long3; 330 | typedef __device_builtin__ struct ulong3 ulong3; 331 | typedef __device_builtin__ struct long4 long4; 332 | typedef __device_builtin__ struct ulong4 ulong4; 333 | typedef __device_builtin__ struct float1 float1; 334 | typedef __device_builtin__ struct float2 float2; 335 | typedef __device_builtin__ struct float3 float3; 336 | typedef __device_builtin__ struct float4 float4; 337 | typedef __device_builtin__ struct longlong1 longlong1; 338 | typedef __device_builtin__ struct ulonglong1 ulonglong1; 339 | typedef __device_builtin__ struct longlong2 longlong2; 340 | typedef __device_builtin__ struct ulonglong2 ulonglong2; 341 | typedef __device_builtin__ struct longlong3 longlong3; 342 | typedef __device_builtin__ struct ulonglong3 ulonglong3; 343 | typedef __device_builtin__ struct longlong4 longlong4; 344 | typedef __device_builtin__ struct ulonglong4 ulonglong4; 345 | typedef __device_builtin__ struct double1 double1; 346 | typedef __device_builtin__ struct double2 double2; 347 | typedef __device_builtin__ struct double3 double3; 348 | typedef __device_builtin__ struct double4 double4; 349 | 350 | /******************************************************************************* 351 | * * 352 | * * 353 | * * 354 | *******************************************************************************/ 355 | 356 | struct __device_builtin__ dim3 357 | { 358 | unsigned int x, y, z; 359 | }; 360 | 361 | typedef __device_builtin__ struct dim3 dim3; 362 | 363 | #endif /* !__VECTOR_TYPES_H__ */ 364 | -------------------------------------------------------------------------------- /include/cuda8_0/vector_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | * 4 | * NOTICE TO LICENSEE: 5 | * 6 | * This source code and/or documentation ("Licensed Deliverables") are 7 | * subject to NVIDIA intellectual property rights under U.S. and 8 | * international Copyright laws. 9 | * 10 | * These Licensed Deliverables contained herein is PROPRIETARY and 11 | * CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | * conditions of a form of NVIDIA software license agreement by and 13 | * between NVIDIA and Licensee ("License Agreement") or electronically 14 | * accepted by Licensee. Notwithstanding any terms or conditions to 15 | * the contrary in the License Agreement, reproduction or disclosure 16 | * of the Licensed Deliverables to any third party without the express 17 | * written consent of NVIDIA is prohibited. 18 | * 19 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | * OF THESE LICENSED DELIVERABLES. 33 | * 34 | * U.S. Government End Users. These Licensed Deliverables are a 35 | * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | * 1995), consisting of "commercial computer software" and "commercial 37 | * computer software documentation" as such terms are used in 48 38 | * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | * only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | * U.S. Government End Users acquire the Licensed Deliverables with 42 | * only those rights set forth herein. 43 | * 44 | * Any use of the Licensed Deliverables in individual and commercial 45 | * software must include, in the user documentation and internal 46 | * comments to the code, the above Disclaimer and U.S. Government End 47 | * Users Notice. 48 | */ 49 | 50 | #if !defined(__VECTOR_TYPES_H__) 51 | #define __VECTOR_TYPES_H__ 52 | 53 | struct __device_builtin__ char1 54 | { 55 | signed char x; 56 | }; 57 | 58 | struct __device_builtin__ uchar1 59 | { 60 | unsigned char x; 61 | }; 62 | 63 | 64 | struct __device_builtin__ char2 65 | { 66 | signed char x, y; 67 | }; 68 | 69 | struct __device_builtin__ uchar2 70 | { 71 | unsigned char x, y; 72 | }; 73 | 74 | struct __device_builtin__ char3 75 | { 76 | signed char x, y, z; 77 | }; 78 | 79 | struct __device_builtin__ uchar3 80 | { 81 | unsigned char x, y, z; 82 | }; 83 | 84 | struct __device_builtin__ char4 85 | { 86 | signed char x, y, z, w; 87 | }; 88 | 89 | struct __device_builtin__ uchar4 90 | { 91 | unsigned char x, y, z, w; 92 | }; 93 | 94 | struct __device_builtin__ short1 95 | { 96 | short x; 97 | }; 98 | 99 | struct __device_builtin__ ushort1 100 | { 101 | unsigned short x; 102 | }; 103 | 104 | struct __device_builtin__ short2 105 | { 106 | short x, y; 107 | }; 108 | 109 | struct __device_builtin__ ushort2 110 | { 111 | unsigned short x, y; 112 | }; 113 | 114 | struct __device_builtin__ short3 115 | { 116 | short x, y, z; 117 | }; 118 | 119 | struct __device_builtin__ ushort3 120 | { 121 | unsigned short x, y, z; 122 | }; 123 | 124 | struct __device_builtin__ short4 125 | { 126 | short x, y, z, w; 127 | }; 128 | 129 | struct __device_builtin__ ushort4 130 | { 131 | unsigned short x, y, z, w; 132 | }; 133 | 134 | struct __device_builtin__ int1 135 | { 136 | int x; 137 | }; 138 | 139 | struct __device_builtin__ uint1 140 | { 141 | unsigned int x; 142 | }; 143 | 144 | struct __device_builtin__ int2 145 | { 146 | int x, y; 147 | }; 148 | 149 | struct __device_builtin__ uint2 150 | { 151 | unsigned int x, y; 152 | }; 153 | 154 | struct __device_builtin__ int3 155 | { 156 | int x, y, z; 157 | }; 158 | 159 | struct __device_builtin__ uint3 160 | { 161 | unsigned int x, y, z; 162 | }; 163 | 164 | struct __device_builtin__ int4 165 | { 166 | int x, y, z, w; 167 | }; 168 | 169 | struct __device_builtin__ uint4 170 | { 171 | unsigned int x, y, z, w; 172 | }; 173 | 174 | struct __device_builtin__ long1 175 | { 176 | long int x; 177 | }; 178 | 179 | struct __device_builtin__ ulong1 180 | { 181 | unsigned long x; 182 | }; 183 | 184 | struct __device_builtin__ long2 185 | { 186 | long int x, y; 187 | }; 188 | 189 | struct __device_builtin__ ulong2 190 | { 191 | unsigned long int x, y; 192 | }; 193 | 194 | struct __device_builtin__ long3 195 | { 196 | long int x, y, z; 197 | }; 198 | 199 | struct __device_builtin__ ulong3 200 | { 201 | unsigned long int x, y, z; 202 | }; 203 | 204 | struct __device_builtin__ long4 205 | { 206 | long int x, y, z, w; 207 | }; 208 | 209 | struct __device_builtin__ ulong4 210 | { 211 | unsigned long int x, y, z, w; 212 | }; 213 | 214 | struct __device_builtin__ float1 215 | { 216 | float x; 217 | }; 218 | 219 | struct __device_builtin__ float2 220 | { 221 | float x; float y; 222 | }; 223 | 224 | struct __device_builtin__ float3 225 | { 226 | float x, y, z; 227 | }; 228 | 229 | struct __device_builtin__ float4 230 | { 231 | float x, y, z, w; 232 | }; 233 | 234 | struct __device_builtin__ longlong1 235 | { 236 | long long int x; 237 | }; 238 | 239 | struct __device_builtin__ ulonglong1 240 | { 241 | unsigned long long int x; 242 | }; 243 | 244 | struct __device_builtin__ longlong2 245 | { 246 | long long int x, y; 247 | }; 248 | 249 | struct __device_builtin__ ulonglong2 250 | { 251 | unsigned long long int x, y; 252 | }; 253 | 254 | struct __device_builtin__ longlong3 255 | { 256 | long long int x, y, z; 257 | }; 258 | 259 | struct __device_builtin__ ulonglong3 260 | { 261 | unsigned long long int x, y, z; 262 | }; 263 | 264 | struct __device_builtin__ longlong4 265 | { 266 | long long int x, y, z ,w; 267 | }; 268 | 269 | struct __device_builtin__ ulonglong4 270 | { 271 | unsigned long long int x, y, z, w; 272 | }; 273 | 274 | struct __device_builtin__ double1 275 | { 276 | double x; 277 | }; 278 | 279 | struct __device_builtin__ double2 280 | { 281 | double x, y; 282 | }; 283 | 284 | struct __device_builtin__ double3 285 | { 286 | double x, y, z; 287 | }; 288 | 289 | struct __device_builtin__ double4 290 | { 291 | double x, y, z, w; 292 | }; 293 | 294 | 295 | /******************************************************************************* 296 | * * 297 | * * 298 | * * 299 | *******************************************************************************/ 300 | 301 | typedef __device_builtin__ struct char1 char1; 302 | typedef __device_builtin__ struct uchar1 uchar1; 303 | typedef __device_builtin__ struct char2 char2; 304 | typedef __device_builtin__ struct uchar2 uchar2; 305 | typedef __device_builtin__ struct char3 char3; 306 | typedef __device_builtin__ struct uchar3 uchar3; 307 | typedef __device_builtin__ struct char4 char4; 308 | typedef __device_builtin__ struct uchar4 uchar4; 309 | typedef __device_builtin__ struct short1 short1; 310 | typedef __device_builtin__ struct ushort1 ushort1; 311 | typedef __device_builtin__ struct short2 short2; 312 | typedef __device_builtin__ struct ushort2 ushort2; 313 | typedef __device_builtin__ struct short3 short3; 314 | typedef __device_builtin__ struct ushort3 ushort3; 315 | typedef __device_builtin__ struct short4 short4; 316 | typedef __device_builtin__ struct ushort4 ushort4; 317 | typedef __device_builtin__ struct int1 int1; 318 | typedef __device_builtin__ struct uint1 uint1; 319 | typedef __device_builtin__ struct int2 int2; 320 | typedef __device_builtin__ struct uint2 uint2; 321 | typedef __device_builtin__ struct int3 int3; 322 | typedef __device_builtin__ struct uint3 uint3; 323 | typedef __device_builtin__ struct int4 int4; 324 | typedef __device_builtin__ struct uint4 uint4; 325 | typedef __device_builtin__ struct long1 long1; 326 | typedef __device_builtin__ struct ulong1 ulong1; 327 | typedef __device_builtin__ struct long2 long2; 328 | typedef __device_builtin__ struct ulong2 ulong2; 329 | typedef __device_builtin__ struct long3 long3; 330 | typedef __device_builtin__ struct ulong3 ulong3; 331 | typedef __device_builtin__ struct long4 long4; 332 | typedef __device_builtin__ struct ulong4 ulong4; 333 | typedef __device_builtin__ struct float1 float1; 334 | typedef __device_builtin__ struct float2 float2; 335 | typedef __device_builtin__ struct float3 float3; 336 | typedef __device_builtin__ struct float4 float4; 337 | typedef __device_builtin__ struct longlong1 longlong1; 338 | typedef __device_builtin__ struct ulonglong1 ulonglong1; 339 | typedef __device_builtin__ struct longlong2 longlong2; 340 | typedef __device_builtin__ struct ulonglong2 ulonglong2; 341 | typedef __device_builtin__ struct longlong3 longlong3; 342 | typedef __device_builtin__ struct ulonglong3 ulonglong3; 343 | typedef __device_builtin__ struct longlong4 longlong4; 344 | typedef __device_builtin__ struct ulonglong4 ulonglong4; 345 | typedef __device_builtin__ struct double1 double1; 346 | typedef __device_builtin__ struct double2 double2; 347 | typedef __device_builtin__ struct double3 double3; 348 | typedef __device_builtin__ struct double4 double4; 349 | 350 | /******************************************************************************* 351 | * * 352 | * * 353 | * * 354 | *******************************************************************************/ 355 | 356 | struct __device_builtin__ dim3 357 | { 358 | unsigned int x, y, z; 359 | }; 360 | 361 | typedef __device_builtin__ struct dim3 dim3; 362 | 363 | #endif /* !__VECTOR_TYPES_H__ */ 364 | -------------------------------------------------------------------------------- /nimcuda.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.2.2" 4 | author = "Andrea Ferretti" 5 | description = "Nim binding for CUDA" 6 | license = "Apache2" 7 | skipDirs = @["headers", "include", "c2nim", "examples", "htmldocs"] 8 | srcDir = "src" 9 | 10 | # Dependencies 11 | 12 | requires "nim >= 1.4.0" 13 | 14 | import 15 | std / [strscans, strformat, os, sequtils, strutils, pegs] 16 | 17 | type CudaVersion = enum 18 | cuda8_0, cuda12_5 19 | 20 | const DefaultVersion = cuda8_0 21 | 22 | const 23 | ModifiedHeadersDir = "include" 24 | NimCodeDir = "src" 25 | UtilitiesDir = "utils" 26 | DocumentationDir = "htmldocs" 27 | ExamplesDir = "examples" 28 | C2nimDirectivesDir = "c2nim" 29 | TemporaryHeadersDir = "headers" 30 | 31 | const Libs = [ 32 | # "library_types", 33 | # "vector_types", 34 | # "driver_types", # do not decomment - the nim file is manually adjusted 35 | "surface_types", 36 | "texture_types", 37 | "cuda_runtime_api", 38 | # "cuda_occupancy", # do not decomment - the nim file is manually adjusted 39 | "cuComplex", 40 | "cublas_api", 41 | # "cublas_v2", 42 | # "cudnn_v9", 43 | "cufft", 44 | "curand", 45 | "cusolver_common", 46 | "cusolverDn", 47 | "cusolverRf", 48 | "cusolverSp", 49 | "cusparse", 50 | "nvblas", 51 | #"nvgraph" <- removed in cuda 11.0, adopted into cugraph 52 | "nvrtc", 53 | "cuda", 54 | ] 55 | 56 | proc systemCudaName(v: CudaVersion): string = 57 | ## Returns the name used for cuda directories on linux. 58 | var captures: array[2, string] 59 | assert ($v).match(peg" 'cuda' {\d+} '_' {\d+} ", captures) 60 | fmt"cuda-{captures[0]}.{captures[1]}" 61 | 62 | proc systemCudaInclude(version: CudaVersion): string = 63 | when hostOS == "windows": 64 | getEnv("CUDA_PATH") / "include" 65 | else: 66 | "/usr/local" / version.systemCudaName / "include" 67 | 68 | proc systemCudaCLib(version: CudaVersion): string = 69 | when hostOS == "windows": 70 | getEnv("CUDA_PATH") / "lib" / "x64" 71 | else: 72 | "/usr/local" / version.systemCudaName / "lib64" 73 | 74 | 75 | func nimcudaSourceDir(version: CudaVersion): string = 76 | const dirThatHoldsVersions = NimCodeDir / "nimcuda" 77 | result = dirThatHoldsVersions / $version 78 | 79 | func nimcudaExamplesDir(version: CudaVersion): string = 80 | const dirThatHoldsVersions = ExamplesDir 81 | result = dirThatHoldsVersions / $version 82 | 83 | 84 | proc patch(libFileName: string; version: CudaVersion): string = 85 | let installedLib = systemCudaInclude(version) / libFileName 86 | 87 | let 88 | simpleLibPath = ModifiedHeadersDir / $version / libFileName 89 | patchPath = C2nimDirectivesDir / $version / libFileName 90 | outPath = TemporaryHeadersDir / libFileName 91 | libContent = 92 | if simpleLibPath.fileExists: readFile(simpleLibPath) 93 | else: readFile(installedLib) 94 | patchContent = readFile(patchPath) 95 | 96 | writeFile(outPath, patchContent & "\n" & libContent) 97 | return outPath 98 | 99 | 100 | proc preprocess(filePath: string) = 101 | const preprocessorExe = UtilitiesDir / "preprocessor".toExe 102 | 103 | if not preprocessorExe.fileExists: 104 | # Compile preprocessor. 105 | const preprocessorSource = preprocessorExe.changeFileExt("nim") 106 | exec "nim c -d:release " & preprocessorSource 107 | 108 | exec preprocessorExe & " " & filePath 109 | 110 | 111 | proc postprocess(filePath: string) = 112 | const postprocessorExe = UtilitiesDir / "postprocessor".toExe 113 | 114 | if not postprocessorExe.fileExists: 115 | # Compile preprocessor. 116 | const postprocessorSource = postprocessorExe.changeFileExt("nim") 117 | exec "nim c -d:release " & postprocessorSource 118 | 119 | exec postprocessorExe & " " & filePath 120 | 121 | 122 | proc process(libName: string; version: CudaVersion) = 123 | let 124 | headerFileName = libName.addFileExt("h") 125 | outPath = nimcudaSourceDir(version) / headerFileName.changeFileExt("nim") 126 | headerPath = patch(headerFileName, version) 127 | preprocess headerPath 128 | exec("c2nim --debug --strict --prefix\"_\" --prefix\"__\" --suffix\"_\" " & 129 | "--suffix\"__\" " & headerPath & " -o:" & outPath) 130 | postprocess outPath 131 | 132 | proc compile(filePath: string) = 133 | exec("nim c -c " & filePath) 134 | 135 | proc compile(libName: string; version: CudaVersion) = 136 | let libPath = nimcudaSourceDir(version) / libName.addFileExt("nim") 137 | compile libPath 138 | 139 | 140 | proc processAll(version: CudaVersion) = 141 | mkDir TemporaryHeadersDir 142 | 143 | for lib in Libs: 144 | process(lib, version) 145 | 146 | let allTemporaryFiles = TemporaryHeadersDir.listFiles() 147 | for file in allTemporaryFiles: 148 | rmFile file 149 | 150 | 151 | 152 | proc compileAll(version: CudaVersion) = 153 | if version == DefaultVersion: 154 | compile NimCodeDir / "nimcuda".addFileExt("nim") 155 | for nimSourceFile in nimcudaSourceDir(version).listFiles: 156 | exec "nim c -c " & nimSourceFile 157 | 158 | 159 | func parseCudaVersion(input: string): CudaVersion = 160 | ## Parses the passed cuda version, returning `DefaultVersion` if no match 161 | ## is found. 162 | # proc normalizer(s: string): string = 163 | # var captures: array[2, string] 164 | # if s.match(peg" y'cuda'? {\d+} ('_' / '.' / '-') {\d+} $ ", captures): 165 | # fmt"cuda{captures[0]}_{captures[1]}" 166 | # else: 167 | # s 168 | var index = 0 169 | var 170 | major = "" 171 | minor = "" 172 | let success = input.scanp(index, ?"cuda", +(`Digits` -> major.add($_)), 173 | {'.', '-', '_'}, +(`Digits` -> minor.add($_))) 174 | if success: 175 | case fmt"{major}.{minor}" 176 | of "8.0": 177 | cuda8_0 178 | of "12.5": 179 | cuda12_5 180 | else: 181 | DefaultVersion 182 | else: 183 | DefaultVersion 184 | 185 | 186 | const args = when NimMajor >= 2: 187 | cmdline.commandLineParams() 188 | else: 189 | os.commandLineParams() 190 | 191 | template taskWithCudaVersionArgument(name: untyped; description: string; 192 | body: untyped): untyped = 193 | ## Creates a nimble task that takes one command line argument: a cuda version. 194 | ## This argument is accessible as the symbol `cudaVersion`. 195 | task name, description: 196 | const NameOfThisTask = `name Task`.astToStr[0..^5] #removing "Task" 197 | 198 | let 199 | noVersionArgPassed = args[^1] == NameOfThisTask 200 | oneVersionArgPassed = args[^2] == NameOfThisTask 201 | tooManyArgs = not (noVersionArgPassed or oneVersionArgPassed) 202 | 203 | if tooManyArgs: 204 | echo "Too many arguments! Please only pass the cuda version to this task." 205 | echo "Example: 'nimble $1 12.5'" % NameOfThisTask 206 | else: 207 | # parseCudaVersion defaults to `DefaultVersion`, so if the task is the 208 | # last param, it returns the default. 209 | let cudaVersion {.inject.} = args[^1].parseCudaVersion() 210 | body 211 | 212 | template taskWithCertainVersions(name: untyped; description: string; 213 | versions: set[CudaVersion]; 214 | body: untyped): untyped = 215 | ## Creates a nimble task that takes one command line argument: a cuda version. 216 | ## This argument is accessible as the symbol `cudaVersion`. 217 | ## The task can only be run on some versions of cuda, specified by `versions`. 218 | taskWithCudaVersionArgument name, description: 219 | if cudaVersion in versions: 220 | body 221 | else: 222 | echo "This task is only available for version(s) $1." % [$versions] 223 | 224 | 225 | taskWithCudaVersionArgument headers, "generate bindings from headers": 226 | processAll(cudaVersion) 227 | 228 | taskWithCudaVersionArgument checkcheck, 229 | "check that bindings compile": 230 | compileAll(cudaVersion) 231 | 232 | task docs, "generate documentation": 233 | # remove possibly outdated files: 234 | if DocumentationDir.dirExists: 235 | rmDir DocumentationDir 236 | mkDir DocumentationDir 237 | 238 | for cudaVersion in CudaVersion: 239 | let outDir = DocumentationDir / $cudaVersion 240 | 241 | for nimSourceFile in nimcudaSourceDir(cudaVersion).listFiles: 242 | exec fmt"nim doc2 --index:on --outDir:{outDir} {nimSourceFile}" 243 | 244 | let indexFile = outDir / "theindex".addFileExt("html") 245 | exec fmt"nim buildIndex -o:{indexFile} {outDir}" 246 | 247 | 248 | proc exampleConfig(version: CudaVersion) = 249 | --hints: off 250 | --linedir: on 251 | --stacktrace: on 252 | --linetrace: on 253 | --debuginfo 254 | switch("path", thisDir() / nimcudaSourceDir(version)) 255 | --run 256 | 257 | taskWithCudaVersionArgument fft, "run fft example": 258 | exampleConfig(cudaVersion) 259 | setCommand "c", nimcudaExamplesDir(cudaVersion) / "fft".addFileExt("nim") 260 | 261 | taskWithCudaVersionArgument sparse, "run sparse example": 262 | exampleConfig(cudaVersion) 263 | setCommand "c", nimcudaExamplesDir(cudaVersion) / "sparse".addFileExt("nim") 264 | 265 | taskWithCudaVersionArgument random, "run random example": 266 | exampleConfig(cudaVersion) 267 | setCommand "c", nimcudaExamplesDir(cudaVersion) / "random".addFileExt("nim") 268 | 269 | taskWithCertainVersions pagerank, "run pagerank example", {cuda8_0}: 270 | # removed in cuda 11.0 271 | exampleConfig(cudaVersion) 272 | setCommand "c", nimcudaExamplesDir(cudaVersion) / "pagerank".addFileExt("nim") 273 | 274 | taskWithCertainVersions blas, "run cublas example", {cuda12_5}: 275 | # TODO: implement and test for 8.0 276 | exampleConfig(cudaVersion) 277 | setCommand "c", nimcudaExamplesDir(cudaVersion) / "blas".addFileExt("nim") 278 | 279 | taskWithCertainVersions denseLinearSystem, "run cusolverDn example", {cuda12_5}: 280 | # TODO: implement and test for 8.0 281 | exampleConfig(cudaVersion) 282 | setCommand "c", nimcudaExamplesDir(cudaVersion) / 283 | "denseLinearSystem".addFileExt("nim") 284 | 285 | taskWithCertainVersions sparseLinearSystem, "run cusolverSp example", 286 | {cuda12_5}: 287 | # TODO: implement and test for 8.0 288 | exampleConfig(cudaVersion) 289 | setCommand "c", nimcudaExamplesDir(cudaVersion) / 290 | "sparseLinearSystem".addFileExt("nim") 291 | 292 | taskWithCertainVersions runtimeCompilation, "run nvrtc example", 293 | {cuda12_5}: 294 | # TODO: implement and test for 8.0 295 | exampleConfig(cudaVersion) 296 | setCommand "c", nimcudaExamplesDir(cudaVersion) / 297 | "runtimeCompilation".addFileExt("nim") 298 | -------------------------------------------------------------------------------- /src/nimcuda.nim: -------------------------------------------------------------------------------- 1 | 2 | ##[ 3 | This main module exports a few of cuda 8.0's modules. 4 | If you need a different version or a module not exported here, try something 5 | like: 6 | 7 | .. code-block:: Nim 8 | import nimcuda/cuda8_0/library_name 9 | ]## 10 | 11 | import 12 | ./nimcuda/cuda8_0/[check, cuda_runtime_api, library_types, driver_types, 13 | vector_types] 14 | 15 | 16 | export check, cuda_runtime_api, library_types, driver_types, vector_types 17 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/check.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ./cublas_api 16 | # import ./cublas_v2 17 | # import ./cuComplex 18 | import ./cuda_occupancy 19 | # import ./cuda_runtime_api 20 | # import ./cudnn 21 | import ./cufft 22 | import ./curand 23 | import ./cusolver_common 24 | # import ./cusolverDn 25 | # import ./cusolverRf 26 | # import ./cusolverSp 27 | import ./cusparse 28 | import ./driver_types 29 | # import ./library_types 30 | # import ./nvblas 31 | # import ./nvgraph 32 | # import ./surface_types 33 | # import ./texture_types 34 | # import ./vector_types 35 | import ./nvrtc 36 | import ./cuda 37 | 38 | type 39 | CudaError* = object of IOError 40 | CudaOccError* = object of IOError 41 | CufftError* = object of IOError 42 | CublasError* = object of IOError 43 | CusparseError* = object of IOError 44 | CusolverError* = object of IOError 45 | CurandError* = object of IOError 46 | # CudnnError* = object of IOError 47 | # NVGraphError* = object of IOError 48 | NvrtcError* = object of IOError 49 | CudaDriverError* = object of IOError 50 | 51 | 52 | func check*(a: sink cudaError_t) = 53 | if a != cudaSuccess: 54 | raise newException(CudaError, $a & " " & $int(a)) 55 | 56 | func check*(a: sink cudaOccError) = 57 | if a != CUDA_OCC_SUCCESS: 58 | raise newException(CudaOccError, $a & " " & $int(a)) 59 | 60 | func check*(a: sink cublasStatus_t) = 61 | if a != CUBLAS_STATUS_SUCCESS: 62 | raise newException(CublasError, $a & " " & $int(a)) 63 | 64 | func check*(a: sink cufftResult) = 65 | if a != CUFFT_SUCCESS: 66 | raise newException(CufftError, $a & " " & $int(a)) 67 | 68 | func check*(a: sink cusparseStatus_t) = 69 | if a != CUSPARSE_STATUS_SUCCESS: 70 | raise newException(CusparseError, $a & " " & $int(a)) 71 | 72 | func check*(a: sink cusolverStatus_t) = 73 | if a != CUSOLVER_STATUS_SUCCESS: 74 | raise newException(CusolverError, $a & " " & $int(a)) 75 | 76 | func check*(a: sink curandStatus) = 77 | if a != CURAND_STATUS_SUCCESS: 78 | raise newException(CurandError, $a & " " & $int(a)) 79 | 80 | # func check*(a: cudnnStatus_t) = 81 | # let a = a # ensure we only evaluate once even if the expression has side effects 82 | # if a != CUDNN_STATUS_SUCCESS: 83 | # raise newException(CudnnError, $a & " " & $int(a)) 84 | 85 | # func check*(a: nvgraphStatus_t) = 86 | # let a = a # ensure we only evaluate once even if the expression has side effects 87 | # if a != NVGRAPH_STATUS_SUCCESS: 88 | # raise newException(NVGraphError, $a & " " & $int(a)) 89 | 90 | func check*(a: sink nvrtcResult) = 91 | if a != NVRTC_SUCCESS: 92 | raise newException(NvrtcError, $a & " " & $int(a)) 93 | 94 | func check*(a: sink CUresult) = 95 | if a != CUDA_SUCCESS: 96 | raise newException(NvrtcError, $a & " " & $int(a)) 97 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/cuComplex.nim: -------------------------------------------------------------------------------- 1 | from std/math import sqrt 2 | 3 | template sqrtf(x: cfloat): cfloat = sqrt(x) 4 | 5 | template fabsf(x: cfloat): cfloat = abs(x) 6 | 7 | template fabs(x: float): float = abs(x) 8 | 9 | template `div`(a: static[float64], b: cfloat): cfloat = cfloat(a) / b 10 | 11 | template `div`(a: cfloat, b: cfloat): cfloat = a / b 12 | ## 13 | ## Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 14 | ## 15 | ## NOTICE TO LICENSEE: 16 | ## 17 | ## This source code and/or documentation ("Licensed Deliverables") are 18 | ## subject to NVIDIA intellectual property rights under U.S. and 19 | ## international Copyright laws. 20 | ## 21 | ## These Licensed Deliverables contained herein is PROPRIETARY and 22 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 23 | ## conditions of a form of NVIDIA software license agreement by and 24 | ## between NVIDIA and Licensee ("License Agreement") or electronically 25 | ## accepted by Licensee. Notwithstanding any terms or conditions to 26 | ## the contrary in the License Agreement, reproduction or disclosure 27 | ## of the Licensed Deliverables to any third party without the express 28 | ## written consent of NVIDIA is prohibited. 29 | ## 30 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 31 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 32 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 33 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 34 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 35 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 36 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 37 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 38 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 39 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 40 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 41 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 42 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 43 | ## OF THESE LICENSED DELIVERABLES. 44 | ## 45 | ## U.S. Government End Users. These Licensed Deliverables are a 46 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 47 | ## 1995), consisting of "commercial computer software" and "commercial 48 | ## computer software documentation" as such terms are used in 48 49 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 50 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 51 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 52 | ## U.S. Government End Users acquire the Licensed Deliverables with 53 | ## only those rights set forth herein. 54 | ## 55 | ## Any use of the Licensed Deliverables in individual and commercial 56 | ## software must include, in the user documentation and internal 57 | ## comments to the code, the above Disclaimer and U.S. Government End 58 | ## Users Notice. 59 | ## 60 | import ./libpaths 61 | tellCompilerToUseCuda() 62 | 63 | when not defined(CUDACC_RTC): 64 | when defined(GNUC): 65 | when defined(clang) or 66 | (not defined(PGIC) and 67 | (GNUC > 4 or (GNUC == 4 and GNUC_MINOR >= 2))): 68 | discard 69 | ## When trying to include C header file in C++ Code extern "C" is required 70 | ## But the Standard QNX headers already have ifdef extern in them when compiling C++ Code 71 | ## extern "C" cannot be nested 72 | ## Hence keep the header out of extern "C" block 73 | ## 74 | 75 | when not defined(CUDACC): 76 | discard 77 | import 78 | vector_types 79 | 80 | type 81 | cuFloatComplex* = float2 82 | 83 | proc cuCrealf*(x: cuFloatComplex): cfloat = 84 | return x.x 85 | 86 | proc cuCimagf*(x: cuFloatComplex): cfloat = 87 | return x.y 88 | 89 | proc make_cuFloatComplex*(r: cfloat; i: cfloat): cuFloatComplex = 90 | var res: cuFloatComplex 91 | res.x = r 92 | res.y = i 93 | return res 94 | 95 | proc cuConjf*(x: cuFloatComplex): cuFloatComplex = 96 | return make_cuFloatComplex(cuCrealf(x), -cuCimagf(x)) 97 | 98 | proc cuCaddf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 99 | return make_cuFloatComplex(cuCrealf(x) + cuCrealf(y), cuCimagf(x) + cuCimagf(y)) 100 | 101 | proc cuCsubf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 102 | return make_cuFloatComplex(cuCrealf(x) - cuCrealf(y), cuCimagf(x) - cuCimagf(y)) 103 | 104 | ## This implementation could suffer from intermediate overflow even though 105 | ## the final resultNotKeyWord would be in range. However, various implementations do 106 | ## not guard against this (presumably to avoid losing performance), so we 107 | ## don't do it either to stay competitive. 108 | ## 109 | 110 | proc cuCmulf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 111 | var prod: cuFloatComplex 112 | prod = make_cuFloatComplex((cuCrealf(x) * cuCrealf(y)) - 113 | (cuCimagf(x) * cuCimagf(y)), (cuCrealf(x) * cuCimagf(y)) + 114 | (cuCimagf(x) * cuCrealf(y))) 115 | return prod 116 | 117 | ## This implementation guards against intermediate underflow and overflow 118 | ## by scaling. Such guarded implementations are usually the default for 119 | ## complex library implementations, with some also offering an unguarded, 120 | ## faster version. 121 | ## 122 | 123 | proc cuCdivf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 124 | var quot: cuFloatComplex 125 | var s: cfloat = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y)) 126 | var oos: cfloat = 1.0f div s 127 | var ars: cfloat = cuCrealf(x) * oos 128 | var ais: cfloat = cuCimagf(x) * oos 129 | var brs: cfloat = cuCrealf(y) * oos 130 | var bis: cfloat = cuCimagf(y) * oos 131 | s = (brs * brs) + (bis * bis) 132 | oos = 1.0f div s 133 | quot = make_cuFloatComplex(((ars * brs) + (ais * bis)) * oos, 134 | ((ais * brs) - (ars * bis)) * oos) 135 | return quot 136 | 137 | ## 138 | ## We would like to call hypotf(), but it's not available on all platforms. 139 | ## This discrete implementation guards against intermediate underflow and 140 | ## overflow by scaling. Otherwise we would lose half the exponent range. 141 | ## There are various ways of doing guarded computation. For now chose the 142 | ## simplest and fastest solution, however this may suffer from inaccuracies 143 | ## if sqrt and division are not IEEE compliant. 144 | ## 145 | 146 | proc cuCabsf*(x: cuFloatComplex): cfloat = 147 | var a: cfloat = cuCrealf(x) 148 | var b: cfloat = cuCimagf(x) 149 | var 150 | v: cfloat 151 | w: cfloat 152 | t: cfloat 153 | a = fabsf(a) 154 | b = fabsf(b) 155 | if a > b: 156 | v = a 157 | w = b 158 | else: 159 | v = b 160 | w = a 161 | t = w div v 162 | t = 1.0f + t * t 163 | t = v * sqrtf(t) 164 | if (v == 0.0f) or (v > 3.402823466e38f) or (w > 3.402823466e38f): 165 | t = v + w 166 | return t 167 | 168 | ## Double precision 169 | 170 | type 171 | cuDoubleComplex* = double2 172 | 173 | proc cuCreal*(x: cuDoubleComplex): cdouble = 174 | return x.x 175 | 176 | proc cuCimag*(x: cuDoubleComplex): cdouble = 177 | return x.y 178 | 179 | proc make_cuDoubleComplex*(r: cdouble; i: cdouble): cuDoubleComplex = 180 | var res: cuDoubleComplex 181 | res.x = r 182 | res.y = i 183 | return res 184 | 185 | proc cuConj*(x: cuDoubleComplex): cuDoubleComplex = 186 | return make_cuDoubleComplex(cuCreal(x), -cuCimag(x)) 187 | 188 | proc cuCadd*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 189 | return make_cuDoubleComplex(cuCreal(x) + cuCreal(y), cuCimag(x) + cuCimag(y)) 190 | 191 | proc cuCsub*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 192 | return make_cuDoubleComplex(cuCreal(x) - cuCreal(y), cuCimag(x) - cuCimag(y)) 193 | 194 | ## This implementation could suffer from intermediate overflow even though 195 | ## the final resultNotKeyWord would be in range. However, various implementations do 196 | ## not guard against this (presumably to avoid losing performance), so we 197 | ## don't do it either to stay competitive. 198 | ## 199 | 200 | proc cuCmul*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 201 | var prod: cuDoubleComplex 202 | prod = make_cuDoubleComplex((cuCreal(x) * cuCreal(y)) - (cuCimag(x) * cuCimag(y)), ( 203 | cuCreal(x) * cuCimag(y)) + (cuCimag(x) * cuCreal(y))) 204 | return prod 205 | 206 | ## This implementation guards against intermediate underflow and overflow 207 | ## by scaling. Such guarded implementations are usually the default for 208 | ## complex library implementations, with some also offering an unguarded, 209 | ## faster version. 210 | ## 211 | 212 | proc cuCdiv*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 213 | var quot: cuDoubleComplex 214 | var s: cdouble = (fabs(cuCreal(y))) + (fabs(cuCimag(y))) 215 | var oos: cdouble = 1.0 div s 216 | var ars: cdouble = cuCreal(x) * oos 217 | var ais: cdouble = cuCimag(x) * oos 218 | var brs: cdouble = cuCreal(y) * oos 219 | var bis: cdouble = cuCimag(y) * oos 220 | s = (brs * brs) + (bis * bis) 221 | oos = 1.0 div s 222 | quot = make_cuDoubleComplex(((ars * brs) + (ais * bis)) * oos, 223 | ((ais * brs) - (ars * bis)) * oos) 224 | return quot 225 | 226 | ## This implementation guards against intermediate underflow and overflow 227 | ## by scaling. Otherwise we would lose half the exponent range. There are 228 | ## various ways of doing guarded computation. For now chose the simplest 229 | ## and fastest solution, however this may suffer from inaccuracies if sqrt 230 | ## and division are not IEEE compliant. 231 | ## 232 | 233 | proc cuCabs*(x: cuDoubleComplex): cdouble = 234 | var a: cdouble = cuCreal(x) 235 | var b: cdouble = cuCimag(x) 236 | var 237 | v: cdouble 238 | w: cdouble 239 | t: cdouble 240 | a = fabs(a) 241 | b = fabs(b) 242 | if a > b: 243 | v = a 244 | w = b 245 | else: 246 | v = b 247 | w = a 248 | t = w div v 249 | t = 1.0 + t * t 250 | t = v * sqrt(t) 251 | if (v == 0.0) or (v > 1.79769313486231570e+308) or (w > 1.79769313486231570e+308): 252 | t = v + w 253 | return t 254 | 255 | ## aliases 256 | 257 | type 258 | cuComplex* = cuFloatComplex 259 | 260 | proc make_cuComplex*(x: cfloat; y: cfloat): cuComplex = 261 | return make_cuFloatComplex(x, y) 262 | 263 | ## float-to-double promotion 264 | 265 | proc cuComplexFloatToDouble*(c: cuFloatComplex): cuDoubleComplex = 266 | return make_cuDoubleComplex(cast[cdouble](cuCrealf(c)), 267 | cast[cdouble](cuCimagf(c))) 268 | 269 | proc cuComplexDoubleToFloat*(c: cuDoubleComplex): cuFloatComplex = 270 | return make_cuFloatComplex(cast[cfloat](cuCreal(c)), cast[cfloat](cuCimag(c))) 271 | 272 | proc cuCfmaf*(x: cuComplex; y: cuComplex; d: cuComplex): cuComplex = 273 | var real_res: cfloat 274 | var imag_res: cfloat 275 | real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d) 276 | imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d) 277 | real_res = -(cuCimagf(x) * cuCimagf(y)) + real_res 278 | imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res 279 | return make_cuComplex(real_res, imag_res) 280 | 281 | proc cuCfma*(x: cuDoubleComplex; y: cuDoubleComplex; d: cuDoubleComplex): cuDoubleComplex = 282 | var real_res: cdouble 283 | var imag_res: cdouble 284 | real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d) 285 | imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d) 286 | real_res = -(cuCimag(x) * cuCimag(y)) + real_res 287 | imag_res = (cuCimag(x) * cuCreal(y)) + imag_res 288 | return make_cuDoubleComplex(real_res, imag_res) 289 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/cusolver_common.nim: -------------------------------------------------------------------------------- 1 | ## #assumendef _MSC_VER 2 | 3 | when defined(windows): 4 | const 5 | libName = "cusolver.dll" 6 | elif defined(macosx): 7 | const 8 | libName = "libcusolver.dylib" 9 | else: 10 | const 11 | libName = "libcusolver.so" 12 | import 13 | library_types 14 | import ./libpaths 15 | tellCompilerToUseCuda() 16 | ## 17 | ## Copyright 2014 NVIDIA Corporation. All rights reserved. 18 | ## 19 | ## NOTICE TO LICENSEE: 20 | ## 21 | ## This source code and/or documentation ("Licensed Deliverables") are 22 | ## subject to NVIDIA intellectual property rights under U.S. and 23 | ## international Copyright laws. 24 | ## 25 | ## These Licensed Deliverables contained herein is PROPRIETARY and 26 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 27 | ## conditions of a form of NVIDIA software license agreement by and 28 | ## between NVIDIA and Licensee ("License Agreement") or electronically 29 | ## accepted by Licensee. Notwithstanding any terms or conditions to 30 | ## the contrary in the License Agreement, reproduction or disclosure 31 | ## of the Licensed Deliverables to any third party without the express 32 | ## written consent of NVIDIA is prohibited. 33 | ## 34 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 35 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 36 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 37 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 38 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 39 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 40 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 41 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 42 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 43 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 44 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 45 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 46 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 47 | ## OF THESE LICENSED DELIVERABLES. 48 | ## 49 | ## U.S. Government End Users. These Licensed Deliverables are a 50 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 51 | ## 1995), consisting of "commercial computer software" and "commercial 52 | ## computer software documentation" as such terms are used in 48 53 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 54 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 55 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 56 | ## U.S. Government End Users acquire the Licensed Deliverables with 57 | ## only those rights set forth herein. 58 | ## 59 | ## Any use of the Licensed Deliverables in individual and commercial 60 | ## software must include, in the user documentation and internal 61 | ## comments to the code, the above Disclaimer and U.S. Government End 62 | ## Users Notice. 63 | ## 64 | 65 | when defined(MSC_VER): 66 | type 67 | clonglong* = int64 68 | else: 69 | discard 70 | type 71 | cusolver_int_t* = cint 72 | 73 | const 74 | CUSOLVER_VER_MAJOR* = 11 75 | CUSOLVER_VER_MINOR* = 6 76 | CUSOLVER_VER_PATCH* = 3 77 | CUSOLVER_VER_BUILD* = 83 78 | CUSOLVER_VERSION* = ( 79 | CUSOLVER_VER_MAJOR * 1000 + CUSOLVER_VER_MINOR * 100 + CUSOLVER_VER_PATCH) 80 | 81 | ## ------------------------------------------------------------------------------ 82 | ## #if !defined(MSC_VER) 83 | ## #define CUSOLVER_CPP_VERSION __cplusplus 84 | ## #elif _MSC_FULL_VER >= 190024210 // Visual Studio 2015 Update 3 85 | ## #define CUSOLVER_CPP_VERSION _MSVC_LANG 86 | ## #else 87 | ## #define CUSOLVER_CPP_VERSION 0 88 | ## #endif 89 | ## ------------------------------------------------------------------------------ 90 | ## #if !defined(DISABLE_CUSOLVER_DEPRECATED) 91 | ## 92 | ## #if CUSOLVER_CPP_VERSION >= 201402L 93 | ## 94 | ## #define CUSOLVER_DEPRECATED(new_func) \ 95 | ## [[deprecated("please use " #new_func " instead")]] 96 | ## 97 | ## #elif defined(MSC_VER) 98 | ## 99 | ## #define CUSOLVER_DEPRECATED(new_func) \ 100 | ## __declspec(deprecated("please use " #new_func " instead")) 101 | ## 102 | ## #elif defined(INTEL_COMPILER) || defined(clang) || \ 103 | ## (defined(GNUC) && \ 104 | ## (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) 105 | ## 106 | ## #define CUSOLVER_DEPRECATED(new_func) \ 107 | ## __attribute__((deprecated("please use " #new_func " instead"))) 108 | ## 109 | ## #elif defined(GNUC) || defined(xlc) 110 | ## 111 | ## #define CUSOLVER_DEPRECATED(new_func) __attribute__((deprecated)) 112 | ## 113 | ## #else 114 | ## 115 | ## #define CUSOLVER_DEPRECATED(new_func) 116 | ## 117 | ## #endif // defined(cplusplus) && __cplusplus >= 201402L 118 | ## //------------------------------------------------------------------------------ 119 | ## 120 | ## #if CUSOLVER_CPP_VERSION >= 201703L 121 | ## 122 | ## #define CUSOLVER_DEPRECATED_ENUM(new_enum) \ 123 | ## [[deprecated("please use " #new_enum " instead")]] 124 | ## 125 | ## #elif defined(clang) || \ 126 | ## (defined(GNUC) && __GNUC__ >= 6 && !defined(PGI)) 127 | ## 128 | ## #define CUSOLVER_DEPRECATED_ENUM(new_enum) \ 129 | ## __attribute__((deprecated("please use " #new_enum " instead"))) 130 | ## 131 | ## #else 132 | ## 133 | ## #define CUSOLVER_DEPRECATED_ENUM(new_enum) 134 | ## 135 | ## #endif // defined(cplusplus) && __cplusplus >= 201402L 136 | ## 137 | ## #else // defined(DISABLE_CUSOLVER_DEPRECATED) 138 | ## 139 | ## #define CUSOLVER_DEPRECATED(new_func) 140 | ## #define CUSOLVER_DEPRECATED_ENUM(new_enum) 141 | ## 142 | ## #endif // !defined(DISABLE_CUSOLVER_DEPRECATED) 143 | ## #undef CUSOLVER_CPP_VERSION 144 | 145 | type 146 | cusolverStatus_t* {.size: sizeof(cint).} = enum 147 | CUSOLVER_STATUS_SUCCESS = 0, CUSOLVER_STATUS_NOT_INITIALIZED = 1, 148 | CUSOLVER_STATUS_ALLOC_FAILED = 2, CUSOLVER_STATUS_INVALID_VALUE = 3, 149 | CUSOLVER_STATUS_ARCH_MISMATCH = 4, CUSOLVER_STATUS_MAPPING_ERROR = 5, 150 | CUSOLVER_STATUS_EXECUTION_FAILED = 6, CUSOLVER_STATUS_INTERNAL_ERROR = 7, 151 | CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8, 152 | CUSOLVER_STATUS_NOT_SUPPORTED = 9, CUSOLVER_STATUS_ZERO_PIVOT = 10, 153 | CUSOLVER_STATUS_INVALID_LICENSE = 11, 154 | CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED = 12, 155 | CUSOLVER_STATUS_IRS_PARAMS_INVALID = 13, 156 | CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC = 14, 157 | CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE = 15, 158 | CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER = 16, 159 | CUSOLVER_STATUS_IRS_INTERNAL_ERROR = 20, 160 | CUSOLVER_STATUS_IRS_NOT_SUPPORTED = 21, CUSOLVER_STATUS_IRS_OUT_OF_RANGE = 22, 161 | CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES = 23, 162 | CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED = 25, 163 | CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED = 26, 164 | CUSOLVER_STATUS_IRS_MATRIX_SINGULAR = 30, 165 | CUSOLVER_STATUS_INVALID_WORKSPACE = 31 166 | cusolverEigType_t* {.size: sizeof(cint).} = enum 167 | CUSOLVER_EIG_TYPE_1 = 1, CUSOLVER_EIG_TYPE_2 = 2, CUSOLVER_EIG_TYPE_3 = 3 168 | cusolverEigMode_t* {.size: sizeof(cint).} = enum 169 | CUSOLVER_EIG_MODE_NOVECTOR = 0, CUSOLVER_EIG_MODE_VECTOR = 1 170 | cusolverEigRange_t* {.size: sizeof(cint).} = enum 171 | CUSOLVER_EIG_RANGE_ALL = 1001, CUSOLVER_EIG_RANGE_I = 1002, 172 | CUSOLVER_EIG_RANGE_V = 1003 173 | cusolverNorm_t* {.size: sizeof(cint).} = enum 174 | CUSOLVER_INF_NORM = 104, CUSOLVER_MAX_NORM = 105, CUSOLVER_ONE_NORM = 106, 175 | CUSOLVER_FRO_NORM = 107 176 | cusolverIRSRefinement_t* {.size: sizeof(cint).} = enum 177 | CUSOLVER_IRS_REFINE_NOT_SET = 1100, CUSOLVER_IRS_REFINE_NONE = 1101, 178 | CUSOLVER_IRS_REFINE_CLASSICAL = 1102, 179 | CUSOLVER_IRS_REFINE_CLASSICAL_GMRES = 1103, CUSOLVER_IRS_REFINE_GMRES = 1104, 180 | CUSOLVER_IRS_REFINE_GMRES_GMRES = 1105, 181 | CUSOLVER_IRS_REFINE_GMRES_NOPCOND = 1106, CUSOLVER_PREC_DD = 1150, 182 | CUSOLVER_PREC_SS = 1151, CUSOLVER_PREC_SHT = 1152 183 | cusolverPrecType_t* {.size: sizeof(cint).} = enum 184 | CUSOLVER_R_8I = 1201, CUSOLVER_R_8U = 1202, CUSOLVER_R_64F = 1203, 185 | CUSOLVER_R_32F = 1204, CUSOLVER_R_16F = 1205, CUSOLVER_R_16BF = 1206, 186 | CUSOLVER_R_TF32 = 1207, CUSOLVER_R_AP = 1208, CUSOLVER_C_8I = 1211, 187 | CUSOLVER_C_8U = 1212, CUSOLVER_C_64F = 1213, CUSOLVER_C_32F = 1214, 188 | CUSOLVER_C_16F = 1215, CUSOLVER_C_16BF = 1216, CUSOLVER_C_TF32 = 1217, 189 | CUSOLVER_C_AP = 1218 190 | cusolverAlgMode_t* {.size: sizeof(cint).} = enum 191 | CUSOLVER_ALG_0 = 0, ## default algorithm 192 | CUSOLVER_ALG_1 = 1, CUSOLVER_ALG_2 = 2 193 | cusolverStorevMode_t* {.size: sizeof(cint).} = enum 194 | CUBLAS_STOREV_COLUMNWISE = 0, CUBLAS_STOREV_ROWWISE = 1 195 | cusolverDirectMode_t* {.size: sizeof(cint).} = enum 196 | CUBLAS_DIRECT_FORWARD = 0, CUBLAS_DIRECT_BACKWARD = 1 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | proc cusolverGetProperty*(`type`: libraryPropertyType; value: ptr cint): cusolverStatus_t {. 209 | cdecl, importc: "cusolverGetProperty", dynlib: libName.} 210 | proc cusolverGetVersion*(version: ptr cint): cusolverStatus_t {.cdecl, 211 | importc: "cusolverGetVersion", dynlib: libName.} 212 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/helpers.nim: -------------------------------------------------------------------------------- 1 | 2 | ##[ 3 | This module helps with some differences between C and Nim that C2Nim misses. 4 | ]## 5 | 6 | 7 | converter toCSize_t*(self: cint): csize_t {.inline.} = 8 | csize_t(self) 9 | 10 | converter toBool*(self: cint): bool {.inline.} = 11 | bool(self) 12 | 13 | converter toBool*(self: uint): bool {.inline.} = 14 | bool(self) 15 | 16 | converter toCint*(self: bool): cint {.inline.} = 17 | cint(self) 18 | 19 | 20 | const INT_MAX * = cint.high 21 | 22 | 23 | func `or`*(a: bool; b: cint): bool {.inline.} = 24 | a or b.toBool 25 | 26 | func `or`*(a: cint; b: bool): bool {.inline.} = 27 | a.toBool or b 28 | 29 | func `and`*(a: bool; b: cint): bool {.inline.} = 30 | a and b.toBool 31 | 32 | func `and`*(a: cint; b: bool): bool {.inline.} = 33 | a.toBool and b 34 | 35 | 36 | converter toBool*[T: ptr|pointer|proc](self: T): bool {.inline.} = 37 | not self.isNil 38 | 39 | 40 | converter toCUInt*[U: enum](self: U): cuint {.inline.} = 41 | cuint(self) 42 | 43 | converter toCInt*[U: enum](self: U): cint {.inline.} = 44 | cint(self) 45 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/libpaths.nim: -------------------------------------------------------------------------------- 1 | 2 | ##[This module implements some auto-detection of cuda installation locations, 3 | as well as communication with the c compilers about this info. 4 | 5 | If you want to manually overide the autodetection, pass the nim compiler 6 | `-d:CudaLib="PATH_TO_CUDA_DYN_LIBS"` and/or 7 | `-d:CudaIncludes="PATH_TO_CUDA_HEADERS"`. 8 | ]## 9 | 10 | #[The following is a rip of std/distros, slightly modified for compile-time 11 | use. 12 | The extra specificity compared to normal `defined` tests or `hostOS` 13 | is needed because some linux distros install cuda in very different places 14 | (im looking at you, arch!) 15 | ]# 16 | 17 | 18 | from std/distros import Distribution 19 | import std/[os, strutils, macros, macrocache] 20 | when NimMajor == 2: 21 | import std/envvars 22 | 23 | 24 | # we cache the result of the 'cmdRelease' 25 | # execution for faster platform detections. 26 | var 27 | unameRes {.compileTime.}: string 28 | osReleaseIDRes {.compileTime.}: string 29 | releaseRes {.compileTime.}: string 30 | hostnamectlRes {.compileTime.}: string 31 | 32 | template cmdRelease(cmd, cache): untyped = 33 | if cache.len == 0: 34 | # cache = (when defined(nimscript): gorge(cmd) else: execProcess(cmd)) 35 | cache = gorge(cmd) 36 | cache 37 | 38 | template uname(): untyped = cmdRelease("uname -a", unameRes) 39 | template osReleaseID(): untyped = 40 | cmdRelease("cat /etc/os-release | grep ^ID=", osReleaseIDRes) 41 | template release(): untyped = cmdRelease("lsb_release -d", releaseRes) 42 | template hostnamectl(): untyped = cmdRelease("hostnamectl", hostnamectlRes) 43 | 44 | proc detectOsWithAllCmd(d: Distribution): bool {.compileTime.} = 45 | let dd = toLowerAscii($d) 46 | result = dd in toLowerAscii(osReleaseID()) or dd in toLowerAscii(release()) or 47 | dd in toLowerAscii(uname()) or ("operating system: " & dd) in 48 | toLowerAscii(hostnamectl()) 49 | 50 | proc detectOsImpl(d: Distribution): bool {.compileTime.} = 51 | case d 52 | of Distribution.Windows: result = defined(windows) 53 | of Distribution.Posix: result = defined(posix) 54 | of Distribution.MacOSX: result = defined(macosx) 55 | of Distribution.Linux: result = defined(linux) 56 | of Distribution.BSD: result = defined(bsd) 57 | else: 58 | when defined(bsd): 59 | case d 60 | of Distribution.FreeBSD, Distribution.NetBSD, Distribution.OpenBSD: 61 | result = $d in uname() 62 | else: 63 | result = false 64 | elif defined(linux): 65 | const EasyLinux = when (NimMajor, NimMinor) >= (1, 6): 66 | {Distribution.Elementary, Distribution.Ubuntu, Distribution.Debian, 67 | Distribution.Fedora, Distribution.OpenMandriva, Distribution.CentOS, 68 | Distribution.Alpine, Distribution.Mageia, Distribution.Zorin, 69 | Distribution.Void} 70 | else: 71 | {Distribution.Elementary, Distribution.Ubuntu, Distribution.Debian, 72 | Distribution.Fedora, Distribution.OpenMandriva, Distribution.CentOS, 73 | Distribution.Alpine, Distribution.Mageia, Distribution.Zorin} 74 | 75 | case d 76 | of Distribution.Gentoo: 77 | result = ("-" & $d & " ") in uname() 78 | of EasyLinux: 79 | result = toLowerAscii($d) in osReleaseID() 80 | of Distribution.RedHat: 81 | result = "rhel" in osReleaseID() 82 | of Distribution.ArchLinux: 83 | result = "arch" in osReleaseID() 84 | # when (NimMajor, NimMinor) >= (1, 6): 85 | # of Distribution.Artix: 86 | # result = "artix" in osReleaseID() 87 | of Distribution.NixOS: 88 | # Check if this is a Nix build or NixOS environment 89 | result = existsEnv("NIX_BUILD_TOP") or 90 | existsEnv("__NIXOS_SET_ENVIRONMENT_DONE") 91 | of Distribution.OpenSUSE: 92 | result = "suse" in toLowerAscii(uname()) or 93 | "suse" in toLowerAscii(release()) 94 | of Distribution.GoboLinux: 95 | result = "-Gobo " in uname() 96 | of Distribution.Solaris: 97 | let uname = toLowerAscii(uname()) 98 | result = ("sun" in uname) or ("solaris" in uname) 99 | of Distribution.Haiku: 100 | result = defined(haiku) 101 | else: 102 | result = detectOsWithAllCmd(d) 103 | else: 104 | result = false 105 | 106 | template detectOs(d: untyped): bool = 107 | ## Distro/OS detection. For convenience, the 108 | ## required `Distribution.` qualifier is added to the 109 | ## enum value. 110 | detectOsImpl(Distribution.d) 111 | 112 | 113 | 114 | # begin actual detection 115 | when detectOs(Windows): 116 | from std/os import getEnv, `/` 117 | const 118 | CudaPath = getEnv("CUDA_PATH") 119 | CudaIncludes* {.strdefine.} = CudaPath / "include" 120 | CudaLib* {.strdefine.} = CudaPath / "lib64" 121 | 122 | elif detectOs(ArchLinux): 123 | from std/os import `/` 124 | const 125 | CudaPath = "/opt/cuda" 126 | CudaIncludes* {.strdefine.} = CudaPath / "include" 127 | CudaLib* {.strdefine.} = CudaPath / "lib64" 128 | 129 | elif detectOs(Linux): 130 | # Generic linux catch-all. 131 | # This includes anyone following the cuda installation guide. 132 | const 133 | CudaPath = "/usr/local/cuda" 134 | CudaIncludes* {.strdefine.} = CudaPath / "include" 135 | CudaLib* {.strdefine.} = CudaPath / "lib64" 136 | 137 | else: 138 | # Some wild operating system! 139 | const 140 | CudaIncludes* {.strdefine.} = "unknown" 141 | CudaLib* {.strdefine.} = "unknown" 142 | 143 | 144 | # check for validity 145 | when not dirExists(CudaIncludes): 146 | {.error: "Could not find the cuda source headers! Please specify the " & 147 | "location of the cuda includes directory by passing " & 148 | "`-d:CudaIncludes=\"YOUR_PATH\"` to the nim compiler.".} 149 | elif not dirExists(CudaLib): 150 | {.error: "Could not find the cuda shared libraries! Please specify the " & 151 | "location of the cuda library directory by passing " & 152 | "`-d:CudaLib=\"YOUR_PATH\"` to the nim compiler.".} 153 | 154 | 155 | 156 | macro tellCompilerToUseCuda*(): untyped = 157 | ## Tells the compiler and linker to use cuda libraries. 158 | # we'll use macrocaching so that we dont unneccessarily emit a million times 159 | 160 | const ToldCompilerCount = CacheCounter"ToldCompilerToUseCudaCount" 161 | if ToldCompilerCount.value == 0: 162 | result = quote do: 163 | {.passC: "-I" & CudaIncludes.} 164 | {.passL: "-L" & CudaLib & " -lcuda".} 165 | inc ToldCompilerCount 166 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/library_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2023 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | import ./libpaths 50 | tellCompilerToUseCuda() 51 | 52 | type 53 | cudaDataType* = enum 54 | CUDA_R_32F = 0, ## real as a float 55 | CUDA_R_64F = 1, ## real as a double 56 | CUDA_R_16F = 2, ## real as a half 57 | CUDA_R_8I = 3, ## real as a signed 8-bit int 58 | CUDA_C_32F = 4, ## complex as a pair of float numbers 59 | CUDA_C_64F = 5, ## complex as a pair of double numbers 60 | CUDA_C_16F = 6, ## complex as a pair of half numbers 61 | CUDA_C_8I = 7, ## complex as a pair of signed 8-bit int numbers 62 | CUDA_R_8U = 8, ## real as a unsigned 8-bit int 63 | CUDA_C_8U = 9, ## complex as a pair of unsigned 8-bit int numbers 64 | CUDA_R_32I = 10, ## real as a signed 32-bit int 65 | CUDA_C_32I = 11, ## complex as a pair of signed 32-bit int numbers 66 | CUDA_R_32U = 12, ## real as a unsigned 32-bit int 67 | CUDA_C_32U = 13, ## complex as a pair of unsigned 32-bit int numbers 68 | CUDA_R_16BF = 14, ## real as a nv_bfloat16 69 | CUDA_C_16BF = 15, ## complex as a pair of nv_bfloat16 numbers 70 | CUDA_R_4I = 16, ## real as a signed 4-bit int 71 | CUDA_C_4I = 17, ## complex as a pair of signed 4-bit int numbers 72 | CUDA_R_4U = 18, ## real as a unsigned 4-bit int 73 | CUDA_C_4U = 19, ## complex as a pair of unsigned 4-bit int numbers 74 | CUDA_R_16I = 20, ## real as a signed 16-bit int 75 | CUDA_C_16I = 21, ## complex as a pair of signed 16-bit int numbers 76 | CUDA_R_16U = 22, ## real as a unsigned 16-bit int 77 | CUDA_C_16U = 23, ## complex as a pair of unsigned 16-bit int numbers 78 | CUDA_R_64I = 24, ## real as a signed 64-bit int 79 | CUDA_C_64I = 25, ## complex as a pair of signed 64-bit int numbers 80 | CUDA_R_64U = 26, ## real as a unsigned 64-bit int 81 | CUDA_C_64U = 27, ## complex as a pair of unsigned 64-bit int numbers 82 | CUDA_R_8F_E4M3 = 28, ## real as a nv_fp8_e4m3 83 | CUDA_R_8F_E5M2 = 29 ## real as a nv_fp8_e5m2 84 | libraryPropertyType* = enum 85 | MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL 86 | 87 | 88 | 89 | cudaDataType_t* = cudaDataType 90 | 91 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/surface_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2023 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | 50 | ## ***************************************************************************** 51 | ## * 52 | ## * 53 | ## * 54 | ## ***************************************************************************** 55 | import ./libpaths 56 | tellCompilerToUseCuda() 57 | when not defined(CUDACC_RTC_MINIMAL): 58 | ## 59 | ## \addtogroup CUDART_TYPES 60 | ## 61 | ## @{ 62 | ## 63 | ## ***************************************************************************** 64 | ## * 65 | ## * 66 | ## * 67 | ## ***************************************************************************** 68 | const 69 | cudaSurfaceType1D* = 0x01 70 | cudaSurfaceType2D* = 0x02 71 | cudaSurfaceType3D* = 0x03 72 | cudaSurfaceTypeCubemap* = 0x0C 73 | cudaSurfaceType1DLayered* = 0xF1 74 | cudaSurfaceType2DLayered* = 0xF2 75 | cudaSurfaceTypeCubemapLayered* = 0xFC 76 | ## 77 | ## CUDA Surface boundary modes 78 | ## 79 | type 80 | cudaSurfaceBoundaryMode* = enum 81 | cudaBoundaryModeZero = 0, ## < Zero boundary mode 82 | cudaBoundaryModeClamp = 1, ## < Clamp boundary mode 83 | cudaBoundaryModeTrap = 2 ## < Trap boundary mode 84 | ## 85 | ## CUDA Surface format modes 86 | ## 87 | type 88 | cudaSurfaceFormatMode* = enum 89 | cudaFormatModeForced = 0, ## < Forced format mode 90 | cudaFormatModeAuto = 1 ## < Auto format mode 91 | ## 92 | ## An opaque value that represents a CUDA Surface object 93 | ## 94 | type 95 | cudaSurfaceObject_t* = culonglong 96 | ## @} 97 | ## @} 98 | ## END CUDART_TYPES 99 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/texture_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2023 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | 50 | ## ***************************************************************************** 51 | ## * 52 | ## * 53 | ## * 54 | ## ***************************************************************************** 55 | import ./libpaths 56 | tellCompilerToUseCuda() 57 | when not defined(CUDACC_RTC_MINIMAL): 58 | ## 59 | ## \addtogroup CUDART_TYPES 60 | ## 61 | ## @{ 62 | ## 63 | ## ***************************************************************************** 64 | ## * 65 | ## * 66 | ## * 67 | ## ***************************************************************************** 68 | const 69 | cudaTextureType1D* = 0x01 70 | cudaTextureType2D* = 0x02 71 | cudaTextureType3D* = 0x03 72 | cudaTextureTypeCubemap* = 0x0C 73 | cudaTextureType1DLayered* = 0xF1 74 | cudaTextureType2DLayered* = 0xF2 75 | cudaTextureTypeCubemapLayered* = 0xFC 76 | ## 77 | ## CUDA texture address modes 78 | ## 79 | type 80 | cudaTextureAddressMode* = enum 81 | cudaAddressModeWrap = 0, ## < Wrapping address mode 82 | cudaAddressModeClamp = 1, ## < Clamp to edge address mode 83 | cudaAddressModeMirror = 2, ## < Mirror address mode 84 | cudaAddressModeBorder = 3 ## < Border address mode 85 | ## 86 | ## CUDA texture filter modes 87 | ## 88 | type 89 | cudaTextureFilterMode* = enum 90 | cudaFilterModePoint = 0, ## < Point filter mode 91 | cudaFilterModeLinear = 1 ## < Linear filter mode 92 | ## 93 | ## CUDA texture read modes 94 | ## 95 | type 96 | cudaTextureReadMode* = enum 97 | cudaReadModeElementType = 0, ## < Read texture as specified element type 98 | cudaReadModeNormalizedFloat = 1 ## < Read texture as normalized float 99 | ## 100 | ## CUDA texture descriptor 101 | ## 102 | type 103 | cudaTextureDesc* {.bycopy.} = object 104 | ## 105 | ## Texture address mode for up to 3 dimensions 106 | ## 107 | addressMode*: array[3, cudaTextureAddressMode] 108 | ## 109 | ## Texture filter mode 110 | ## 111 | filterMode*: cudaTextureFilterMode 112 | ## 113 | ## Texture read mode 114 | ## 115 | readMode*: cudaTextureReadMode 116 | ## 117 | ## Perform sRGB->linear conversion during texture read 118 | ## 119 | sRGB*: cint 120 | ## 121 | ## Texture Border Color 122 | ## 123 | borderColor*: array[4, cfloat] 124 | ## 125 | ## Indicates whether texture reads are normalized or not 126 | ## 127 | normalizedCoords*: cint 128 | ## 129 | ## Limit to the anisotropy ratio 130 | ## 131 | maxAnisotropy*: cuint 132 | ## 133 | ## Mipmap filter mode 134 | ## 135 | mipmapFilterMode*: cudaTextureFilterMode 136 | ## 137 | ## Offset applied to the supplied mipmap level 138 | ## 139 | mipmapLevelBias*: cfloat 140 | ## 141 | ## Lower end of the mipmap level range to clamp access to 142 | ## 143 | minMipmapLevelClamp*: cfloat 144 | ## 145 | ## Upper end of the mipmap level range to clamp access to 146 | ## 147 | maxMipmapLevelClamp*: cfloat 148 | ## 149 | ## Disable any trilinear filtering optimizations. 150 | ## 151 | disableTrilinearOptimization*: cint 152 | ## 153 | ## Enable seamless cube map filtering. 154 | ## 155 | seamlessCubemap*: cint 156 | 157 | ## 158 | ## An opaque value that represents a CUDA texture object 159 | ## 160 | type 161 | cudaTextureObject_t* = culonglong 162 | ## @} 163 | ## @} 164 | ## END CUDART_TYPES 165 | -------------------------------------------------------------------------------- /src/nimcuda/cuda12_5/vector_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | import ./libpaths 50 | tellCompilerToUseCuda() 51 | type 52 | char1* {.importc: "char1", header: "vector_types.h", bycopy.} = object 53 | x* {.importc: "x".}: cchar 54 | 55 | uchar1* {.importc: "uchar1", header: "vector_types.h", bycopy.} = object 56 | x* {.importc: "x".}: char 57 | 58 | char2* {.importc: "char2", header: "vector_types.h", bycopy.} = object 59 | x* {.importc: "x".}: cchar 60 | y* {.importc: "y".}: cchar 61 | 62 | uchar2* {.importc: "uchar2", header: "vector_types.h", bycopy.} = object 63 | x* {.importc: "x".}: char 64 | y* {.importc: "y".}: char 65 | 66 | char3* {.importc: "char3", header: "vector_types.h", bycopy.} = object 67 | x* {.importc: "x".}: cchar 68 | y* {.importc: "y".}: cchar 69 | z* {.importc: "z".}: cchar 70 | 71 | uchar3* {.importc: "uchar3", header: "vector_types.h", bycopy.} = object 72 | x* {.importc: "x".}: char 73 | y* {.importc: "y".}: char 74 | z* {.importc: "z".}: char 75 | 76 | char4* {.importc: "char4", header: "vector_types.h", bycopy.} = object 77 | x* {.importc: "x".}: cchar 78 | y* {.importc: "y".}: cchar 79 | z* {.importc: "z".}: cchar 80 | w* {.importc: "w".}: cchar 81 | 82 | uchar4* {.importc: "uchar4", header: "vector_types.h", bycopy.} = object 83 | x* {.importc: "x".}: char 84 | y* {.importc: "y".}: char 85 | z* {.importc: "z".}: char 86 | w* {.importc: "w".}: char 87 | 88 | short1* {.importc: "short1", header: "vector_types.h", bycopy.} = object 89 | x* {.importc: "x".}: cshort 90 | 91 | ushort1* {.importc: "ushort1", header: "vector_types.h", bycopy.} = object 92 | x* {.importc: "x".}: cushort 93 | 94 | short2* {.importc: "short2", header: "vector_types.h", bycopy.} = object 95 | x* {.importc: "x".}: cshort 96 | y* {.importc: "y".}: cshort 97 | 98 | ushort2* {.importc: "ushort2", header: "vector_types.h", bycopy.} = object 99 | x* {.importc: "x".}: cushort 100 | y* {.importc: "y".}: cushort 101 | 102 | short3* {.importc: "short3", header: "vector_types.h", bycopy.} = object 103 | x* {.importc: "x".}: cshort 104 | y* {.importc: "y".}: cshort 105 | z* {.importc: "z".}: cshort 106 | 107 | ushort3* {.importc: "ushort3", header: "vector_types.h", bycopy.} = object 108 | x* {.importc: "x".}: cushort 109 | y* {.importc: "y".}: cushort 110 | z* {.importc: "z".}: cushort 111 | 112 | short4* {.importc: "short4", header: "vector_types.h", bycopy.} = object 113 | x* {.importc: "x".}: cshort 114 | y* {.importc: "y".}: cshort 115 | z* {.importc: "z".}: cshort 116 | w* {.importc: "w".}: cshort 117 | 118 | ushort4* {.importc: "ushort4", header: "vector_types.h", bycopy.} = object 119 | x* {.importc: "x".}: cushort 120 | y* {.importc: "y".}: cushort 121 | z* {.importc: "z".}: cushort 122 | w* {.importc: "w".}: cushort 123 | 124 | int1* {.importc: "int1", header: "vector_types.h", bycopy.} = object 125 | x* {.importc: "x".}: cint 126 | 127 | uint1* {.importc: "uint1", header: "vector_types.h", bycopy.} = object 128 | x* {.importc: "x".}: cuint 129 | 130 | int2* {.importc: "int2", header: "vector_types.h", bycopy.} = object 131 | x* {.importc: "x".}: cint 132 | y* {.importc: "y".}: cint 133 | 134 | uint2* {.importc: "uint2", header: "vector_types.h", bycopy.} = object 135 | x* {.importc: "x".}: cuint 136 | y* {.importc: "y".}: cuint 137 | 138 | int3* {.importc: "int3", header: "vector_types.h", bycopy.} = object 139 | x* {.importc: "x".}: cint 140 | y* {.importc: "y".}: cint 141 | z* {.importc: "z".}: cint 142 | 143 | uint3* {.importc: "uint3", header: "vector_types.h", bycopy.} = object 144 | x* {.importc: "x".}: cuint 145 | y* {.importc: "y".}: cuint 146 | z* {.importc: "z".}: cuint 147 | 148 | int4* {.importc: "int4", header: "vector_types.h", bycopy.} = object 149 | x* {.importc: "x".}: cint 150 | y* {.importc: "y".}: cint 151 | z* {.importc: "z".}: cint 152 | w* {.importc: "w".}: cint 153 | 154 | uint4* {.importc: "uint4", header: "vector_types.h", bycopy.} = object 155 | x* {.importc: "x".}: cuint 156 | y* {.importc: "y".}: cuint 157 | z* {.importc: "z".}: cuint 158 | w* {.importc: "w".}: cuint 159 | 160 | long1* {.importc: "long1", header: "vector_types.h", bycopy.} = object 161 | x* {.importc: "x".}: clong 162 | 163 | ulong1* {.importc: "ulong1", header: "vector_types.h", bycopy.} = object 164 | x* {.importc: "x".}: culong 165 | 166 | long2* {.importc: "long2", header: "vector_types.h", bycopy.} = object 167 | x* {.importc: "x".}: clong 168 | y* {.importc: "y".}: clong 169 | 170 | ulong2* {.importc: "ulong2", header: "vector_types.h", bycopy.} = object 171 | x* {.importc: "x".}: culong 172 | y* {.importc: "y".}: culong 173 | 174 | long3* {.importc: "long3", header: "vector_types.h", bycopy.} = object 175 | x* {.importc: "x".}: clong 176 | y* {.importc: "y".}: clong 177 | z* {.importc: "z".}: clong 178 | 179 | ulong3* {.importc: "ulong3", header: "vector_types.h", bycopy.} = object 180 | x* {.importc: "x".}: culong 181 | y* {.importc: "y".}: culong 182 | z* {.importc: "z".}: culong 183 | 184 | long4* {.importc: "long4", header: "vector_types.h", bycopy.} = object 185 | x* {.importc: "x".}: clong 186 | y* {.importc: "y".}: clong 187 | z* {.importc: "z".}: clong 188 | w* {.importc: "w".}: clong 189 | 190 | ulong4* {.importc: "ulong4", header: "vector_types.h", bycopy.} = object 191 | x* {.importc: "x".}: culong 192 | y* {.importc: "y".}: culong 193 | z* {.importc: "z".}: culong 194 | w* {.importc: "w".}: culong 195 | 196 | float1* {.importc: "float1", header: "vector_types.h", bycopy.} = object 197 | x* {.importc: "x".}: cfloat 198 | 199 | float2* {.importc: "float2", header: "vector_types.h", bycopy.} = object 200 | x* {.importc: "x".}: cfloat 201 | y* {.importc: "y".}: cfloat 202 | 203 | float3* {.importc: "float3", header: "vector_types.h", bycopy.} = object 204 | x* {.importc: "x".}: cfloat 205 | y* {.importc: "y".}: cfloat 206 | z* {.importc: "z".}: cfloat 207 | 208 | float4* {.importc: "float4", header: "vector_types.h", bycopy.} = object 209 | x* {.importc: "x".}: cfloat 210 | y* {.importc: "y".}: cfloat 211 | z* {.importc: "z".}: cfloat 212 | w* {.importc: "w".}: cfloat 213 | 214 | longlong1* {.importc: "longlong1", header: "vector_types.h", bycopy.} = object 215 | x* {.importc: "x".}: clonglong 216 | 217 | ulonglong1* {.importc: "ulonglong1", header: "vector_types.h", bycopy.} = object 218 | x* {.importc: "x".}: culonglong 219 | 220 | longlong2* {.importc: "longlong2", header: "vector_types.h", bycopy.} = object 221 | x* {.importc: "x".}: clonglong 222 | y* {.importc: "y".}: clonglong 223 | 224 | ulonglong2* {.importc: "ulonglong2", header: "vector_types.h", bycopy.} = object 225 | x* {.importc: "x".}: culonglong 226 | y* {.importc: "y".}: culonglong 227 | 228 | longlong3* {.importc: "longlong3", header: "vector_types.h", bycopy.} = object 229 | x* {.importc: "x".}: clonglong 230 | y* {.importc: "y".}: clonglong 231 | z* {.importc: "z".}: clonglong 232 | 233 | ulonglong3* {.importc: "ulonglong3", header: "vector_types.h", bycopy.} = object 234 | x* {.importc: "x".}: culonglong 235 | y* {.importc: "y".}: culonglong 236 | z* {.importc: "z".}: culonglong 237 | 238 | longlong4* {.importc: "longlong4", header: "vector_types.h", bycopy.} = object 239 | x* {.importc: "x".}: clonglong 240 | y* {.importc: "y".}: clonglong 241 | z* {.importc: "z".}: clonglong 242 | w* {.importc: "w".}: clonglong 243 | 244 | ulonglong4* {.importc: "ulonglong4", header: "vector_types.h", bycopy.} = object 245 | x* {.importc: "x".}: culonglong 246 | y* {.importc: "y".}: culonglong 247 | z* {.importc: "z".}: culonglong 248 | w* {.importc: "w".}: culonglong 249 | 250 | double1* {.importc: "double1", header: "vector_types.h", bycopy.} = object 251 | x* {.importc: "x".}: cdouble 252 | 253 | double2* {.importc: "double2", header: "vector_types.h", bycopy.} = object 254 | x* {.importc: "x".}: cdouble 255 | y* {.importc: "y".}: cdouble 256 | 257 | double3* {.importc: "double3", header: "vector_types.h", bycopy.} = object 258 | x* {.importc: "x".}: cdouble 259 | y* {.importc: "y".}: cdouble 260 | z* {.importc: "z".}: cdouble 261 | 262 | double4* {.importc: "double4", header: "vector_types.h", bycopy.} = object 263 | x* {.importc: "x".}: cdouble 264 | y* {.importc: "y".}: cdouble 265 | z* {.importc: "z".}: cdouble 266 | w* {.importc: "w".}: cdouble 267 | 268 | 269 | ## ***************************************************************************** 270 | ## * 271 | ## * 272 | ## * 273 | ## ***************************************************************************** 274 | 275 | 276 | ## ***************************************************************************** 277 | ## * 278 | ## * 279 | ## * 280 | ## ***************************************************************************** 281 | 282 | type 283 | dim3* {.importc: "dim3", header: "vector_types.h", bycopy.} = object 284 | x* {.importc: "x".}: cuint 285 | y* {.importc: "y".}: cuint 286 | z* {.importc: "z".}: cuint 287 | 288 | -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/check.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import ./cublas_api 16 | import ./cublas_v2 17 | import ./cuComplex 18 | import ./cuda_occupancy 19 | import ./cuda_runtime_api 20 | import ./cudnn 21 | import ./cufft 22 | import ./curand 23 | import ./cusolver_common 24 | import ./cusolverDn 25 | import ./cusolverRf 26 | import ./cusolverSp 27 | import ./cusparse 28 | import ./driver_types 29 | import ./library_types 30 | import ./nvblas 31 | import ./nvgraph 32 | import ./surface_types 33 | import ./texture_types 34 | import ./vector_types 35 | 36 | type 37 | CudaError* = object of IOError 38 | CudaOccError* = object of IOError 39 | CufftError* = object of IOError 40 | CublasError* = object of IOError 41 | CusparseError* = object of IOError 42 | CusolverError* = object of IOError 43 | CurandError* = object of IOError 44 | CudnnError* = object of IOError 45 | NVGraphError* = object of IOError 46 | 47 | template check*(a: cudaError_t) = 48 | let y = a # ensure we only evaluate once even if the expression has side effects 49 | if y != cudaSuccess: 50 | raise newException(CudaError, $y & " " & $int(y)) 51 | 52 | template check*(a: cudaOccError) = 53 | let y = a # ensure we only evaluate once even if the expression has side effects 54 | if y != CUDA_OCC_SUCCESS: 55 | raise newException(CudaOccError, $y & " " & $int(y)) 56 | 57 | template check*(a: cublasStatus_t) = 58 | let y = a # ensure we only evaluate once even if the expression has side effects 59 | if y != CUBLAS_STATUS_SUCCESS: 60 | raise newException(CublasError, $y & " " & $int(y)) 61 | 62 | template check*(a: cufftResult) = 63 | let y = a # ensure we only evaluate once even if the expression has side effects 64 | if y != CUFFT_SUCCESS: 65 | raise newException(CufftError, $y & " " & $int(y)) 66 | 67 | template check*(a: cusparseStatus_t) = 68 | let y = a # ensure we only evaluate once even if the expression has side effects 69 | if y != CUSPARSE_STATUS_SUCCESS: 70 | raise newException(CusparseError, $y & " " & $int(y)) 71 | 72 | template check*(a: cusolverStatus_t) = 73 | let y = a # ensure we only evaluate once even if the expression has side effects 74 | if y != CUSOLVER_STATUS_SUCCESS: 75 | raise newException(CusolverError, $y & " " & $int(y)) 76 | 77 | template check*(a: curandStatus) = 78 | let y = a # ensure we only evaluate once even if the expression has side effects 79 | if y != CURAND_STATUS_SUCCESS: 80 | raise newException(CurandError, $y & " " & $int(y)) 81 | 82 | template check*(a: cudnnStatus_t) = 83 | let y = a # ensure we only evaluate once even if the expression has side effects 84 | if y != CUDNN_STATUS_SUCCESS: 85 | raise newException(CudnnError, $y & " " & $int(y)) 86 | 87 | template check*(a: nvgraphStatus_t) = 88 | let y = a # ensure we only evaluate once even if the expression has side effects 89 | if y != NVGRAPH_STATUS_SUCCESS: 90 | raise newException(NVGraphError, $y & " " & $int(y)) 91 | -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/cuComplex.nim: -------------------------------------------------------------------------------- 1 | from math import sqrt 2 | 3 | template sqrtf(x: cfloat): cfloat = sqrt(x) 4 | 5 | template fabsf(x: cfloat): cfloat = abs(x) 6 | 7 | template fabs(x: float): float = abs(x) 8 | 9 | template `div`(a: static[float64], b: cfloat): cfloat = cfloat(a) / b 10 | 11 | template `div`(a: cfloat, b: cfloat): cfloat = a / b 12 | ## 13 | ## Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 14 | ## 15 | ## NOTICE TO LICENSEE: 16 | ## 17 | ## This source code and/or documentation ("Licensed Deliverables") are 18 | ## subject to NVIDIA intellectual property rights under U.S. and 19 | ## international Copyright laws. 20 | ## 21 | ## These Licensed Deliverables contained herein is PROPRIETARY and 22 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 23 | ## conditions of a form of NVIDIA software license agreement by and 24 | ## between NVIDIA and Licensee ("License Agreement") or electronically 25 | ## accepted by Licensee. Notwithstanding any terms or conditions to 26 | ## the contrary in the License Agreement, reproduction or disclosure 27 | ## of the Licensed Deliverables to any third party without the express 28 | ## written consent of NVIDIA is prohibited. 29 | ## 30 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 31 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 32 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 33 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 34 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 35 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 36 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 37 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 38 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 39 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 40 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 41 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 42 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 43 | ## OF THESE LICENSED DELIVERABLES. 44 | ## 45 | ## U.S. Government End Users. These Licensed Deliverables are a 46 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 47 | ## 1995), consisting of "commercial computer software" and "commercial 48 | ## computer software documentation" as such terms are used in 48 49 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 50 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 51 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 52 | ## U.S. Government End Users acquire the Licensed Deliverables with 53 | ## only those rights set forth herein. 54 | ## 55 | ## Any use of the Licensed Deliverables in individual and commercial 56 | ## software must include, in the user documentation and internal 57 | ## comments to the code, the above Disclaimer and U.S. Government End 58 | ## Users Notice. 59 | ## 60 | 61 | when not defined(CU_COMPLEX_H): 62 | const 63 | CU_COMPLEX_H* = true 64 | ## When trying to include C header file in C++ Code extern "C" is required 65 | ## But the Standard QNX headers already have ifdef extern in them when compiling C++ Code 66 | ## extern "C" cannot be nested 67 | ## Hence keep the header out of extern "C" block 68 | ## 69 | import 70 | vector_types 71 | 72 | type 73 | cuFloatComplex* = float2 74 | proc cuCrealf*(x: cuFloatComplex): cfloat = 75 | return x.x 76 | 77 | proc cuCimagf*(x: cuFloatComplex): cfloat = 78 | return x.y 79 | 80 | proc make_cuFloatComplex*(r: cfloat; i: cfloat): cuFloatComplex = 81 | var res: cuFloatComplex 82 | res.x = r 83 | res.y = i 84 | return res 85 | 86 | proc cuConjf*(x: cuFloatComplex): cuFloatComplex = 87 | return make_cuFloatComplex(cuCrealf(x), - cuCimagf(x)) 88 | 89 | proc cuCaddf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 90 | return make_cuFloatComplex(cuCrealf(x) + cuCrealf(y), cuCimagf(x) + cuCimagf(y)) 91 | 92 | proc cuCsubf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 93 | return make_cuFloatComplex(cuCrealf(x) - cuCrealf(y), cuCimagf(x) - cuCimagf(y)) 94 | 95 | ## This implementation could suffer from intermediate overflow even though 96 | ## the final result would be in range. However, various implementations do 97 | ## not guard against this (presumably to avoid losing performance), so we 98 | ## don't do it either to stay competitive. 99 | ## 100 | proc cuCmulf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 101 | var prod: cuFloatComplex 102 | prod = make_cuFloatComplex((cuCrealf(x) * cuCrealf(y)) - 103 | (cuCimagf(x) * cuCimagf(y)), (cuCrealf(x) * cuCimagf(y)) + 104 | (cuCimagf(x) * cuCrealf(y))) 105 | return prod 106 | 107 | ## This implementation guards against intermediate underflow and overflow 108 | ## by scaling. Such guarded implementations are usually the default for 109 | ## complex library implementations, with some also offering an unguarded, 110 | ## faster version. 111 | ## 112 | proc cuCdivf*(x: cuFloatComplex; y: cuFloatComplex): cuFloatComplex = 113 | var quot: cuFloatComplex 114 | var s: cfloat = fabsf(cuCrealf(y)) + fabsf(cuCimagf(y)) 115 | var oos: cfloat = 1.0 div s 116 | var ars: cfloat = cuCrealf(x) * oos 117 | var ais: cfloat = cuCimagf(x) * oos 118 | var brs: cfloat = cuCrealf(y) * oos 119 | var bis: cfloat = cuCimagf(y) * oos 120 | s = (brs * brs) + (bis * bis) 121 | oos = 1.0 div s 122 | quot = make_cuFloatComplex(((ars * brs) + (ais * bis)) * oos, 123 | ((ais * brs) - (ars * bis)) * oos) 124 | return quot 125 | 126 | ## 127 | ## We would like to call hypotf(), but it's not available on all platforms. 128 | ## This discrete implementation guards against intermediate underflow and 129 | ## overflow by scaling. Otherwise we would lose half the exponent range. 130 | ## There are various ways of doing guarded computation. For now chose the 131 | ## simplest and fastest solution, however this may suffer from inaccuracies 132 | ## if sqrt and division are not IEEE compliant. 133 | ## 134 | proc cuCabsf*(x: cuFloatComplex): cfloat = 135 | var a: cfloat = cuCrealf(x) 136 | var b: cfloat = cuCimagf(x) 137 | var 138 | v: cfloat 139 | w: cfloat 140 | t: cfloat 141 | a = fabsf(a) 142 | b = fabsf(b) 143 | if a > b: 144 | v = a 145 | w = b 146 | else: 147 | v = b 148 | w = a 149 | t = w div v 150 | t = 1.0 + t * t 151 | t = v * sqrtf(t) 152 | if (v == 0.0) or (v > 3.402823466e+38) or (w > 3.402823466e+38): 153 | t = v + w 154 | return t 155 | 156 | ## Double precision 157 | type 158 | cuDoubleComplex* = double2 159 | proc cuCreal*(x: cuDoubleComplex): cdouble = 160 | return x.x 161 | 162 | proc cuCimag*(x: cuDoubleComplex): cdouble = 163 | return x.y 164 | 165 | proc make_cuDoubleComplex*(r: cdouble; i: cdouble): cuDoubleComplex = 166 | var res: cuDoubleComplex 167 | res.x = r 168 | res.y = i 169 | return res 170 | 171 | proc cuConj*(x: cuDoubleComplex): cuDoubleComplex = 172 | return make_cuDoubleComplex(cuCreal(x), - cuCimag(x)) 173 | 174 | proc cuCadd*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 175 | return make_cuDoubleComplex(cuCreal(x) + cuCreal(y), cuCimag(x) + cuCimag(y)) 176 | 177 | proc cuCsub*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 178 | return make_cuDoubleComplex(cuCreal(x) - cuCreal(y), cuCimag(x) - cuCimag(y)) 179 | 180 | ## This implementation could suffer from intermediate overflow even though 181 | ## the final result would be in range. However, various implementations do 182 | ## not guard against this (presumably to avoid losing performance), so we 183 | ## don't do it either to stay competitive. 184 | ## 185 | proc cuCmul*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 186 | var prod: cuDoubleComplex 187 | prod = make_cuDoubleComplex((cuCreal(x) * cuCreal(y)) - 188 | (cuCimag(x) * cuCimag(y)), (cuCreal(x) * cuCimag(y)) + 189 | (cuCimag(x) * cuCreal(y))) 190 | return prod 191 | 192 | ## This implementation guards against intermediate underflow and overflow 193 | ## by scaling. Such guarded implementations are usually the default for 194 | ## complex library implementations, with some also offering an unguarded, 195 | ## faster version. 196 | ## 197 | proc cuCdiv*(x: cuDoubleComplex; y: cuDoubleComplex): cuDoubleComplex = 198 | var quot: cuDoubleComplex 199 | var s: cdouble = (fabs(cuCreal(y))) + (fabs(cuCimag(y))) 200 | var oos: cdouble = 1.0 div s 201 | var ars: cdouble = cuCreal(x) * oos 202 | var ais: cdouble = cuCimag(x) * oos 203 | var brs: cdouble = cuCreal(y) * oos 204 | var bis: cdouble = cuCimag(y) * oos 205 | s = (brs * brs) + (bis * bis) 206 | oos = 1.0 div s 207 | quot = make_cuDoubleComplex(((ars * brs) + (ais * bis)) * oos, 208 | ((ais * brs) - (ars * bis)) * oos) 209 | return quot 210 | 211 | ## This implementation guards against intermediate underflow and overflow 212 | ## by scaling. Otherwise we would lose half the exponent range. There are 213 | ## various ways of doing guarded computation. For now chose the simplest 214 | ## and fastest solution, however this may suffer from inaccuracies if sqrt 215 | ## and division are not IEEE compliant. 216 | ## 217 | proc cuCabs*(x: cuDoubleComplex): cdouble = 218 | var a: cdouble = cuCreal(x) 219 | var b: cdouble = cuCimag(x) 220 | var 221 | v: cdouble 222 | w: cdouble 223 | t: cdouble 224 | a = fabs(a) 225 | b = fabs(b) 226 | if a > b: 227 | v = a 228 | w = b 229 | else: 230 | v = b 231 | w = a 232 | t = w div v 233 | t = 1.0 + t * t 234 | t = v * sqrt(t) 235 | if (v == 0.0) or (v > 1.797693134862316e+308) or (w > 1.797693134862316e+308): 236 | t = v + w 237 | return t 238 | 239 | ## aliases 240 | type 241 | cuComplex* = cuFloatComplex 242 | proc make_cuComplex*(x: cfloat; y: cfloat): cuComplex = 243 | return make_cuFloatComplex(x, y) 244 | 245 | ## float-to-double promotion 246 | proc cuComplexFloatToDouble*(c: cuFloatComplex): cuDoubleComplex = 247 | return make_cuDoubleComplex(cast[cdouble](cuCrealf(c)), 248 | cast[cdouble](cuCimagf(c))) 249 | 250 | proc cuComplexDoubleToFloat*(c: cuDoubleComplex): cuFloatComplex = 251 | return make_cuFloatComplex(cast[cfloat](cuCreal(c)), cast[cfloat](cuCimag(c))) 252 | 253 | proc cuCfmaf*(x: cuComplex; y: cuComplex; d: cuComplex): cuComplex = 254 | var real_res: cfloat 255 | var imag_res: cfloat 256 | real_res = (cuCrealf(x) * cuCrealf(y)) + cuCrealf(d) 257 | imag_res = (cuCrealf(x) * cuCimagf(y)) + cuCimagf(d) 258 | real_res = - (cuCimagf(x) * cuCimagf(y)) + real_res 259 | imag_res = (cuCimagf(x) * cuCrealf(y)) + imag_res 260 | return make_cuComplex(real_res, imag_res) 261 | 262 | proc cuCfma*(x: cuDoubleComplex; y: cuDoubleComplex; d: cuDoubleComplex): cuDoubleComplex = 263 | var real_res: cdouble 264 | var imag_res: cdouble 265 | real_res = (cuCreal(x) * cuCreal(y)) + cuCreal(d) 266 | imag_res = (cuCreal(x) * cuCimag(y)) + cuCimag(d) 267 | real_res = - (cuCimag(x) * cuCimag(y)) + real_res 268 | imag_res = (cuCimag(x) * cuCreal(y)) + imag_res 269 | return make_cuDoubleComplex(real_res, imag_res) 270 | -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/cublas_v2.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2014 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | ## 50 | ## This is the public header file for the new CUBLAS library API, it mapped the generic 51 | ## Cublas name functions to the actual _v2 implementations. 52 | ## 53 | 54 | when not defined(CUBLAS_V2_H): 55 | const 56 | CUBLAS_V2_H* = true 57 | import 58 | cublas_api 59 | 60 | const 61 | cublasCreate* = cublasCreate_v2 62 | cublasDestroy* = cublasDestroy_v2 63 | cublasGetVersion* = cublasGetVersion_v2 64 | cublasSetStream* = cublasSetStream_v2 65 | cublasGetStream* = cublasGetStream_v2 66 | cublasGetPointerMode* = cublasGetPointerMode_v2 67 | cublasSetPointerMode* = cublasSetPointerMode_v2 68 | ## Blas3 Routines 69 | const 70 | cublasSnrm2* = cublasSnrm2_v2 71 | cublasDnrm2* = cublasDnrm2_v2 72 | cublasScnrm2* = cublasScnrm2_v2 73 | cublasDznrm2* = cublasDznrm2_v2 74 | cublasSdot* = cublasSdot_v2 75 | cublasDdot* = cublasDdot_v2 76 | cublasCdotu* = cublasCdotu_v2 77 | cublasCdotc* = cublasCdotc_v2 78 | cublasZdotu* = cublasZdotu_v2 79 | cublasZdotc* = cublasZdotc_v2 80 | cublasSscal* = cublasSscal_v2 81 | cublasDscal* = cublasDscal_v2 82 | cublasCscal* = cublasCscal_v2 83 | cublasCsscal* = cublasCsscal_v2 84 | cublasZscal* = cublasZscal_v2 85 | cublasZdscal* = cublasZdscal_v2 86 | cublasSaxpy* = cublasSaxpy_v2 87 | cublasDaxpy* = cublasDaxpy_v2 88 | cublasCaxpy* = cublasCaxpy_v2 89 | cublasZaxpy* = cublasZaxpy_v2 90 | cublasScopy* = cublasScopy_v2 91 | cublasDcopy* = cublasDcopy_v2 92 | cublasCcopy* = cublasCcopy_v2 93 | cublasZcopy* = cublasZcopy_v2 94 | cublasSswap* = cublasSswap_v2 95 | cublasDswap* = cublasDswap_v2 96 | cublasCswap* = cublasCswap_v2 97 | cublasZswap* = cublasZswap_v2 98 | cublasIsamax* = cublasIsamax_v2 99 | cublasIdamax* = cublasIdamax_v2 100 | cublasIcamax* = cublasIcamax_v2 101 | cublasIzamax* = cublasIzamax_v2 102 | cublasIsamin* = cublasIsamin_v2 103 | cublasIdamin* = cublasIdamin_v2 104 | cublasIcamin* = cublasIcamin_v2 105 | cublasIzamin* = cublasIzamin_v2 106 | cublasSasum* = cublasSasum_v2 107 | cublasDasum* = cublasDasum_v2 108 | cublasScasum* = cublasScasum_v2 109 | cublasDzasum* = cublasDzasum_v2 110 | cublasSrot* = cublasSrot_v2 111 | cublasDrot* = cublasDrot_v2 112 | cublasCrot* = cublasCrot_v2 113 | cublasCsrot* = cublasCsrot_v2 114 | cublasZrot* = cublasZrot_v2 115 | cublasZdrot* = cublasZdrot_v2 116 | cublasSrotg* = cublasSrotg_v2 117 | cublasDrotg* = cublasDrotg_v2 118 | cublasCrotg* = cublasCrotg_v2 119 | cublasZrotg* = cublasZrotg_v2 120 | cublasSrotm* = cublasSrotm_v2 121 | cublasDrotm* = cublasDrotm_v2 122 | cublasSrotmg* = cublasSrotmg_v2 123 | cublasDrotmg* = cublasDrotmg_v2 124 | ## Blas2 Routines 125 | const 126 | cublasSgemv* = cublasSgemv_v2 127 | cublasDgemv* = cublasDgemv_v2 128 | cublasCgemv* = cublasCgemv_v2 129 | cublasZgemv* = cublasZgemv_v2 130 | cublasSgbmv* = cublasSgbmv_v2 131 | cublasDgbmv* = cublasDgbmv_v2 132 | cublasCgbmv* = cublasCgbmv_v2 133 | cublasZgbmv* = cublasZgbmv_v2 134 | cublasStrmv* = cublasStrmv_v2 135 | cublasDtrmv* = cublasDtrmv_v2 136 | cublasCtrmv* = cublasCtrmv_v2 137 | cublasZtrmv* = cublasZtrmv_v2 138 | cublasStbmv* = cublasStbmv_v2 139 | cublasDtbmv* = cublasDtbmv_v2 140 | cublasCtbmv* = cublasCtbmv_v2 141 | cublasZtbmv* = cublasZtbmv_v2 142 | cublasStpmv* = cublasStpmv_v2 143 | cublasDtpmv* = cublasDtpmv_v2 144 | cublasCtpmv* = cublasCtpmv_v2 145 | cublasZtpmv* = cublasZtpmv_v2 146 | cublasStrsv* = cublasStrsv_v2 147 | cublasDtrsv* = cublasDtrsv_v2 148 | cublasCtrsv* = cublasCtrsv_v2 149 | cublasZtrsv* = cublasZtrsv_v2 150 | cublasStpsv* = cublasStpsv_v2 151 | cublasDtpsv* = cublasDtpsv_v2 152 | cublasCtpsv* = cublasCtpsv_v2 153 | cublasZtpsv* = cublasZtpsv_v2 154 | cublasStbsv* = cublasStbsv_v2 155 | cublasDtbsv* = cublasDtbsv_v2 156 | cublasCtbsv* = cublasCtbsv_v2 157 | cublasZtbsv* = cublasZtbsv_v2 158 | cublasSsymv* = cublasSsymv_v2 159 | cublasDsymv* = cublasDsymv_v2 160 | cublasCsymv* = cublasCsymv_v2 161 | cublasZsymv* = cublasZsymv_v2 162 | cublasChemv* = cublasChemv_v2 163 | cublasZhemv* = cublasZhemv_v2 164 | cublasSsbmv* = cublasSsbmv_v2 165 | cublasDsbmv* = cublasDsbmv_v2 166 | cublasChbmv* = cublasChbmv_v2 167 | cublasZhbmv* = cublasZhbmv_v2 168 | cublasSspmv* = cublasSspmv_v2 169 | cublasDspmv* = cublasDspmv_v2 170 | cublasChpmv* = cublasChpmv_v2 171 | cublasZhpmv* = cublasZhpmv_v2 172 | cublasSger* = cublasSger_v2 173 | cublasDger* = cublasDger_v2 174 | cublasCgeru* = cublasCgeru_v2 175 | cublasCgerc* = cublasCgerc_v2 176 | cublasZgeru* = cublasZgeru_v2 177 | cublasZgerc* = cublasZgerc_v2 178 | cublasSsyr* = cublasSsyr_v2 179 | cublasDsyr* = cublasDsyr_v2 180 | cublasCsyr* = cublasCsyr_v2 181 | cublasZsyr* = cublasZsyr_v2 182 | cublasCher* = cublasCher_v2 183 | cublasZher* = cublasZher_v2 184 | cublasSspr* = cublasSspr_v2 185 | cublasDspr* = cublasDspr_v2 186 | cublasChpr* = cublasChpr_v2 187 | cublasZhpr* = cublasZhpr_v2 188 | cublasSsyr2* = cublasSsyr2_v2 189 | cublasDsyr2* = cublasDsyr2_v2 190 | cublasCsyr2* = cublasCsyr2_v2 191 | cublasZsyr2* = cublasZsyr2_v2 192 | cublasCher2* = cublasCher2_v2 193 | cublasZher2* = cublasZher2_v2 194 | cublasSspr2* = cublasSspr2_v2 195 | cublasDspr2* = cublasDspr2_v2 196 | cublasChpr2* = cublasChpr2_v2 197 | cublasZhpr2* = cublasZhpr2_v2 198 | ## Blas3 Routines 199 | const 200 | cublasSgemm* = cublasSgemm_v2 201 | cublasDgemm* = cublasDgemm_v2 202 | cublasCgemm* = cublasCgemm_v2 203 | cublasZgemm* = cublasZgemm_v2 204 | cublasSsyrk* = cublasSsyrk_v2 205 | cublasDsyrk* = cublasDsyrk_v2 206 | cublasCsyrk* = cublasCsyrk_v2 207 | cublasZsyrk* = cublasZsyrk_v2 208 | cublasCherk* = cublasCherk_v2 209 | cublasZherk* = cublasZherk_v2 210 | cublasSsyr2k* = cublasSsyr2k_v2 211 | cublasDsyr2k* = cublasDsyr2k_v2 212 | cublasCsyr2k* = cublasCsyr2k_v2 213 | cublasZsyr2k* = cublasZsyr2k_v2 214 | cublasCher2k* = cublasCher2k_v2 215 | cublasZher2k* = cublasZher2k_v2 216 | cublasSsymm* = cublasSsymm_v2 217 | cublasDsymm* = cublasDsymm_v2 218 | cublasCsymm* = cublasCsymm_v2 219 | cublasZsymm* = cublasZsymm_v2 220 | cublasChemm* = cublasChemm_v2 221 | cublasZhemm* = cublasZhemm_v2 222 | cublasStrsm* = cublasStrsm_v2 223 | cublasDtrsm* = cublasDtrsm_v2 224 | cublasCtrsm* = cublasCtrsm_v2 225 | cublasZtrsm* = cublasZtrsm_v2 226 | cublasStrmm* = cublasStrmm_v2 227 | cublasDtrmm* = cublasDtrmm_v2 228 | cublasCtrmm* = cublasCtrmm_v2 229 | cublasZtrmm* = cublasZtrmm_v2 -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/cusolver_common.nim: -------------------------------------------------------------------------------- 1 | {.deadCodeElim: on.} 2 | when defined(windows): 3 | import os 4 | {.passL: "\"" & os.getEnv("CUDA_PATH") / "lib/x64" / "cusolver.lib" & "\"".} 5 | {.pragma: dyn.} 6 | elif defined(macosx): 7 | const 8 | libName = "libcusolver.dylib" 9 | {.pragma: dyn, dynlib: libName.} 10 | else: 11 | const 12 | libName = "libcusolver.so" 13 | {.pragma: dyn, dynlib: libName.} 14 | import 15 | library_types 16 | 17 | ## 18 | ## Copyright 2014 NVIDIA Corporation. All rights reserved. 19 | ## 20 | ## NOTICE TO LICENSEE: 21 | ## 22 | ## This source code and/or documentation ("Licensed Deliverables") are 23 | ## subject to NVIDIA intellectual property rights under U.S. and 24 | ## international Copyright laws. 25 | ## 26 | ## These Licensed Deliverables contained herein is PROPRIETARY and 27 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 28 | ## conditions of a form of NVIDIA software license agreement by and 29 | ## between NVIDIA and Licensee ("License Agreement") or electronically 30 | ## accepted by Licensee. Notwithstanding any terms or conditions to 31 | ## the contrary in the License Agreement, reproduction or disclosure 32 | ## of the Licensed Deliverables to any third party without the express 33 | ## written consent of NVIDIA is prohibited. 34 | ## 35 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 36 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 37 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 38 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 39 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 40 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 41 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 42 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 43 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 44 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 45 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 46 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 47 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 48 | ## OF THESE LICENSED DELIVERABLES. 49 | ## 50 | ## U.S. Government End Users. These Licensed Deliverables are a 51 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 52 | ## 1995), consisting of "commercial computer software" and "commercial 53 | ## computer software documentation" as such terms are used in 48 54 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 55 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 56 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 57 | ## U.S. Government End Users acquire the Licensed Deliverables with 58 | ## only those rights set forth herein. 59 | ## 60 | ## Any use of the Licensed Deliverables in individual and commercial 61 | ## software must include, in the user documentation and internal 62 | ## comments to the code, the above Disclaimer and U.S. Government End 63 | ## Users Notice. 64 | ## 65 | 66 | when not defined(CUSOLVER_COMMON_H): 67 | const 68 | CUSOLVER_COMMON_H* = true 69 | type 70 | cusolverStatus_t* {.size: sizeof(cint).} = enum 71 | CUSOLVER_STATUS_SUCCESS = 0, CUSOLVER_STATUS_NOT_INITIALIZED = 1, 72 | CUSOLVER_STATUS_ALLOC_FAILED = 2, CUSOLVER_STATUS_INVALID_VALUE = 3, 73 | CUSOLVER_STATUS_ARCH_MISMATCH = 4, CUSOLVER_STATUS_MAPPING_ERROR = 5, 74 | CUSOLVER_STATUS_EXECUTION_FAILED = 6, CUSOLVER_STATUS_INTERNAL_ERROR = 7, 75 | CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED = 8, 76 | CUSOLVER_STATUS_NOT_SUPPORTED = 9, CUSOLVER_STATUS_ZERO_PIVOT = 10, 77 | CUSOLVER_STATUS_INVALID_LICENSE = 11 78 | cusolverEigType_t* {.size: sizeof(cint).} = enum 79 | CUSOLVER_EIG_TYPE_1 = 1, CUSOLVER_EIG_TYPE_2 = 2, CUSOLVER_EIG_TYPE_3 = 3 80 | cusolverEigMode_t* {.size: sizeof(cint).} = enum 81 | CUSOLVER_EIG_MODE_NOVECTOR = 0, CUSOLVER_EIG_MODE_VECTOR = 1 82 | proc cusolverGetProperty*(`type`: libraryPropertyType; value: ptr cint): cusolverStatus_t {. 83 | cdecl, importc: "cusolverGetProperty", dyn.} 84 | -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/library_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | 50 | when not defined(LIBRARY_TYPES_H): 51 | const 52 | LIBRARY_TYPES_H* = true 53 | type 54 | cudaDataType* = enum 55 | CUDA_R_32F = 0, ## real as a float 56 | CUDA_R_64F = 1, ## real as a double 57 | CUDA_R_16F = 2, ## real as a half 58 | CUDA_R_8I = 3, ## real as a signed char 59 | CUDA_C_32F = 4, ## complex as a pair of float numbers 60 | CUDA_C_64F = 5, ## complex as a pair of double numbers 61 | CUDA_C_16F = 6, ## complex as a pair of half numbers 62 | CUDA_C_8I = 7, ## complex as a pair of signed char numbers 63 | CUDA_R_8U = 8, ## real as a unsigned char 64 | CUDA_C_8U = 9, ## complex as a pair of unsigned char numbers 65 | CUDA_R_32I = 10, ## real as a signed int 66 | CUDA_C_32I = 11, ## complex as a pair of signed int numbers 67 | CUDA_R_32U = 12, ## real as a unsigned int 68 | CUDA_C_32U = 13 69 | libraryPropertyType* = enum 70 | MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/nvgraph.nim: -------------------------------------------------------------------------------- 1 | ## #prefix nvgraph 2 | ## #prefix NVGRAPH_ 3 | 4 | {.deadCodeElim: on.} 5 | when defined(windows): 6 | import os 7 | {.passL: "\"" & os.getEnv("CUDA_PATH") / "lib/x64" / "nvgraph.lib" & "\"".} 8 | {.pragma: dyn.} 9 | elif defined(macosx): 10 | const 11 | libName = "libnvgraph.dylib" 12 | {.pragma: dyn, dynlib: libName.} 13 | else: 14 | const 15 | libName = "libnvgraph.so" 16 | {.pragma: dyn, dynlib: libName.} 17 | import 18 | library_types 19 | 20 | ## 21 | ## Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. 22 | ## 23 | ## NVIDIA CORPORATION and its licensors retain all intellectual property 24 | ## and proprietary rights in and to this software, related documentation 25 | ## and any modifications thereto. Any use, reproduction, disclosure or 26 | ## distribution of this software and related documentation without an express 27 | ## license agreement from NVIDIA CORPORATION is strictly prohibited. 28 | ## 29 | ## 30 | 31 | ## nvGRAPH status type returns 32 | 33 | type 34 | nvgraphStatus_t* {.size: sizeof(cint).} = enum 35 | NVGRAPH_STATUS_SUCCESS = 0, NVGRAPH_STATUS_NOT_INITIALIZED = 1, 36 | NVGRAPH_STATUS_ALLOC_FAILED = 2, NVGRAPH_STATUS_INVALID_VALUE = 3, 37 | NVGRAPH_STATUS_ARCH_MISMATCH = 4, NVGRAPH_STATUS_MAPPING_ERROR = 5, 38 | NVGRAPH_STATUS_EXECUTION_FAILED = 6, NVGRAPH_STATUS_INTERNAL_ERROR = 7, 39 | NVGRAPH_STATUS_TYPE_NOT_SUPPORTED = 8, NVGRAPH_STATUS_NOT_CONVERGED = 9 40 | 41 | 42 | proc nvgraphStatusGetString*(status: nvgraphStatus_t): cstring {.cdecl, 43 | importc: "nvgraphStatusGetString", dyn.} 44 | ## Opaque structure holding nvGRAPH library context 45 | 46 | type 47 | nvgraphContext* = object 48 | 49 | nvgraphHandle_t* = ptr nvgraphContext 50 | 51 | ## Opaque structure holding the graph descriptor 52 | 53 | type 54 | nvgraphGraphDescr* = object 55 | 56 | nvgraphGraphDescr_t* = ptr nvgraphGraphDescr 57 | 58 | ## Semi-ring types 59 | 60 | type 61 | nvgraphSemiring_t* {.size: sizeof(cint).} = enum 62 | NVGRAPH_PLUS_TIMES_SR = 0, NVGRAPH_MIN_PLUS_SR = 1, NVGRAPH_MAX_MIN_SR = 2, 63 | NVGRAPH_OR_AND_SR = 3 64 | 65 | 66 | ## Topology types 67 | 68 | type 69 | nvgraphTopologyType_t* {.size: sizeof(cint).} = enum 70 | NVGRAPH_CSR_32 = 0, NVGRAPH_CSC_32 = 1, NVGRAPH_COO_32 = 2 71 | nvgraphTag_t* {.size: sizeof(cint).} = enum 72 | NVGRAPH_DEFAULT = 0, ## Default is unsorted. 73 | NVGRAPH_UNSORTED = 1, ## 74 | NVGRAPH_SORTED_BY_SOURCE = 2, ## CSR 75 | NVGRAPH_SORTED_BY_DESTINATION = 3 76 | 77 | 78 | 79 | type 80 | nvgraphCSRTopology32I_st* = object 81 | nvertices*: cint ## n+1 82 | nedges*: cint ## nnz 83 | source_offsets*: ptr cint ## rowPtr 84 | destination_indices*: ptr cint ## colInd 85 | 86 | nvgraphCSRTopology32I_t* = ptr nvgraphCSRTopology32I_st 87 | nvgraphCSCTopology32I_st* = object 88 | nvertices*: cint ## n+1 89 | nedges*: cint ## nnz 90 | destination_offsets*: ptr cint ## colPtr 91 | source_indices*: ptr cint ## rowInd 92 | 93 | nvgraphCSCTopology32I_t* = ptr nvgraphCSCTopology32I_st 94 | nvgraphCOOTopology32I_st* = object 95 | nvertices*: cint ## n+1 96 | nedges*: cint ## nnz 97 | source_indices*: ptr cint ## rowInd 98 | destination_indices*: ptr cint ## colInd 99 | tag*: nvgraphTag_t 100 | 101 | nvgraphCOOTopology32I_t* = ptr nvgraphCOOTopology32I_st 102 | 103 | ## Open the library and create the handle 104 | 105 | proc nvgraphCreate*(handle: ptr nvgraphHandle_t): nvgraphStatus_t {.cdecl, 106 | importc: "nvgraphCreate", dyn.} 107 | ## Close the library and destroy the handle 108 | 109 | proc nvgraphDestroy*(handle: nvgraphHandle_t): nvgraphStatus_t {.cdecl, 110 | importc: "nvgraphDestroy", dyn.} 111 | ## Create an empty graph descriptor 112 | 113 | proc nvgraphCreateGraphDescr*(handle: nvgraphHandle_t; 114 | descrG: ptr nvgraphGraphDescr_t): nvgraphStatus_t {. 115 | cdecl, importc: "nvgraphCreateGraphDescr", dyn.} 116 | ## Destroy a graph descriptor 117 | 118 | proc nvgraphDestroyGraphDescr*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t): nvgraphStatus_t {. 119 | cdecl, importc: "nvgraphDestroyGraphDescr", dyn.} 120 | ## Set size, topology data in the graph descriptor 121 | 122 | proc nvgraphSetGraphStructure*(handle: nvgraphHandle_t; 123 | descrG: nvgraphGraphDescr_t; topologyData: pointer; 124 | TType: nvgraphTopologyType_t): nvgraphStatus_t {. 125 | cdecl, importc: "nvgraphSetGraphStructure", dyn.} 126 | ## Query size and topology information from the graph descriptor 127 | 128 | proc nvgraphGetGraphStructure*(handle: nvgraphHandle_t; 129 | descrG: nvgraphGraphDescr_t; topologyData: pointer; 130 | TType: ptr nvgraphTopologyType_t): nvgraphStatus_t {. 131 | cdecl, importc: "nvgraphGetGraphStructure", dyn.} 132 | ## Allocate numsets vectors of size V reprensenting Vertex Data and attached them the graph. 133 | ## settypes[i] is the type of vector #i, currently all Vertex and Edge data should have the same type 134 | 135 | proc nvgraphAllocateVertexData*(handle: nvgraphHandle_t; 136 | descrG: nvgraphGraphDescr_t; numsets: csize_t; 137 | settypes: ptr cudaDataType): nvgraphStatus_t {.cdecl, 138 | importc: "nvgraphAllocateVertexData", dyn.} 139 | ## Allocate numsets vectors of size E reprensenting Edge Data and attached them the graph. 140 | ## settypes[i] is the type of vector #i, currently all Vertex and Edge data should have the same type 141 | 142 | proc nvgraphAllocateEdgeData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 143 | numsets: csize_t; settypes: ptr cudaDataType): nvgraphStatus_t {. 144 | cdecl, importc: "nvgraphAllocateEdgeData", dyn.} 145 | ## `Update the vertex set #setnum with the data in *vertexData, sets have 0-based index` 146 | ## Conversions are not sopported so nvgraphTopologyType_t should match the graph structure 147 | 148 | proc nvgraphSetVertexData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 149 | vertexData: pointer; setnum: csize_t): nvgraphStatus_t {. 150 | cdecl, importc: "nvgraphSetVertexData", dyn.} 151 | ## `Copy the edge set #setnum in *edgeData, sets have 0-based index` 152 | ## Conversions are not sopported so nvgraphTopologyType_t should match the graph structure 153 | 154 | proc nvgraphGetVertexData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 155 | vertexData: pointer; setnum: csize_t): nvgraphStatus_t {. 156 | cdecl, importc: "nvgraphGetVertexData", dyn.} 157 | ## Convert the edge data to another topology 158 | ## 159 | 160 | proc nvgraphConvertTopology*(handle: nvgraphHandle_t; 161 | srcTType: nvgraphTopologyType_t; srcTopology: pointer; 162 | srcEdgeData: pointer; dataType: ptr cudaDataType; 163 | dstTType: nvgraphTopologyType_t; dstTopology: pointer; 164 | dstEdgeData: pointer): nvgraphStatus_t {.cdecl, 165 | importc: "nvgraphConvertTopology", dyn.} 166 | ## Convert graph to another structure 167 | ## 168 | 169 | proc nvgraphConvertGraph*(handle: nvgraphHandle_t; srcDescrG: nvgraphGraphDescr_t; 170 | dstDescrG: nvgraphGraphDescr_t; 171 | dstTType: nvgraphTopologyType_t): nvgraphStatus_t {.cdecl, 172 | importc: "nvgraphConvertGraph", dyn.} 173 | ## `Update the edge set #setnum with the data in *edgeData, sets have 0-based index` 174 | ## Conversions are not sopported so nvgraphTopologyType_t should match the graph structure 175 | 176 | proc nvgraphSetEdgeData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 177 | edgeData: pointer; setnum: csize_t): nvgraphStatus_t {.cdecl, 178 | importc: "nvgraphSetEdgeData", dyn.} 179 | ## `Copy the edge set #setnum in *edgeData, sets have 0-based index` 180 | ## Conversions are not sopported so nvgraphTopologyType_t should match the graph structure 181 | 182 | proc nvgraphGetEdgeData*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 183 | edgeData: pointer; setnum: csize_t): nvgraphStatus_t {.cdecl, 184 | importc: "nvgraphGetEdgeData", dyn.} 185 | ## create a new graph by extracting a subgraph given a list of vertices 186 | ## 187 | 188 | proc nvgraphExtractSubgraphByVertex*(handle: nvgraphHandle_t; 189 | descrG: nvgraphGraphDescr_t; 190 | subdescrG: nvgraphGraphDescr_t; 191 | subvertices: ptr cint; numvertices: csize_t): nvgraphStatus_t {. 192 | cdecl, importc: "nvgraphExtractSubgraphByVertex", dyn.} 193 | ## create a new graph by extracting a subgraph given a list of edges 194 | ## 195 | 196 | proc nvgraphExtractSubgraphByEdge*(handle: nvgraphHandle_t; 197 | descrG: nvgraphGraphDescr_t; 198 | subdescrG: nvgraphGraphDescr_t; 199 | subedges: ptr cint; numedges: csize_t): nvgraphStatus_t {. 200 | cdecl, importc: "nvgraphExtractSubgraphByEdge", dyn.} 201 | ## nvGRAPH Semi-ring sparse matrix vector multiplication 202 | ## 203 | 204 | proc nvgraphSrSpmv*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 205 | weight_index: csize_t; alpha: pointer; x_index: csize_t; beta: pointer; 206 | y_index: csize_t; SR: nvgraphSemiring_t): nvgraphStatus_t {.cdecl, 207 | importc: "nvgraphSrSpmv", dyn.} 208 | ## nvGRAPH Single Source Shortest Path (SSSP) 209 | ## Calculate the shortest path distance from a single vertex in the graph to all other vertices. 210 | ## 211 | 212 | proc nvgraphSssp*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 213 | weight_index: csize_t; source_vert: ptr cint; sssp_index: csize_t): nvgraphStatus_t {. 214 | cdecl, importc: "nvgraphSssp", dyn.} 215 | ## nvGRAPH WidestPath 216 | ## Find widest path potential from source_index to every other vertices. 217 | ## 218 | 219 | proc nvgraphWidestPath*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 220 | weight_index: csize_t; source_vert: ptr cint; 221 | widest_path_index: csize_t): nvgraphStatus_t {.cdecl, 222 | importc: "nvgraphWidestPath", dyn.} 223 | ## nvGRAPH PageRank 224 | ## Find PageRank for each vertex of a graph with a given transition probabilities, a bookmark vector of dangling vertices, and the damping factor. 225 | ## 226 | 227 | proc nvgraphPagerank*(handle: nvgraphHandle_t; descrG: nvgraphGraphDescr_t; 228 | weight_index: csize_t; alpha: pointer; bookmark_index: csize_t; 229 | has_guess: cint; pagerank_index: csize_t; tolerance: cfloat; 230 | max_iter: cint): nvgraphStatus_t {.cdecl, 231 | importc: "nvgraphPagerank", dyn.} 232 | -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/surface_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | 50 | when not defined(SURFACE_TYPES_H): 51 | const 52 | SURFACE_TYPES_H* = true 53 | ## ****************************************************************************** 54 | ## * 55 | ## * 56 | ## * 57 | ## ***************************************************************************** 58 | import 59 | driver_types 60 | 61 | ## * 62 | ## \addtogroup CUDART_TYPES 63 | ## 64 | ## @{ 65 | ## 66 | ## ****************************************************************************** 67 | ## * 68 | ## * 69 | ## * 70 | ## ***************************************************************************** 71 | const 72 | cudaSurfaceType1D* = 0x00000001 73 | cudaSurfaceType2D* = 0x00000002 74 | cudaSurfaceType3D* = 0x00000003 75 | cudaSurfaceTypeCubemap* = 0x0000000C 76 | cudaSurfaceType1DLayered* = 0x000000F1 77 | cudaSurfaceType2DLayered* = 0x000000F2 78 | cudaSurfaceTypeCubemapLayered* = 0x000000FC 79 | ## * 80 | ## CUDA Surface boundary modes 81 | ## 82 | type 83 | cudaSurfaceBoundaryMode* = enum 84 | cudaBoundaryModeZero = 0, ## *< Zero boundary mode 85 | cudaBoundaryModeClamp = 1, ## *< Clamp boundary mode 86 | cudaBoundaryModeTrap = 2 87 | ## * 88 | ## CUDA Surface format modes 89 | ## 90 | type 91 | cudaSurfaceFormatMode* = enum 92 | cudaFormatModeForced = 0, ## *< Forced format mode 93 | cudaFormatModeAuto = 1 94 | ## * 95 | ## CUDA Surface reference 96 | ## 97 | type 98 | surfaceReference* = object 99 | channelDesc*: cudaChannelFormatDesc ## * 100 | ## Channel descriptor for surface reference 101 | ## 102 | 103 | ## * 104 | ## An opaque value that represents a CUDA Surface object 105 | ## 106 | type 107 | cudaSurfaceObject_t* = culonglong 108 | ## * @} 109 | ## * @} 110 | ## END CUDART_TYPES -------------------------------------------------------------------------------- /src/nimcuda/cuda8_0/texture_types.nim: -------------------------------------------------------------------------------- 1 | ## 2 | ## Copyright 1993-2012 NVIDIA Corporation. All rights reserved. 3 | ## 4 | ## NOTICE TO LICENSEE: 5 | ## 6 | ## This source code and/or documentation ("Licensed Deliverables") are 7 | ## subject to NVIDIA intellectual property rights under U.S. and 8 | ## international Copyright laws. 9 | ## 10 | ## These Licensed Deliverables contained herein is PROPRIETARY and 11 | ## CONFIDENTIAL to NVIDIA and is being provided under the terms and 12 | ## conditions of a form of NVIDIA software license agreement by and 13 | ## between NVIDIA and Licensee ("License Agreement") or electronically 14 | ## accepted by Licensee. Notwithstanding any terms or conditions to 15 | ## the contrary in the License Agreement, reproduction or disclosure 16 | ## of the Licensed Deliverables to any third party without the express 17 | ## written consent of NVIDIA is prohibited. 18 | ## 19 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 20 | ## LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE 21 | ## SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS 22 | ## PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. 23 | ## NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED 24 | ## DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 25 | ## NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. 26 | ## NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE 27 | ## LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY 28 | ## SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY 29 | ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 30 | ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 31 | ## ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 32 | ## OF THESE LICENSED DELIVERABLES. 33 | ## 34 | ## U.S. Government End Users. These Licensed Deliverables are a 35 | ## "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT 36 | ## 1995), consisting of "commercial computer software" and "commercial 37 | ## computer software documentation" as such terms are used in 48 38 | ## C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government 39 | ## only as a commercial end item. Consistent with 48 C.F.R.12.212 and 40 | ## 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all 41 | ## U.S. Government End Users acquire the Licensed Deliverables with 42 | ## only those rights set forth herein. 43 | ## 44 | ## Any use of the Licensed Deliverables in individual and commercial 45 | ## software must include, in the user documentation and internal 46 | ## comments to the code, the above Disclaimer and U.S. Government End 47 | ## Users Notice. 48 | ## 49 | 50 | when not defined(TEXTURE_TYPES_H): 51 | const 52 | TEXTURE_TYPES_H* = true 53 | ## ****************************************************************************** 54 | ## * 55 | ## * 56 | ## * 57 | ## ***************************************************************************** 58 | import 59 | driver_types 60 | 61 | ## * 62 | ## \addtogroup CUDART_TYPES 63 | ## 64 | ## @{ 65 | ## 66 | ## ****************************************************************************** 67 | ## * 68 | ## * 69 | ## * 70 | ## ***************************************************************************** 71 | const 72 | cudaTextureType1D* = 0x00000001 73 | cudaTextureType2D* = 0x00000002 74 | cudaTextureType3D* = 0x00000003 75 | cudaTextureTypeCubemap* = 0x0000000C 76 | cudaTextureType1DLayered* = 0x000000F1 77 | cudaTextureType2DLayered* = 0x000000F2 78 | cudaTextureTypeCubemapLayered* = 0x000000FC 79 | ## * 80 | ## CUDA texture address modes 81 | ## 82 | type 83 | cudaTextureAddressMode* = enum 84 | cudaAddressModeWrap = 0, ## *< Wrapping address mode 85 | cudaAddressModeClamp = 1, ## *< Clamp to edge address mode 86 | cudaAddressModeMirror = 2, ## *< Mirror address mode 87 | cudaAddressModeBorder = 3 88 | ## * 89 | ## CUDA texture filter modes 90 | ## 91 | type 92 | cudaTextureFilterMode* = enum 93 | cudaFilterModePoint = 0, ## *< Point filter mode 94 | cudaFilterModeLinear = 1 95 | ## * 96 | ## CUDA texture read modes 97 | ## 98 | type 99 | cudaTextureReadMode* = enum 100 | cudaReadModeElementType = 0, ## *< Read texture as specified element type 101 | cudaReadModeNormalizedFloat = 1 102 | ## * 103 | ## CUDA texture reference 104 | ## 105 | type 106 | textureReference* = object 107 | normalized*: cint ## * 108 | ## Indicates whether texture reads are normalized or not 109 | ## 110 | ## * 111 | ## Texture filter mode 112 | ## 113 | filterMode*: cudaTextureFilterMode ## * 114 | ## Texture address mode for up to 3 dimensions 115 | ## 116 | addressMode*: array[3, cudaTextureAddressMode] ## * 117 | ## Channel descriptor for the texture reference 118 | ## 119 | channelDesc*: cudaChannelFormatDesc ## * 120 | ## Perform sRGB->linear conversion during texture read 121 | ## 122 | sRGB*: cint ## * 123 | ## Limit to the anisotropy ratio 124 | ## 125 | maxAnisotropy*: cuint ## * 126 | ## Mipmap filter mode 127 | ## 128 | mipmapFilterMode*: cudaTextureFilterMode ## * 129 | ## Offset applied to the supplied mipmap level 130 | ## 131 | mipmapLevelBias*: cfloat ## * 132 | ## Lower end of the mipmap level range to clamp access to 133 | ## 134 | minMipmapLevelClamp*: cfloat ## * 135 | ## Upper end of the mipmap level range to clamp access to 136 | ## 137 | maxMipmapLevelClamp*: cfloat 138 | cudaReserved*: array[15, cint] 139 | 140 | ## * 141 | ## CUDA texture descriptor 142 | ## 143 | type 144 | cudaTextureDesc* = object 145 | addressMode*: array[3, cudaTextureAddressMode] ## * 146 | ## Texture address mode for up to 3 dimensions 147 | ## 148 | ## * 149 | ## Texture filter mode 150 | ## 151 | filterMode*: cudaTextureFilterMode ## * 152 | ## Texture read mode 153 | ## 154 | readMode*: cudaTextureReadMode ## * 155 | ## Perform sRGB->linear conversion during texture read 156 | ## 157 | sRGB*: cint ## * 158 | ## Texture Border Color 159 | ## 160 | borderColor*: array[4, cfloat] ## * 161 | ## Indicates whether texture reads are normalized or not 162 | ## 163 | normalizedCoords*: cint ## * 164 | ## Limit to the anisotropy ratio 165 | ## 166 | maxAnisotropy*: cuint ## * 167 | ## Mipmap filter mode 168 | ## 169 | mipmapFilterMode*: cudaTextureFilterMode ## * 170 | ## Offset applied to the supplied mipmap level 171 | ## 172 | mipmapLevelBias*: cfloat ## * 173 | ## Lower end of the mipmap level range to clamp access to 174 | ## 175 | minMipmapLevelClamp*: cfloat ## * 176 | ## Upper end of the mipmap level range to clamp access to 177 | ## 178 | maxMipmapLevelClamp*: cfloat 179 | 180 | ## * 181 | ## An opaque value that represents a CUDA texture object 182 | ## 183 | type 184 | cudaTextureObject_t* = culonglong 185 | ## * @} 186 | ## * @} 187 | ## END CUDART_TYPES -------------------------------------------------------------------------------- /utils/postprocessor.nim: -------------------------------------------------------------------------------- 1 | 2 | ##[ 3 | This helper executable postprocesses the nim files after they get 4 | spit out by c2nim. 5 | ]## 6 | 7 | import 8 | std / [pegs, cmdline, paths, files, strformat, strutils, sugar, sets, options] 9 | 10 | 11 | 12 | func mangleDefines(code: sink string): string = 13 | ## C2nim struggles with mangling code that looks like this: 14 | ## `defined(__MY_CONST__)`. This proc mangles it to normal Nim style. 15 | let pegAst = peg"""definedExpr <- 'defined(' \s* middle \s* endOfDefined 16 | 17 | middle <- leading / trailing 18 | 19 | leading <- '_'+ identifer '_'* 20 | trailing <- '_'* identifer '_'+ 21 | 22 | identifer <- { ( !(endOfIdentifier / endOfDefined) .)+ } 23 | 24 | endOfIdentifier <- '_'+ !(\a / \d) 25 | endOfDefined <- ')' 26 | """ 27 | 28 | result = code.replacef(pegAst, 29 | "defined($1)") 30 | # result = code.replacef(peg"'defined(__' { (!('__' / [)]) .)+ } '__'? [)]", 31 | # "defined($1)") 32 | 33 | func handleForwardDecls(code: sink string): string = 34 | ## C2nim handles forward declarations by outputing the following: 35 | ## `discard "forward decl of {typeDesc}"` 36 | ## This proc replaces it with `type {typeDesc} = object`. 37 | result = code.replacef(peg""" 'discard "forward decl of ' {\ident} ["] """, 38 | "type $1 {.nodecl.} = object") 39 | 40 | 41 | func removeUnusedVariableSilencing(code: sink string): string = 42 | let matcher = peg" {\n ' '*} 'cast[nil](' \s* {\ident} \s* ')' {@\n} " 43 | 44 | result = code.replacef(matcher, "$1discard $2$3") 45 | 46 | 47 | func fixTrailingUnderscoreProcName(name: string): string = 48 | let nameNoTrailing = name.strip(chars={'_'}, leading=false, trailing=true) 49 | result = fmt"{nameNoTrailing}UnderScore" 50 | 51 | 52 | proc fixProcsDecls(code: sink string): string = 53 | ## This proc makes some procs discardable that should be. 54 | ## Currently this is any proc returning an error code. 55 | ## It also fixes trailing underscores in the name. 56 | 57 | let procDecls = peg"""procDecls <- (@procDecl)* 58 | procDecl <- 'proc ' procName '*(' \s* argDecls ')' (':' \s+ returnType)? (\s+ pragmas)? (\s+ '=')? @\n 59 | procName <- \ident 60 | notLastProcArg <- procArgName ': ' procArgType ';' 61 | lastProcArg <- procArgName ': ' procArgType !';' 62 | argDecls <- (notLastProcArg \s*)* lastProcArg 63 | returnType <- \ident 64 | pragmas <- '{.' \s* (notLastPragma \s+)* lastPragma \s* '.}' 65 | notLastPragma <- singlePragma ',' 66 | lastPragma <- singlePragma !',' 67 | 68 | 69 | singlePragma <- pragmaName (':' \s+ ["]? \ident ["]?)? 70 | pragmaName <- \ident 71 | 72 | procArgName <- \ident 73 | procArgType <- 'ptr '? \ident 74 | """ 75 | 76 | type ProcToReplace = object 77 | startF, lengthF: int # template confusion later on requires the 'F'. 78 | oldName, newName: Option[string] 79 | makeDiscardable: bool 80 | 81 | var needsChanged: seq[ProcToReplace] = @[] 82 | 83 | const DiscardableReturnTypes = ["cudaError", "cudaOccError", "cublasStatus_t", 84 | "cufftResult", "cusparseStatus_t", 85 | "cusolverStatus_t", "curandStatus"] 86 | 87 | type ProcDeclarationParsingContext = object 88 | currentProc: string 89 | shouldBeDiscardable: bool 90 | 91 | func reset(self: var ProcDeclarationParsingContext) = 92 | self.currentProc = "" 93 | self.shouldBeDiscardable = false 94 | 95 | var context = ProcDeclarationParsingContext() 96 | 97 | 98 | let parseProcDecls = procDecls.eventParser: 99 | pkNonTerminal: 100 | leave: 101 | template thisMatch(): string = 102 | code[start .. start + length - 1] 103 | 104 | if length > 0: 105 | # Succesful match on a nonterminal (named) peg. 106 | case p.nt.name 107 | of "procName": 108 | context.currentProc = thisMatch() 109 | 110 | of "returnType": 111 | let returnType = thisMatch() 112 | for discardableType in DiscardableReturnTypes: 113 | if returnType.cmpIgnoreStyle(discardableType) == 0: 114 | context.shouldBeDiscardable = true 115 | 116 | 117 | of "pragmaName": 118 | if thisMatch().cmpIgnoreStyle("discardable") == 0: 119 | # Proc is already discardable. 120 | context.shouldBeDiscardable = false 121 | 122 | of "procDecl": 123 | # Success parsing a proc declaration. 124 | var found = ProcToReplace(startF: start, lengthF: length, 125 | makeDiscardable: context.shouldBeDiscardable) 126 | 127 | if context.currentProc.endsWith('_'): 128 | found.oldName = some context.currentProc 129 | found.newName = some context.currentProc.fixTrailingUnderscoreProcName 130 | 131 | if found.makeDiscardable or found.newName.isSome: 132 | needsChanged.add found 133 | 134 | reset context 135 | 136 | else: discard 137 | 138 | else: 139 | case p.nt.name 140 | of "procDecl": 141 | # Failure parsing; not a proc declaration. 142 | reset context 143 | else: discard 144 | 145 | assert parseProcDecls(code) != -1 146 | 147 | func makeDiscardable(decl: string): string = 148 | const 149 | PragmaStart = "{." 150 | NotFound = -1 151 | let 152 | foundPragmaStart = decl.rfind("{.") 153 | alreadyHasPragmas = foundPragmaStart != NotFound 154 | if alreadyHasPragmas: 155 | let 156 | firstPart = decl[0 .. foundPragmaStart + PragmaStart.high] 157 | lastPart = decl[foundPragmaStart + PragmaStart.high + 1 .. ^1] 158 | result = fmt"{firstPart}discardable, {lastPart}" 159 | else: 160 | assert decl[^1] == '=' 161 | result = fmt"{decl[0..^2]} {{.discardable.}} =" 162 | 163 | let replacePairs = collect: 164 | for procedure in needsChanged: 165 | 166 | template thisMatch(): string = 167 | code[procedure.startF .. procedure.startF + procedure.lengthF - 1] 168 | 169 | let original = thisMatch() 170 | var modified = original 171 | 172 | if procedure.makeDiscardable: 173 | modified = modified.makeDiscardable 174 | if procedure.newName.isSome: 175 | modified = modified.replace(fmt"proc {procedure.oldName.get}", 176 | fmt"proc {procedure.newName.get}") 177 | (original, modified) 178 | 179 | result = code.multiReplace(replacePairs) 180 | 181 | 182 | 183 | 184 | func doSimpleSwaps(code: sink string): string = 185 | ## Corrects some types that c2nim doesn't get right. 186 | let 187 | renameCuchar = ("cuchar", "char") 188 | fixCastToInt = ("(int)", "(cint)") 189 | renameMemCopy = ("memcpy", "copyMem") 190 | result = code.multiReplace(renameCuchar, fixCastToInt, renameMemCopy) 191 | 192 | 193 | func escapeKeyWords(code: sink string): string = 194 | ## C2nim outputs variables named Nim keywords as-is. 195 | ## This proc escapes them. 196 | # TODO: add more keywords 197 | let someKeyWord = peg" { \s+ / '(' } {'result'} !\w" 198 | result = code.replacef(someKeyWord, "$1$2NotKeyWord") 199 | 200 | 201 | 202 | 203 | proc postprocess*(code: sink string): string = 204 | result = code.mangleDefines.doSimpleSwaps.handleForwardDecls. 205 | fixProcsDecls.escapeKeyWords. 206 | removeUnusedVariableSilencing() 207 | 208 | 209 | 210 | 211 | 212 | 213 | proc main = 214 | for arg in commandLineParams(): 215 | assert (Path arg).fileExists, fmt "Bad argument! '{arg}' doesn't exist." 216 | 217 | let 218 | input = readFile(arg) 219 | postprocessed = input.postprocess() 220 | 221 | writeFile arg, postprocessed 222 | echo fmt"Postprocessed '{arg}'" 223 | 224 | 225 | 226 | when isMainModule: 227 | main() 228 | 229 | -------------------------------------------------------------------------------- /utils/preprocessor.nim: -------------------------------------------------------------------------------- 1 | 2 | ##[ 3 | This helper executable preprocesses the c header files before they get 4 | passed to c2nim. 5 | ]## 6 | 7 | import 8 | std / [pegs, cmdline, paths, files, strformat, strutils] 9 | 10 | func renameUint64(code: sink string): string = 11 | ## C2nim has trouble with the `unsigned long long int` type. 12 | ## This func replaces it with something that it can handle. 13 | result = code.replace(peg"'unsigned long long' ' int'?", "culonglong") 14 | 15 | func renameInt64(code: sink string): string = 16 | ## C2nim has trouble with the `unsigned long long int` type. 17 | ## This func replaces it with something that it can handle. 18 | result = code.replace("int64_t", "clonglong") 19 | 20 | func renameCuchar(code: sink string): string = 21 | ## `cuchar` is depreciated. 22 | ## This func replaces it. 23 | result = code.replace("cuchar", "uint8") 24 | 25 | func rearrangeConstPtrTypeDefs(code: string): string = 26 | ## C2nim has trouble with the a certain arrangement of `const*` typedefs. 27 | ## This func replaces it with something that it can handle. 28 | result = code.replacef(peg"'typedef struct ' {\ident} ' const* ' {\ident}[;]", 29 | "typedef const struct $1* $2;") 30 | 31 | 32 | 33 | 34 | 35 | func preprocess*(code: sink string): string = 36 | ## Does some nice formatting to Cuda library code before it gets passed to 37 | ## c2Nim. 38 | result = code.rearrangeConstPtrTypeDefs.renameUint64.renameInt64. 39 | renameCuchar() 40 | 41 | 42 | 43 | 44 | proc main = 45 | for arg in commandLineParams(): 46 | assert (Path arg).fileExists, fmt "Bad argument! '{arg}' doesn't exist." 47 | 48 | let 49 | input = readFile(arg) 50 | preprocessed = input.preprocess() 51 | 52 | writeFile arg, preprocessed 53 | echo fmt"Preprocessed '{arg}'" 54 | 55 | 56 | 57 | when isMainModule: 58 | main() 59 | --------------------------------------------------------------------------------