├── .gitignore ├── LICENSE ├── README.md ├── examples ├── compile.nim ├── info.nim ├── kmeans.cl ├── kmeans.nim ├── point.h ├── vadd.cl └── vadd.nim ├── nimcl.nim ├── nimcl.nimble └── points.json /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache 2 | *.ndb 3 | *.aoco 4 | *.aocx 5 | examples/compile 6 | examples/info 7 | examples/point.nim 8 | examples/kmeans 9 | examples/vadd -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nim OpenCL utilities 2 | 3 | This is an attempt at a high level wrapper over 4 | [OpenCL](https://github.com/nim-lang/opencl/). 5 | 6 | For now, things are added when needed and as such, they may not be perfectly 7 | coherent. Still, they should be enough to cover the simplest cases and get 8 | started. 9 | 10 | Some API changes can also be expected as the library becomes more 11 | comprehensive. 12 | 13 | ## Vector add example 14 | 15 | The "hello, world!" of OpenCL: 16 | 17 | ```nim 18 | const 19 | body = staticRead("vadd.cl") 20 | size = 1_000_000 21 | var 22 | a = newSeq[float32](size) 23 | b = newSeq[float32](size) 24 | c = newSeq[float32](size) 25 | 26 | for i in 0 .. a.high: 27 | a[i] = i.float32 28 | b[i] = (i * i).float32 29 | 30 | let 31 | (device, context, queue) = singleDeviceDefaults() 32 | program = context.createAndBuild(body, device) 33 | add = program.createKernel("add_vector") 34 | gpuA = context.bufferLike(a) 35 | gpuB = context.bufferLike(b) 36 | gpuC = context.bufferLike(c) 37 | 38 | add.args(gpuA, gpuB, gpuC, size.int32) 39 | 40 | queue.write(a, gpuA) 41 | queue.write(b, gpuB) 42 | queue.run(add, size) 43 | queue.read(c, gpuC) 44 | 45 | echo c[1 .. 100] 46 | 47 | # Clean up 48 | release(queue) 49 | release(add) 50 | release(program) 51 | release(gpuA) 52 | release(gpuB) 53 | release(gpuC) 54 | release(context) 55 | ``` 56 | 57 | The kernel is just 58 | 59 | ```opencl 60 | __kernel void add_vector(__global float* a, __global float* b, __global float* c, int num_els) { 61 | int idx = get_global_id(0); 62 | if (idx < num_els) { 63 | c[idx] = a[idx] + b[idx]; 64 | } 65 | } 66 | ``` -------------------------------------------------------------------------------- /examples/compile.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2016-2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os, nimcl 16 | 17 | proc main() = 18 | let n = paramCount() 19 | if n != 1: 20 | echo "Please, use exactly one argument" 21 | return 22 | let 23 | fileName = paramStr(1) 24 | body = readFile(fileName) 25 | platform = getPlatformByName("NVIDIA CUDA") 26 | devices = platform.getDevices 27 | context = devices.createContext 28 | program = context.createProgram(body) 29 | try: 30 | program.buildOn(devices) 31 | echo "Program compiled" 32 | except: 33 | echo "Build failure" 34 | echo program.buildErrors(devices) 35 | 36 | when isMainModule: 37 | main() -------------------------------------------------------------------------------- /examples/info.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2016-2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import nimcl 16 | 17 | proc main() = 18 | let platform = firstPlatform() 19 | echo "Using Open CL version:" 20 | echo " ", platform.version 21 | echo "Found platform:" 22 | echo " ", platform.name 23 | let devices = platform.getDevices 24 | echo "Found ", devices.len, " devices:" 25 | for device in devices: 26 | echo " ", device.name 27 | echo " Max work groups for device: " & $(device.maxWorkGroups) 28 | echo " Max work items per group: " & $(device.maxWorkItems) 29 | echo " Global memory: " & $(device.globalMemory) & " bytes" 30 | echo " Local memory: " & $(device.localMemory) & " bytes" 31 | 32 | when isMainModule: 33 | main() -------------------------------------------------------------------------------- /examples/kmeans.cl: -------------------------------------------------------------------------------- 1 | /** Copyright 2016-2017 UniCredit S.p.A. 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | #include "examples/point.h" 17 | 18 | float dist(__global Point* p, __global Centroid* c) 19 | { 20 | float dx = p->x - c->x; 21 | float dy = p->y - c->y; 22 | return dx*dx + dy*dy; 23 | } 24 | 25 | __kernel void group_by_cluster(__global Point* points, __global Centroid* centroids, int num_points, int num_centroids) { 26 | int idx = get_global_id(0); 27 | int i = 0; 28 | float min_distance = -1.0; 29 | 30 | if (idx < num_points) { 31 | for (i = 0; i < num_centroids; i++) { 32 | float d = dist(points + idx, centroids + i); 33 | 34 | if (min_distance > d || min_distance == -1.0) { 35 | min_distance = d; 36 | points[idx].cluster = i; 37 | } 38 | } 39 | } 40 | } 41 | 42 | __kernel void sum_points(__global Point* points, __global Accum* accum, __local Accum* scratch, int num_points, int num_centroids) { 43 | int lid = get_local_id(0); 44 | int gid = get_global_id(0); 45 | int wid = get_group_id(0); 46 | int pos = lid * num_centroids; 47 | int s; 48 | int j; 49 | 50 | for (s = pos; s < pos + num_centroids; s++) { 51 | scratch[s].x_sum = 0.0; 52 | scratch[s].y_sum = 0.0; 53 | scratch[s].num_points = 0; 54 | } 55 | 56 | if (gid < num_points) { 57 | int cluster = points[gid].cluster; 58 | scratch[pos + cluster].x_sum = points[gid].x; 59 | scratch[pos + cluster].y_sum = points[gid].y; 60 | scratch[pos + cluster].num_points = 1; 61 | } 62 | barrier(CLK_LOCAL_MEM_FENCE); 63 | 64 | for(s = get_local_size(0) / 2; s > 0; s = s / 2) { 65 | if (lid < s) { 66 | for (j = 0; j < num_centroids; j++) { 67 | int dst = pos + j; 68 | int src = pos + j + s * num_centroids; 69 | scratch[dst].x_sum += scratch[src].x_sum; 70 | scratch[dst].y_sum += scratch[src].y_sum; 71 | scratch[dst].num_points += scratch[src].num_points; 72 | } 73 | } 74 | barrier(CLK_LOCAL_MEM_FENCE); 75 | } 76 | 77 | if (lid == 0) { 78 | for (j = 0; j < num_centroids; j++) { 79 | int h = wid * num_centroids + j; 80 | accum[h].x_sum = scratch[pos + j].x_sum; 81 | accum[h].y_sum = scratch[pos + j].y_sum; 82 | accum[h].num_points = scratch[pos + j].num_points; 83 | } 84 | } 85 | } 86 | 87 | __kernel void update_centroids(__global Accum* accum, __global Centroid* centroids, int work_groups, int num_centroids) { 88 | int gid = get_global_id(0); 89 | float x_sum = 0.0; 90 | float y_sum = 0.0; 91 | int num_points = 0; 92 | int j; 93 | 94 | if (gid < num_centroids) { 95 | for (j = 0; j < work_groups; j++) { 96 | int h = j * num_centroids + gid; 97 | x_sum += accum[h].x_sum; 98 | y_sum += accum[h].y_sum; 99 | num_points += accum[h].num_points; 100 | } 101 | if (num_points > 0) { 102 | centroids[gid].x = x_sum / num_points; 103 | centroids[gid].y = y_sum / num_points; 104 | } 105 | } 106 | } -------------------------------------------------------------------------------- /examples/kmeans.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2016-2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import times, json, os, math, strutils, opencl, nimcl, point 16 | 17 | proc loadPoints(filename: string): seq[Point] = 18 | result = newSeq[Point]() 19 | for p in parseFile(filename).items: 20 | result.add(Point(x: p[0].fnum, y: p[1].fnum, cluster: -1)) 21 | 22 | proc main() = 23 | const 24 | body = staticRead("kmeans.cl") 25 | n = 10 26 | iterations = 100 27 | var 28 | points = loadPoints("points.json") 29 | centroids = newSeq[Centroid](n) 30 | let 31 | (device, context, queue) = singleDeviceDefaults() 32 | program = context.createProgram(body) 33 | workGroups = device.maxWorkGroups 34 | workItems = (points.len div workGroups).nextPowerOfTwo 35 | 36 | program.buildOn(device) 37 | 38 | let 39 | groupByCluster = program.createKernel("group_by_cluster") 40 | sumPoints = program.createKernel("sum_points") 41 | updateCentroids = program.createKernel("update_centroids") 42 | start = cpuTime() 43 | gpuPoints = context.bufferLike(points) 44 | gpuCentroids = context.bufferLike(centroids) 45 | gpuAccum = buffer[Accum](context, centroids.len * workGroups) 46 | 47 | groupByCluster.args(gpuPoints, gpuCentroids, points.len.int32, centroids.len.int32) 48 | sumPoints.args(gpuPoints, gpuAccum, LocalBuffer[Accum](centroids.len * workItems), points.len.int32, centroids.len.int32) 49 | updateCentroids.args(gpuAccum, gpuCentroids, workGroups.int32, centroids.len.int32) 50 | 51 | for _ in 1 .. iterations: 52 | for i in 0 ..< centroids.len: 53 | centroids[i].x = points[i].x 54 | centroids[i].y = points[i].y 55 | 56 | queue.write(points, gpuPoints) 57 | queue.write(centroids, gpuCentroids) 58 | 59 | for _ in 1 .. 15: 60 | queue.run(groupByCluster, points.len) 61 | queue.run(sumPoints, workItems * workGroups, workItems) 62 | queue.run(updateCentroids, centroids.len) 63 | 64 | queue.read(centroids, gpuCentroids) 65 | 66 | let time = (((cpuTime() - start) * 1000) / float(iterations)).round 67 | echo format("Made $1 iterations with an average of $2 milliseconds", 68 | iterations, time) 69 | 70 | for a in centroids: 71 | echo a 72 | 73 | # Clean up 74 | release(queue) 75 | release(groupByCluster) 76 | release(sumPoints) 77 | release(updateCentroids) 78 | release(program) 79 | release(gpuPoints) 80 | release(gpuCentroids) 81 | release(context) 82 | 83 | when isMainModule: 84 | main() -------------------------------------------------------------------------------- /examples/point.h: -------------------------------------------------------------------------------- 1 | /** Copyright 2016-2017 UniCredit S.p.A. 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | typedef struct { 17 | float x; 18 | float y; 19 | int cluster; 20 | } Point; 21 | 22 | typedef struct { 23 | float x; 24 | float y; 25 | } Centroid; 26 | 27 | typedef struct { 28 | float x_sum; 29 | float y_sum; 30 | int num_points; 31 | } Accum; -------------------------------------------------------------------------------- /examples/vadd.cl: -------------------------------------------------------------------------------- 1 | /** Copyright 2016-2017 UniCredit S.p.A. 2 | * 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | __kernel void add_vector(__global float* a, __global float* b, 17 | __global float* c, int num_els) { 18 | int idx = get_global_id(0); 19 | if (idx < num_els) { 20 | c[idx] = a[idx] + b[idx]; 21 | } 22 | } -------------------------------------------------------------------------------- /examples/vadd.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2016-2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os, math, nimcl 16 | 17 | proc main() = 18 | const 19 | body = staticRead("vadd.cl") 20 | # body = staticRead("vadd.aocx") 21 | size = 1_000_000 22 | var 23 | a = newSeq[float32](size) 24 | b = newSeq[float32](size) 25 | c = newSeq[float32](size) 26 | 27 | for i in 0 .. a.high: 28 | a[i] = i.float32 29 | b[i] = (i * i).float32 30 | 31 | let 32 | (device, context, queue) = singleDeviceDefaults() 33 | program = context.createAndBuild(body, device) 34 | # platform = getPlatformByName("Intel(R) FPGA") 35 | # device = platform.getDevices[0] 36 | # context = @[device].createContext 37 | # queue = context.commandQueueFor(device) 38 | # program = context.createAndBuildBinary(body, device) 39 | add = program.createKernel("add_vector") 40 | gpuA = context.bufferLike(a) 41 | gpuB = context.bufferLike(b) 42 | gpuC = context.bufferLike(c) 43 | 44 | add.args(gpuA, gpuB, gpuC, size.int32) 45 | 46 | queue.write(a, gpuA) 47 | queue.write(b, gpuB) 48 | queue.run(add, size) 49 | queue.read(c, gpuC) 50 | 51 | echo c[1 .. 100] 52 | 53 | # Clean up 54 | release(queue) 55 | release(add) 56 | release(program) 57 | release(gpuA) 58 | release(gpuB) 59 | release(gpuC) 60 | release(context) 61 | 62 | when isMainModule: 63 | main() -------------------------------------------------------------------------------- /nimcl.nim: -------------------------------------------------------------------------------- 1 | # Copyright 2016-2017 UniCredit S.p.A. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import opencl, macros 16 | 17 | type 18 | PlatformNotFound = object of Exception 19 | DeviceNotFound = object of Exception 20 | 21 | proc newPlatformNotFound(): ref PlatformNotFound = 22 | new result 23 | result.msg = "PlatformNotFound" 24 | 25 | proc newDeviceNotFound(): ref DeviceNotFound = 26 | new result 27 | result.msg = "DeviceNotFound" 28 | 29 | proc name*(id: PPlatformId): string = 30 | var size = 0 31 | check getPlatformInfo(id, PLATFORM_NAME, 0, nil, addr size) 32 | result = newString(size) 33 | check getPlatformInfo(id, PLATFORM_NAME, size, addr result[0], nil) 34 | 35 | proc name*(id: PDeviceId): string = 36 | var size = 0 37 | check getDeviceInfo(id, DEVICE_NAME, 0, nil, addr size) 38 | result = newString(size) 39 | check getDeviceInfo(id, DEVICE_NAME, size, addr result[0], nil) 40 | 41 | proc maxWorkGroups*(id: PDeviceId): int = 42 | check getDeviceInfo(id, DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), addr result, nil) 43 | 44 | proc localMemory*(id: PDeviceId): uint64 = 45 | check getDeviceInfo(id, DEVICE_LOCAL_MEM_SIZE, sizeof(int), addr result, nil) 46 | 47 | proc globalMemory*(id: PDeviceId): uint64 = 48 | check getDeviceInfo(id, DEVICE_GLOBAL_MEM_SIZE, sizeof(int), addr result, nil) 49 | 50 | proc maxWorkItems*(id: PDeviceId): seq[int] = 51 | var dims: int 52 | check getDeviceInfo(id, DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(int), addr dims, nil) 53 | result = newSeq[int](dims) 54 | check getDeviceInfo(id, DEVICE_MAX_WORK_ITEM_SIZES, dims * sizeof(int), addr result[0], nil) 55 | 56 | proc version*(id: PPlatformId): string = 57 | var size = 0 58 | check getPlatformInfo(id, PLATFORM_VERSION, 0, nil, addr size) 59 | result = newString(size) 60 | check getPlatformInfo(id, PLATFORM_VERSION, size, addr result[0], nil) 61 | 62 | proc getPlatformByName*(platformName: string): PPlatformId = 63 | var numPlatforms: uint32 64 | check getPlatformIDs(0, nil, addr numPlatforms) 65 | var platforms = newSeq[PPlatformId](numPlatforms) 66 | check getPlatformIDs(numPlatforms, addr platforms[0], nil) 67 | 68 | for platform in platforms: 69 | if platform.name.substr(0, platformName.high) == platformName: 70 | return platform 71 | 72 | raise newPlatformNotFound() 73 | 74 | proc firstPlatform*(): PPlatformId = 75 | var numPlatforms: uint32 76 | check getPlatformIDs(0, nil, addr numPlatforms) 77 | if numPlatforms == 0: 78 | raise newPlatformNotFound() 79 | var platforms = newSeq[PPlatformId](numPlatforms) 80 | check getPlatformIDs(numPlatforms, addr platforms[0], nil) 81 | return platforms[0] 82 | 83 | proc getDevices*(platform: PPlatformId): seq[PDeviceId] = 84 | var numDevices: uint32 85 | check getDeviceIDs(platform, DEVICE_TYPE_ALL, 0, nil, addr numDevices) 86 | if numDevices == 0: 87 | raise newDeviceNotFound() 88 | 89 | var devices = newSeq[PDeviceId](numDevices) 90 | check getDeviceIDs(platform, DEVICE_TYPE_ALL, numDevices, addr devices[0], nil) 91 | devices 92 | 93 | proc createContext*(devices: seq[PDeviceId]): PContext = 94 | var status: TClResult 95 | var devs = devices 96 | result = createContext(nil, devs.len.uint32, cast[ptr PDeviceId](addr devs[0]), nil, nil, addr status) 97 | check status 98 | 99 | proc commandQueueFor*(context: PContext, device: PDeviceId): PCommandQueue = 100 | var status: TClResult 101 | result = createCommandQueue(context, device, 0, addr status) 102 | check status 103 | 104 | proc openclDefaults*(): tuple[devices: seq[PDeviceId], context: PContext] = 105 | let 106 | platform = firstPlatform() 107 | devices = platform.getDevices 108 | context = devices.createContext 109 | return (devices, context) 110 | 111 | proc singleDeviceDefaults*(): tuple[device: PDeviceId, context: PContext, queue: PCommandQueue] = 112 | let 113 | platform = firstPlatform() 114 | device = platform.getDevices[0] 115 | context = @[device].createContext 116 | queue = context.commandQueueFor(device) 117 | return (device, context, queue) 118 | 119 | proc createProgram*(context: PContext, body: string): PProgram = 120 | var status: TClResult 121 | var lines = [cstring(body)] 122 | result = createProgramWithSource(context, 1, cast[cstringArray](addr lines), nil, addr status) 123 | check status 124 | 125 | proc createProgramBinary*(context: PContext, device: PDeviceId, body: string): PProgram = 126 | var status: TClResult 127 | var binaryStatus: int32 128 | var dev = device 129 | var lines = [cstring(body)] 130 | var L = body.len 131 | result = createProgramWithBinary(context, 1, addr dev, addr L, cast[ptr ptr cuchar](addr lines), addr binaryStatus, addr status) 132 | check status 133 | 134 | proc buildOn*(program: PProgram, devices: seq[PDeviceId]) = 135 | var devs = devices 136 | check buildProgram(program, devs.len.uint32, cast[ptr PDeviceId](addr devs[0]), nil, nil, nil) 137 | 138 | proc buildOn*(program: PProgram, device: PDeviceId) = program.buildOn(@[device]) 139 | 140 | proc createAndBuild*(context: PContext, body: string, devices: seq[PDeviceId]): PProgram = 141 | result = createProgram(context, body) 142 | result.buildOn(devices) 143 | 144 | proc createAndBuild*(context: PContext, body: string, device: PDeviceId): PProgram = 145 | result = createProgram(context, body) 146 | result.buildOn(device) 147 | 148 | proc createAndBuildBinary*(context: PContext, body: string, device: PDeviceId): PProgram = 149 | result = createProgramBinary(context, device, body) 150 | result.buildOn(device) 151 | 152 | proc buffer*[A](context: PContext, size: int, flags: Tmem_flags = MEM_READ_WRITE): PMem = 153 | var status: TClResult 154 | result = createBuffer(context, flags, size * sizeof(A), nil, addr status) 155 | check status 156 | 157 | proc bufferLike*[A](context: PContext, xs: seq[A], flags: Tmem_flags = MEM_READ_WRITE): PMem = 158 | buffer[A](context, xs.len, flags) 159 | 160 | proc buildErrors*(program: PProgram, devices: seq[PDeviceId]): string = 161 | var logSize: int 162 | check getProgramBuildInfo(program, devices[0], PROGRAM_BUILD_LOG, 0, nil, addr logSize) 163 | result = newString(logSize + 1) 164 | check getProgramBuildInfo(program, devices[0], PROGRAM_BUILD_LOG, logSize, addr result[0], nil) 165 | 166 | proc createKernel*(program: PProgram, name: string): PKernel = 167 | var status: TClResult 168 | result = createKernel(program, name, addr status) 169 | check status 170 | 171 | type 172 | LocalBuffer*[A] = distinct int 173 | anyInt = int or int32 or int64 174 | 175 | template setArg(kernel: PKernel, item: PMem, index: int) = 176 | var x = item 177 | check setKernelArg(kernel, index.uint32, sizeof(Pmem), addr x) 178 | 179 | template setArg[A](kernel: PKernel, item: var A, index: int) = 180 | check setKernelArg(kernel, index.uint32, sizeof(A), addr item) 181 | 182 | template setArg[A](kernel: PKernel, item: LocalBuffer[A], index: int) = 183 | check setKernelArg(kernel, index.uint32, int(item) * sizeof(A), nil) 184 | 185 | template setArg(kernel: PKernel, item: anyInt, index: int) = 186 | var x = item 187 | check setKernelArg(kernel, index.uint32, sizeof(type(item)), addr x) 188 | 189 | macro args*(kernel: Pkernel, args: varargs[untyped]): untyped = 190 | result = newStmtList() 191 | 192 | var i = 0 # no pairs for macro for loop 193 | for arg in items(args): 194 | let s = quote do: 195 | `kernel`.setArg(`arg`, `i`) 196 | result.add(s) 197 | inc i 198 | 199 | proc run*(queue: PCommandQueue, kernel: PKernel, totalWork: int) = 200 | var globalWorkSize = [totalWork, 0, 0] 201 | check enqueueNDRangeKernel(queue, kernel, 1, nil, cast[ptr int](addr globalWorkSize), nil, 0, nil, nil) 202 | 203 | proc run*(queue: PCommandQueue, kernel: PKernel, totalWork, localWork: int) = 204 | var 205 | globalWorkSize = [totalWork, 0, 0] 206 | localWorkSize = [localWork, 0, 0] 207 | check enqueueNDRangeKernel(queue, kernel, 1, nil, cast[ptr int](addr globalWorkSize), cast[ptr int](addr localWorkSize), 0, nil, nil) 208 | 209 | proc run2d*(queue: PCommandQueue, kernel: PKernel, totalWork: (int, int)) = 210 | let (a, b) = totalWork 211 | var globalWorkSize = [a, b, 0] 212 | check enqueueNDRangeKernel(queue, kernel, 2, nil, cast[ptr int](addr globalWorkSize), nil, 0, nil, nil) 213 | 214 | proc run2d*(queue: PCommandQueue, kernel: PKernel, totalWork, localWork: (int, int)) = 215 | let 216 | (a, b) = totalWork 217 | (c, d) = localWork 218 | var 219 | globalWorkSize = [a, b, 0] 220 | localWorkSize = [c, d, 0] 221 | check enqueueNDRangeKernel(queue, kernel, 2, nil, cast[ptr int](addr globalWorkSize), cast[ptr int](addr localWorkSize), 0, nil, nil) 222 | 223 | proc run3d*(queue: PCommandQueue, kernel: PKernel, totalWork: (int, int, int)) = 224 | let (a, b, c) = totalWork 225 | var globalWorkSize = [a, b, c] 226 | check enqueueNDRangeKernel(queue, kernel, 3, nil, cast[ptr int](addr globalWorkSize), nil, 0, nil, nil) 227 | 228 | proc run3d*(queue: PCommandQueue, kernel: PKernel, totalWork, localWork: (int, int, int)) = 229 | let 230 | (a, b, c) = totalWork 231 | (d, e, f) = localWork 232 | var 233 | globalWorkSize = [a, b, c] 234 | localWorkSize = [d, e, f] 235 | check enqueueNDRangeKernel(queue, kernel, 3, nil, cast[ptr int](addr globalWorkSize), cast[ptr int](addr localWorkSize), 0, nil, nil) 236 | 237 | proc write*(queue: PCommandQueue, src: pointer, dest: PMem, size: int) = 238 | check enqueueWriteBuffer(queue, dest, CL_FALSE, 0, size, src, 0, nil, nil) 239 | 240 | proc write*[A](queue: PCommandQueue, src: var seq[A], dest: PMem) = 241 | write(queue, addr src[0], dest, src.len * sizeof(A)) 242 | 243 | proc read*(queue: PCommandQueue, dest: pointer, src: PMem, size: int) = 244 | check enqueueReadBuffer(queue, src, CL_TRUE, 0, size, dest, 0, nil, nil) 245 | 246 | proc read*[A](queue: PCommandQueue, dest: var seq[A], src: PMem) = 247 | read(queue, addr dest[0], src, dest.len * sizeof(A)) 248 | 249 | template release*(queue: PCommandQueue) = check releaseCommandQueue(queue) 250 | template release*(kernel: PKernel) = check releaseKernel(kernel) 251 | template release*(program: PProgram) = check releaseProgram(program) 252 | template release*(buffer: PMem) = check releaseMemObject(buffer) 253 | template release*(context: PContext) = check releaseContext(context) 254 | -------------------------------------------------------------------------------- /nimcl.nimble: -------------------------------------------------------------------------------- 1 | version = "0.1.3" 2 | author = "Andrea Ferretti" 3 | description = "OpenCL utilities" 4 | license = "Apache2" 5 | skipDirs = @["examples"] 6 | skipFiles = @["points.json"] 7 | 8 | requires "nim >= 0.15.0", "opencl >= 1.0" 9 | 10 | 11 | template dependsOn*(task: untyped): typed = 12 | exec "nimble " & astToStr(task) 13 | 14 | proc addDefaults() = 15 | switch("cincludes", "/usr/local/cuda/targets/x86_64-linux/include") 16 | switch("clibdir", "/usr/local/cuda/targets/x86_64-linux/lib") 17 | --define: release 18 | --path: "." 19 | 20 | task info, "OpenCL info": 21 | addDefaults() 22 | --run 23 | setCommand "c", "examples/info.nim" 24 | 25 | task clcompile, "OpenCL compiler": 26 | addDefaults() 27 | setCommand "c", "examples/compile.nim" 28 | 29 | task vadd, "run vector add example": 30 | addDefaults() 31 | --run 32 | setCommand "c", "examples/vadd.nim" 33 | 34 | task headers, "compile headers with c2nim": 35 | exec "c2nim examples/point.h" 36 | 37 | task kmeans, "run kmeans example": 38 | dependsOn headers 39 | addDefaults() 40 | --run 41 | setCommand "c", "examples/kmeans.nim" --------------------------------------------------------------------------------