├── .gitignore
├── LICENSE
├── README.md
├── examples
    ├── compile.nim
    ├── info.nim
    ├── kmeans.cl
    ├── kmeans.nim
    ├── point.h
    ├── vadd.cl
    └── vadd.nim
├── nimcl.nim
├── nimcl.nimble
└── points.json


/.gitignore:
--------------------------------------------------------------------------------
1 | nimcache
2 | *.ndb
3 | *.aoco
4 | *.aocx
5 | examples/compile
6 | examples/info
7 | examples/point.nim
8 | examples/kmeans
9 | examples/vadd


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Nim OpenCL utilities
 2 | 
 3 | This is an attempt at a high level wrapper over
 4 | [OpenCL](https://github.com/nim-lang/opencl/).
 5 | 
 6 | For now, things are added when needed and as such, they may not be perfectly
 7 | coherent. Still, they should be enough to cover the simplest cases and get
 8 | started.
 9 | 
10 | Some API changes can also be expected as the library becomes more
11 | comprehensive.
12 | 
13 | ## Vector add example
14 | 
15 | The "hello, world!" of OpenCL:
16 | 
17 | ```nim
18 | const
19 |   body = staticRead("vadd.cl")
20 |   size = 1_000_000
21 | var
22 |   a = newSeq[float32](size)
23 |   b = newSeq[float32](size)
24 |   c = newSeq[float32](size)
25 | 
26 | for i in 0 .. a.high:
27 |   a[i] = i.float32
28 |   b[i] = (i * i).float32
29 | 
30 | let
31 |   (device, context, queue) = singleDeviceDefaults()
32 |   program = context.createAndBuild(body, device)
33 |   add = program.createKernel("add_vector")
34 |   gpuA = context.bufferLike(a)
35 |   gpuB = context.bufferLike(b)
36 |   gpuC = context.bufferLike(c)
37 | 
38 | add.args(gpuA, gpuB, gpuC, size.int32)
39 | 
40 | queue.write(a, gpuA)
41 | queue.write(b, gpuB)
42 | queue.run(add, size)
43 | queue.read(c, gpuC)
44 | 
45 | echo c[1 .. 100]
46 | 
47 | # Clean up
48 | release(queue)
49 | release(add)
50 | release(program)
51 | release(gpuA)
52 | release(gpuB)
53 | release(gpuC)
54 | release(context)
55 | ```
56 | 
57 | The kernel is just
58 | 
59 | ```opencl
60 | __kernel void add_vector(__global float* a, __global float* b, __global float* c, int num_els) {
61 |   int idx = get_global_id(0);
62 |   if (idx < num_els) {
63 |     c[idx] = a[idx] + b[idx];
64 |   }
65 | }
66 | ```


--------------------------------------------------------------------------------
/examples/compile.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2016-2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os, nimcl
16 | 
17 | proc main() =
18 |   let n = paramCount()
19 |   if n != 1:
20 |     echo "Please, use exactly one argument"
21 |     return
22 |   let
23 |     fileName = paramStr(1)
24 |     body = readFile(fileName)
25 |     platform = getPlatformByName("NVIDIA CUDA")
26 |     devices = platform.getDevices
27 |     context = devices.createContext
28 |     program = context.createProgram(body)
29 |   try:
30 |     program.buildOn(devices)
31 |     echo "Program compiled"
32 |   except:
33 |     echo "Build failure"
34 |     echo program.buildErrors(devices)
35 | 
36 | when isMainModule:
37 |   main()


--------------------------------------------------------------------------------
/examples/info.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2016-2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import nimcl
16 | 
17 | proc main() =
18 |   let platform = firstPlatform()
19 |   echo "Using Open CL version:"
20 |   echo "  ", platform.version
21 |   echo "Found platform:"
22 |   echo "  ", platform.name
23 |   let devices = platform.getDevices
24 |   echo "Found ", devices.len, " devices:"
25 |   for device in devices:
26 |     echo "  ", device.name
27 |     echo "  Max work groups for device: " & $(device.maxWorkGroups)
28 |     echo "  Max work items per group: " & $(device.maxWorkItems)
29 |     echo "  Global memory: " & $(device.globalMemory) & " bytes"
30 |     echo "  Local memory: " & $(device.localMemory) & " bytes"
31 | 
32 | when isMainModule:
33 |   main()


--------------------------------------------------------------------------------
/examples/kmeans.cl:
--------------------------------------------------------------------------------
  1 | /** Copyright 2016-2017 UniCredit S.p.A.
  2 |  *
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | #include "examples/point.h"
 17 | 
 18 | float dist(__global Point* p, __global Centroid* c)
 19 | {
 20 |     float dx = p->x - c->x;
 21 |     float dy = p->y - c->y;
 22 |     return dx*dx + dy*dy;
 23 | }
 24 | 
 25 | __kernel void group_by_cluster(__global Point* points, __global Centroid* centroids, int num_points, int num_centroids) {
 26 |   int idx = get_global_id(0);
 27 |   int i = 0;
 28 |   float min_distance = -1.0;
 29 | 
 30 |   if (idx < num_points) {
 31 |     for (i = 0; i < num_centroids; i++) {
 32 |       float d = dist(points + idx,  centroids + i);
 33 | 
 34 |       if (min_distance > d || min_distance == -1.0) {
 35 |         min_distance = d;
 36 |         points[idx].cluster = i;
 37 |       }
 38 |     }
 39 |   }
 40 | }
 41 | 
 42 | __kernel void sum_points(__global Point* points, __global Accum* accum, __local Accum* scratch, int num_points, int num_centroids) {
 43 |   int lid = get_local_id(0);
 44 |   int gid = get_global_id(0);
 45 |   int wid = get_group_id(0);
 46 |   int pos = lid * num_centroids;
 47 |   int s;
 48 |   int j;
 49 | 
 50 |   for (s = pos; s < pos + num_centroids; s++) {
 51 |     scratch[s].x_sum = 0.0;
 52 |     scratch[s].y_sum = 0.0;
 53 |     scratch[s].num_points = 0;
 54 |   }
 55 | 
 56 |   if (gid < num_points) {
 57 |     int cluster = points[gid].cluster;
 58 |     scratch[pos + cluster].x_sum = points[gid].x;
 59 |     scratch[pos + cluster].y_sum = points[gid].y;
 60 |     scratch[pos + cluster].num_points = 1;
 61 |   }
 62 |   barrier(CLK_LOCAL_MEM_FENCE);
 63 | 
 64 |   for(s = get_local_size(0) / 2; s > 0; s = s / 2) {
 65 |     if (lid < s) {
 66 |       for (j = 0; j < num_centroids; j++) {
 67 |         int dst = pos + j;
 68 |         int src = pos + j + s * num_centroids;
 69 |         scratch[dst].x_sum += scratch[src].x_sum;
 70 |         scratch[dst].y_sum += scratch[src].y_sum;
 71 |         scratch[dst].num_points += scratch[src].num_points;
 72 |       }
 73 |     }
 74 |     barrier(CLK_LOCAL_MEM_FENCE);
 75 |   }
 76 | 
 77 |   if (lid == 0) {
 78 |     for (j = 0; j < num_centroids; j++) {
 79 |       int h = wid * num_centroids + j;
 80 |       accum[h].x_sum = scratch[pos + j].x_sum;
 81 |       accum[h].y_sum = scratch[pos + j].y_sum;
 82 |       accum[h].num_points = scratch[pos + j].num_points;
 83 |     }
 84 |   }
 85 | }
 86 | 
 87 | __kernel void update_centroids(__global Accum* accum, __global Centroid* centroids, int work_groups, int num_centroids) {
 88 |   int gid = get_global_id(0);
 89 |   float x_sum = 0.0;
 90 |   float y_sum = 0.0;
 91 |   int num_points = 0;
 92 |   int j;
 93 | 
 94 |   if (gid < num_centroids) {
 95 |     for (j = 0; j < work_groups; j++) {
 96 |       int h = j * num_centroids + gid;
 97 |       x_sum += accum[h].x_sum;
 98 |       y_sum += accum[h].y_sum;
 99 |       num_points += accum[h].num_points;
100 |     }
101 |     if (num_points > 0) {
102 |       centroids[gid].x = x_sum / num_points;
103 |       centroids[gid].y = y_sum / num_points;
104 |     }
105 |   }
106 | }


--------------------------------------------------------------------------------
/examples/kmeans.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2016-2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import times, json, os, math, strutils, opencl, nimcl, point
16 | 
17 | proc loadPoints(filename: string): seq[Point] =
18 |   result = newSeq[Point]()
19 |   for p in parseFile(filename).items:
20 |     result.add(Point(x: p[0].fnum, y: p[1].fnum, cluster: -1))
21 | 
22 | proc main() =
23 |   const
24 |     body = staticRead("kmeans.cl")
25 |     n = 10
26 |     iterations = 100
27 |   var
28 |     points = loadPoints("points.json")
29 |     centroids = newSeq[Centroid](n)
30 |   let
31 |     (device, context, queue) = singleDeviceDefaults()
32 |     program = context.createProgram(body)
33 |     workGroups = device.maxWorkGroups
34 |     workItems = (points.len div workGroups).nextPowerOfTwo
35 | 
36 |   program.buildOn(device)
37 | 
38 |   let
39 |     groupByCluster = program.createKernel("group_by_cluster")
40 |     sumPoints = program.createKernel("sum_points")
41 |     updateCentroids = program.createKernel("update_centroids")
42 |     start = cpuTime()
43 |     gpuPoints = context.bufferLike(points)
44 |     gpuCentroids = context.bufferLike(centroids)
45 |     gpuAccum = buffer[Accum](context, centroids.len * workGroups)
46 | 
47 |   groupByCluster.args(gpuPoints, gpuCentroids, points.len.int32, centroids.len.int32)
48 |   sumPoints.args(gpuPoints, gpuAccum, LocalBuffer[Accum](centroids.len * workItems), points.len.int32, centroids.len.int32)
49 |   updateCentroids.args(gpuAccum, gpuCentroids, workGroups.int32, centroids.len.int32)
50 | 
51 |   for _ in 1 .. iterations:
52 |     for i in 0 ..< centroids.len:
53 |       centroids[i].x = points[i].x
54 |       centroids[i].y = points[i].y
55 | 
56 |     queue.write(points, gpuPoints)
57 |     queue.write(centroids, gpuCentroids)
58 | 
59 |     for _ in 1 .. 15:
60 |       queue.run(groupByCluster, points.len)
61 |       queue.run(sumPoints, workItems * workGroups, workItems)
62 |       queue.run(updateCentroids, centroids.len)
63 | 
64 |     queue.read(centroids, gpuCentroids)
65 | 
66 |   let time = (((cpuTime() - start) * 1000) / float(iterations)).round
67 |   echo format("Made $1 iterations with an average of $2 milliseconds",
68 |               iterations, time)
69 | 
70 |   for a in centroids:
71 |     echo a
72 | 
73 |   # Clean up
74 |   release(queue)
75 |   release(groupByCluster)
76 |   release(sumPoints)
77 |   release(updateCentroids)
78 |   release(program)
79 |   release(gpuPoints)
80 |   release(gpuCentroids)
81 |   release(context)
82 | 
83 | when isMainModule:
84 |   main()


--------------------------------------------------------------------------------
/examples/point.h:
--------------------------------------------------------------------------------
 1 | /** Copyright 2016-2017 UniCredit S.p.A.
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | typedef struct {
17 |   float x;
18 |   float y;
19 |   int cluster;
20 | } Point;
21 | 
22 | typedef struct {
23 |   float x;
24 |   float y;
25 | } Centroid;
26 | 
27 | typedef struct {
28 |   float x_sum;
29 |   float y_sum;
30 |   int num_points;
31 | } Accum;


--------------------------------------------------------------------------------
/examples/vadd.cl:
--------------------------------------------------------------------------------
 1 | /** Copyright 2016-2017 UniCredit S.p.A.
 2 |  *
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | __kernel void add_vector(__global float* a, __global float* b,
17 |   __global float* c, int num_els) {
18 |   int idx = get_global_id(0);
19 |   if (idx < num_els) {
20 |     c[idx] = a[idx] + b[idx];
21 |   }
22 | }


--------------------------------------------------------------------------------
/examples/vadd.nim:
--------------------------------------------------------------------------------
 1 | # Copyright 2016-2017 UniCredit S.p.A.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | # http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os, math, nimcl
16 | 
17 | proc main() =
18 |   const
19 |     body = staticRead("vadd.cl")
20 |     # body = staticRead("vadd.aocx")
21 |     size = 1_000_000
22 |   var
23 |     a = newSeq[float32](size)
24 |     b = newSeq[float32](size)
25 |     c = newSeq[float32](size)
26 | 
27 |   for i in 0 .. a.high:
28 |     a[i] = i.float32
29 |     b[i] = (i * i).float32
30 | 
31 |   let
32 |     (device, context, queue) = singleDeviceDefaults()
33 |     program = context.createAndBuild(body, device)
34 |     # platform = getPlatformByName("Intel(R) FPGA")
35 |     # device = platform.getDevices[0]
36 |     # context = @[device].createContext
37 |     # queue = context.commandQueueFor(device)
38 |     # program = context.createAndBuildBinary(body, device)
39 |     add = program.createKernel("add_vector")
40 |     gpuA = context.bufferLike(a)
41 |     gpuB = context.bufferLike(b)
42 |     gpuC = context.bufferLike(c)
43 | 
44 |   add.args(gpuA, gpuB, gpuC, size.int32)
45 | 
46 |   queue.write(a, gpuA)
47 |   queue.write(b, gpuB)
48 |   queue.run(add, size)
49 |   queue.read(c, gpuC)
50 | 
51 |   echo c[1 .. 100]
52 | 
53 |   # Clean up
54 |   release(queue)
55 |   release(add)
56 |   release(program)
57 |   release(gpuA)
58 |   release(gpuB)
59 |   release(gpuC)
60 |   release(context)
61 | 
62 | when isMainModule:
63 |   main()


--------------------------------------------------------------------------------
/nimcl.nim:
--------------------------------------------------------------------------------
  1 | # Copyright 2016-2017 UniCredit S.p.A.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | # http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import opencl, macros
 16 | 
 17 | type
 18 |   PlatformNotFound = object of Exception
 19 |   DeviceNotFound = object of Exception
 20 | 
 21 | proc newPlatformNotFound(): ref PlatformNotFound =
 22 |   new result
 23 |   result.msg = "PlatformNotFound"
 24 | 
 25 | proc newDeviceNotFound(): ref DeviceNotFound =
 26 |   new result
 27 |   result.msg = "DeviceNotFound"
 28 | 
 29 | proc name*(id: PPlatformId): string =
 30 |   var size = 0
 31 |   check getPlatformInfo(id, PLATFORM_NAME, 0, nil, addr size)
 32 |   result = newString(size)
 33 |   check getPlatformInfo(id, PLATFORM_NAME, size, addr result[0], nil)
 34 | 
 35 | proc name*(id: PDeviceId): string =
 36 |   var size = 0
 37 |   check getDeviceInfo(id, DEVICE_NAME, 0, nil, addr size)
 38 |   result = newString(size)
 39 |   check getDeviceInfo(id, DEVICE_NAME, size, addr result[0], nil)
 40 | 
 41 | proc maxWorkGroups*(id: PDeviceId): int =
 42 |   check getDeviceInfo(id, DEVICE_MAX_WORK_GROUP_SIZE, sizeof(int), addr result, nil)
 43 | 
 44 | proc localMemory*(id: PDeviceId): uint64 =
 45 |   check getDeviceInfo(id, DEVICE_LOCAL_MEM_SIZE, sizeof(int), addr result, nil)
 46 | 
 47 | proc globalMemory*(id: PDeviceId): uint64 =
 48 |   check getDeviceInfo(id, DEVICE_GLOBAL_MEM_SIZE, sizeof(int), addr result, nil)
 49 | 
 50 | proc maxWorkItems*(id: PDeviceId): seq[int] =
 51 |   var dims: int
 52 |   check getDeviceInfo(id, DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(int), addr dims, nil)
 53 |   result = newSeq[int](dims)
 54 |   check getDeviceInfo(id, DEVICE_MAX_WORK_ITEM_SIZES, dims * sizeof(int), addr result[0], nil)
 55 | 
 56 | proc version*(id: PPlatformId): string =
 57 |   var size = 0
 58 |   check getPlatformInfo(id, PLATFORM_VERSION, 0, nil, addr size)
 59 |   result = newString(size)
 60 |   check getPlatformInfo(id, PLATFORM_VERSION, size, addr result[0], nil)
 61 | 
 62 | proc getPlatformByName*(platformName: string): PPlatformId =
 63 |   var numPlatforms: uint32
 64 |   check getPlatformIDs(0, nil, addr numPlatforms)
 65 |   var platforms = newSeq[PPlatformId](numPlatforms)
 66 |   check getPlatformIDs(numPlatforms, addr platforms[0], nil)
 67 | 
 68 |   for platform in platforms:
 69 |     if platform.name.substr(0, platformName.high) == platformName:
 70 |       return platform
 71 | 
 72 |   raise newPlatformNotFound()
 73 | 
 74 | proc firstPlatform*(): PPlatformId =
 75 |   var numPlatforms: uint32
 76 |   check getPlatformIDs(0, nil, addr numPlatforms)
 77 |   if numPlatforms == 0:
 78 |     raise newPlatformNotFound()
 79 |   var platforms = newSeq[PPlatformId](numPlatforms)
 80 |   check getPlatformIDs(numPlatforms, addr platforms[0], nil)
 81 |   return platforms[0]
 82 | 
 83 | proc getDevices*(platform: PPlatformId): seq[PDeviceId] =
 84 |   var numDevices: uint32
 85 |   check getDeviceIDs(platform, DEVICE_TYPE_ALL, 0, nil, addr numDevices)
 86 |   if numDevices == 0:
 87 |     raise newDeviceNotFound()
 88 | 
 89 |   var devices = newSeq[PDeviceId](numDevices)
 90 |   check getDeviceIDs(platform, DEVICE_TYPE_ALL, numDevices, addr devices[0], nil)
 91 |   devices
 92 | 
 93 | proc createContext*(devices: seq[PDeviceId]): PContext =
 94 |   var status: TClResult
 95 |   var devs = devices
 96 |   result = createContext(nil, devs.len.uint32, cast[ptr PDeviceId](addr devs[0]), nil, nil, addr status)
 97 |   check status
 98 | 
 99 | proc commandQueueFor*(context: PContext, device: PDeviceId): PCommandQueue =
100 |   var status: TClResult
101 |   result = createCommandQueue(context, device, 0, addr status)
102 |   check status
103 | 
104 | proc openclDefaults*(): tuple[devices: seq[PDeviceId], context: PContext] =
105 |   let
106 |     platform = firstPlatform()
107 |     devices = platform.getDevices
108 |     context = devices.createContext
109 |   return (devices, context)
110 | 
111 | proc singleDeviceDefaults*(): tuple[device: PDeviceId, context: PContext, queue: PCommandQueue] =
112 |   let
113 |     platform = firstPlatform()
114 |     device = platform.getDevices[0]
115 |     context = @[device].createContext
116 |     queue = context.commandQueueFor(device)
117 |   return (device, context, queue)
118 | 
119 | proc createProgram*(context: PContext, body: string): PProgram =
120 |   var status: TClResult
121 |   var lines = [cstring(body)]
122 |   result = createProgramWithSource(context, 1, cast[cstringArray](addr lines), nil, addr status)
123 |   check status
124 | 
125 | proc createProgramBinary*(context: PContext, device: PDeviceId, body: string): PProgram =
126 |   var status: TClResult
127 |   var binaryStatus: int32
128 |   var dev = device
129 |   var lines = [cstring(body)]
130 |   var L = body.len
131 |   result = createProgramWithBinary(context, 1, addr dev, addr L, cast[ptr ptr cuchar](addr lines), addr binaryStatus, addr status)
132 |   check status
133 | 
134 | proc buildOn*(program: PProgram, devices: seq[PDeviceId]) =
135 |   var devs = devices
136 |   check buildProgram(program, devs.len.uint32, cast[ptr PDeviceId](addr devs[0]), nil, nil, nil)
137 | 
138 | proc buildOn*(program: PProgram, device: PDeviceId) = program.buildOn(@[device])
139 | 
140 | proc createAndBuild*(context: PContext, body: string, devices: seq[PDeviceId]): PProgram =
141 |   result = createProgram(context, body)
142 |   result.buildOn(devices)
143 | 
144 | proc createAndBuild*(context: PContext, body: string, device: PDeviceId): PProgram =
145 |   result = createProgram(context, body)
146 |   result.buildOn(device)
147 | 
148 | proc createAndBuildBinary*(context: PContext, body: string, device: PDeviceId): PProgram =
149 |   result = createProgramBinary(context, device, body)
150 |   result.buildOn(device)
151 | 
152 | proc buffer*[A](context: PContext, size: int, flags: Tmem_flags = MEM_READ_WRITE): PMem =
153 |   var status: TClResult
154 |   result = createBuffer(context, flags, size * sizeof(A), nil, addr status)
155 |   check status
156 | 
157 | proc bufferLike*[A](context: PContext, xs: seq[A], flags: Tmem_flags = MEM_READ_WRITE): PMem =
158 |   buffer[A](context, xs.len, flags)
159 | 
160 | proc buildErrors*(program: PProgram, devices: seq[PDeviceId]): string =
161 |   var logSize: int
162 |   check getProgramBuildInfo(program, devices[0], PROGRAM_BUILD_LOG, 0, nil, addr logSize)
163 |   result = newString(logSize + 1)
164 |   check getProgramBuildInfo(program, devices[0], PROGRAM_BUILD_LOG, logSize, addr result[0], nil)
165 | 
166 | proc createKernel*(program: PProgram, name: string): PKernel =
167 |   var status: TClResult
168 |   result = createKernel(program, name, addr status)
169 |   check status
170 | 
171 | type
172 |   LocalBuffer*[A] = distinct int
173 |   anyInt = int or int32 or int64
174 | 
175 | template setArg(kernel: PKernel, item: PMem, index: int) =
176 |   var x = item
177 |   check setKernelArg(kernel, index.uint32, sizeof(Pmem), addr x)
178 | 
179 | template setArg[A](kernel: PKernel, item: var A, index: int) =
180 |   check setKernelArg(kernel, index.uint32, sizeof(A), addr item)
181 | 
182 | template setArg[A](kernel: PKernel, item: LocalBuffer[A], index: int) =
183 |   check setKernelArg(kernel, index.uint32, int(item) * sizeof(A), nil)
184 | 
185 | template setArg(kernel: PKernel, item: anyInt, index: int) =
186 |   var x = item
187 |   check setKernelArg(kernel, index.uint32, sizeof(type(item)), addr x)
188 | 
189 | macro args*(kernel: Pkernel, args: varargs[untyped]): untyped =
190 |   result = newStmtList()
191 | 
192 |   var i = 0 # no pairs for macro for loop
193 |   for arg in items(args):
194 |     let s = quote do:
195 |       `kernel`.setArg(`arg`, `i`)
196 |     result.add(s)
197 |     inc i
198 | 
199 | proc run*(queue: PCommandQueue, kernel: PKernel, totalWork: int) =
200 |   var globalWorkSize = [totalWork, 0, 0]
201 |   check enqueueNDRangeKernel(queue, kernel, 1, nil,  cast[ptr int](addr globalWorkSize), nil, 0, nil, nil)
202 | 
203 | proc run*(queue: PCommandQueue, kernel: PKernel, totalWork, localWork: int) =
204 |   var
205 |     globalWorkSize = [totalWork, 0, 0]
206 |     localWorkSize = [localWork, 0, 0]
207 |   check enqueueNDRangeKernel(queue, kernel, 1, nil,  cast[ptr int](addr globalWorkSize), cast[ptr int](addr localWorkSize), 0, nil, nil)
208 | 
209 | proc run2d*(queue: PCommandQueue, kernel: PKernel, totalWork: (int, int)) =
210 |   let (a, b) = totalWork
211 |   var globalWorkSize = [a, b, 0]
212 |   check enqueueNDRangeKernel(queue, kernel, 2, nil,  cast[ptr int](addr globalWorkSize), nil, 0, nil, nil)
213 | 
214 | proc run2d*(queue: PCommandQueue, kernel: PKernel, totalWork, localWork: (int, int)) =
215 |   let
216 |     (a, b) = totalWork
217 |     (c, d) = localWork
218 |   var
219 |     globalWorkSize = [a, b, 0]
220 |     localWorkSize = [c, d, 0]
221 |   check enqueueNDRangeKernel(queue, kernel, 2, nil,  cast[ptr int](addr globalWorkSize), cast[ptr int](addr localWorkSize), 0, nil, nil)
222 | 
223 | proc run3d*(queue: PCommandQueue, kernel: PKernel, totalWork: (int, int, int)) =
224 |   let (a, b, c) = totalWork
225 |   var globalWorkSize = [a, b, c]
226 |   check enqueueNDRangeKernel(queue, kernel, 3, nil,  cast[ptr int](addr globalWorkSize), nil, 0, nil, nil)
227 | 
228 | proc run3d*(queue: PCommandQueue, kernel: PKernel, totalWork, localWork: (int, int, int)) =
229 |   let
230 |     (a, b, c) = totalWork
231 |     (d, e, f) = localWork
232 |   var
233 |     globalWorkSize = [a, b, c]
234 |     localWorkSize = [d, e, f]
235 |   check enqueueNDRangeKernel(queue, kernel, 3, nil,  cast[ptr int](addr globalWorkSize), cast[ptr int](addr localWorkSize), 0, nil, nil)
236 | 
237 | proc write*(queue: PCommandQueue, src: pointer, dest: PMem, size: int) =
238 |   check enqueueWriteBuffer(queue, dest, CL_FALSE, 0, size, src, 0, nil, nil)
239 | 
240 | proc write*[A](queue: PCommandQueue, src: var seq[A], dest: PMem) =
241 |   write(queue, addr src[0], dest, src.len * sizeof(A))
242 | 
243 | proc read*(queue: PCommandQueue, dest: pointer, src: PMem, size: int) =
244 |   check enqueueReadBuffer(queue, src, CL_TRUE, 0, size, dest, 0, nil, nil)
245 | 
246 | proc read*[A](queue: PCommandQueue, dest: var seq[A], src: PMem) =
247 |   read(queue, addr dest[0], src, dest.len * sizeof(A))
248 | 
249 | template release*(queue: PCommandQueue) = check releaseCommandQueue(queue)
250 | template release*(kernel: PKernel) = check releaseKernel(kernel)
251 | template release*(program: PProgram) = check releaseProgram(program)
252 | template release*(buffer: PMem) = check releaseMemObject(buffer)
253 | template release*(context: PContext) = check releaseContext(context)
254 | 


--------------------------------------------------------------------------------
/nimcl.nimble:
--------------------------------------------------------------------------------
 1 | version       = "0.1.3"
 2 | author        = "Andrea Ferretti"
 3 | description   = "OpenCL utilities"
 4 | license       = "Apache2"
 5 | skipDirs      = @["examples"]
 6 | skipFiles     = @["points.json"]
 7 | 
 8 | requires "nim >= 0.15.0", "opencl >= 1.0"
 9 | 
10 | 
11 | template dependsOn*(task: untyped): typed =
12 |   exec "nimble " & astToStr(task)
13 | 
14 | proc addDefaults() =
15 |   switch("cincludes", "/usr/local/cuda/targets/x86_64-linux/include")
16 |   switch("clibdir", "/usr/local/cuda/targets/x86_64-linux/lib")
17 |   --define: release
18 |   --path: "."
19 | 
20 | task info, "OpenCL info":
21 |   addDefaults()
22 |   --run
23 |   setCommand "c", "examples/info.nim"
24 | 
25 | task clcompile, "OpenCL compiler":
26 |   addDefaults()
27 |   setCommand "c", "examples/compile.nim"
28 | 
29 | task vadd, "run vector add example":
30 |   addDefaults()
31 |   --run
32 |   setCommand "c", "examples/vadd.nim"
33 | 
34 | task headers, "compile headers with c2nim":
35 |   exec "c2nim examples/point.h"
36 | 
37 | task kmeans, "run kmeans example":
38 |   dependsOn headers
39 |   addDefaults()
40 |   --run
41 |   setCommand "c", "examples/kmeans.nim"


--------------------------------------------------------------------------------