├── LICENSE
├── NanoCL.cpp
└── README.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 minxomat
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NanoCL.cpp:
--------------------------------------------------------------------------------
  1 | // (c) 2014-2017, github.com/turbo
  2 | // MIT licensed
  3 | 
  4 | #include <windows.h>
  5 | 
  6 | #include <string>
  7 | #include <vector>
  8 | 
  9 | #include <GL/gl.h>
 10 | 
 11 | #ifndef __MINGW32__
 12 | #pragma comment(lib, "opengl32.lib") // MSVC, ICL
 13 | #pragma comment(lib, "gdi32.lib")    // ICL
 14 | #pragma comment(lib, "user32.lib")   // ICL
 15 | #endif
 16 | 
 17 | #define NanoCL_MAX_LOG_LENGTH 10000
 18 | 
 19 | #define NanoCL_V                                                               \
 20 |   "varying vec2 pos;void "                                                     \
 21 |   "main(void){pos=vec2(gl_MultiTexCoord0);gl_Position=gl_Vertex;}"
 22 | 
 23 | #define NanoCL_K                                                               \
 24 |   "varying vec2 pos;vec4 read(sampler2D m){return texture2D(m,pos);}"          \
 25 |   "void commit(vec4 d){gl_FragColor=d;}"
 26 | 
 27 | #define defPROC(a, b, ...)                                                     \
 28 |   typedef a(__stdcall *MCL##b)(__VA_ARGS__);                                   \
 29 |   MCL##b b = nullptr
 30 | 
 31 | #define loadPROC(a)                                                            \
 32 |   a = MCL##a((wglGetProcAddress(#a)));                                         \
 33 |   if (a == nullptr)                                                            \
 34 |     ExitProcess(printf("ERROR: Couldn't load GL(Ext) function %s.\n", #a));
 35 | 
 36 | #define kernel(k) #k
 37 | 
 38 | namespace NanoCL {
 39 | defPROC(const char *, glGetStringi, int, int);
 40 | defPROC(void, glActiveTexture, int);
 41 | defPROC(void, glAttachShader, unsigned, unsigned);
 42 | defPROC(void, glCompileShader, unsigned);
 43 | defPROC(void, glDeleteShader, unsigned);
 44 | defPROC(void, glGetInfoLogARB, unsigned, int, int *, char *);
 45 | defPROC(void, glGetObjectParameterivARB, unsigned, unsigned, int *);
 46 | defPROC(void, glLinkProgram, unsigned);
 47 | defPROC(void, glShaderSource, unsigned, int, const char **, const int *);
 48 | defPROC(void, glUniform1i, int, int);
 49 | defPROC(void, glUniform2fv, int, int, const float *);
 50 | defPROC(void, glUniform4fv, int, int, const float *);
 51 | defPROC(void, glUseProgram, unsigned);
 52 | defPROC(void, glBindFramebufferEXT, unsigned, unsigned);
 53 | defPROC(void, glDeleteFramebuffersEXT, int, const unsigned *);
 54 | defPROC(void, glFramebufferTexture2DEXT, unsigned, unsigned, unsigned, unsigned,
 55 |         int);
 56 | defPROC(void, glGenFramebuffersEXT, int, unsigned *);
 57 | defPROC(void, glGenerateMipmapEXT, unsigned);
 58 | defPROC(int, glGetUniformLocation, unsigned, const char *);
 59 | defPROC(int, glCreateProgram, void);
 60 | defPROC(int, glCreateShader, unsigned);
 61 | 
 62 | typedef struct NCL_vec4f { float r, g, b, a; } NCL_vec4f;
 63 | 
 64 | inline void gpgpu_fillscreen(void) {
 65 |   glBegin(6);
 66 |   glTexCoord2f(0.0f, 0.0f);
 67 |   glVertex3f(-1.0f, -1.0f, 0.0f);
 68 |   glTexCoord2f(1.0f, 0.0f);
 69 |   glVertex3f(+1.0f, -1.0f, 0.0f);
 70 |   glTexCoord2f(1.0f, 1.0f);
 71 |   glVertex3f(+1.0f, +1.0f, 0.0f);
 72 |   glTexCoord2f(0.0f, 1.0f);
 73 |   glVertex3f(-1.0f, +1.0f, 0.0f);
 74 |   glEnd();
 75 | }
 76 | 
 77 | class gpgpu_texture2D {
 78 | public:
 79 |   unsigned handle;
 80 |   int width, height;
 81 | 
 82 |   gpgpu_texture2D(int w, int h) : width(w), height(h) {
 83 |     glGenTextures(1, &handle);
 84 |     update_data(nullptr);
 85 |     bind();
 86 |     glTexParameteri(0x0DE1, 0x2802, 0x812F);
 87 |     glTexParameteri(0x0DE1, 0x2803, 0x812F);
 88 |     bind();
 89 |     glTexParameteri(0x0DE1, 0x2801, 0x2600);
 90 |     glTexParameteri(0x0DE1, 0x2800, 0x2600);
 91 |     glGenerateMipmapEXT(0x0DE1);
 92 |   }
 93 | 
 94 |   ~gpgpu_texture2D() {
 95 |     glDeleteTextures(1, &handle);
 96 |     handle = 0;
 97 |   }
 98 | 
 99 |   void draw(void) const {
100 |     bind();
101 |     glEnable(0x0DE1);
102 |     gpgpu_fillscreen();
103 |   }
104 | 
105 |   void bind(int texture_unit = 0) const {
106 |     glActiveTexture(0x84C0 + texture_unit);
107 |     glBindTexture(0x0DE1, handle);
108 |   }
109 | 
110 |   void update_data(const float *dat) {
111 |     bind();
112 |     glTexImage2D(0x0DE1, 0, 0x8814, width, height, 0, 0x1908, 0x1406, dat);
113 |   }
114 | 
115 | private:
116 |   gpgpu_texture2D(const gpgpu_texture2D &src);
117 |   void operator=(const gpgpu_texture2D &src);
118 | };
119 | 
120 | inline void gpgpu_tex_scale(unsigned program, gpgpu_texture2D *tex,
121 |                             const std::string &name) {
122 |   auto scale = glGetUniformLocation(program, (name + "Scale").c_str());
123 |   if (scale <= -1)
124 |     return;
125 |   const float argARB[] = {1.0f / tex->width, 1.0f / tex->height, 0.0f, 0.0f};
126 |   glUniform2fv(scale, 1, argARB);
127 | }
128 | 
129 | inline void gpgpu_add(unsigned program, gpgpu_texture2D *tex,
130 |                       const std::string &name, int texture_unit = 0) {
131 |   glUseProgram(program);
132 |   tex->bind(texture_unit);
133 |   glUniform1i(glGetUniformLocation(program, name.c_str()), texture_unit);
134 |   gpgpu_tex_scale(program, tex, name);
135 | }
136 | 
137 | class gpgpu_framebuffer {
138 | public:
139 |   unsigned handle;
140 |   gpgpu_texture2D *tex;
141 | 
142 |   explicit gpgpu_framebuffer(gpgpu_texture2D *tex_) {
143 |     glGenFramebuffersEXT(1, &handle);
144 |     attach(tex_);
145 |   }
146 | 
147 |   ~gpgpu_framebuffer() {
148 |     glDeleteFramebuffersEXT(1, &handle);
149 |     handle = 0;
150 |   }
151 | 
152 |   void run(unsigned prog) const {
153 |     bind();
154 |     glUseProgram(prog);
155 |     auto scale_index = glGetUniformLocation(prog, "locationScale");
156 |     if (scale_index > -1) {
157 |       const float argARB[] = {float(tex->width), float(tex->height), 0.0f,
158 |                               0.0f};
159 |       glUniform4fv(scale_index, 1, argARB);
160 |     }
161 |     gpgpu_fillscreen();
162 |   }
163 | 
164 |   void read(float *destination, int width, int height) const {
165 |     bind();
166 |     glReadPixels(0, 0, width, height, 0x1908, 0x1406, destination);
167 |   }
168 | 
169 |   void bind(void) const {
170 |     glBindFramebufferEXT(0x8D40, handle);
171 |     if (tex)
172 |       glViewport(0, 0, tex->width, tex->height);
173 |   }
174 | 
175 |   void attach(gpgpu_texture2D *tex_) {
176 |     tex = tex_;
177 |     if (!tex)
178 |       return;
179 |     glBindFramebufferEXT(0x8D40, handle);
180 |     glFramebufferTexture2DEXT(0x8D40, 0x8CE0, 0x0DE1, tex->handle, 0);
181 |   }
182 | 
183 | private:
184 |   gpgpu_framebuffer(const gpgpu_framebuffer &src);
185 |   void operator=(const gpgpu_framebuffer &src);
186 | };
187 | 
188 | inline void gpgpu_init() {
189 |   static auto gpgpu_initted = false;
190 |   if (gpgpu_initted)
191 |     return;
192 |   gpgpu_initted = true;
193 | 
194 |   const static PIXELFORMATDESCRIPTOR pfd = {
195 |       0, 0, PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER,
196 |       0, 0, 0,
197 |       0, 0, 0,
198 |       0, 0, 0,
199 |       0, 0, 0,
200 |       0, 0, 0,
201 |       0, 0, 0,
202 |       0, 0, 0,
203 |       0, 0};
204 | 
205 |   auto hDC = GetDC(CreateWindow(
206 | #if (defined(_MSC_VER) && !defined(__INTEL_COMPILER))
207 |       LPCWSTR
208 | #else
209 |       LPCSTR
210 | #endif
211 |       ("edit"),
212 |       nullptr, WS_POPUP | WS_MINIMIZE, 0, 0, 0, 0, nullptr, nullptr, nullptr,
213 |       nullptr));
214 | 
215 |   SetPixelFormat(hDC, ChoosePixelFormat(hDC, &pfd), &pfd);
216 |   wglMakeCurrent(hDC, wglCreateContext(hDC));
217 | 
218 |   int extListSize = 0;
219 |   glGetIntegerv(0x821D, &extListSize);
220 | 
221 |   if (extListSize == 0)
222 |     ExitProcess(printf("ERROR: No GPU extensions detected (OpenGL context init "
223 |                        "might have failed).\n"));
224 | 
225 | #pragma warning(disable : 4312)
226 |   loadPROC(glActiveTexture);
227 |   loadPROC(glGetUniformLocation);
228 |   loadPROC(glAttachShader);
229 |   loadPROC(glCompileShader);
230 |   loadPROC(glCreateProgram);
231 |   loadPROC(glCreateShader);
232 |   loadPROC(glDeleteShader);
233 |   loadPROC(glGetInfoLogARB);
234 |   loadPROC(glGetObjectParameterivARB);
235 |   loadPROC(glGetUniformLocation);
236 |   loadPROC(glLinkProgram);
237 |   loadPROC(glShaderSource);
238 |   loadPROC(glUniform1i);
239 |   loadPROC(glUniform2fv);
240 |   loadPROC(glUniform4fv);
241 |   loadPROC(glUseProgram);
242 |   loadPROC(glBindFramebufferEXT);
243 |   loadPROC(glDeleteFramebuffersEXT);
244 |   loadPROC(glFramebufferTexture2DEXT);
245 |   loadPROC(glGenFramebuffersEXT);
246 |   loadPROC(glGenerateMipmapEXT);
247 |   loadPROC(glGetStringi);
248 | #pragma warning(default : 4312)
249 | 
250 |   glDisable(0x0B71);
251 |   glDisable(0x0BC0);
252 |   glDisable(0x0BE2);
253 | }
254 | 
255 | class gpgpu_array;
256 | 
257 | class context {
258 | public:
259 |   explicit context() { gpgpu_init(); }
260 | 
261 |   std::vector<gpgpu_array *> tex;
262 |   std::string utils;
263 | 
264 |   void add_array(gpgpu_array *t) { tex.push_back(t); }
265 | };
266 | 
267 | class gpgpu_array : public gpgpu_texture2D, public gpgpu_framebuffer {
268 | public:
269 |   context &env;
270 |   std::string name;
271 | 
272 |   gpgpu_array(context &env_, std::string name, int w, int h)
273 |       : gpgpu_texture2D(w, h), gpgpu_framebuffer(this), env(env_), name(name) {
274 |     env.add_array(this);
275 |   }
276 | 
277 |   std::string get_tex_decls(void) const {
278 |     std::string s;
279 |     for (unsigned i = 0; i < env.tex.size(); i++)
280 |       s += "uniform sampler2D " + env.tex[i]->name + ";uniform vec2 " +
281 |            env.tex[i]->name + "Scale;";
282 |     return s + env.utils;
283 |   }
284 | 
285 |   void swap(gpgpu_array &arr) {
286 |     std::swap(gpgpu_framebuffer::handle, arr.gpgpu_framebuffer::handle);
287 |     std::swap(gpgpu_texture2D::handle, arr.gpgpu_texture2D::handle);
288 |   }
289 | };
290 | 
291 | inline unsigned gpgpu_runprep(gpgpu_array &dest, unsigned code) {
292 |   auto &env = dest.env;
293 |   unsigned texunit = 0;
294 |   for (unsigned i = 0; i < env.tex.size(); i++) {
295 |     if (env.tex[i] != &dest)
296 |       gpgpu_add(code, env.tex[i], env.tex[i]->name, texunit++);
297 |     else
298 |       gpgpu_tex_scale(code, env.tex[i], env.tex[i]->name);
299 |   }
300 |   return code;
301 | }
302 | 
303 | // Debug GLSL
304 | void checkShaderOp(unsigned obj, unsigned errType, const char *where) {
305 |   int compiled;
306 |   glGetObjectParameterivARB(obj, errType, &compiled);
307 |   if (compiled)
308 |     return;
309 |   char errorLog[NanoCL_MAX_LOG_LENGTH];
310 |   glGetInfoLogARB(obj, NanoCL_MAX_LOG_LENGTH, nullptr, errorLog);
311 | 
312 |   printf("ERROR: Could not build GLSL shader (fatal).\n\n--- CODE DUMP "
313 |          "---\n%s\n\n--- ERROR LOG ---\n%s\n\n",
314 |          where, errorLog);
315 | }
316 | 
317 | unsigned makeShaderObject(int target, const char *code) {
318 |   auto h = glCreateShader(target);
319 |   glShaderSource(h, 1, &code, nullptr);
320 |   glCompileShader(h);
321 |   checkShaderOp(h, 0x8B81, code);
322 |   return h;
323 | }
324 | 
325 | unsigned makeProgramObject(const char *vertex, const char *fragment) {
326 |   if (glUseProgram == nullptr)
327 |     printf("ERROR: glUseProgram could not be loaded.\n");
328 | 
329 |   auto p = glCreateProgram();
330 |   auto vo = makeShaderObject(0x8B31, vertex);
331 |   auto fo = makeShaderObject(0x8B30, fragment);
332 | 
333 |   glAttachShader(p, vo);
334 |   glAttachShader(p, fo);
335 |   glLinkProgram(p);
336 |   checkShaderOp(p, 0x8B82, "link");
337 |   glDeleteShader(vo);
338 |   glDeleteShader(fo);
339 | 
340 |   return p;
341 | }
342 | 
343 | struct alloc {
344 |   NCL_vec4f *data;
345 |   unsigned dataWidth;   // (CPU) [internal] texture width
346 |   unsigned dataHeight;  // (CPU) [internal] texture height
347 |   gpgpu_array *gpuData; // (GPU) float-texture
348 | 
349 |   alloc(context &gpuCtx, std::string UID, unsigned length) {
350 |     dataWidth = length;
351 | 
352 |     int x, y;
353 | 
354 |     for (x = 0; y = length / ++x | 0, x <= y;)
355 |       if (!(x * y - length))
356 |         dataHeight = y;
357 | 
358 |     dataWidth = length / dataHeight;
359 | 
360 |     data = new NCL_vec4f[length]();
361 |     gpuData = new gpgpu_array(gpuCtx, UID, dataWidth, dataHeight);
362 |   }
363 | };
364 | 
365 | void push(alloc uID) { uID.gpuData->update_data((float *)(uID.data)); }
366 | 
367 | int make(alloc uID, const char *kernel) {
368 |   return makeProgramObject(
369 |       NanoCL_V,
370 |       (NanoCL_K + (*uID.gpuData).get_tex_decls() + std::string(kernel))
371 |           .c_str());
372 | }
373 | 
374 | void run(alloc uID, const char *kernel) {
375 |   (*uID.gpuData).run(gpgpu_runprep(*uID.gpuData, make(uID, kernel)));
376 | }
377 | 
378 | void run(alloc uID, int progID) {
379 |   (*uID.gpuData).run(gpgpu_runprep(*uID.gpuData, progID));
380 | }
381 | 
382 | void pull(alloc uID) {
383 |   uID.gpuData->read(((float *)(uID.data)), uID.dataWidth, uID.dataHeight);
384 | }
385 | 
386 | void swap(alloc A, alloc B) { A.gpuData->swap(*B.gpuData); }
387 | }
388 | 
389 | #undef defPROC
390 | #undef loadPROC
391 | 
392 | /* EOF */
393 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![](http://i.imgur.com/pE9Bswx.png)]()
 2 | 
 3 | NanoCL provides a thin wrapper on top of OpenGl functions for GPGPU programming. It can be used to allocate and manage GPU memory and run GLSL kernels on standard `float[]` arrays.
 4 | 
 5 | This library is the upstream source for [turbo.js](https://github.com/turbo/js), but is vastly more capable. 
 6 | 
 7 | Though NanoCL is much more trivial than OpenCL, it can provide certain advantages:
 8 | 
 9 | - virtually no compile-time overhead
10 | - completely dependency-free
11 | - compatible with almost all GPUs and GLSL-compatible software renderers
12 | - transparent source code
13 | - allows for easier tracing using performance analysis (perf, Amp, ...) because no superfluous levels of indirections are used
14 | 
15 | NanoCL was designed for Windows. It does not require any OpenGL wrapper libraries like GLU or GLFW, just the OpenGL headers. It was tested using TDM-GCC, ICC and MSVC.
16 | 
17 | Documentation is in progress.
18 | 


--------------------------------------------------------------------------------