├── LICENSE ├── NanoCL.cpp └── README.md /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 minxomat 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NanoCL.cpp: -------------------------------------------------------------------------------- 1 | // (c) 2014-2017, github.com/turbo 2 | // MIT licensed 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #ifndef __MINGW32__ 12 | #pragma comment(lib, "opengl32.lib") // MSVC, ICL 13 | #pragma comment(lib, "gdi32.lib") // ICL 14 | #pragma comment(lib, "user32.lib") // ICL 15 | #endif 16 | 17 | #define NanoCL_MAX_LOG_LENGTH 10000 18 | 19 | #define NanoCL_V \ 20 | "varying vec2 pos;void " \ 21 | "main(void){pos=vec2(gl_MultiTexCoord0);gl_Position=gl_Vertex;}" 22 | 23 | #define NanoCL_K \ 24 | "varying vec2 pos;vec4 read(sampler2D m){return texture2D(m,pos);}" \ 25 | "void commit(vec4 d){gl_FragColor=d;}" 26 | 27 | #define defPROC(a, b, ...) \ 28 | typedef a(__stdcall *MCL##b)(__VA_ARGS__); \ 29 | MCL##b b = nullptr 30 | 31 | #define loadPROC(a) \ 32 | a = MCL##a((wglGetProcAddress(#a))); \ 33 | if (a == nullptr) \ 34 | ExitProcess(printf("ERROR: Couldn't load GL(Ext) function %s.\n", #a)); 35 | 36 | #define kernel(k) #k 37 | 38 | namespace NanoCL { 39 | defPROC(const char *, glGetStringi, int, int); 40 | defPROC(void, glActiveTexture, int); 41 | defPROC(void, glAttachShader, unsigned, unsigned); 42 | defPROC(void, glCompileShader, unsigned); 43 | defPROC(void, glDeleteShader, unsigned); 44 | defPROC(void, glGetInfoLogARB, unsigned, int, int *, char *); 45 | defPROC(void, glGetObjectParameterivARB, unsigned, unsigned, int *); 46 | defPROC(void, glLinkProgram, unsigned); 47 | defPROC(void, glShaderSource, unsigned, int, const char **, const int *); 48 | defPROC(void, glUniform1i, int, int); 49 | defPROC(void, glUniform2fv, int, int, const float *); 50 | defPROC(void, glUniform4fv, int, int, const float *); 51 | defPROC(void, glUseProgram, unsigned); 52 | defPROC(void, glBindFramebufferEXT, unsigned, unsigned); 53 | defPROC(void, glDeleteFramebuffersEXT, int, const unsigned *); 54 | defPROC(void, glFramebufferTexture2DEXT, unsigned, unsigned, unsigned, unsigned, 55 | int); 56 | defPROC(void, glGenFramebuffersEXT, int, unsigned *); 57 | defPROC(void, glGenerateMipmapEXT, unsigned); 58 | defPROC(int, glGetUniformLocation, unsigned, const char *); 59 | defPROC(int, glCreateProgram, void); 60 | defPROC(int, glCreateShader, unsigned); 61 | 62 | typedef struct NCL_vec4f { float r, g, b, a; } NCL_vec4f; 63 | 64 | inline void gpgpu_fillscreen(void) { 65 | glBegin(6); 66 | glTexCoord2f(0.0f, 0.0f); 67 | glVertex3f(-1.0f, -1.0f, 0.0f); 68 | glTexCoord2f(1.0f, 0.0f); 69 | glVertex3f(+1.0f, -1.0f, 0.0f); 70 | glTexCoord2f(1.0f, 1.0f); 71 | glVertex3f(+1.0f, +1.0f, 0.0f); 72 | glTexCoord2f(0.0f, 1.0f); 73 | glVertex3f(-1.0f, +1.0f, 0.0f); 74 | glEnd(); 75 | } 76 | 77 | class gpgpu_texture2D { 78 | public: 79 | unsigned handle; 80 | int width, height; 81 | 82 | gpgpu_texture2D(int w, int h) : width(w), height(h) { 83 | glGenTextures(1, &handle); 84 | update_data(nullptr); 85 | bind(); 86 | glTexParameteri(0x0DE1, 0x2802, 0x812F); 87 | glTexParameteri(0x0DE1, 0x2803, 0x812F); 88 | bind(); 89 | glTexParameteri(0x0DE1, 0x2801, 0x2600); 90 | glTexParameteri(0x0DE1, 0x2800, 0x2600); 91 | glGenerateMipmapEXT(0x0DE1); 92 | } 93 | 94 | ~gpgpu_texture2D() { 95 | glDeleteTextures(1, &handle); 96 | handle = 0; 97 | } 98 | 99 | void draw(void) const { 100 | bind(); 101 | glEnable(0x0DE1); 102 | gpgpu_fillscreen(); 103 | } 104 | 105 | void bind(int texture_unit = 0) const { 106 | glActiveTexture(0x84C0 + texture_unit); 107 | glBindTexture(0x0DE1, handle); 108 | } 109 | 110 | void update_data(const float *dat) { 111 | bind(); 112 | glTexImage2D(0x0DE1, 0, 0x8814, width, height, 0, 0x1908, 0x1406, dat); 113 | } 114 | 115 | private: 116 | gpgpu_texture2D(const gpgpu_texture2D &src); 117 | void operator=(const gpgpu_texture2D &src); 118 | }; 119 | 120 | inline void gpgpu_tex_scale(unsigned program, gpgpu_texture2D *tex, 121 | const std::string &name) { 122 | auto scale = glGetUniformLocation(program, (name + "Scale").c_str()); 123 | if (scale <= -1) 124 | return; 125 | const float argARB[] = {1.0f / tex->width, 1.0f / tex->height, 0.0f, 0.0f}; 126 | glUniform2fv(scale, 1, argARB); 127 | } 128 | 129 | inline void gpgpu_add(unsigned program, gpgpu_texture2D *tex, 130 | const std::string &name, int texture_unit = 0) { 131 | glUseProgram(program); 132 | tex->bind(texture_unit); 133 | glUniform1i(glGetUniformLocation(program, name.c_str()), texture_unit); 134 | gpgpu_tex_scale(program, tex, name); 135 | } 136 | 137 | class gpgpu_framebuffer { 138 | public: 139 | unsigned handle; 140 | gpgpu_texture2D *tex; 141 | 142 | explicit gpgpu_framebuffer(gpgpu_texture2D *tex_) { 143 | glGenFramebuffersEXT(1, &handle); 144 | attach(tex_); 145 | } 146 | 147 | ~gpgpu_framebuffer() { 148 | glDeleteFramebuffersEXT(1, &handle); 149 | handle = 0; 150 | } 151 | 152 | void run(unsigned prog) const { 153 | bind(); 154 | glUseProgram(prog); 155 | auto scale_index = glGetUniformLocation(prog, "locationScale"); 156 | if (scale_index > -1) { 157 | const float argARB[] = {float(tex->width), float(tex->height), 0.0f, 158 | 0.0f}; 159 | glUniform4fv(scale_index, 1, argARB); 160 | } 161 | gpgpu_fillscreen(); 162 | } 163 | 164 | void read(float *destination, int width, int height) const { 165 | bind(); 166 | glReadPixels(0, 0, width, height, 0x1908, 0x1406, destination); 167 | } 168 | 169 | void bind(void) const { 170 | glBindFramebufferEXT(0x8D40, handle); 171 | if (tex) 172 | glViewport(0, 0, tex->width, tex->height); 173 | } 174 | 175 | void attach(gpgpu_texture2D *tex_) { 176 | tex = tex_; 177 | if (!tex) 178 | return; 179 | glBindFramebufferEXT(0x8D40, handle); 180 | glFramebufferTexture2DEXT(0x8D40, 0x8CE0, 0x0DE1, tex->handle, 0); 181 | } 182 | 183 | private: 184 | gpgpu_framebuffer(const gpgpu_framebuffer &src); 185 | void operator=(const gpgpu_framebuffer &src); 186 | }; 187 | 188 | inline void gpgpu_init() { 189 | static auto gpgpu_initted = false; 190 | if (gpgpu_initted) 191 | return; 192 | gpgpu_initted = true; 193 | 194 | const static PIXELFORMATDESCRIPTOR pfd = { 195 | 0, 0, PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, 196 | 0, 0, 0, 197 | 0, 0, 0, 198 | 0, 0, 0, 199 | 0, 0, 0, 200 | 0, 0, 0, 201 | 0, 0, 0, 202 | 0, 0, 0, 203 | 0, 0}; 204 | 205 | auto hDC = GetDC(CreateWindow( 206 | #if (defined(_MSC_VER) && !defined(__INTEL_COMPILER)) 207 | LPCWSTR 208 | #else 209 | LPCSTR 210 | #endif 211 | ("edit"), 212 | nullptr, WS_POPUP | WS_MINIMIZE, 0, 0, 0, 0, nullptr, nullptr, nullptr, 213 | nullptr)); 214 | 215 | SetPixelFormat(hDC, ChoosePixelFormat(hDC, &pfd), &pfd); 216 | wglMakeCurrent(hDC, wglCreateContext(hDC)); 217 | 218 | int extListSize = 0; 219 | glGetIntegerv(0x821D, &extListSize); 220 | 221 | if (extListSize == 0) 222 | ExitProcess(printf("ERROR: No GPU extensions detected (OpenGL context init " 223 | "might have failed).\n")); 224 | 225 | #pragma warning(disable : 4312) 226 | loadPROC(glActiveTexture); 227 | loadPROC(glGetUniformLocation); 228 | loadPROC(glAttachShader); 229 | loadPROC(glCompileShader); 230 | loadPROC(glCreateProgram); 231 | loadPROC(glCreateShader); 232 | loadPROC(glDeleteShader); 233 | loadPROC(glGetInfoLogARB); 234 | loadPROC(glGetObjectParameterivARB); 235 | loadPROC(glGetUniformLocation); 236 | loadPROC(glLinkProgram); 237 | loadPROC(glShaderSource); 238 | loadPROC(glUniform1i); 239 | loadPROC(glUniform2fv); 240 | loadPROC(glUniform4fv); 241 | loadPROC(glUseProgram); 242 | loadPROC(glBindFramebufferEXT); 243 | loadPROC(glDeleteFramebuffersEXT); 244 | loadPROC(glFramebufferTexture2DEXT); 245 | loadPROC(glGenFramebuffersEXT); 246 | loadPROC(glGenerateMipmapEXT); 247 | loadPROC(glGetStringi); 248 | #pragma warning(default : 4312) 249 | 250 | glDisable(0x0B71); 251 | glDisable(0x0BC0); 252 | glDisable(0x0BE2); 253 | } 254 | 255 | class gpgpu_array; 256 | 257 | class context { 258 | public: 259 | explicit context() { gpgpu_init(); } 260 | 261 | std::vector tex; 262 | std::string utils; 263 | 264 | void add_array(gpgpu_array *t) { tex.push_back(t); } 265 | }; 266 | 267 | class gpgpu_array : public gpgpu_texture2D, public gpgpu_framebuffer { 268 | public: 269 | context &env; 270 | std::string name; 271 | 272 | gpgpu_array(context &env_, std::string name, int w, int h) 273 | : gpgpu_texture2D(w, h), gpgpu_framebuffer(this), env(env_), name(name) { 274 | env.add_array(this); 275 | } 276 | 277 | std::string get_tex_decls(void) const { 278 | std::string s; 279 | for (unsigned i = 0; i < env.tex.size(); i++) 280 | s += "uniform sampler2D " + env.tex[i]->name + ";uniform vec2 " + 281 | env.tex[i]->name + "Scale;"; 282 | return s + env.utils; 283 | } 284 | 285 | void swap(gpgpu_array &arr) { 286 | std::swap(gpgpu_framebuffer::handle, arr.gpgpu_framebuffer::handle); 287 | std::swap(gpgpu_texture2D::handle, arr.gpgpu_texture2D::handle); 288 | } 289 | }; 290 | 291 | inline unsigned gpgpu_runprep(gpgpu_array &dest, unsigned code) { 292 | auto &env = dest.env; 293 | unsigned texunit = 0; 294 | for (unsigned i = 0; i < env.tex.size(); i++) { 295 | if (env.tex[i] != &dest) 296 | gpgpu_add(code, env.tex[i], env.tex[i]->name, texunit++); 297 | else 298 | gpgpu_tex_scale(code, env.tex[i], env.tex[i]->name); 299 | } 300 | return code; 301 | } 302 | 303 | // Debug GLSL 304 | void checkShaderOp(unsigned obj, unsigned errType, const char *where) { 305 | int compiled; 306 | glGetObjectParameterivARB(obj, errType, &compiled); 307 | if (compiled) 308 | return; 309 | char errorLog[NanoCL_MAX_LOG_LENGTH]; 310 | glGetInfoLogARB(obj, NanoCL_MAX_LOG_LENGTH, nullptr, errorLog); 311 | 312 | printf("ERROR: Could not build GLSL shader (fatal).\n\n--- CODE DUMP " 313 | "---\n%s\n\n--- ERROR LOG ---\n%s\n\n", 314 | where, errorLog); 315 | } 316 | 317 | unsigned makeShaderObject(int target, const char *code) { 318 | auto h = glCreateShader(target); 319 | glShaderSource(h, 1, &code, nullptr); 320 | glCompileShader(h); 321 | checkShaderOp(h, 0x8B81, code); 322 | return h; 323 | } 324 | 325 | unsigned makeProgramObject(const char *vertex, const char *fragment) { 326 | if (glUseProgram == nullptr) 327 | printf("ERROR: glUseProgram could not be loaded.\n"); 328 | 329 | auto p = glCreateProgram(); 330 | auto vo = makeShaderObject(0x8B31, vertex); 331 | auto fo = makeShaderObject(0x8B30, fragment); 332 | 333 | glAttachShader(p, vo); 334 | glAttachShader(p, fo); 335 | glLinkProgram(p); 336 | checkShaderOp(p, 0x8B82, "link"); 337 | glDeleteShader(vo); 338 | glDeleteShader(fo); 339 | 340 | return p; 341 | } 342 | 343 | struct alloc { 344 | NCL_vec4f *data; 345 | unsigned dataWidth; // (CPU) [internal] texture width 346 | unsigned dataHeight; // (CPU) [internal] texture height 347 | gpgpu_array *gpuData; // (GPU) float-texture 348 | 349 | alloc(context &gpuCtx, std::string UID, unsigned length) { 350 | dataWidth = length; 351 | 352 | int x, y; 353 | 354 | for (x = 0; y = length / ++x | 0, x <= y;) 355 | if (!(x * y - length)) 356 | dataHeight = y; 357 | 358 | dataWidth = length / dataHeight; 359 | 360 | data = new NCL_vec4f[length](); 361 | gpuData = new gpgpu_array(gpuCtx, UID, dataWidth, dataHeight); 362 | } 363 | }; 364 | 365 | void push(alloc uID) { uID.gpuData->update_data((float *)(uID.data)); } 366 | 367 | int make(alloc uID, const char *kernel) { 368 | return makeProgramObject( 369 | NanoCL_V, 370 | (NanoCL_K + (*uID.gpuData).get_tex_decls() + std::string(kernel)) 371 | .c_str()); 372 | } 373 | 374 | void run(alloc uID, const char *kernel) { 375 | (*uID.gpuData).run(gpgpu_runprep(*uID.gpuData, make(uID, kernel))); 376 | } 377 | 378 | void run(alloc uID, int progID) { 379 | (*uID.gpuData).run(gpgpu_runprep(*uID.gpuData, progID)); 380 | } 381 | 382 | void pull(alloc uID) { 383 | uID.gpuData->read(((float *)(uID.data)), uID.dataWidth, uID.dataHeight); 384 | } 385 | 386 | void swap(alloc A, alloc B) { A.gpuData->swap(*B.gpuData); } 387 | } 388 | 389 | #undef defPROC 390 | #undef loadPROC 391 | 392 | /* EOF */ 393 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![](http://i.imgur.com/pE9Bswx.png)]() 2 | 3 | NanoCL provides a thin wrapper on top of OpenGl functions for GPGPU programming. It can be used to allocate and manage GPU memory and run GLSL kernels on standard `float[]` arrays. 4 | 5 | This library is the upstream source for [turbo.js](https://github.com/turbo/js), but is vastly more capable. 6 | 7 | Though NanoCL is much more trivial than OpenCL, it can provide certain advantages: 8 | 9 | - virtually no compile-time overhead 10 | - completely dependency-free 11 | - compatible with almost all GPUs and GLSL-compatible software renderers 12 | - transparent source code 13 | - allows for easier tracing using performance analysis (perf, Amp, ...) because no superfluous levels of indirections are used 14 | 15 | NanoCL was designed for Windows. It does not require any OpenGL wrapper libraries like GLU or GLFW, just the OpenGL headers. It was tested using TDM-GCC, ICC and MSVC. 16 | 17 | Documentation is in progress. 18 | --------------------------------------------------------------------------------