├── CMakeLists.txt ├── apigen ├── .idea │ ├── apigen.iml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── apigen.py ├── gen.c └── glfuncs.txt ├── docs └── screenshots │ ├── doom3_1.png │ ├── doom3_2.png │ ├── hl_of_demo.png │ ├── quake2.png │ ├── quake3_demo.png │ ├── ut2003.png │ └── ut99_goty.png ├── opengl32.def ├── rast_fasterizer ├── CMakeLists.txt ├── clip.h └── fasterizer.cpp ├── rast_opencl ├── CMakeLists.txt └── rast_opencl.cpp ├── rast_reference ├── CMakeLists.txt ├── kernel.h ├── kernels │ ├── rast_tex_dst_src.cpp │ ├── rast_tex_dst_zero.cpp │ ├── rast_tex_one_msa.cpp │ ├── rast_tex_one_msc.cpp │ ├── rast_tex_one_one.cpp │ ├── rast_tex_one_zero.cpp │ ├── rast_tex_sa_msa.cpp │ └── template.h └── rast_reference.cpp ├── rast_wireframe ├── CMakeLists.txt ├── rast_wireframe.cpp ├── surface.cpp └── surface.h ├── readme.md ├── softgl.cfg └── source ├── GL.h ├── buffer.cpp ├── buffer.h ├── common.cpp ├── common.h ├── config.cpp ├── config.h ├── context.cpp ├── context.h ├── exports.h ├── forward.h ├── game_id.cpp ├── game_id.h ├── gdi_hook.cpp ├── gdi_hook.h ├── log.cpp ├── log.h ├── main.cpp ├── math.h ├── matrix.h ├── opengl.cpp ├── primative.cpp ├── primative.h ├── profile.cpp ├── profile.h ├── raster.cpp ├── raster.h ├── state.h ├── texture.cpp ├── texture.h ├── wgl.cpp ├── wgl.h ├── window.h └── windows.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | project(opengl32) 3 | 4 | add_definitions(-D_CRT_SECURE_NO_WARNINGS) 5 | 6 | add_subdirectory(rast_fasterizer) 7 | add_subdirectory(rast_wireframe) 8 | add_subdirectory(rast_reference) 9 | add_subdirectory(rast_opencl) 10 | 11 | file(GLOB C_FILES source/*.cpp) 12 | file(GLOB H_FILES source/*.h) 13 | 14 | add_library(opengl32 SHARED ${C_FILES} ${H_FILES} opengl32.def) 15 | -------------------------------------------------------------------------------- /apigen/.idea/apigen.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /apigen/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /apigen/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /apigen/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | void 33 | 34 | 35 | 36 | 41 | 42 | 43 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 70 | 71 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 102 | 103 | 119 | 120 | 131 | 132 | 150 | 151 | 169 | 170 | 190 | 191 | 212 | 213 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 248 | 249 | 250 | 251 | 1504959552633 252 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 284 | 285 | 286 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | -------------------------------------------------------------------------------- /apigen/apigen.py: -------------------------------------------------------------------------------- 1 | def print_function(proto, args, name): 2 | typedef = '{0}_t'.format(name) 3 | ret_type = ' '.join(proto) 4 | arg_proto = list() 5 | arg_name = list() 6 | if len(args) > 0: 7 | for arg in args: 8 | arg_tokens = arg.split() 9 | arg_proto += [' '.join(arg_tokens[:-1])] 10 | arg_name += list() if len(arg_tokens) == 1 else [arg_tokens[-1]] 11 | ret = '' if ret_type == 'void' else 'return ' 12 | print 'ATTRIB' 13 | print '{0} APIENTRY {2}({1}) {{'.format(' '.join(proto), ', '.join(args), name) 14 | # print ' typedef {0} (WINAPI * {1})({2});'.format(ret_type, typedef, ', '.join(arg_proto)) 15 | # print ' {0} thunk = ({0})gl_func_table[e_{1}];'.format(typedef, name) 16 | # print ' assert(thunk);' 17 | # print ' {0}thunk({1});'.format(ret, ', '.join(arg_name)) 18 | print '}' 19 | print '' 20 | 21 | 22 | g_func_names = [] 23 | 24 | 25 | 26 | def parse(line): 27 | global g_func_names 28 | lparen = line.find('(') 29 | rparen = line.find(')') 30 | arg = line[lparen+1:rparen] 31 | arg_tokens = arg.split(',') 32 | arg_tokens = list(x.strip() for x in arg_tokens) 33 | proto = line[0:lparen].strip() 34 | proto_tokens = proto.split() 35 | proto_tokens = list(x.strip() for x in proto_tokens) 36 | func_name = proto_tokens[-1] 37 | print_function(proto_tokens[:-1], arg_tokens, func_name) 38 | g_func_names += [func_name] 39 | 40 | 41 | def main(): 42 | with open('glfuncs.txt', 'rb') as f: 43 | for line in f.readlines(): 44 | parse(line.strip('\r\n')) 45 | 46 | print 'const char *gl_func_names[e_gl_func_count__] = {' 47 | for x in g_func_names: 48 | print ' "{0}",'.format(x) 49 | print '};' 50 | print '' 51 | print 'enum gl_func_enum_t {' 52 | for x in g_func_names: 53 | print ' e_{0},'.format(x) 54 | print ' e_gl_func_count__' 55 | print '};' 56 | 57 | if __name__ == '__main__': 58 | main() 59 | -------------------------------------------------------------------------------- /docs/screenshots/doom3_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/doom3_1.png -------------------------------------------------------------------------------- /docs/screenshots/doom3_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/doom3_2.png -------------------------------------------------------------------------------- /docs/screenshots/hl_of_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/hl_of_demo.png -------------------------------------------------------------------------------- /docs/screenshots/quake2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/quake2.png -------------------------------------------------------------------------------- /docs/screenshots/quake3_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/quake3_demo.png -------------------------------------------------------------------------------- /docs/screenshots/ut2003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/ut2003.png -------------------------------------------------------------------------------- /docs/screenshots/ut99_goty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/ut99_goty.png -------------------------------------------------------------------------------- /opengl32.def: -------------------------------------------------------------------------------- 1 | LIBRARY OPENGL32 2 | EXPORTS 3 | glAccum 4 | glActiveTexture 5 | glActiveTextureARB 6 | glAlphaFunc 7 | glAreTexturesResident 8 | glArrayElement 9 | glBegin 10 | glBindTexture 11 | glBitmap 12 | glBlendFunc 13 | glCallList 14 | glCallLists 15 | glClear 16 | glClearAccum 17 | glClearColor 18 | glClearDepth 19 | glClearIndex 20 | glClearStencil 21 | glClientActiveTextureARB 22 | glClipPlane 23 | glColor3b 24 | glColor3bv 25 | glColor3d 26 | glColor3dv 27 | glColor3f 28 | glColor3fv 29 | glColor3i 30 | glColor3iv 31 | glColor3s 32 | glColor3sv 33 | glColor3ub 34 | glColor3ubv 35 | glColor3ui 36 | glColor3uiv 37 | glColor3us 38 | glColor3usv 39 | glColor4b 40 | glColor4bv 41 | glColor4d 42 | glColor4dv 43 | glColor4f 44 | glColor4fv 45 | glColor4i 46 | glColor4iv 47 | glColor4s 48 | glColor4sv 49 | glColor4ub 50 | glColor4ubv 51 | glColor4ui 52 | glColor4uiv 53 | glColor4us 54 | glColor4usv 55 | glColorMask 56 | glColorMaterial 57 | glColorPointer 58 | glCompressedTexSubImage2DARB 59 | glCompressedTexImage2DARB 60 | glCopyPixels 61 | glCopyTexImage1D 62 | glCopyTexImage2D 63 | glCopyTexSubImage1D 64 | glCopyTexSubImage2D 65 | glCullFace 66 | glDebugEntry 67 | glDeleteLists 68 | glDeleteTextures 69 | glDepthFunc 70 | glDepthMask 71 | glDepthRange 72 | glDisable 73 | glDisableClientState 74 | glDrawArrays 75 | glDrawBuffer 76 | glDrawElements 77 | glDrawRangeElements 78 | glDrawPixels 79 | glEdgeFlag 80 | glEdgeFlagPointer 81 | glEdgeFlagv 82 | glEnable 83 | glEnableClientState 84 | glEnd 85 | glEndList 86 | glEvalCoord1d 87 | glEvalCoord1dv 88 | glEvalCoord1f 89 | glEvalCoord1fv 90 | glEvalCoord2d 91 | glEvalCoord2dv 92 | glEvalCoord2f 93 | glEvalCoord2fv 94 | glEvalMesh1 95 | glEvalMesh2 96 | glEvalPoint1 97 | glEvalPoint2 98 | glFeedbackBuffer 99 | glFinish 100 | glFlush 101 | glFogf 102 | glFogfv 103 | glFogi 104 | glFogiv 105 | glFrontFace 106 | glFrustum 107 | glGenLists 108 | glGenTextures 109 | glGetBooleanv 110 | glGetClipPlane 111 | glGetDoublev 112 | glGetError 113 | glGetFloatv 114 | glGetIntegerv 115 | glGetLightfv 116 | glGetLightiv 117 | glGetMapdv 118 | glGetMapfv 119 | glGetMapiv 120 | glGetMaterialfv 121 | glGetMaterialiv 122 | glGetPixelMapfv 123 | glGetPixelMapuiv 124 | glGetPixelMapusv 125 | glGetPointerv 126 | glGetPolygonStipple 127 | glGetString 128 | glGetTexEnvfv 129 | glGetTexEnviv 130 | glGetTexGendv 131 | glGetTexGenfv 132 | glGetTexGeniv 133 | glGetTexImage 134 | glGetTexLevelParameterfv 135 | glGetTexLevelParameteriv 136 | glGetTexParameterfv 137 | glGetTexParameteriv 138 | glHint 139 | glIndexMask 140 | glIndexPointer 141 | glIndexd 142 | glIndexdv 143 | glIndexf 144 | glIndexfv 145 | glIndexi 146 | glIndexiv 147 | glIndexs 148 | glIndexsv 149 | glIndexub 150 | glIndexubv 151 | glInitNames 152 | glInterleavedArrays 153 | glIsEnabled 154 | glIsList 155 | glIsTexture 156 | glLightModelf 157 | glLightModelfv 158 | glLightModeli 159 | glLightModeliv 160 | glLightf 161 | glLightfv 162 | glLighti 163 | glLightiv 164 | glLineStipple 165 | glLineWidth 166 | glListBase 167 | glLoadIdentity 168 | glLoadMatrixd 169 | glLoadMatrixf 170 | glLoadName 171 | glLogicOp 172 | glMap1d 173 | glMap1f 174 | glMap2d 175 | glMap2f 176 | glMapGrid1d 177 | glMapGrid1f 178 | glMapGrid2d 179 | glMapGrid2f 180 | glMaterialf 181 | glMaterialfv 182 | glMateriali 183 | glMaterialiv 184 | glMatrixMode 185 | glMultiTexCoord1dARB 186 | glMultiTexCoord1fARB 187 | glMultiTexCoord1iARB 188 | glMultiTexCoord1sARB 189 | glMultiTexCoord2dARB 190 | glMultiTexCoord2fARB 191 | glMultiTexCoord2iARB 192 | glMultiTexCoord2sARB 193 | glMultiTexCoord3dARB 194 | glMultiTexCoord3fARB 195 | glMultiTexCoord3iARB 196 | glMultiTexCoord3sARB 197 | glMultiTexCoord4dARB 198 | glMultiTexCoord4fARB 199 | glMultiTexCoord4iARB 200 | glMultiTexCoord4sARB 201 | glMultMatrixd 202 | glMultMatrixf 203 | glNewList 204 | glNormal3b 205 | glNormal3bv 206 | glNormal3d 207 | glNormal3dv 208 | glNormal3f 209 | glNormal3fv 210 | glNormal3i 211 | glNormal3iv 212 | glNormal3s 213 | glNormal3sv 214 | glNormalPointer 215 | glOrtho 216 | glPassThrough 217 | glPixelMapfv 218 | glPixelMapuiv 219 | glPixelMapusv 220 | glPixelStoref 221 | glPixelStorei 222 | glPixelTransferf 223 | glPixelTransferi 224 | glPixelZoom 225 | glPointSize 226 | glPolygonMode 227 | glPolygonOffset 228 | glPolygonStipple 229 | glPopAttrib 230 | glPopClientAttrib 231 | glPopMatrix 232 | glPopName 233 | glPrioritizeTextures 234 | glPushAttrib 235 | glPushClientAttrib 236 | glPushMatrix 237 | glPushName 238 | glRasterPos2d 239 | glRasterPos2dv 240 | glRasterPos2f 241 | glRasterPos2fv 242 | glRasterPos2i 243 | glRasterPos2iv 244 | glRasterPos2s 245 | glRasterPos2sv 246 | glRasterPos3d 247 | glRasterPos3dv 248 | glRasterPos3f 249 | glRasterPos3fv 250 | glRasterPos3i 251 | glRasterPos3iv 252 | glRasterPos3s 253 | glRasterPos3sv 254 | glRasterPos4d 255 | glRasterPos4dv 256 | glRasterPos4f 257 | glRasterPos4fv 258 | glRasterPos4i 259 | glRasterPos4iv 260 | glRasterPos4s 261 | glRasterPos4sv 262 | glReadBuffer 263 | glReadPixels 264 | glRectd 265 | glRectdv 266 | glRectf 267 | glRectfv 268 | glRecti 269 | glRectiv 270 | glRects 271 | glRectsv 272 | glRenderMode 273 | glRotated 274 | glRotatef 275 | glScaled 276 | glScalef 277 | glScissor 278 | glSelectBuffer 279 | glShadeModel 280 | glStencilFunc 281 | glStencilMask 282 | glStencilOp 283 | glTexCoord1d 284 | glTexCoord1dv 285 | glTexCoord1f 286 | glTexCoord1fv 287 | glTexCoord1i 288 | glTexCoord1iv 289 | glTexCoord1s 290 | glTexCoord1sv 291 | glTexCoord2d 292 | glTexCoord2dv 293 | glTexCoord2f 294 | glTexCoord2fv 295 | glTexCoord2i 296 | glTexCoord2iv 297 | glTexCoord2s 298 | glTexCoord2sv 299 | glTexCoord3d 300 | glTexCoord3dv 301 | glTexCoord3f 302 | glTexCoord3fv 303 | glTexCoord3i 304 | glTexCoord3iv 305 | glTexCoord3s 306 | glTexCoord3sv 307 | glTexCoord4d 308 | glTexCoord4dv 309 | glTexCoord4f 310 | glTexCoord4fv 311 | glTexCoord4i 312 | glTexCoord4iv 313 | glTexCoord4s 314 | glTexCoord4sv 315 | glTexCoordPointer 316 | glTexEnvf 317 | glTexEnvfv 318 | glTexEnvi 319 | glTexEnviv 320 | glTexGend 321 | glTexGendv 322 | glTexGenf 323 | glTexGenfv 324 | glTexGeni 325 | glTexGeniv 326 | glTexImage1D 327 | glTexImage2D 328 | glTexParameterf 329 | glTexParameterfv 330 | glTexParameteri 331 | glTexParameteriv 332 | glTexSubImage1D 333 | glTexSubImage2D 334 | glTranslated 335 | glTranslatef 336 | glVertex2d 337 | glVertex2dv 338 | glVertex2f 339 | glVertex2fv 340 | glVertex2i 341 | glVertex2iv 342 | glVertex2s 343 | glVertex2sv 344 | glVertex3d 345 | glVertex3dv 346 | glVertex3f 347 | glVertex3fv 348 | glVertex3i 349 | glVertex3iv 350 | glVertex3s 351 | glVertex3sv 352 | glVertex4d 353 | glVertex4dv 354 | glVertex4f 355 | glVertex4fv 356 | glVertex4i 357 | glVertex4iv 358 | glVertex4s 359 | glVertex4sv 360 | glVertexPointer 361 | glViewport 362 | 363 | wglCopyContext=wglCopyContext_imp 364 | wglCreateContext=wglCreateContext_imp 365 | wglCreateLayerContext=wglCreateLayerContext_imp 366 | wglDeleteContext=wglDeleteContext_imp 367 | wglDescribeLayerPlane=wglDescribeLayerPlane_imp 368 | wglGetCurrentContext=wglGetCurrentContext_imp 369 | wglGetCurrentDC=wglGetCurrentDC_imp 370 | wglGetLayerPaletteEntries=wglGetLayerPaletteEntries_imp 371 | wglGetProcAddress=wglGetProcAddress_imp 372 | wglMakeCurrent=wglMakeCurrent_imp 373 | wglRealizeLayerPalette=wglRealizeLayerPalette_imp 374 | wglSetLayerPaletteEntries=wglSetLayerPaletteEntries_imp 375 | wglShareLists=wglShareLists_imp 376 | wglSwapBuffers=wglSwapBuffers_imp 377 | wglSwapLayerBuffers=wglSwapLayerBuffers_imp 378 | wglSwapMultipleBuffers=wglSwapMultipleBuffers_imp 379 | wglUseFontBitmapsA=wglUseFontBitmapsA_imp 380 | wglUseFontBitmapsW=wglUseFontBitmapsW_imp 381 | wglUseFontOutlinesA=wglUseFontOutlinesA_imp 382 | wglUseFontOutlinesW=wglUseFontOutlinesW_imp 383 | wglChoosePixelFormat=wglChoosePixelFormat_imp 384 | wglSetPixelFormat=wglSetPixelFormat_imp 385 | wglGetExtensionsStringARB=wglGetExtensionsStringARB_imp 386 | wglDescribePixelFormat=wglDescribePixelFormat_imp 387 | -------------------------------------------------------------------------------- /rast_fasterizer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB C_FILES *.cpp) 2 | file(GLOB H_FILES *.h) 3 | 4 | add_library( 5 | softgl_rast_fasterizer 6 | SHARED 7 | ${C_FILES} ${H_FILES}) 8 | -------------------------------------------------------------------------------- /rast_fasterizer/clip.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../source/math.h" 3 | 4 | struct edge_t { 5 | 6 | // note: normals face 'inwards' for triangles 7 | // positive may be inside triangle, negative is always outside 8 | 9 | // return normal {-dy, dx} * s, where s is chosen so the normal extends to reach p 10 | static const float2 scale_normal(const float2 &a, const float2 &b, const float2 &p) { 11 | // find edges 12 | // note: (a - b) as we want an inward facing normal 13 | const float2 edge = float2(a) - float2(b); 14 | // edge normal {-dy, dx} 15 | const float2 normal = float2::cross(edge); 16 | // distance between edge and opposite vertex 17 | // note: dist should always be > 0.f for a forward facing triangle 18 | const float dist = float2::dot(normal, float2(p - a)); 19 | // normalize to get interpolants 20 | return normal / dist; 21 | } 22 | 23 | edge_t(const float2 &a, const float2 &b) 24 | : _n(float2::cross(a - b)) // {-dy, dx} 25 | , _d(float2::dot(a, _n)) { // distance of edge to origin 26 | // note: normal cant be used for attribute interpolation 27 | } 28 | 29 | edge_t(const float2 &a, const float2 &b, const float2 &p) 30 | : _n(scale_normal(a, b, p)) // {-dy, dx} * s 31 | , _d(float2::dot(a, _n)) // distance of edge to origin 32 | { 33 | } 34 | 35 | // evaluate the edge equation at a given point 36 | float eval(const float2 & p) const { 37 | // distance from edge 38 | return float2::dot(p, _n); 39 | } 40 | 41 | // test if a point falls on negative side of edge 42 | // point on negative side is 'out' 43 | bool test_out(const float2 &p) const { 44 | return (p.x * _n.x + p.y * _n.y) < _d; 45 | } 46 | 47 | // test if a point falls on positive side of edge 48 | // point on positive side is 'in' 49 | bool test_in(const float2 &p) const { 50 | return (p.x * _n.x + p.y * _n.y) >= _d; 51 | } 52 | 53 | // return normal sign code 54 | int normal_quadrant() const { 55 | return (_n.x > 0.f) | ((_n.y > 0.f) << 1); 56 | } 57 | 58 | // trivial rejection based on edge normal and closest box vertex 59 | bool trivial_out(const rectf_t &r) const { 60 | //note: remember edge normals face inward for triangles 61 | switch (normal_quadrant()) { 62 | case 3: return test_out(float2{r.x1, r.y1}); // (+,+) -> box (-, -) 63 | case 2: return test_out(float2{r.x0, r.y1}); // (-,+) -> box (+, -) 64 | case 1: return test_out(float2{r.x1, r.y0}); // (+,-) -> box (-, +) 65 | case 0: return test_out(float2{r.x0, r.y0}); // (-,-) -> box (+, +) 66 | default: __assume(false); 67 | } 68 | } 69 | 70 | // trivial inclusion based on edge normal and closest box vertex 71 | bool trivial_in(const rectf_t &r) const { 72 | //note: remember edge normals face inward for triangles 73 | switch (normal_quadrant()) { 74 | case 3: return test_in(float2{r.x0, r.y0}); // (+,+) -> box (+, +) 75 | case 2: return test_in(float2{r.x1, r.y0}); // (-,+) -> box (-, +) 76 | case 1: return test_in(float2{r.x0, r.y1}); // (+,-) -> box (+, -) 77 | case 0: return test_in(float2{r.x1, r.y1}); // (-,-) -> box (-, -) 78 | default: __assume(false); 79 | } 80 | } 81 | 82 | const float2 &normal() const { 83 | return _n; 84 | } 85 | 86 | protected: 87 | float2 _n; 88 | float _d; 89 | }; 90 | 91 | 92 | struct tri_setup_t { 93 | 94 | // constructor 95 | tri_setup_t(const float2 &v0, const float2 &v1, const float2 &v2) 96 | : _e{edge_t(v0, v1, v2), // e01 97 | edge_t(v1, v2, v0), // e12 98 | edge_t(v2, v0, v1)} // e20 99 | {} 100 | 101 | // test if 'r' is outside triangle via edge test 102 | bool trivial_out(const rectf_t &r) const { 103 | return _e[0].trivial_out(r) || 104 | _e[1].trivial_out(r) || 105 | _e[2].trivial_out(r); 106 | } 107 | 108 | // test if 'r' is inside triangle via edge test 109 | bool trivial_in(const rectf_t &r) const { 110 | return _e[0].trivial_in(r) && 111 | _e[1].trivial_in(r) && 112 | _e[2].trivial_in(r); 113 | } 114 | 115 | protected: 116 | // edges {e01, e12, e20} 117 | std::array _e; 118 | 119 | }; // tri_setup_t 120 | -------------------------------------------------------------------------------- /rast_fasterizer/fasterizer.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "../source/context.h" 6 | #include "../source/math.h" 7 | #include "../source/raster.h" 8 | 9 | #include "clip.h" 10 | 11 | namespace { 12 | 13 | static const int32_t BLOCK_SIZE = 16; 14 | 15 | struct triangle_setup_t { 16 | 17 | enum { 18 | slot_w0, // triangle weight 0 19 | slot_w1, // triangle weight 1 20 | slot_iw, // inverse w 21 | slot_r, // r / w 22 | slot_g, // g / w 23 | slot_b, // b / w 24 | slot_u, // u / w 25 | slot_v, // v / w 26 | _slot_count_ 27 | }; 28 | 29 | std::array v; 30 | std::array vx; 31 | std::array vy; 32 | 33 | float minz, maxz; 34 | uint32_t mip_level; 35 | }; 36 | 37 | struct frame_t { 38 | uint32_t *_pixels; 39 | float *_depth; 40 | int32_t _width; 41 | int32_t _height; 42 | int _blocks_in_x; 43 | int _blocks_in_y; 44 | }; 45 | 46 | struct block_t { 47 | std::vector _triangle_setup; 48 | 49 | // block bounding rect 50 | float2 _min, _max; 51 | 52 | // frame buffer pointers 53 | uint32_t *_fb_color; 54 | float *_fb_depth; 55 | }; 56 | 57 | void bounds(const float4 &v0, 58 | const float4 &v1, 59 | const float4 &v2, 60 | recti_t &out) { 61 | // Compute triangle bounding box 62 | out.x0 = std::min({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)}); 63 | out.y0 = std::min({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)}); 64 | out.x1 = std::max({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)}); 65 | out.y1 = std::max({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)}); 66 | } 67 | 68 | } // namespace 69 | 70 | struct rast_fasterizer_t : public raster_t { 71 | 72 | void framebuffer_clear( 73 | bool color, 74 | bool depth, 75 | bool stencil, 76 | uint32_t clear_color, 77 | float clear_depth, 78 | uint32_t clear_stencil) override { 79 | 80 | if (clear_depth) { 81 | float *depth = _frame._depth; 82 | for (int y = 0; y < _frame._height; ++y) { 83 | for (int x = 0; x < _frame._width; ++x) { 84 | depth[x] = 1000.f; 85 | } 86 | depth += _frame._width; 87 | } 88 | } 89 | }; 90 | 91 | void framebuffer_release() override { 92 | _blocks.clear(); 93 | } 94 | 95 | void framebuffer_aquire() override { 96 | _blocks.clear(); 97 | _frame._pixels = _cxt->buffer.pixels(); 98 | _frame._depth = _cxt->buffer.depth(); 99 | _frame._width = _cxt->buffer.width(); 100 | _frame._height = _cxt->buffer.height(); 101 | 102 | _frame._blocks_in_x = _frame._width / BLOCK_SIZE; 103 | _frame._blocks_in_y = _frame._height / BLOCK_SIZE; 104 | 105 | for (int y = 0; y < _frame._blocks_in_y; ++y) { 106 | for (int x = 0; x < _frame._blocks_in_x; ++x) { 107 | 108 | const int32_t ix = x * BLOCK_SIZE; 109 | const int32_t iy = y * BLOCK_SIZE; 110 | 111 | const int offset = ix + iy * _frame._width; 112 | 113 | block_t b; 114 | b._min = float2{float(ix), float(iy)}; 115 | b._max = float2{float(ix + BLOCK_SIZE), float(iy + BLOCK_SIZE)}; 116 | b._fb_color = _frame._pixels + offset; 117 | b._fb_depth = _frame._depth + offset; 118 | _blocks.push_back(b); 119 | } 120 | } 121 | 122 | assert(_blocks.size() == _frame._blocks_in_x * _frame._blocks_in_y); 123 | } 124 | 125 | bool start(gl_context_t &cxt) override { 126 | _cxt = &cxt; 127 | return true; 128 | } 129 | 130 | void stop() override { 131 | } 132 | 133 | void draw_block_fast(const block_t &block, const triangle_setup_t &s) { 134 | 135 | float iw = (s.vx[2] * (block._min.x) + 136 | s.vy[2] * (block._min.y)) - s.v[2]; 137 | 138 | float u = (s.vx[3] * (block._min.x) + 139 | s.vy[3] * (block._min.y)) - s.v[3]; 140 | 141 | uint32_t *dst = block._fb_color; 142 | float *depth = block._fb_depth; 143 | 144 | for (int y = 0; y < BLOCK_SIZE; ++y) { 145 | 146 | float iw_ = iw; 147 | 148 | const float u0 = (u) / iw; 149 | const float u1 = (u + s.vx[3] * BLOCK_SIZE) / iw; 150 | const float u_dx = (u1 - u0) / BLOCK_SIZE; 151 | float u_ = u0; 152 | 153 | for (int x = 0; x < BLOCK_SIZE; ++x) { 154 | 155 | const float w = 1.f / iw_; 156 | 157 | // depth test 158 | if (w <= depth[x]) { 159 | depth[x] = w; 160 | 161 | const uint8_t r = uint8_t(128 + w * 0.1f); 162 | const uint8_t g = uint8_t(128 + w * 0.1f); 163 | const uint8_t b = uint8_t(128 + w * 0.1f); 164 | 165 | dst[x] = (r << 16) | (g << 8) | b; 166 | } 167 | 168 | // x step interpolants 169 | iw_ += s.vx[2]; 170 | u_ += u_dx; 171 | } 172 | 173 | // y step interpolants 174 | iw += s.vy[2]; 175 | u += s.vy[3]; 176 | 177 | dst += _frame._width; 178 | depth += _frame._width; 179 | } 180 | } 181 | 182 | void draw_block(const block_t &block, const triangle_setup_t &s) { 183 | 184 | const float vx0 = s.vx[0]; 185 | const float vy0 = s.vy[0]; 186 | const float vx1 = s.vx[1]; 187 | const float vy1 = s.vy[1]; 188 | 189 | float v0 = (vx0 * block._min.x + vy0 * block._min.y) - s.v[0]; 190 | float v1 = (vx1 * block._min.x + vy1 * block._min.y) - s.v[1]; 191 | 192 | float iw = (s.vx[2] * (block._min.x) + 193 | s.vy[2] * (block._min.y)) - s.v[2]; 194 | 195 | uint32_t *dst = block._fb_color; 196 | float *depth = block._fb_depth; 197 | 198 | for (int y = 0; y < BLOCK_SIZE; ++y) { 199 | 200 | float v0_ = v0; 201 | float v1_ = v1; 202 | 203 | float iw_ = iw; 204 | 205 | for (int x = 0; x < BLOCK_SIZE; ++x) { 206 | 207 | const float v2_ = 1.f - (v0_ + v1_); 208 | 209 | const float w = 1.f / iw_; 210 | 211 | // triangle edge test 212 | if (v0_ > 0.f && v1_ > 0.f && v2_ > 0.f) { 213 | 214 | // depth test 215 | if (w <= depth[x]) { 216 | depth[x] = w; 217 | 218 | const uint8_t r = uint8_t(128 + w * 0.1f); 219 | const uint8_t g = uint8_t(128 + w * 0.1f); 220 | const uint8_t b = uint8_t(128 + w * 0.1f); 221 | 222 | dst[x] = (r << 16) | (g << 8) | b; 223 | } 224 | 225 | } 226 | 227 | v0_ += vx0; 228 | v1_ += vx1; 229 | iw_ += s.vx[2]; 230 | } 231 | 232 | v0 += vy0; 233 | v1 += vy1; 234 | iw += s.vy[2]; 235 | 236 | dst += _frame._width; 237 | depth += _frame._width; 238 | } 239 | } 240 | 241 | void draw_block(const block_t &block) { 242 | for (uint32_t setup_index : block._triangle_setup) { 243 | 244 | bool trivial_in = 0 != (setup_index & 0x80000000); 245 | 246 | setup_index &= 0x7fffffff; 247 | 248 | const triangle_setup_t &s = _setup[setup_index]; 249 | 250 | if (trivial_in) { 251 | draw_block_fast(block, s); 252 | } 253 | else { 254 | draw_block(block, s); 255 | } 256 | } 257 | } 258 | 259 | void insert_triangle(const triangle_t &t, 260 | const triangle_setup_t &s, 261 | uint32_t setup_index) { 262 | 263 | // find bounding rectangle of the triangle 264 | recti_t rect; 265 | bounds(t.vert[0].coord, 266 | t.vert[1].coord, 267 | t.vert[2].coord, 268 | rect); 269 | 270 | // clip the triangle if fully out of the frame 271 | if (rect.x1 < 0) return; 272 | if (rect.y1 < 0) return; 273 | if (rect.x0 > _frame._width) return; 274 | if (rect.y0 > _frame._height) return; 275 | 276 | // clamp in block space 277 | const int32_t ix0 = std::max(rect.x0 / BLOCK_SIZE, 0); 278 | const int32_t iy0 = std::max(rect.y0 / BLOCK_SIZE, 0); 279 | const int32_t ix1 = std::min(rect.x1 / BLOCK_SIZE, _frame._blocks_in_x-1); 280 | const int32_t iy1 = std::min(rect.y1 / BLOCK_SIZE, _frame._blocks_in_y-1); 281 | 282 | // isolate 2d coordinates 283 | const float2 v0{t.vert[0].coord.x, 284 | t.vert[0].coord.y}; 285 | const float2 v1{t.vert[1].coord.x, 286 | t.vert[1].coord.y}; 287 | const float2 v2{t.vert[2].coord.x, 288 | t.vert[2].coord.y}; 289 | 290 | tri_setup_t clip{v0, v1, v2}; 291 | 292 | // insert into blocks 293 | for (int y = iy0; y <= iy1; ++y) { 294 | for (int x = ix0; x <= ix1; ++x) { 295 | 296 | //XXX: only do these tests if the area is large 297 | // heuristic for a large triangle: 298 | // area > (2 * (BLOCK_SIZE * BLOCK_SIZE)); 299 | 300 | const rectf_t r{ 301 | (x + 0) * BLOCK_SIZE, 302 | (y + 0) * BLOCK_SIZE, 303 | (x + 1) * BLOCK_SIZE, 304 | (y + 1) * BLOCK_SIZE}; 305 | 306 | if (clip.trivial_out(r)) { 307 | continue; 308 | } 309 | 310 | // apply trivial in mask 311 | //XXX: trivial in seems broken 312 | const uint32_t mask = 0; // clip.trivial_in(r) ? 0x80000000 : 0x00000000; 313 | 314 | // offset into the block list 315 | const uint32_t bo = x + y * _frame._blocks_in_x; 316 | 317 | assert(bo >= 0 && bo < _blocks.size()); 318 | _blocks[bo]._triangle_setup.push_back(setup_index | mask); 319 | } 320 | } 321 | } 322 | 323 | float triangle_area(const float2 &v0, 324 | const float2 &v1, 325 | const float2 &v2) { 326 | 327 | // area is found using part of the vector product 328 | 329 | // x = a2 * b3 - a3 * b2 330 | // y = a1 * b3 - a3 * b1 331 | // z = a1 * b2 - a2 * b1 332 | 333 | // where a = v0 -> v1 334 | // where b = v0 -> v2 335 | 336 | // we only care about the z componant which contains the area of the 337 | // parallelogram formed. i'm unsure why we dont need to multiply by 338 | // 0.5f however when returning the result. 339 | 340 | return (v1.x - v0.x) * (v2.y - v0.y) - 341 | (v2.x - v0.x) * (v1.y - v0.y); 342 | } 343 | 344 | // evaluate the gradient field given the following: 345 | // normal: the normal for that edge 346 | // poe: a point on the edge 347 | // point: the location where to sample it 348 | float evaluate(const float2 &normal, 349 | const float2 &poe) { 350 | return normal.x * poe.x + normal.y * poe.y; 351 | } 352 | 353 | bool setup_triangle(const triangle_t &t, triangle_setup_t &s) { 354 | 355 | // isolate 2d coordinates 356 | const float2 v0{t.vert[0].coord.x, t.vert[0].coord.y}; 357 | const float2 v1{t.vert[1].coord.x, t.vert[1].coord.y}; 358 | const float2 v2{t.vert[2].coord.x, t.vert[2].coord.y}; 359 | 360 | // find the area of the triangle 361 | const float area = triangle_area(v0, v1, v2); 362 | 363 | // find edge vectors 364 | const float2 d01 = v1 - v0; 365 | const float2 d12 = v2 - v1; 366 | const float2 d20 = v0 - v2; 367 | 368 | // cross product gives us normals from the edges 369 | // which we 'normalize' to the area of the triangle 370 | const float2 n0 = float2::cross(d12) / area; 371 | const float2 n1 = float2::cross(d20) / area; 372 | const float2 n2 = float2::cross(d01) / area; 373 | 374 | // evaluate the starting position for each interpolant 375 | const float s0 = evaluate(n0, v1); 376 | const float s1 = evaluate(n1, v2); 377 | const float s2 = evaluate(n2, v0); 378 | 379 | // edge function interpolants 380 | { 381 | s. v[triangle_setup_t::slot_w0] = s0; 382 | s.vx[triangle_setup_t::slot_w0] = n0.x; 383 | s.vy[triangle_setup_t::slot_w0] = n0.y; 384 | 385 | s. v[triangle_setup_t::slot_w1] = s1; 386 | s.vx[triangle_setup_t::slot_w1] = n1.x; 387 | s.vy[triangle_setup_t::slot_w1] = n1.y; 388 | } 389 | 390 | // XXX: make this float3 and use dot products 391 | std::array c; 392 | 393 | // 1/w interpolation 394 | { 395 | const float iw0 = 1.f / t.vert[0].coord.w; 396 | const float iw1 = 1.f / t.vert[1].coord.w; 397 | const float iw2 = 1.f / t.vert[2].coord.w; 398 | 399 | c[0] = s0 * iw0; c[1] = s1 * iw1; c[2] = s2 * iw2; 400 | c[3] = n0.x * iw0; c[4] = n1.x * iw1; c[5] = n2.x * iw2; 401 | c[6] = n0.y * iw0; c[7] = n1.y * iw1; c[8] = n2.y * iw2; 402 | 403 | const uint32_t slot = triangle_setup_t::slot_iw; 404 | s. v[slot] = c[0] + c[1] + c[2]; 405 | s.vx[slot] = c[3] + c[4] + c[5]; 406 | s.vy[slot] = c[6] + c[7] + c[8]; 407 | } 408 | 409 | // r 410 | { 411 | const uint32_t slot = triangle_setup_t::slot_r; 412 | const float a0 = t.vert[0].rgba.x; 413 | const float a1 = t.vert[1].rgba.x; 414 | const float a2 = t.vert[2].rgba.x; 415 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 416 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 417 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 418 | } 419 | 420 | // g 421 | { 422 | const uint32_t slot = triangle_setup_t::slot_g; 423 | const float a0 = t.vert[0].rgba.y; 424 | const float a1 = t.vert[1].rgba.y; 425 | const float a2 = t.vert[2].rgba.y; 426 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 427 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 428 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 429 | } 430 | 431 | // b 432 | { 433 | const uint32_t slot = triangle_setup_t::slot_b; 434 | const float a0 = t.vert[0].rgba.z; 435 | const float a1 = t.vert[1].rgba.z; 436 | const float a2 = t.vert[2].rgba.z; 437 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 438 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 439 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 440 | } 441 | 442 | // u 443 | { 444 | const uint32_t slot = triangle_setup_t::slot_u; 445 | const float a0 = t.vert[0].tex.x; 446 | const float a1 = t.vert[1].tex.x; 447 | const float a2 = t.vert[2].tex.x; 448 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 449 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 450 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 451 | } 452 | 453 | // v 454 | { 455 | const uint32_t slot = triangle_setup_t::slot_v; 456 | const float a0 = t.vert[0].tex.y; 457 | const float a1 = t.vert[1].tex.y; 458 | const float a2 = t.vert[2].tex.y; 459 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 460 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 461 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 462 | } 463 | 464 | return true; 465 | } 466 | 467 | void push_triangles(const std::vector &triangles, 468 | const texture_t *tex, 469 | const state_manager_t &state) override { 470 | 471 | // insert all triangles into blocks 472 | for (const auto &t : triangles) { 473 | if (t.vert[0].coord.w == 0.f) { 474 | // signals fully clipped so discard 475 | continue; 476 | } 477 | 478 | _setup.emplace_back(); 479 | triangle_setup_t &s = _setup.back(); 480 | if (!setup_triangle(t, s)) { 481 | _setup.pop_back(); 482 | continue; 483 | } 484 | 485 | const uint32_t setup_index = (uint32_t)_setup.size() - 1u; 486 | 487 | insert_triangle(t, s, setup_index); 488 | } 489 | 490 | // draw all blocks 491 | for (block_t &b : _blocks) { 492 | draw_block(b); 493 | b._triangle_setup.clear(); 494 | } 495 | 496 | // clear all of our triangle setup 497 | _setup.clear(); 498 | } 499 | 500 | void flush() override {} 501 | 502 | void present() override { 503 | } 504 | 505 | protected: 506 | 507 | std::vector _setup; 508 | 509 | frame_t _frame; 510 | std::vector _blocks; 511 | gl_context_t *_cxt; 512 | }; 513 | 514 | extern "C" { 515 | __declspec(dllexport) raster_t *raster_create() { 516 | return new rast_fasterizer_t; 517 | } 518 | 519 | __declspec(dllexport) void raster_release(raster_t *r) { 520 | assert(r); 521 | delete r; 522 | } 523 | }; 524 | -------------------------------------------------------------------------------- /rast_opencl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB C_FILES *.cpp) 2 | file(GLOB H_FILES *.h) 3 | 4 | find_package(OPENCL) 5 | 6 | if (${OpenCL_FOUND}) 7 | 8 | add_library( 9 | softgl_rast_opencl 10 | SHARED 11 | ${C_FILES} ${H_FILES}) 12 | 13 | target_include_directories( 14 | softgl_rast_opencl 15 | PUBLIC 16 | ${OpenCL_INCLUDE_DIRS}) 17 | 18 | target_link_libraries( 19 | softgl_rast_opencl 20 | ${OpenCL_LIBRARIES}) 21 | 22 | else() 23 | 24 | endif() 25 | -------------------------------------------------------------------------------- /rast_opencl/rast_opencl.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include "../source/context.h" 12 | #include "../source/math.h" 13 | #include "../source/raster.h" 14 | #include "../source/texture.h" 15 | 16 | namespace { 17 | 18 | struct frame_t { 19 | uint32_t w, h; 20 | uint32_t num_w, num_h; 21 | }; 22 | 23 | const char *cl_kernel_raster = R"( 24 | struct __attribute__ ((packed)) vertex_t { 25 | float4 coord; 26 | float4 rgba; 27 | float2 tex; 28 | }; 29 | 30 | struct __attribute__ ((packed)) triangle_t { 31 | struct vertex_t vert[3]; 32 | }; 33 | 34 | float triangle_area(const float2 v0, 35 | const float2 v1, 36 | const float2 v2) { 37 | 38 | // area is found using part of the vector product 39 | 40 | // x = a2 * b3 - a3 * b2 41 | // y = a1 * b3 - a3 * b1 42 | // z = a1 * b2 - a2 * b1 43 | 44 | // where a = v0 -> v1 45 | // where b = v0 -> v2 46 | 47 | // we only care about the z componant which contains the area of the 48 | // parallelogram formed. i'm unsure why we dont need to multiply by 49 | // 0.5f however when returning the result. 50 | 51 | return (v1.x - v0.x) * (v2.y - v0.y) - (v2.x - v0.x) * (v1.y - v0.y); 52 | } 53 | 54 | // evaluate the gradient field given the following: 55 | // normal: the normal for that edge 56 | // poe: a point on the edge 57 | // point: the location where to sample it 58 | float evaluate(const float2 normal, 59 | const float2 poe, 60 | const float2 point) { 61 | return dot(normal, point) - dot(normal, poe); 62 | } 63 | 64 | float2 cross2d(const float2 a) { 65 | return (float2)(-a.y, a.x ); 66 | } 67 | 68 | kernel void raster(__global const struct triangle_t *tri, 69 | const uint num_tri, 70 | __global uint *fb_color, 71 | __global float *fb_depth, 72 | uint fb_pitch) { 73 | 74 | // chunk bounding rect 75 | const float chunk_min_x = get_group_id(0) * CHUNK_SIZE; 76 | const float chunk_min_y = get_group_id(1) * CHUNK_SIZE; 77 | const float chunk_max_x = chunk_min_x + CHUNK_SIZE; 78 | const float chunk_max_y = chunk_min_y + CHUNK_SIZE; 79 | 80 | // offset the framebuffer 81 | const uint fb_offs = get_group_id(0) * CHUNK_SIZE + 82 | get_group_id(1) * CHUNK_SIZE * fb_pitch; 83 | fb_color += fb_offs; 84 | fb_depth += fb_offs; 85 | 86 | // chunk staring coordinate 87 | const float2 start = (float2)(chunk_min_x, chunk_min_y); 88 | 89 | // for each triangle 90 | for (uint t=0; t < num_tri; ++t) { 91 | 92 | // get the vertices 93 | const float2 v0 = tri[t].vert[0].coord.xy; 94 | const float2 v1 = tri[t].vert[2].coord.xy; 95 | const float2 v2 = tri[t].vert[1].coord.xy; 96 | 97 | // find the area of the triangle 98 | const float area = triangle_area(v0, v1, v2); 99 | 100 | // find edge vectors 101 | const float2 d12 = v2 - v1; 102 | const float2 d20 = v0 - v2; 103 | 104 | // cross product gives us normals from the edges 105 | // which we 'normalize' to the area of the triangle 106 | const float2 n0 = cross2d(d12) / area; 107 | const float2 n1 = cross2d(d20) / area; 108 | 109 | // edge functions are 110 | // s0 = dot(n0, point) - d0 111 | // s1 = dot(n1, point) - d1 112 | // s2 = dot(n2, point) - d2 113 | 114 | // evaluate the starting position for each interpolant 115 | float s0 = evaluate(n0, v1, start); 116 | float s1 = evaluate(n1, v2, start); 117 | 118 | for (int y = 0; y < CHUNK_SIZE; y += 1) { 119 | 120 | float s0_ = s0; 121 | float s1_ = s1; 122 | 123 | for (int x = 0; x < CHUNK_SIZE; x += 1) { 124 | 125 | // s2_ can be derived from s0_ and s1_ since we know the 126 | // sum of them all should add up to 1.0f (the full area) 127 | const float s2_ = 1.f - (s0_ + s1_); 128 | 129 | if (s0_ > 0 && s1_ > 0 && s2_ > 0) { 130 | const uchar r = (uchar)(s0_ * 256); 131 | const uchar g = (uchar)(s1_ * 256); 132 | const uchar b = (uchar)(s2_ * 256); 133 | const uint rgb = (r << 16) | (g << 8) | b; 134 | 135 | fb_color[ x + y * fb_pitch ] = rgb; 136 | } 137 | 138 | s0_ += n0.x; 139 | s1_ += n1.x; 140 | } 141 | 142 | s0 += n0.y; 143 | s1 += n1.y; 144 | } 145 | 146 | } 147 | } 148 | )"; 149 | 150 | } // namespace 151 | 152 | struct rast_opencl_t : public raster_t { 153 | 154 | static const uint32_t chunk_size = 32; 155 | 156 | rast_opencl_t() 157 | : _cl_context(nullptr) 158 | , _cl_device(nullptr) 159 | , _cl_queue(nullptr) 160 | , _cl_program(nullptr) 161 | , _cl_kernel(nullptr) 162 | , _cl_fb_color(nullptr) 163 | , _cl_fb_depth(nullptr) 164 | , _cl_triangles(nullptr) 165 | , _cl_triangle_setup(nullptr) 166 | , _cxt(nullptr) { 167 | } 168 | 169 | void framebuffer_release() override { 170 | if (_cl_fb_color) { 171 | clReleaseMemObject(_cl_fb_color); 172 | _cl_fb_color = nullptr; 173 | } 174 | if (_cl_fb_depth) { 175 | clReleaseMemObject(_cl_fb_depth); 176 | _cl_fb_depth = nullptr; 177 | } 178 | } 179 | 180 | void framebuffer_aquire() override { 181 | 182 | if (!_cl_context || !_cxt) { 183 | return; 184 | } 185 | 186 | // release the old framebuffer 187 | framebuffer_release(); 188 | 189 | // start with framebuffer size 190 | uint32_t w = _cxt->buffer.width(); 191 | uint32_t h = _cxt->buffer.height(); 192 | 193 | // round to nearest chunk size 194 | const uint32_t chunk_mask = chunk_size - 1; 195 | w = (w + chunk_mask) & ~chunk_mask; 196 | h = (h + chunk_mask) & ~chunk_mask; 197 | 198 | _frame.w = w; 199 | _frame.h = h; 200 | _frame.num_w = w / chunk_size; 201 | _frame.num_h = h / chunk_size; 202 | 203 | const size_t fb_area = w * h; 204 | 205 | // create the color buffer 206 | _cl_fb_color = clCreateBuffer( 207 | _cl_context, 208 | CL_MEM_READ_WRITE, 209 | fb_area * sizeof(uint32_t), 210 | nullptr, 211 | nullptr); 212 | // create the depth buffer 213 | _cl_fb_depth = clCreateBuffer( 214 | _cl_context, 215 | CL_MEM_READ_WRITE, 216 | fb_area * sizeof(float), 217 | nullptr, 218 | nullptr); 219 | 220 | } 221 | 222 | bool start(gl_context_t &cxt) override { 223 | _cxt = &cxt; 224 | 225 | // access an OpenCL device 226 | if (!init_cl()) { 227 | return false; 228 | } 229 | 230 | // triangle list buffer 231 | _cl_triangles = clCreateBuffer( 232 | _cl_context, 233 | CL_MEM_READ_WRITE, 234 | 1024 * 1024 * 16, // 16meg 235 | nullptr, 236 | nullptr); 237 | if (!_cl_triangles) { 238 | return false; 239 | } 240 | 241 | // triangle setup buffer 242 | _cl_triangle_setup = clCreateBuffer( 243 | _cl_context, 244 | CL_MEM_READ_WRITE, 245 | 1024 * 1024 * 16, // 16meg 246 | nullptr, 247 | nullptr); 248 | if (!_cl_triangle_setup) { 249 | return false; 250 | } 251 | 252 | if (!compile_kernel()) { 253 | return false; 254 | } 255 | 256 | return true; 257 | } 258 | 259 | void stop() override { 260 | if (_cl_triangles) { 261 | clReleaseMemObject(_cl_triangles); 262 | _cl_triangles = nullptr; 263 | } 264 | if (_cl_triangle_setup) { 265 | clReleaseMemObject(_cl_triangle_setup); 266 | _cl_triangle_setup = nullptr; 267 | } 268 | if (_cl_fb_color) { 269 | clReleaseMemObject(_cl_fb_color); 270 | _cl_fb_color = nullptr; 271 | } 272 | if (_cl_fb_depth) { 273 | clReleaseMemObject(_cl_fb_depth); 274 | _cl_fb_depth = nullptr; 275 | } 276 | if (_cl_kernel) { 277 | clReleaseKernel(_cl_kernel); 278 | _cl_kernel = nullptr; 279 | } 280 | if (_cl_program) { 281 | clReleaseProgram(_cl_program); 282 | _cl_program = nullptr; 283 | } 284 | if (_cl_queue) { 285 | clReleaseCommandQueue(_cl_queue); 286 | _cl_queue = nullptr; 287 | } 288 | if (_cl_context) { 289 | clReleaseContext(_cl_context); 290 | _cl_context = nullptr; 291 | } 292 | _cxt = nullptr; 293 | } 294 | 295 | void push_triangles(const std::vector &triangles, 296 | const texture_t *tex, 297 | const state_manager_t &state) override { 298 | 299 | if (!_cl_triangles || !_cl_queue) { 300 | return; 301 | } 302 | 303 | cl_int ret = CL_SUCCESS; 304 | 305 | // write triangles into the triangle list buffer 306 | ret = clEnqueueWriteBuffer( 307 | _cl_queue, 308 | _cl_triangles, 309 | false, 310 | 0, 311 | triangles.size() * sizeof(triangle_t), 312 | triangles.data(), 313 | 0, 314 | 0, 315 | nullptr 316 | ); 317 | if (ret != CL_SUCCESS) { 318 | DEBUG_BREAK; 319 | return; 320 | } 321 | 322 | // triangle setup kernel 323 | // ... 324 | 325 | // set kernel parameters 326 | { 327 | const cl_uint num_triangles = triangles.size(); 328 | const cl_uint fp_pitch = _frame.w; 329 | clSetKernelArg(_cl_kernel, 0, sizeof(_cl_triangles), &_cl_triangles); 330 | clSetKernelArg(_cl_kernel, 1, sizeof(cl_uint), &num_triangles); 331 | clSetKernelArg(_cl_kernel, 2, sizeof(_cl_fb_color), &_cl_fb_color); 332 | clSetKernelArg(_cl_kernel, 3, sizeof(_cl_fb_depth), &_cl_fb_depth); 333 | clSetKernelArg(_cl_kernel, 4, sizeof(cl_uint), &fp_pitch); 334 | } 335 | 336 | // triangle raster kernel 337 | size_t gwgs[3] = { _frame.num_w, _frame.num_h, 1 }; 338 | size_t lwgs[3] = { 1, 1, 1 }; 339 | ret = clEnqueueNDRangeKernel( 340 | _cl_queue, 341 | _cl_kernel, 342 | 2, 343 | nullptr, 344 | gwgs, 345 | lwgs, 346 | 0, 347 | 0, 348 | nullptr); 349 | if (ret != CL_SUCCESS) { 350 | DEBUG_BREAK; 351 | return; 352 | } 353 | 354 | clFinish(_cl_queue); 355 | } 356 | 357 | void flush() override {} 358 | 359 | void present() override { 360 | 361 | if (!_cl_fb_color || !_cl_queue || !_cxt) { 362 | return; 363 | } 364 | 365 | // TODO: use clEnqueueReadBufferRect since our rendertarget 366 | // and the actual screen buffer might be different sizes 367 | // deppending on the chunk size. 368 | 369 | const size_t nbytes = _cxt->buffer.width() * _cxt->buffer.height() * sizeof(uint32_t); 370 | cl_int ret = clEnqueueReadBuffer( 371 | _cl_queue, 372 | _cl_fb_color, 373 | true, 374 | 0, 375 | nbytes, 376 | _cxt->buffer.pixels(), 377 | 0, 378 | 0, 379 | nullptr); 380 | 381 | if (ret != CL_SUCCESS) { 382 | DEBUG_BREAK; 383 | } 384 | 385 | cl_uint color = 0x10101010; 386 | clEnqueueFillBuffer(_cl_queue, _cl_fb_color, &color, 4, 0, nbytes, 0, 0, nullptr); 387 | } 388 | 389 | protected: 390 | 391 | bool init_cl(); 392 | bool compile_kernel(); 393 | void log_compile_error(); 394 | 395 | // opencl handles 396 | cl_context _cl_context; 397 | cl_device_id _cl_device; 398 | cl_command_queue _cl_queue; 399 | 400 | // kernels 401 | cl_program _cl_program; 402 | cl_kernel _cl_kernel; 403 | 404 | // framebuffers 405 | cl_mem _cl_fb_color; 406 | cl_mem _cl_fb_depth; 407 | 408 | // triangle list and triangle setup 409 | cl_mem _cl_triangles; 410 | cl_mem _cl_triangle_setup; 411 | 412 | gl_context_t *_cxt; 413 | frame_t _frame; 414 | 415 | // opencl platforms 416 | std::array _cl_platforms; 417 | }; 418 | 419 | bool rast_opencl_t::init_cl() { 420 | 421 | cl_int ret = CL_SUCCESS; 422 | 423 | cl_uint num_platforms = 0; 424 | ret = clGetPlatformIDs( 425 | _cl_platforms.size(), 426 | _cl_platforms.data(), 427 | &num_platforms); 428 | if (CL_SUCCESS != ret) 429 | return false; 430 | 431 | ret = clGetDeviceIDs( 432 | _cl_platforms[0], // platform selection 433 | CL_DEVICE_TYPE_CPU, 434 | 1, // number of devices 435 | &_cl_device, // device pointer 436 | nullptr); 437 | if (CL_SUCCESS != ret) 438 | return false; 439 | 440 | _cl_context = clCreateContext( 441 | nullptr, 442 | 1, 443 | &_cl_device, 444 | nullptr, 445 | nullptr, 446 | nullptr); 447 | if (!_cl_context) { 448 | return false; 449 | } 450 | 451 | _cl_queue = clCreateCommandQueue( 452 | _cl_context, 453 | _cl_device, 454 | cl_command_queue_properties(0), 455 | nullptr); 456 | if (!_cl_queue) { 457 | return false; 458 | } 459 | 460 | return true; 461 | } 462 | 463 | bool rast_opencl_t::compile_kernel() { 464 | 465 | cl_int ret = CL_SUCCESS; 466 | 467 | _cl_program = clCreateProgramWithSource( 468 | _cl_context, 469 | 1, // number of source buffers 470 | &cl_kernel_raster, // source code 471 | nullptr, // source length 472 | nullptr); // error code ret 473 | if (!_cl_program) { 474 | return false; 475 | } 476 | 477 | ret = clBuildProgram( 478 | _cl_program, 479 | 0, // num devices 480 | nullptr, // device list 481 | "-DCHUNK_SIZE=32", // options 482 | nullptr, // notify callback 483 | nullptr); // user data 484 | 485 | _cl_kernel = clCreateKernel( 486 | _cl_program, 487 | "raster", // kernel name 488 | nullptr); 489 | if (!_cl_kernel) { 490 | log_compile_error(); 491 | return false; 492 | } 493 | 494 | return true; 495 | } 496 | 497 | void rast_opencl_t::log_compile_error() { 498 | std::array error; 499 | error[0] = '\0'; 500 | size_t written = 0; 501 | cl_uint ret = clGetProgramBuildInfo( 502 | _cl_program, 503 | _cl_device, 504 | CL_PROGRAM_BUILD_LOG, 505 | error.size(), 506 | error.data(), 507 | &written); 508 | printf("%s\n\n", error.data()); 509 | } 510 | 511 | extern "C" { 512 | __declspec(dllexport) raster_t *raster_create() { 513 | return new rast_opencl_t; 514 | } 515 | 516 | __declspec(dllexport) void raster_release(raster_t *r) { 517 | delete r; 518 | } 519 | }; 520 | -------------------------------------------------------------------------------- /rast_reference/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB KERNELS 2 | kernels/*.cpp 3 | kernels/*.h) 4 | add_library( 5 | softgl_rast_reference_kernels 6 | ${KERNELS}) 7 | 8 | file(GLOB SOURCE *.cpp *.h) 9 | add_library( 10 | softgl_rast_reference 11 | SHARED 12 | ${SOURCE}) 13 | 14 | target_link_libraries( 15 | softgl_rast_reference 16 | softgl_rast_reference_kernels) 17 | 18 | target_compile_definitions( 19 | softgl_rast_reference_kernels 20 | PUBLIC 21 | -DUSE_APROX_TEMPLATE) 22 | -------------------------------------------------------------------------------- /rast_reference/kernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "../source/context.h" 9 | #include "../source/math.h" 10 | #include "../source/raster.h" 11 | #include "../source/texture.h" 12 | 13 | 14 | static const int32_t BLOCK_SIZE = 16; 15 | static const int32_t BLOCK_MASK = ~(BLOCK_SIZE - 1); 16 | 17 | struct triangle_setup_t { 18 | 19 | enum { 20 | slot_w0, // triangle weight 0 21 | slot_w1, // triangle weight 1 22 | slot_iw, // inverse w 23 | slot_z, // z / w 24 | 25 | slot_a, // a / w 26 | slot_r, // r / w 27 | slot_g, // g / w 28 | slot_b, // b / w 29 | 30 | slot_u, // u / w 31 | slot_v, // v / w 32 | _slot_count_ 33 | }; 34 | 35 | std::array v; 36 | std::array vx; 37 | std::array vy; 38 | 39 | std::array edge; 40 | 41 | bool affine; 42 | recti_t bound; 43 | uint32_t mip_level; 44 | }; 45 | 46 | struct frame_t { 47 | uint32_t *_pixels; 48 | float *_depth; 49 | int32_t _width; 50 | int32_t _height; 51 | }; 52 | 53 | extern const std::array mip_log_table; 54 | 55 | static inline uint32_t get_mip_level(float tri_area, 56 | float uv_area) { 57 | const float factor = fabsf(uv_area) / fabsf(tri_area); 58 | uint32_t ifactor = uint32_t(factor); 59 | uint32_t max = mip_log_table.size() - 1; 60 | uint32_t index = std::min(ifactor, max); 61 | return mip_log_table[index]; 62 | } 63 | 64 | static inline float triangle_area(const float2 &v0, 65 | const float2 &v1, 66 | const float2 &v2) { 67 | 68 | // area is found using part of the vector product 69 | 70 | // x = a2 * b3 - a3 * b2 71 | // y = a1 * b3 - a3 * b1 72 | // z = a1 * b2 - a2 * b1 73 | 74 | // where a = v0 -> v1 75 | // where b = v0 -> v2 76 | 77 | // we only care about the z componant which contains the area of the 78 | // parallelogram formed. i'm unsure why we dont need to multiply by 79 | // 0.5f however when returning the result. 80 | 81 | return (v1.x - v0.x) * (v2.y - v0.y) - 82 | (v2.x - v0.x) * (v1.y - v0.y); 83 | } 84 | 85 | // evaluate the gradient field given the following: 86 | // normal: the normal for that edge 87 | // poe: a point on the edge 88 | // point: the location where to sample it 89 | static inline float evaluate(const float2 &normal, 90 | const float2 &poe) { 91 | return normal.x * poe.x + normal.y * poe.y; 92 | } 93 | 94 | static inline __m128 step_x(float v, float vx) { 95 | return _mm_set_ps(v + vx * 3.f, 96 | v + vx * 2.f, 97 | v + vx * 1.f, 98 | v + vx * 0.f); 99 | } 100 | 101 | static inline bool affine_heuristic(const recti_t &r) { 102 | // heuristic decides if this is a small triangle using 103 | // the square of the max edge to avoid slim triangles being 104 | // flagged 105 | const int32_t max = std::max(r.dx(), r.dy()); 106 | return (max * max) < (64 * 64); 107 | } 108 | 109 | static inline int32_t normal_quadrant(const float3 &edge) { 110 | return (edge.x > 0.f) | ((edge.y > 0.f) << 1); 111 | } 112 | 113 | static inline bool test_out(const float3 &edge, 114 | const float2 &p) { 115 | return (p.x * edge.x + p.y * edge.y) < edge.z; 116 | } 117 | 118 | static inline bool test_in(const float3 &edge, 119 | const float2 &p) { 120 | return (p.x * edge.x + p.y * edge.y) > edge.z; 121 | } 122 | 123 | static inline bool trivial_out(const float3 &e, 124 | const float2 &min, 125 | const float2 & max) { 126 | switch (normal_quadrant( e )) { 127 | case 3: return test_out(e, float2{max.x, max.y}); // (+,+) -> box (-, -) 128 | case 2: return test_out(e, float2{min.x, max.y}); // (-,+) -> box (+, -) 129 | case 1: return test_out(e, float2{max.x, min.y}); // (+,-) -> box (-, +) 130 | case 0: return test_out(e, float2{min.x, min.y}); // (-,-) -> box (+, +) 131 | default: __assume(false); 132 | } 133 | return false; 134 | } 135 | 136 | static inline bool trivial_in(const float3 &e, 137 | const float2 &min, 138 | const float2 & max) { 139 | switch (normal_quadrant( e )) { 140 | case 3: return test_in(e, float2{min.x, min.y}); // (+,+) -> box (+, +) 141 | case 2: return test_in(e, float2{max.x, min.y}); // (-,+) -> box (-, +) 142 | case 1: return test_in(e, float2{min.x, max.y}); // (+,-) -> box (+, -) 143 | case 0: return test_in(e, float2{max.x, max.y}); // (-,-) -> box (-, -) 144 | default: __assume(false); 145 | } 146 | return false; 147 | } 148 | 149 | template 150 | static __forceinline __m128i blend_factor(__m128i src, __m128i dst) { 151 | 152 | // Input layout: 153 | // ARGB colour layout 154 | 155 | switch (mode) { 156 | case GL_ZERO: 157 | return _mm_setzero_si128(); 158 | case GL_ONE: 159 | return _mm_set1_epi8((char)0xff); 160 | case GL_DST_COLOR: 161 | return dst; 162 | case GL_SRC_COLOR: 163 | return src; 164 | case GL_ONE_MINUS_DST_COLOR: 165 | return _mm_sub_epi8(_mm_set1_epi8((char)0xff), dst); 166 | case GL_ONE_MINUS_SRC_COLOR: 167 | return _mm_sub_epi8(_mm_set1_epi8((char)0xff), src); 168 | case GL_SRC_ALPHA: { 169 | const __m128i a = _mm_and_si128(src, _mm_set1_epi32(0x000000ff)); 170 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16)); 171 | return _mm_or_si128(b, _mm_slli_epi32(b, 8)); 172 | } 173 | case GL_ONE_MINUS_SRC_ALPHA: { 174 | const __m128i a = _mm_and_si128(src, _mm_set1_epi32(0x000000ff)); 175 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16)); 176 | const __m128i c = _mm_or_si128(b, _mm_slli_epi32(b, 8)); 177 | return _mm_sub_epi8(_mm_set1_epi32(0xffffffff), c); 178 | } 179 | case GL_DST_ALPHA: { 180 | const __m128i a = _mm_and_si128(dst, _mm_set1_epi32(0x000000ff)); 181 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16)); 182 | return _mm_or_si128(b, _mm_slli_epi32(b, 8)); 183 | } 184 | case GL_ONE_MINUS_DST_ALPHA: { 185 | const __m128i a = _mm_and_si128(dst, _mm_set1_epi32(0x000000ff)); 186 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16)); 187 | const __m128i c = _mm_or_si128(b, _mm_slli_epi32(b, 8)); 188 | return _mm_sub_epi8(_mm_set1_epi32(0xffffffff), c); 189 | } 190 | case GL_SRC_ALPHA_SATURATE: 191 | return _mm_setzero_si128(); // TODO 192 | default: 193 | __assume(0); 194 | } 195 | } 196 | 197 | // multiply packed 8 bit unsigned bytes 198 | static __forceinline __m128i _mm_mulhi_epi8( __m128i x, __m128i y ) { 199 | #if 0 200 | // unpack and multiply 201 | __m128i dst_even = _mm_mullo_epi16(x, y); 202 | __m128i dst_odd = _mm_mullo_epi16(_mm_srli_epi16(x, 8), 203 | _mm_srli_epi16(y, 8)); 204 | // repack 205 | return _mm_or_si128(_mm_slli_epi16(dst_odd, 8), 206 | _mm_srli_epi16(_mm_slli_epi16(dst_even,8), 8)); 207 | #else 208 | const __m128i zero = _mm_setzero_si128(); 209 | 210 | // unpack 8 bits into 16 bits 211 | __m128i xlo = _mm_unpacklo_epi8( x, zero ); 212 | __m128i ylo = _mm_unpacklo_epi8( y, zero ); 213 | __m128i xhi = _mm_unpackhi_epi8( x, zero ); 214 | __m128i yhi = _mm_unpackhi_epi8( y, zero ); 215 | 216 | // x * y 217 | xlo = _mm_mullo_epi16( xlo, ylo ); 218 | xhi = _mm_mullo_epi16( xhi, yhi ); 219 | 220 | // >> 8 221 | xlo = _mm_srli_epi16( xlo, 8 ); 222 | xhi = _mm_srli_epi16( xhi, 8 ); 223 | 224 | // saturate and repack into 8 bits 225 | return _mm_packus_epi16( xlo, xhi ); 226 | #endif 227 | } 228 | 229 | // saturate(src * src_factor + dst * dst_factor) 230 | template 231 | static __forceinline __m128i blend_sum(__m128i src, __m128i dst) { 232 | const __m128i sf = blend_factor(src, dst); 233 | const __m128i df = blend_factor(src, dst); 234 | return _mm_adds_epu8(_mm_mulhi_epi8(src, sf), 235 | _mm_mulhi_epi8(dst, df)); 236 | } 237 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_dst_src.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_DST_COLOR 8 | #define DST_BLEND GL_SRC_COLOR 9 | #define KERNEL_NAME rast_tex_dst_src 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_dst_zero.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_DST_COLOR 8 | #define DST_BLEND GL_ZERO 9 | #define KERNEL_NAME rast_tex_dst_zero 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_one_msa.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_ONE 8 | #define DST_BLEND GL_ONE_MINUS_SRC_ALPHA 9 | #define KERNEL_NAME rast_tex_one_msa 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_one_msc.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_ONE 8 | #define DST_BLEND GL_ONE_MINUS_SRC_COLOR 9 | #define KERNEL_NAME rast_tex_one_msc 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_one_one.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_ONE 8 | #define DST_BLEND GL_ONE 9 | #define KERNEL_NAME rast_tex_one_one 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_one_zero.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_ONE 8 | #define DST_BLEND GL_ZERO 9 | #define KERNEL_NAME rast_tex_one_zero 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/rast_tex_sa_msa.cpp: -------------------------------------------------------------------------------- 1 | #include "../kernel.h" 2 | 3 | #define DEPTH_TEST 1 4 | #define DEPTH_WRITE 1 5 | #define COLOR_WRITE 1 6 | #define DEPTH_CMP _mm_cmple_ps 7 | #define SRC_BLEND GL_SRC_ALPHA 8 | #define DST_BLEND GL_ONE_MINUS_SRC_ALPHA 9 | #define KERNEL_NAME rast_tex_sa_msa 10 | 11 | #if USE_APROX_TEMPLATE 12 | #include "template_aprox.h" 13 | #else 14 | #include "template.h" 15 | #endif 16 | -------------------------------------------------------------------------------- /rast_reference/kernels/template.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | static inline void stamp_affine( 4 | const triangle_setup_t &s, 5 | const texture_t &tex, 6 | const float2 origin, 7 | uint32_t *color, 8 | float *depth, 9 | uint32_t pitch) { 10 | 11 | const float v0 = (s.vx[s.slot_w0] * origin.x + s.vy[s.slot_w0] * origin.y) - s.v[s.slot_w0]; 12 | const float v1 = (s.vx[s.slot_w1] * origin.x + s.vy[s.slot_w1] * origin.y) - s.v[s.slot_w1]; 13 | const float iw = (s.vx[s.slot_iw] * origin.x + s.vy[s.slot_iw] * origin.y) - s.v[s.slot_iw]; 14 | const float z = (s.vx[s.slot_z ] * origin.x + s.vy[s.slot_z ] * origin.y) - s.v[s.slot_z ]; 15 | const float u = (s.vx[s.slot_u ] * origin.x + s.vy[s.slot_u ] * origin.y) - s.v[s.slot_u ]; 16 | const float v = (s.vx[s.slot_v ] * origin.x + s.vy[s.slot_v ] * origin.y) - s.v[s.slot_v ]; 17 | 18 | __m128 Sv0x = _mm_set1_ps(s.vx[s.slot_w0] * 4.f); 19 | __m128 Sv0y = _mm_set1_ps(s.vy[s.slot_w0]); 20 | __m128 Sv0 = step_x(v0, s.vx[s.slot_w0]); 21 | 22 | __m128 Sv1x = _mm_set1_ps(s.vx[s.slot_w1] * 4.f); 23 | __m128 Sv1y = _mm_set1_ps(s.vy[s.slot_w1]); 24 | __m128 Sv1 = step_x(v1, s.vx[s.slot_w1]); 25 | 26 | __m128 Szx = _mm_set1_ps(s.vx[s.slot_z] * 4.f); 27 | __m128 Szy = _mm_set1_ps(s.vy[s.slot_z]); 28 | __m128 Sz = step_x(z, s.vx[s.slot_z]); 29 | 30 | const int32_t tw = tex._width >> s.mip_level; 31 | const int32_t twm = tw - 1; 32 | __m128 Sux = _mm_set1_ps(s.vx[s.slot_u] * 4.f * tw); 33 | __m128 Suy = _mm_set1_ps(s.vy[s.slot_u] * tw); 34 | __m128 Su = step_x(u * tw, s.vx[s.slot_u] * tw); 35 | __m128i Stwm = _mm_set_epi32(twm, twm, twm, twm); 36 | 37 | const int32_t th = tex._height >> s.mip_level; 38 | const int32_t thm = th - 1; 39 | __m128 Svx = _mm_set1_ps(s.vx[s.slot_v] * 4.f * th); 40 | __m128 Svy = _mm_set1_ps(s.vy[s.slot_v] * th); 41 | __m128 Sv = step_x(v * th, s.vx[s.slot_v] * th); 42 | __m128i Sthm = _mm_set_epi32(thm, thm, thm, thm); 43 | 44 | const int32_t wshift = std::max(0, int32_t(tex._wshift) - int32_t(s.mip_level)); 45 | const uint32_t *texel = tex._pixels[s.mip_level]; 46 | 47 | for (int y = 0; y < BLOCK_SIZE; ++y) { 48 | 49 | __m128 Sv0_ = Sv0; 50 | __m128 Sv1_ = Sv1; 51 | __m128 Sz_ = Sz; 52 | __m128 Su_ = Su; 53 | __m128 Sv_ = Sv; 54 | 55 | for (int x = 0; x < BLOCK_SIZE; x += 4) { 56 | 57 | // third edge coefficient 58 | // const float v2_ = 1.f - (v0_ + v1_); 59 | __m128 Sv2_ = _mm_sub_ps(_mm_set_ps1(1.f), _mm_add_ps(Sv0_, Sv1_)); 60 | 61 | // load depth values 62 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER) 63 | __m128 zbuf = _mm_load_ps(depth + x); 64 | #endif 65 | 66 | // triangle edge test 67 | // if (v0_ > 0.f && v1_ > 0.f && v2_ > 0.f) { 68 | __m128 m0 = _mm_cmpge_ps(Sv0_, _mm_setzero_ps()); 69 | __m128 m1 = _mm_cmpge_ps(Sv1_, _mm_setzero_ps()); 70 | __m128 m2 = _mm_cmpge_ps(Sv2_, _mm_setzero_ps()); 71 | 72 | // triangle edge test and depth (together) 73 | // if (zed <= depth[x]) { 74 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER) 75 | __m128 keep = _mm_and_ps(_mm_and_ps(m0, DEPTH_CMP(Sz_, zbuf)), 76 | _mm_and_ps(m1, m2)); 77 | #else 78 | __m128 keep = _mm_and_ps(m0, _mm_and_ps(m1, m2)); 79 | #endif 80 | 81 | #if DEPTH_WRITE 82 | // depth write 83 | // depth[x] = zed; 84 | _mm_maskstore_ps(depth + x, _mm_castps_si128(keep), Sz_); 85 | #endif 86 | 87 | #if COLOR_WRITE 88 | // u / (1/w), v / (1/w) 89 | // ((int32_t(u/iw)&twm) + 90 | // (int32_t(u/iw)&twm) << tex._wshift) 91 | const __m128i tu = _mm_and_si128(_mm_cvtps_epi32(Su_), Stwm); 92 | const __m128i tv = _mm_and_si128(_mm_cvtps_epi32(Sv_), Sthm); 93 | const __m128i ti = _mm_add_epi32(tu, _mm_slli_epi32(tv, wshift)); 94 | 95 | // extract texture indices 96 | const uint32_t ti0 = _mm_extract_epi32(ti, 0); 97 | const uint32_t ti1 = _mm_extract_epi32(ti, 1); 98 | const uint32_t ti2 = _mm_extract_epi32(ti, 2); 99 | const uint32_t ti3 = _mm_extract_epi32(ti, 3); 100 | 101 | // load from the texture 102 | const uint32_t tc0 = texel[ti0]; 103 | const uint32_t tc1 = texel[ti1]; 104 | const uint32_t tc2 = texel[ti2]; 105 | const uint32_t tc3 = texel[ti3]; 106 | 107 | // blend equation 108 | #if (SRC_BLEND == GL_ONE) && (DST_BLEND == GL_ZERO) 109 | // special case for one and zero since out is directly our source 110 | const __m128i out = _mm_set_epi32(tc3, tc2, tc1, tc0); 111 | #else 112 | // generic case where we have an arbitary blending equation 113 | const __m128i src = _mm_set_epi32(tc3, tc2, tc1, tc0); 114 | const __m128i dst = _mm_load_si128((__m128i*)(color + x)); 115 | const __m128i out = blend_sum(src, dst); 116 | #endif 117 | 118 | // color write 119 | _mm_maskstore_epi32((int*)color + x, _mm_castps_si128(keep), out); 120 | #endif 121 | 122 | // x-axis step 123 | Sv0_ = _mm_add_ps(Sv0_, Sv0x); 124 | Sv1_ = _mm_add_ps(Sv1_, Sv1x); 125 | Sz_ = _mm_add_ps(Sz_ , Szx ); 126 | Su_ = _mm_add_ps(Su_ , Sux ); 127 | Sv_ = _mm_add_ps(Sv_ , Svx ); 128 | } 129 | 130 | // y-axis step 131 | Sv0 = _mm_add_ps(Sv0, Sv0y); 132 | Sv1 = _mm_add_ps(Sv1, Sv1y); 133 | Sz = _mm_add_ps(Sz , Szy ); 134 | Su = _mm_add_ps(Su , Suy ); 135 | Sv = _mm_add_ps(Sv , Svy ); 136 | 137 | // framebuffer step 138 | color += pitch; 139 | depth += pitch; 140 | } 141 | } 142 | 143 | static inline void stamp( 144 | const triangle_setup_t &s, 145 | const texture_t &tex, 146 | const float2 origin, 147 | uint32_t *color, 148 | float *depth, 149 | uint32_t pitch) { 150 | 151 | const float v0 = (s.vx[s.slot_w0] * origin.x + s.vy[s.slot_w0] * origin.y) - s.v[s.slot_w0]; 152 | const float v1 = (s.vx[s.slot_w1] * origin.x + s.vy[s.slot_w1] * origin.y) - s.v[s.slot_w1]; 153 | const float iw = (s.vx[s.slot_iw] * origin.x + s.vy[s.slot_iw] * origin.y) - s.v[s.slot_iw]; 154 | const float z = (s.vx[s.slot_z ] * origin.x + s.vy[s.slot_z ] * origin.y) - s.v[s.slot_z ]; 155 | const float u = (s.vx[s.slot_u ] * origin.x + s.vy[s.slot_u ] * origin.y) - s.v[s.slot_u ]; 156 | const float v = (s.vx[s.slot_v ] * origin.x + s.vy[s.slot_v ] * origin.y) - s.v[s.slot_v ]; 157 | 158 | __m128 Sv0x = _mm_set1_ps(s.vx[s.slot_w0] * 4.f); 159 | __m128 Sv0y = _mm_set1_ps(s.vy[s.slot_w0]); 160 | __m128 Sv0 = step_x(v0, s.vx[s.slot_w0]); 161 | 162 | __m128 Sv1x = _mm_set1_ps(s.vx[s.slot_w1] * 4.f); 163 | __m128 Sv1y = _mm_set1_ps(s.vy[s.slot_w1]); 164 | __m128 Sv1 = step_x(v1, s.vx[s.slot_w1]); 165 | 166 | __m128 Siwx = _mm_set1_ps(s.vx[s.slot_iw] * 4.f); 167 | __m128 Siwy = _mm_set1_ps(s.vy[s.slot_iw]); 168 | __m128 Siw = step_x(iw, s.vx[s.slot_iw]); 169 | 170 | __m128 Szx = _mm_set1_ps(s.vx[s.slot_z] * 4.f); 171 | __m128 Szy = _mm_set1_ps(s.vy[s.slot_z]); 172 | __m128 Sz = step_x(z, s.vx[s.slot_z]); 173 | 174 | const int32_t tw = tex._width >> s.mip_level; 175 | const int32_t twm = tw - 1; 176 | __m128 Sux = _mm_set1_ps(s.vx[s.slot_u] * 4.f * tw); 177 | __m128 Suy = _mm_set1_ps(s.vy[s.slot_u] * tw); 178 | __m128 Su = step_x(u * tw, s.vx[s.slot_u] * tw); 179 | __m128i Stwm = _mm_set_epi32(twm, twm, twm, twm); 180 | 181 | const int32_t th = tex._height >> s.mip_level; 182 | const int32_t thm = th - 1; 183 | __m128 Svx = _mm_set1_ps(s.vx[s.slot_v] * 4.f * th); 184 | __m128 Svy = _mm_set1_ps(s.vy[s.slot_v] * th); 185 | __m128 Sv = step_x(v * th, s.vx[s.slot_v] * th); 186 | __m128i Sthm = _mm_set_epi32(thm, thm, thm, thm); 187 | 188 | const int32_t wshift = std::max(0, int32_t(tex._wshift) - int32_t(s.mip_level)); 189 | const uint32_t *texel = tex._pixels[s.mip_level]; 190 | 191 | for (int y = 0; y < BLOCK_SIZE; ++y) { 192 | 193 | __m128 Sv0_ = Sv0; 194 | __m128 Sv1_ = Sv1; 195 | __m128 Siw_ = Siw; 196 | __m128 Sz_ = Sz; 197 | __m128 Su_ = Su; 198 | __m128 Sv_ = Sv; 199 | 200 | for (int x = 0; x < BLOCK_SIZE; x += 4) { 201 | 202 | // third edge coefficient 203 | // const float v2_ = 1.f - (v0_ + v1_); 204 | const __m128 Sv2_ = _mm_sub_ps(_mm_set_ps1(1.f), _mm_add_ps(Sv0_, Sv1_)); 205 | 206 | // load depth values 207 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER) 208 | const __m128 zbuf = _mm_load_ps(depth + x); 209 | #endif 210 | 211 | // triangle edge test 212 | // if (v0_ > 0.f && v1_ > 0.f && v2_ > 0.f) { 213 | const __m128 m0 = _mm_cmpge_ps(Sv0_, _mm_setzero_ps()); 214 | const __m128 m1 = _mm_cmpge_ps(Sv1_, _mm_setzero_ps()); 215 | const __m128 m2 = _mm_cmpge_ps(Sv2_, _mm_setzero_ps()); 216 | 217 | // triangle edge test and depth (together) 218 | // if (zed <= depth[x]) { 219 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER) 220 | const __m128 keep = _mm_and_ps(_mm_and_ps(m0, DEPTH_CMP(Sz_, zbuf)), 221 | _mm_and_ps(m1, m2)); 222 | #else 223 | const __m128 keep = _mm_and_ps(m0, _mm_and_ps(m1, m2)); 224 | #endif 225 | 226 | #if DEPTH_WRITE 227 | // depth write 228 | // depth[x] = zed; 229 | _mm_maskstore_ps(depth + x, _mm_castps_si128(keep), Sz_); 230 | #endif 231 | 232 | #if COLOR_WRITE 233 | // find 1 / (1/w) 234 | const __m128 rw = _mm_rcp_ps(Siw_); 235 | 236 | // u / (1/w), v / (1/w) 237 | // ((int32_t(u/iw)&twm) + 238 | // (int32_t(u/iw)&twm) << tex._wshift) 239 | const __m128i tu = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Su_, rw)), Stwm); 240 | const __m128i tv = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Sv_, rw)), Sthm); 241 | const __m128i ti = _mm_add_epi32(tu, _mm_slli_epi32(tv, wshift)); 242 | 243 | // extract texture indices 244 | const uint32_t ti0 = _mm_extract_epi32(ti, 0); 245 | const uint32_t ti1 = _mm_extract_epi32(ti, 1); 246 | const uint32_t ti2 = _mm_extract_epi32(ti, 2); 247 | const uint32_t ti3 = _mm_extract_epi32(ti, 3); 248 | 249 | // load from the texture 250 | const uint32_t tc0 = texel[ti0]; 251 | const uint32_t tc1 = texel[ti1]; 252 | const uint32_t tc2 = texel[ti2]; 253 | const uint32_t tc3 = texel[ti3]; 254 | 255 | // blend equation 256 | #if (SRC_BLEND == GL_ONE) && (DST_BLEND == GL_ZERO) 257 | // special case for one and zero since out is directly our source 258 | const __m128i out = _mm_set_epi32(tc3, tc2, tc1, tc0); 259 | #else 260 | // generic case where we have an arbitary blending equation 261 | __m128i src = _mm_set_epi32(tc3, tc2, tc1, tc0); 262 | __m128i dst = _mm_load_si128((__m128i*)(color + x)); 263 | __m128i out = blend_sum(src, dst); 264 | #endif 265 | 266 | // color write 267 | _mm_maskstore_epi32((int*)color + x, _mm_castps_si128(keep), out); 268 | #endif 269 | 270 | // x-axis step 271 | Sv0_ = _mm_add_ps(Sv0_, Sv0x); 272 | Sv1_ = _mm_add_ps(Sv1_, Sv1x); 273 | Siw_ = _mm_add_ps(Siw_, Siwx); 274 | Sz_ = _mm_add_ps(Sz_ , Szx ); 275 | Su_ = _mm_add_ps(Su_ , Sux ); 276 | Sv_ = _mm_add_ps(Sv_ , Svx ); 277 | } 278 | 279 | // y-axis step 280 | Sv0 = _mm_add_ps(Sv0, Sv0y); 281 | Sv1 = _mm_add_ps(Sv1, Sv1y); 282 | Siw = _mm_add_ps(Siw, Siwy); 283 | Sz = _mm_add_ps(Sz , Szy ); 284 | Su = _mm_add_ps(Su , Suy ); 285 | Sv = _mm_add_ps(Sv , Svy ); 286 | 287 | // framebuffer step 288 | color += pitch; 289 | depth += pitch; 290 | } 291 | } 292 | 293 | // trivial in case 294 | static inline void stamp_ti( 295 | const triangle_setup_t &s, 296 | const texture_t &tex, 297 | const float2 origin, 298 | uint32_t *color, 299 | float *depth, 300 | uint32_t pitch) { 301 | 302 | const float iw = (s.vx[s.slot_iw] * origin.x + s.vy[s.slot_iw] * origin.y) - s.v[s.slot_iw]; 303 | const float z = (s.vx[s.slot_z ] * origin.x + s.vy[s.slot_z ] * origin.y) - s.v[s.slot_z ]; 304 | const float u = (s.vx[s.slot_u ] * origin.x + s.vy[s.slot_u ] * origin.y) - s.v[s.slot_u ]; 305 | const float v = (s.vx[s.slot_v ] * origin.x + s.vy[s.slot_v ] * origin.y) - s.v[s.slot_v ]; 306 | 307 | __m128 Siwx = _mm_set1_ps(s.vx[s.slot_iw] * 4.f); 308 | __m128 Siwy = _mm_set1_ps(s.vy[s.slot_iw]); 309 | __m128 Siw = step_x(iw, s.vx[s.slot_iw]); 310 | 311 | __m128 Szx = _mm_set1_ps(s.vx[s.slot_z] * 4.f); 312 | __m128 Szy = _mm_set1_ps(s.vy[s.slot_z]); 313 | __m128 Sz = step_x(z, s.vx[s.slot_z]); 314 | 315 | const int32_t tw = tex._width >> s.mip_level; 316 | const int32_t twm = tw - 1; 317 | __m128 Sux = _mm_set1_ps(s.vx[s.slot_u] * 4.f * tw); 318 | __m128 Suy = _mm_set1_ps(s.vy[s.slot_u] * tw); 319 | __m128 Su = step_x(u * tw, s.vx[s.slot_u] * tw); 320 | __m128i Stwm = _mm_set_epi32(twm, twm, twm, twm); 321 | 322 | const int32_t th = tex._height >> s.mip_level; 323 | const int32_t thm = th - 1; 324 | __m128 Svx = _mm_set1_ps(s.vx[s.slot_v] * 4.f * th); 325 | __m128 Svy = _mm_set1_ps(s.vy[s.slot_v] * th); 326 | __m128 Sv = step_x(v * th, s.vx[s.slot_v] * th); 327 | __m128i Sthm = _mm_set_epi32(thm, thm, thm, thm); 328 | 329 | const int32_t wshift = std::max(0, int32_t(tex._wshift) - int32_t(s.mip_level)); 330 | const uint32_t *texel = tex._pixels[s.mip_level]; 331 | 332 | for (int y = 0; y < BLOCK_SIZE; ++y) { 333 | 334 | __m128 Siw_ = Siw; 335 | __m128 Sz_ = Sz; 336 | __m128 Su_ = Su; 337 | __m128 Sv_ = Sv; 338 | 339 | for (int x = 0; x < BLOCK_SIZE; x += 4) { 340 | 341 | // load depth values 342 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER) 343 | const __m128 zbuf = _mm_load_ps(depth + x); 344 | #endif 345 | 346 | // triangle edge test and depth (together) 347 | // if (zed <= depth[x]) { 348 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER) 349 | const __m128 keep = DEPTH_CMP(Sz_, zbuf); 350 | #else 351 | const __m128 keep = _mm_castsi128_ps (_mm_set1_epi32(0xffffffff)); 352 | #endif 353 | 354 | #if DEPTH_WRITE 355 | // depth write 356 | // depth[x] = zed; 357 | _mm_maskstore_ps(depth + x, _mm_castps_si128(keep), Sz_); 358 | #endif 359 | 360 | #if COLOR_WRITE 361 | // find 1 / (1/w) 362 | const __m128 rw = _mm_rcp_ps(Siw_); 363 | 364 | // u / (1/w), v / (1/w) 365 | // ((int32_t(u/iw)&twm) + 366 | // (int32_t(u/iw)&twm) << tex._wshift) 367 | const __m128i tu = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Su_, rw)), Stwm); 368 | const __m128i tv = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Sv_, rw)), Sthm); 369 | const __m128i ti = _mm_add_epi32(tu, _mm_slli_epi32(tv, wshift)); 370 | 371 | // extract texture indices 372 | const uint32_t ti0 = _mm_extract_epi32(ti, 0); 373 | const uint32_t ti1 = _mm_extract_epi32(ti, 1); 374 | const uint32_t ti2 = _mm_extract_epi32(ti, 2); 375 | const uint32_t ti3 = _mm_extract_epi32(ti, 3); 376 | 377 | // load from the texture 378 | const uint32_t tc0 = texel[ti0]; 379 | const uint32_t tc1 = texel[ti1]; 380 | const uint32_t tc2 = texel[ti2]; 381 | const uint32_t tc3 = texel[ti3]; 382 | 383 | // blend equation 384 | #if (SRC_BLEND == GL_ONE) && (DST_BLEND == GL_ZERO) 385 | // special case for one and zero since out is directly our source 386 | const __m128i out = _mm_set_epi32(tc3, tc2, tc1, tc0); 387 | #else 388 | // generic case where we have an arbitary blending equation 389 | const __m128i src = _mm_set_epi32(tc3, tc2, tc1, tc0); 390 | const __m128i dst = _mm_load_si128((__m128i*)(color + x)); 391 | const __m128i out = blend_sum(src, dst); 392 | #endif 393 | 394 | // color write 395 | _mm_maskstore_epi32((int*)color + x, _mm_castps_si128(keep), out); 396 | #endif 397 | 398 | // x-axis step 399 | Siw_ = _mm_add_ps(Siw_, Siwx); 400 | Sz_ = _mm_add_ps(Sz_ , Szx ); 401 | Su_ = _mm_add_ps(Su_ , Sux ); 402 | Sv_ = _mm_add_ps(Sv_ , Svx ); 403 | } 404 | 405 | // y-axis step 406 | Siw = _mm_add_ps(Siw, Siwy); 407 | Sz = _mm_add_ps(Sz , Szy ); 408 | Su = _mm_add_ps(Su , Suy ); 409 | Sv = _mm_add_ps(Sv , Svy ); 410 | 411 | // framebuffer step 412 | color += pitch; 413 | depth += pitch; 414 | } 415 | } 416 | 417 | void KERNEL_NAME ( 418 | const frame_t &f, 419 | const triangle_setup_t &s, 420 | const texture_t &tex) 421 | { 422 | const recti_t rect = { s.bound.x0 & BLOCK_MASK, 423 | s.bound.y0 & BLOCK_MASK, 424 | (s.bound.x1 + BLOCK_SIZE - 1) & BLOCK_MASK, 425 | (s.bound.y1 + BLOCK_SIZE - 1) & BLOCK_MASK}; 426 | const uint32_t pitch = f._width; 427 | uint32_t *color = f._pixels + (rect.y0 * pitch); 428 | float *depth = f._depth + (rect.y0 * pitch); 429 | 430 | // for small triangles we can render them affine with out much visible 431 | // distortion giving us a speed boost. 432 | if (s.affine) { 433 | for (int32_t y = rect.y0; y < rect.y1; y += BLOCK_SIZE) { 434 | for (int32_t x = rect.x0; x < rect.x1; x += BLOCK_SIZE) { 435 | stamp_affine(s, tex, float2{float(x), float(y)}, color + x, depth + x, pitch); 436 | } 437 | // step the framebuffer 438 | color += pitch * BLOCK_SIZE; 439 | depth += pitch * BLOCK_SIZE; 440 | } 441 | } 442 | else { 443 | const auto &e0 = s.edge[0]; 444 | const auto &e1 = s.edge[1]; 445 | const auto &e2 = s.edge[2]; 446 | for (int32_t y = rect.y0; y < rect.y1; y += BLOCK_SIZE) { 447 | for (int32_t x = rect.x0; x < rect.x1; x += BLOCK_SIZE) { 448 | const float2 min{float(x), float(y)}; 449 | const float2 max{float(x + BLOCK_SIZE), float(y + BLOCK_SIZE)}; 450 | 451 | // trivial out cases 452 | if (trivial_out(e0, min, max)) continue; 453 | if (trivial_out(e1, min, max)) continue; 454 | if (trivial_out(e2, min, max)) continue; 455 | 456 | // if we are fully inside the triangle we dont need to do any edge 457 | // tests so we special case the stamp 458 | if (trivial_in(e0, min, max) && 459 | trivial_in(e1, min, max) && 460 | trivial_in(e2, min, max)) { 461 | stamp_ti(s, tex, min, color + x, depth + x, pitch); 462 | } else { 463 | stamp(s, tex, min, color + x, depth + x, pitch); 464 | } 465 | } 466 | // step the framebuffer 467 | color += pitch * BLOCK_SIZE; 468 | depth += pitch * BLOCK_SIZE; 469 | } 470 | } 471 | } 472 | -------------------------------------------------------------------------------- /rast_reference/rast_reference.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "../source/context.h" 9 | #include "../source/math.h" 10 | #include "../source/raster.h" 11 | #include "../source/texture.h" 12 | 13 | #include "kernel.h" 14 | 15 | typedef void (draw_func_t)( 16 | const frame_t &f, 17 | const triangle_setup_t &s, 18 | const texture_t &tex); 19 | 20 | draw_func_t rast_tex_one_zero; 21 | draw_func_t rast_tex_one_one; 22 | draw_func_t rast_tex_dst_src; 23 | draw_func_t rast_tex_sa_msa; 24 | draw_func_t rast_tex_dst_zero; 25 | draw_func_t rast_tex_one_msc; 26 | draw_func_t rast_tex_one_msa; 27 | 28 | // ~log3.75 (should be log4 but this looks nice) 29 | const std::array mip_log_table = { 30 | 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 31 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 32 | 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 33 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 34 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 35 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 36 | }; 37 | 38 | static inline constexpr uint32_t blend_code(GLenum mode) { 39 | return 40 | (mode == GL_ZERO) ? 0x0 : 41 | (mode == GL_ONE) ? 0x1 : 42 | (mode == GL_DST_COLOR) ? 0x2 : 43 | (mode == GL_SRC_COLOR) ? 0x3 : 44 | (mode == GL_ONE_MINUS_DST_COLOR) ? 0x4 : 45 | (mode == GL_ONE_MINUS_SRC_COLOR) ? 0x5 : 46 | (mode == GL_SRC_ALPHA) ? 0x6 : 47 | (mode == GL_ONE_MINUS_SRC_ALPHA) ? 0x7 : 48 | (mode == GL_DST_ALPHA) ? 0x8 : 49 | (mode == GL_ONE_MINUS_DST_ALPHA) ? 0x9 : 50 | (mode == GL_SRC_ALPHA_SATURATE) ? 0xa : 0x0; 51 | } 52 | 53 | static inline constexpr uint32_t blend_code(GLenum src, GLenum dst) { 54 | return (blend_code(src) << 4) | blend_code(dst); 55 | } 56 | 57 | struct rast_reference_t : public raster_t { 58 | 59 | rast_reference_t() { 60 | _cxt = nullptr; 61 | _frame._pixels = nullptr; 62 | _tex = nullptr; 63 | _draw_func = nullptr; 64 | } 65 | 66 | void framebuffer_clear( 67 | bool color, 68 | bool depth, 69 | bool stencil, 70 | uint32_t clear_color, 71 | float clear_depth, 72 | uint32_t clear_stencil) override { 73 | if (_cxt) { 74 | if (color) { 75 | _cxt->buffer.clear_colour(0x202020); 76 | } 77 | if (depth) { 78 | _cxt->buffer.clear_depth(clear_depth); 79 | } 80 | } 81 | } 82 | 83 | void framebuffer_release() override { 84 | _frame._pixels = nullptr; 85 | } 86 | 87 | void framebuffer_aquire() override { 88 | _frame._pixels = _cxt->buffer.pixels(); 89 | _frame._depth = _cxt->buffer.depth(); 90 | _frame._width = _cxt->buffer.width(); 91 | _frame._height = _cxt->buffer.height(); 92 | } 93 | 94 | bool start(gl_context_t &cxt) override { 95 | _cxt = &cxt; 96 | return true; 97 | } 98 | 99 | void stop() override { 100 | _cxt = nullptr; 101 | } 102 | 103 | void push_triangles(const std::vector &triangles, 104 | const texture_t *tex, 105 | const state_manager_t &state) override; 106 | 107 | void flush() override {} 108 | 109 | void present() override {} 110 | 111 | protected: 112 | 113 | draw_func_t *find_draw_func(const state_manager_t &state); 114 | 115 | draw_func_t *_draw_func; 116 | 117 | bool setup_triangle(const triangle_t &t, 118 | triangle_setup_t &s); 119 | 120 | const texture_t *_tex; 121 | gl_context_t *_cxt; 122 | frame_t _frame; 123 | }; 124 | 125 | draw_func_t *rast_reference_t::find_draw_func(const state_manager_t &state) { 126 | if (state.blendFrag) { 127 | switch (blend_code(state.blendFuncSrc, state.blendFuncDst)) { 128 | case blend_code(GL_ONE, GL_ZERO): 129 | return rast_tex_one_zero; 130 | case blend_code(GL_ONE, GL_ONE): 131 | return rast_tex_one_zero; 132 | case blend_code(GL_DST_COLOR, GL_SRC_COLOR): 133 | return rast_tex_dst_src; 134 | case blend_code(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA): 135 | return rast_tex_sa_msa; 136 | case blend_code(GL_DST_COLOR, GL_ZERO): 137 | return rast_tex_dst_zero; 138 | case blend_code(GL_ONE, GL_ONE_MINUS_SRC_COLOR): 139 | return rast_tex_one_msc; 140 | case blend_code(GL_ONE, GL_ONE_MINUS_SRC_ALPHA): 141 | return rast_tex_one_msa; 142 | default: 143 | // DEBUG_BREAK; 144 | break; 145 | } 146 | } 147 | return rast_tex_one_zero; 148 | } 149 | 150 | bool rast_reference_t::setup_triangle(const triangle_t &t, 151 | triangle_setup_t &s) { 152 | 153 | // isolate 2d coordinates 154 | const float2 v0{t.vert[0].coord.x, t.vert[0].coord.y}; 155 | const float2 v1{t.vert[1].coord.x, t.vert[1].coord.y}; 156 | const float2 v2{t.vert[2].coord.x, t.vert[2].coord.y}; 157 | 158 | // compute triangle bounding box 159 | s.bound.x0 = std::min({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)}); 160 | s.bound.y0 = std::min({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)}); 161 | s.bound.x1 = std::max({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)}); 162 | s.bound.y1 = std::max({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)}); 163 | 164 | // reject if off screen 165 | if (s.bound.x1 < 0) return false; 166 | if (s.bound.x0 > _frame._width) return false; 167 | if (s.bound.y1 < 0) return false; 168 | if (s.bound.y0 > _frame._height) return false; 169 | 170 | // clip against screen bounds 171 | s.bound.x0 = std::max(s.bound.x0, 0); 172 | s.bound.y0 = std::max(s.bound.y0, 0); 173 | s.bound.x1 = std::min(s.bound.x1, _frame._width - 1); 174 | s.bound.y1 = std::min(s.bound.y1, _frame._height - 1); 175 | 176 | // find the area of the triangle 177 | const float area = triangle_area(v0, v1, v2); 178 | 179 | // heuristic for if we should use affine mapping 180 | const bool affine = affine_heuristic(s.bound); 181 | s.affine = affine; 182 | 183 | // the signed area of the UVs (texel space) 184 | if (_tex) { 185 | const float2 &t0 = t.vert[0].tex; 186 | const float2 &t1 = t.vert[1].tex; 187 | const float2 &t2 = t.vert[2].tex; 188 | 189 | const float texture_area = _tex->_width * _tex->_height; 190 | const float uv_area = 191 | ((t1.x - t0.x) * (t2.y - t0.y) - (t2.x - t0.x) * (t1.y - t0.y)); 192 | s.mip_level = get_mip_level(area, uv_area * texture_area); 193 | } 194 | 195 | // find edge vectors 196 | const float2 d01 = v1 - v0; 197 | const float2 d12 = v2 - v1; 198 | const float2 d20 = v0 - v2; 199 | 200 | // cross product gives us normals from the edges 201 | // which we 'normalize' to the area of the triangle 202 | const float2 n0 = float2::cross(d12) / area; 203 | const float2 n1 = float2::cross(d20) / area; 204 | const float2 n2 = float2::cross(d01) / area; 205 | 206 | // evaluate the starting position for each interpolant 207 | const float s0 = evaluate(n0, v1); 208 | const float s1 = evaluate(n1, v2); 209 | const float s2 = evaluate(n2, v0); 210 | 211 | // store edges 212 | s.edge[0] = float3{n0.x, n0.y, s0}; 213 | s.edge[1] = float3{n1.x, n1.y, s1}; 214 | s.edge[2] = float3{n2.x, n2.y, s2}; 215 | 216 | // edge function interpolants 217 | { 218 | s. v[triangle_setup_t::slot_w0] = s0; 219 | s.vx[triangle_setup_t::slot_w0] = n0.x; 220 | s.vy[triangle_setup_t::slot_w0] = n0.y; 221 | 222 | s. v[triangle_setup_t::slot_w1] = s1; 223 | s.vx[triangle_setup_t::slot_w1] = n1.x; 224 | s.vy[triangle_setup_t::slot_w1] = n1.y; 225 | } 226 | 227 | // XXX: make this float3 and use dot products 228 | std::array c; 229 | 230 | const auto & state = _cxt->state; 231 | 232 | // 1/w interpolation 233 | { 234 | const float iw0 = affine ? 1.f : t.vert[0].coord.w; 235 | const float iw1 = affine ? 1.f : t.vert[1].coord.w; 236 | const float iw2 = affine ? 1.f : t.vert[2].coord.w; 237 | 238 | c[0] = s0 * iw0; c[1] = s1 * iw1; c[2] = s2 * iw2; 239 | c[3] = n0.x * iw0; c[4] = n1.x * iw1; c[5] = n2.x * iw2; 240 | c[6] = n0.y * iw0; c[7] = n1.y * iw1; c[8] = n2.y * iw2; 241 | 242 | const uint32_t slot = triangle_setup_t::slot_iw; 243 | s.v [slot] = c[0] + c[1] + c[2]; 244 | s.vx[slot] = c[3] + c[4] + c[5]; 245 | s.vy[slot] = c[6] + c[7] + c[8]; 246 | } 247 | 248 | // z 249 | { 250 | const uint32_t slot = triangle_setup_t::slot_z; 251 | const float a0 = t.vert[0].coord.z; 252 | const float a1 = t.vert[1].coord.z; 253 | const float a2 = t.vert[2].coord.z; 254 | s.v [slot] = s0 * a0 + s1 * a1 + s2 * a2; 255 | s.vx[slot] = n0.x * a0 + n1.x * a1 + n2.x * a2; 256 | s.vy[slot] = n0.y * a0 + n1.y * a1 + n2.y * a2; 257 | } 258 | 259 | // a 260 | { 261 | const uint32_t slot = triangle_setup_t::slot_a; 262 | const float a0 = t.vert[0].rgba.x; 263 | const float a1 = t.vert[1].rgba.x; 264 | const float a2 = t.vert[2].rgba.x; 265 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 266 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 267 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 268 | } 269 | 270 | // r 271 | { 272 | const uint32_t slot = triangle_setup_t::slot_r; 273 | const float a0 = t.vert[0].rgba.w; 274 | const float a1 = t.vert[1].rgba.w; 275 | const float a2 = t.vert[2].rgba.w; 276 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 277 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 278 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 279 | } 280 | 281 | // g 282 | { 283 | const uint32_t slot = triangle_setup_t::slot_g; 284 | const float a0 = t.vert[0].rgba.y; 285 | const float a1 = t.vert[1].rgba.y; 286 | const float a2 = t.vert[2].rgba.y; 287 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 288 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 289 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 290 | } 291 | 292 | // b 293 | { 294 | const uint32_t slot = triangle_setup_t::slot_b; 295 | const float a0 = t.vert[0].rgba.z; 296 | const float a1 = t.vert[1].rgba.z; 297 | const float a2 = t.vert[2].rgba.z; 298 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 299 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 300 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 301 | } 302 | 303 | // u 304 | { 305 | const uint32_t slot = triangle_setup_t::slot_u; 306 | const float a0 = t.vert[0].tex.x; 307 | const float a1 = t.vert[1].tex.x; 308 | const float a2 = t.vert[2].tex.x; 309 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 310 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 311 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 312 | } 313 | 314 | // v 315 | { 316 | const uint32_t slot = triangle_setup_t::slot_v; 317 | const float a0 = t.vert[0].tex.y; 318 | const float a1 = t.vert[1].tex.y; 319 | const float a2 = t.vert[2].tex.y; 320 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2; 321 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2; 322 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2; 323 | } 324 | 325 | return true; 326 | } 327 | 328 | void rast_reference_t::push_triangles(const std::vector &triangles, 329 | const texture_t *tex, 330 | const state_manager_t &state) { 331 | 332 | _draw_func = find_draw_func(state); 333 | 334 | if (!_cxt || !_frame._pixels || !_draw_func) { 335 | return; 336 | } 337 | 338 | _tex = tex; 339 | 340 | for (const auto &t : triangles) { 341 | if (t.vert[0].coord.w == 0.f) { 342 | // signals fully clipped so discard 343 | continue; 344 | } 345 | 346 | triangle_setup_t setup; 347 | if (!setup_triangle(t, setup)) { 348 | continue; 349 | } 350 | 351 | if (state.texture2D || true) { 352 | if (tex && tex->_pixels[0]) { 353 | _draw_func(_frame, setup, *tex); 354 | } else { 355 | // TODO 356 | } 357 | } 358 | } 359 | } 360 | 361 | extern "C" { 362 | __declspec(dllexport) raster_t *raster_create() { 363 | return new rast_reference_t; 364 | } 365 | 366 | __declspec(dllexport) void raster_release(raster_t *r) { 367 | delete r; 368 | } 369 | }; 370 | -------------------------------------------------------------------------------- /rast_wireframe/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB C_FILES *.cpp) 2 | file(GLOB H_FILES *.h) 3 | 4 | add_library( 5 | softgl_rast_wireframe 6 | SHARED 7 | ${C_FILES} ${H_FILES}) 8 | -------------------------------------------------------------------------------- /rast_wireframe/rast_wireframe.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../source/raster.h" 4 | #include "../source/context.h" 5 | #include "surface.h" 6 | 7 | 8 | struct line_t { 9 | float2 a, b; 10 | 11 | line_t(float2 i, float2 j) : a(i), b(j) {} 12 | }; 13 | 14 | 15 | struct raster_wire_t : public raster_t { 16 | 17 | raster_wire_t() 18 | : _cxt(nullptr) 19 | {} 20 | 21 | void framebuffer_release() override {} 22 | 23 | void framebuffer_aquire() override {} 24 | 25 | void framebuffer_clear( 26 | bool color, 27 | bool depth, 28 | bool stencil, 29 | uint32_t clear_color, 30 | float clear_depth, 31 | uint32_t clear_stencil) override { 32 | if (_cxt) { 33 | _cxt->buffer.clear_colour(0x202020); 34 | if (depth) { 35 | _cxt->buffer.clear_depth(0.f); 36 | } 37 | } 38 | } 39 | 40 | bool start(gl_context_t &cxt) override { 41 | _cxt = &cxt; 42 | return true; 43 | } 44 | 45 | void stop() override {} 46 | 47 | void push_triangles(const std::vector &triangles, 48 | const texture_t *tex, 49 | const state_manager_t &state) override { 50 | 51 | assert(_cxt); 52 | 53 | surface_t surf(_cxt->buffer.pixels(), _cxt->buffer.width(), _cxt->buffer.height(), _cxt->buffer.width()); 54 | 55 | for (const auto &t : triangles) { 56 | if (t.vert[0].coord.w == 0.f) { 57 | // signals fully clipped vertex so discard 58 | continue; 59 | } 60 | 61 | // XXX: insert edge into edge list to avoid redraw 62 | 63 | const std::array c{ 64 | float2{t.vert[0].coord.x, t.vert[0].coord.y}, 65 | float2{t.vert[1].coord.x, t.vert[1].coord.y}, 66 | float2{t.vert[2].coord.x, t.vert[2].coord.y}, 67 | }; 68 | 69 | surf.wuline(c[0], c[1], 0xffffff); 70 | surf.wuline(c[1], c[2], 0xffffff); 71 | surf.wuline(c[2], c[0], 0xffffff); 72 | } 73 | } 74 | 75 | void flush() override {} 76 | 77 | void present() override {} 78 | 79 | protected: 80 | gl_context_t *_cxt; 81 | }; 82 | 83 | 84 | extern "C" { 85 | 86 | __declspec(dllexport) 87 | raster_t *raster_create() { 88 | return new raster_wire_t; 89 | } 90 | 91 | __declspec(dllexport) 92 | void raster_release(raster_t *r) { 93 | delete r; 94 | } 95 | }; 96 | -------------------------------------------------------------------------------- /rast_wireframe/surface.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "surface.h" 7 | #include "math.h" 8 | 9 | 10 | namespace { 11 | 12 | // 13 | // faster packed 4 x 8bit fixed point multiply 14 | // 15 | uint32_t alpha(uint32_t a, uint32_t b, uint8_t i) { 16 | #if 0 17 | const uint32_t t0 = ((a & 0xff00ff) + ((b & 0xff00ff) - (a & 0xff00ff)) * i) >> 8; 18 | const uint32_t t1 = ((a & 0x00ff00) + ((b & 0x00ff00) - (a & 0x00ff00)) * i) >> 8; 19 | return (t0 & 0xff00ff) | (t1 & 0x00ff00); 20 | #else 21 | // pixel a 22 | const uint32_t a0 = ((a & 0xff00ff) * ~i) >> 8; 23 | const uint32_t a1 = ((a & 0x00ff00) * ~i) >> 8; 24 | // pixel b 25 | const uint32_t b0 = ((b & 0xff00ff) * i) >> 8; 26 | const uint32_t b1 = ((b & 0x00ff00) * i) >> 8; 27 | // mix results 28 | return ((a0 & 0xff00ff) | (a1 & 0xff00)) + 29 | ((b0 & 0xff00ff) | (b1 & 0xff00)); 30 | #endif 31 | } 32 | 33 | } // namespace {} 34 | 35 | // return true - all out 36 | // false - inside or partial in 37 | bool surface_t::line_clip(float2 &a, float2 &b) { 38 | 39 | enum { 40 | CLIP_X_LO = 1, 41 | CLIP_X_HI = 2, 42 | CLIP_Y_LO = 4, 43 | CLIP_Y_HI = 8, 44 | }; 45 | 46 | const float min_x = 1.f; 47 | const float min_y = 1.f; 48 | const float max_x = float(_width) - 2; 49 | const float max_y = float(_height) - 2; 50 | 51 | static const auto classify_x = [=](const float2 &p) -> int { 52 | return (p.x < min_x ? CLIP_X_LO : 0) | (p.x > max_x ? CLIP_X_HI : 0); 53 | }; 54 | 55 | static const auto classify_y = [=](const float2 &p) -> int { 56 | return (p.y < min_y ? CLIP_Y_LO : 0) | (p.y > max_y ? CLIP_Y_HI : 0); 57 | }; 58 | 59 | static const auto classify = [=](const float2 &p) -> int { 60 | return classify_x(p) | classify_y(p); 61 | }; 62 | 63 | static const auto clip_y_lo = [=](int cl, float2 &va, const float2 &vb) { 64 | if (cl & CLIP_Y_LO) { 65 | const float dx = (vb.x - va.x) / (vb.y - va.y); 66 | va.x += dx * (min_y - va.y); 67 | va.y = min_y; 68 | } 69 | }; 70 | 71 | static const auto clip_y_hi = [=](int cl, float2 &va, const float2 &vb) { 72 | if (cl & CLIP_Y_HI) { 73 | const float dx = (vb.x - va.x) / (vb.y - va.y); 74 | va.x -= dx * (va.y - max_y); 75 | va.y = max_y; 76 | } 77 | }; 78 | 79 | static const auto clip_x_lo = [=](int cl, float2 &va, const float2 &vb) { 80 | if (cl & CLIP_X_LO) { 81 | const float dy = (vb.y - va.y) / (vb.x - va.x); 82 | va.y += dy * (min_x - va.x); 83 | va.x = min_x; 84 | } 85 | }; 86 | 87 | static const auto clip_x_hi = [=](int cl, float2 &va, const float2 &vb) { 88 | if (cl & CLIP_X_HI) { 89 | const float dy = (vb.y - va.y) / (vb.x - va.x); 90 | va.y -= dy * (va.x - max_x); 91 | va.x = max_x; 92 | } 93 | }; 94 | 95 | for (uint32_t i = 0;; ++i) { 96 | 97 | if ((fabs(b.x - a.x) + fabs(b.y - a.y)) < 1.f) { 98 | // remove ultra tiny points 99 | return true; 100 | } 101 | 102 | const int ca = classify(a); 103 | const int cb = classify(b); 104 | 105 | if (0 == (ca | cb)) { 106 | // all in center, no clipping 107 | return false; 108 | } 109 | 110 | const int code = ca & cb; 111 | if ((code & CLIP_X_LO) || (code & CLIP_X_HI) || 112 | (code & CLIP_Y_LO) || (code & CLIP_Y_HI)) { 113 | // all outside one plane 114 | return true; 115 | } 116 | 117 | // todo: clip longest axis first? 118 | 119 | switch (i) { 120 | case 0: 121 | clip_x_lo(ca, a, b); 122 | clip_x_hi(ca, a, b); 123 | clip_x_lo(cb, b, a); 124 | clip_x_hi(cb, b, a); 125 | break; 126 | case 1: 127 | clip_y_lo(ca, a, b); 128 | clip_y_hi(ca, a, b); 129 | clip_y_lo(cb, b, a); 130 | clip_y_hi(cb, b, a); 131 | break; 132 | default: 133 | assert(!"bad clip"); 134 | } 135 | } 136 | 137 | return false; 138 | } 139 | 140 | // fast fixed point line drawing 141 | void surface_t::line(float2 a, float2 b, uint32_t rgb) { 142 | 143 | // clip line to screen 144 | if (line_clip(a, b)) { 145 | // fully clipped 146 | return; 147 | } 148 | 149 | const float dx = b.x - a.x, dy = b.y - a.y; 150 | const float adx = fabsf(dx), ady = fabs(dy); 151 | 152 | static const float fract = float(1u << 16); 153 | 154 | // select the longest axis 155 | if (fabsf(dx) > fabsf(dy)) { 156 | 157 | // sort vertices in y axis 158 | if (b.x < a.x) 159 | std::swap(a, b); 160 | // compute dy/dx 161 | const float ndy = (b.y - a.y) / adx; 162 | // convert y itterator to fixed point 163 | const int32_t iy = int32_t(ndy * fract); 164 | int32_t y = int32_t(a.y * fract); 165 | 166 | // quantize start and end locations 167 | const int32_t iax = int32_t(a.x); 168 | const int32_t ibx = int32_t(b.x); 169 | // raster loop 170 | { 171 | for (int32_t x = iax; x < ibx; ++x, y += iy) { 172 | _pixels[x + (y >> 16) * _pitch] = rgb; 173 | } 174 | } 175 | } else { 176 | // sort vertices in y axis 177 | if (b.y < a.y) 178 | std::swap(a, b); 179 | // compute dx/dy 180 | const float ndx = (b.x - a.x) / ady; 181 | // convert x itterator to fixed point 182 | const int32_t ix = int32_t(ndx * fract); 183 | int32_t x = int32_t(a.x * fract); 184 | 185 | // quantize start and end locations 186 | const int32_t iay = int32_t(a.y); 187 | const int32_t iby = int32_t(b.y); 188 | // raster loop 189 | { 190 | uint32_t *pix = _pixels + (x >> 16) + iay * _pitch; 191 | int32_t xstep = ix > 0 ? 1 : -1; 192 | for (int32_t y = iay; y < iby; ++y, x += ix) { 193 | _pixels[(x >> 16) + y * _pitch] = rgb; 194 | } 195 | } 196 | } 197 | } 198 | 199 | // fast fixed point anti-aliased line drawing 200 | void surface_t::wuline(float2 a, float2 b, uint32_t rgb) { 201 | 202 | // clip line to screen 203 | if (line_clip(a, b)) { 204 | // fully clipped 205 | return; 206 | } 207 | 208 | if (std::isnan(a.x) || std::isnan(a.y) || 209 | std::isnan(b.x) || std::isnan(b.y)) { 210 | return; 211 | } 212 | 213 | const float dx = b.x - a.x, dy = b.y - a.y; 214 | const float adx = fabsf(dx), ady = fabs(dy); 215 | 216 | static const float point = float(1u << 16); 217 | 218 | // select the longest axis 219 | if (fabsf(dx) > fabsf(dy)) { 220 | // sort vertices in x axis 221 | if (b.x < a.x) 222 | std::swap(a, b); 223 | // compute dy/dx 224 | const float ndy = (b.y - a.y) / adx; 225 | // convert y itterator to fixed point 226 | const int32_t iy = int32_t(ndy * point); 227 | int32_t y = int32_t(a.y * point); 228 | // quantize start and end locations 229 | const int32_t iax = int32_t(a.x); 230 | const int32_t ibx = int32_t(b.x); 231 | // raster loop 232 | { 233 | for (int32_t x = iax; x < ibx; ++x, y += iy) { 234 | const uint32_t fract = (y & 0xffff) >> 8; 235 | uint32_t &p0 = _pixels[x + (y >> 16) * _pitch]; 236 | uint32_t &p1 = _pixels[x + (y >> 16) * _pitch + _pitch]; 237 | p0 = alpha(rgb, p0, fract); 238 | p1 = alpha(rgb, p1, fract ^ 0xff); 239 | } 240 | } 241 | } else { 242 | // sort vertices in y axis 243 | if (b.y < a.y) 244 | std::swap(a, b); 245 | // compute dx/dy 246 | const float ndx = (b.x - a.x) / ady; 247 | // convert x itterator to fixed point 248 | const int32_t ix = int32_t(ndx * point); 249 | int32_t x = int32_t(a.x * point); 250 | // quantize start and end locations 251 | const int32_t iay = int32_t(a.y); 252 | const int32_t iby = int32_t(b.y); 253 | // raster loop 254 | { 255 | uint32_t *pix = _pixels + (x >> 16) + iay * _pitch; 256 | int32_t xstep = ix > 0 ? 1 : -1; 257 | for (int32_t y = iay; y < iby; ++y, x += ix) { 258 | const uint32_t fract = (x & 0xffff) >> 8; 259 | uint32_t &p0 = _pixels[(x >> 16) + y * _pitch]; 260 | uint32_t &p1 = _pixels[(x >> 16) + y * _pitch + 1]; 261 | p0 = alpha(rgb, p0, fract); 262 | p1 = alpha(rgb, p1, fract ^ 0xff); 263 | } 264 | } 265 | } 266 | } 267 | 268 | void surface_t::plot(const float2 &p, uint32_t rgb) { 269 | const int32_t x = int32_t(floorf(p.x)); 270 | const int32_t y = int32_t(floorf(p.y)); 271 | if (x < 0 || y < 0 || x >= int32_t(_width) || y >= int32_t(_height)) { 272 | return; 273 | } 274 | _pixels[x + y * _pitch] = rgb; 275 | } 276 | 277 | void surface_t::wuplot(const float2 &p, uint32_t rgb) { 278 | 279 | if (p.x < 0 || p.x >= int32_t(_width -1) || 280 | p.y < 0 || p.y >= int32_t(_height-1)) { 281 | return; 282 | } 283 | 284 | const float ix = floorf(p.x); 285 | const float iy = floorf(p.y); 286 | 287 | const float fx = p.x - ix; 288 | const float fy = p.y - iy; 289 | 290 | const uint32_t bx = uint32_t(fx * 0xff); 291 | const uint32_t by = uint32_t(fy * 0xff); 292 | const uint32_t jx = bx ^ 0xff; 293 | const uint32_t jy = by ^ 0xff; 294 | 295 | uint32_t &p00 = _pixels[int32_t(ix + 0) + int32_t(iy + 0) * _pitch]; 296 | uint32_t &p10 = _pixels[int32_t(ix + 1) + int32_t(iy + 0) * _pitch]; 297 | uint32_t &p01 = _pixels[int32_t(ix + 0) + int32_t(iy + 1) * _pitch]; 298 | uint32_t &p11 = _pixels[int32_t(ix + 1) + int32_t(iy + 1) * _pitch]; 299 | 300 | p00 = alpha(rgb, p00, 0xff ^ ((jx * jy) >> 8)); 301 | p10 = alpha(rgb, p10, 0xff ^ ((bx * jy) >> 8)); 302 | p01 = alpha(rgb, p01, 0xff ^ ((jx * by) >> 8)); 303 | p11 = alpha(rgb, p11, 0xff ^ ((bx * by) >> 8)); 304 | } 305 | 306 | void surface_t::fill(uint32_t rgb) { 307 | const uint32_t len = _pitch * _height; 308 | for (uint32_t i = 0; i < len; ++i) { 309 | _pixels[i] = rgb; 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /rast_wireframe/surface.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "../source/math.h" 6 | 7 | 8 | struct surface_t { 9 | 10 | surface_t() = default; 11 | 12 | surface_t(uint32_t *pixels, uint32_t width, uint32_t height, uint32_t pitch) 13 | : _pixels(pixels) 14 | , _width(width) 15 | , _height(height) 16 | , _pitch(pitch) 17 | { 18 | assert(pixels && width && height && pitch); 19 | } 20 | 21 | uint32_t width() const { 22 | return _width; 23 | } 24 | 25 | uint32_t height() const { 26 | return _height; 27 | } 28 | 29 | uint32_t pitch() const { 30 | return _pitch; 31 | } 32 | 33 | uint32_t *pixels() { 34 | return _pixels; 35 | } 36 | 37 | const uint32_t *pixels() const { 38 | return _pixels; 39 | } 40 | 41 | void plot(const float2 &a, uint32_t rgb); 42 | void wuplot(const float2 &a, uint32_t rgb); 43 | 44 | void line(float2 a, float2 b, uint32_t rgb); 45 | void wuline(float2 a, float2 b, uint32_t rgb); 46 | 47 | void fill(uint32_t rgb); 48 | 49 | protected: 50 | bool line_clip(float2 &a, float2 &b); 51 | 52 | uint32_t *_pixels; 53 | uint32_t _pitch; 54 | uint32_t _width; 55 | uint32_t _height; 56 | }; 57 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # SoftGL 2 | 3 | An opengl implementation using Software Rendering on the CPU. 4 | 5 | 6 | ## Q&A 7 | 8 | > Why would I make this? 9 | > 10 | > Because its fun to learn OpenGL from the wrong end. 11 | 12 | > What version of OpenGL does this support? 13 | > 14 | > Somewhere around OpenGL1.4 15 | 16 | > Is it fast? 17 | > 18 | > While i'm trying to make it fast, its not even close to something like a Geforce256 graphics card. 19 | 20 | > How do I use this? 21 | > 22 | > You dont, and you shouldnt. It really just an experiment. 23 | 24 | > But really how do I use this? 25 | > 26 | > 1. Compile it. 27 | > 2. Copy OpenGL32.dll, softgl_rast_reference.dll and softgl.cfg into your game folder. 28 | > 3. Launch your game (which will most likely crash.) 29 | 30 | > Whats your goal? 31 | > 32 | > When something like Doom3 is playable. 33 | 34 | > Is it conformant? 35 | > 36 | > No. 37 | 38 | ## Features 39 | 40 | - Per triangle Mip-Mapping 41 | - Perspective correct texture mapping 42 | - Simple backend architecture 43 | - Experimental OpenCL backend 44 | - Not much else at the moment 45 | 46 | 47 | ## Optimizations 48 | 49 | - Affine mapping for small triangles 50 | - Vectorizable edge based rendering 51 | - 16x16 stamp rendering with fast culling 52 | - Stamps coded in tight SSE 53 | 54 | 55 | ## The future 56 | 57 | - Support more old OpenGL games 58 | - Fixed point UV interpolation 59 | - More blend modes 60 | - Use AVX stamps 61 | - Multuithread the rasterizer 62 | - JIT the raster stamps 63 | - Improve the OpenCL backend 64 | - Z-buffer at the stamp level for fast culling 65 | - More optimizations 66 | - Support more features 67 | - Alpha Test 68 | - Scissor 69 | - Texenv 70 | - Stencil Test 71 | - ... 72 | 73 | 74 | ## Statistics 75 | 76 | Stats from the quake 3 timedemo (default settings) invoked via `quake3.exe +timedemo 1 +demo four`. 77 | 78 | > 1260 frames 51.0 seconds 24.7 fps 15.0/40.5/78.0/11.2 ms 79 | 80 | 81 | ## Screenshots 82 | 83 | These screenshots show the current state of progress on 15/04/2020. 84 | 85 | ![](docs/screenshots/hl_of_demo.png?raw=true "Halflife opposing force demo") 86 | 87 | ![](docs/screenshots/quake3_demo.png?raw=true "Quake 3 demo") 88 | 89 | ![](docs/screenshots/ut99_goty.png?raw=true "Unreal Tournament GOTY") 90 | 91 | ![](docs/screenshots/quake2.png?raw=true "Quake 2") 92 | 93 | ![](docs/screenshots/doom3_1.png?raw=true "Doom 3") 94 | 95 | ![](docs/screenshots/doom3_2.png?raw=true "Doom 3") 96 | 97 | ![](docs/screenshots/ut2003.png?raw=true "Unreal Tournament 2003") 98 | -------------------------------------------------------------------------------- /softgl.cfg: -------------------------------------------------------------------------------- 1 | raster_dll softgl_rast_reference.dll 2 | profile true 3 | open_stdio true 4 | -------------------------------------------------------------------------------- /source/buffer.cpp: -------------------------------------------------------------------------------- 1 | #include "buffer.h" 2 | #include "context.h" 3 | 4 | buffer_manager_t::buffer_manager_t() 5 | : _width(0), _height(0), _pixels(nullptr), _depth(nullptr) {} 6 | 7 | void buffer_manager_t::_release() { 8 | if (_pixels) { 9 | _aligned_free(_pixels); 10 | _pixels = nullptr; 11 | } 12 | if (_depth) { 13 | _aligned_free(_depth); 14 | _depth = nullptr; 15 | } 16 | } 17 | 18 | void buffer_manager_t::resize(int32_t w, int32_t h) { 19 | // release framebuffer 20 | if (Context && Context->raster.inst) { 21 | Context->raster.inst->framebuffer_release(); 22 | } 23 | 24 | _width = w; 25 | _height = h; 26 | 27 | if (_pixels || _depth) { 28 | _release(); 29 | } 30 | // allocate aligned buffer planes 31 | // XXX: note offsets so pixels and depth dont alias in cache 32 | _pixels = (uint32_t *)_aligned_malloc(w * h * sizeof(uint32_t), 16); 33 | _depth = (float *)_aligned_malloc(w * h * sizeof(float), 16); 34 | // notify context of resize 35 | if (Context) { 36 | Context->on_resize(); 37 | } 38 | // aquire framebuffer 39 | if (Context && Context->raster.inst) { 40 | Context->raster.inst->framebuffer_aquire(); 41 | } 42 | } 43 | 44 | void buffer_manager_t::save_bmp() { 45 | // save a screenshot 46 | ::save_bmp(pixels(), width(), height(), "screenshot.bmp"); 47 | } 48 | -------------------------------------------------------------------------------- /source/buffer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | 8 | struct buffer_manager_t { 9 | 10 | buffer_manager_t(); 11 | 12 | ~buffer_manager_t() { _release(); } 13 | 14 | void resize(int32_t w, int32_t h); 15 | 16 | uint32_t width() const { return _width; } 17 | 18 | uint32_t height() const { return _height; } 19 | 20 | uint32_t *pixels() const { return _pixels; } 21 | 22 | float *depth() const { 23 | assert(_depth); 24 | return _depth; 25 | } 26 | 27 | void clear_colour(const uint32_t rgb) { 28 | const int32_t area = _width * _height; 29 | const uint32_t *end = _pixels + area; 30 | uint32_t *ptr = _pixels; 31 | for (; ptr != end; ++ptr) { 32 | *ptr = rgb; 33 | } 34 | } 35 | 36 | void clear_depth(const float value) { 37 | const int32_t area = _width * _height; 38 | const float *end = _depth + area; 39 | float *ptr = _depth; 40 | for (; ptr != end; ++ptr) { 41 | *ptr = value; 42 | } 43 | } 44 | 45 | void save_bmp(); 46 | 47 | protected: 48 | void _release(); 49 | 50 | uint32_t _width, _height; 51 | uint32_t *_pixels; 52 | float *_depth; 53 | }; 54 | -------------------------------------------------------------------------------- /source/common.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | 3 | void save_bmp(const uint32_t *pixels, uint32_t w, uint32_t h, const char *path) { 4 | // 5 | } 6 | -------------------------------------------------------------------------------- /source/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | // a simple break once macro 6 | #if _DEBUG 7 | #define DEBUG_BREAK \ 8 | { \ 9 | static bool once=true; \ 10 | if (once) { \ 11 | __debugbreak(); \ 12 | once = false; \ 13 | printf("%s\n", __FUNCTION__); \ 14 | } \ 15 | } 16 | #else 17 | #define DEBUG_BREAK // __assume(0) 18 | #endif 19 | 20 | #if 0 21 | struct rectf_t { 22 | float x0, y0, x1, y1; 23 | 24 | float dx() const { 25 | return x1 - x0; 26 | } 27 | 28 | float dy() const { 29 | return y1 - y0; 30 | } 31 | }; 32 | #endif 33 | 34 | void save_bmp(const uint32_t *pixels, uint32_t w, uint32_t h, const char *path); 35 | -------------------------------------------------------------------------------- /source/config.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "config.h" 4 | 5 | 6 | static bool extract(const char *line, std::string &key, std::string &value) { 7 | // clear output strings 8 | key.clear(), value.clear(); 9 | // parse head 10 | const char *p = line; 11 | // skip whitespace 12 | while (*p == ' ' || *p == '\t' || *p == '\r') 13 | ++p; 14 | // check for comments 15 | if (*p == '#') { 16 | return false; 17 | } 18 | const char *k1 = p; 19 | while (*p > ' ') 20 | ++p; 21 | if (k1 != p) { 22 | key.assign(k1, p); 23 | } 24 | // value 25 | while (*p == ' ' || *p == '\t' || *p == '\r') 26 | ++p; 27 | const char *v1 = p; 28 | if (*p == '"' ? ++p, ++v1, true : false) { 29 | while (*p != '"' && (*p >= ' ' || *p == '\t')) 30 | ++p; 31 | } else { 32 | while (*p > ' ') 33 | ++p; 34 | } 35 | if (v1 != p) { 36 | value.assign(v1, p); 37 | } 38 | // should have got something for both 39 | return !key.empty() && !value.empty(); 40 | } 41 | 42 | bool config_t::load(const char *path) { 43 | if (!path) 44 | return false; 45 | FILE *fp = fopen(path, "r"); 46 | if (!fp) 47 | return false; 48 | char line[1024]; 49 | std::string key, value; 50 | while (!feof(fp)) { 51 | if (!fgets(line, sizeof(line), fp)) { 52 | break; 53 | } 54 | line[sizeof(line) - 1] = '\0'; 55 | if (extract(line, key, value)) { 56 | map_[key] = value; 57 | } 58 | } 59 | fclose(fp); 60 | return true; 61 | } 62 | 63 | bool config_t::save(const char *path) { 64 | if (!path) 65 | return false; 66 | FILE *fp = fopen(path, "w"); 67 | if (!fp) 68 | return false; 69 | for (const auto &a : map_) { 70 | fprintf(fp, "%s %s\n", a.first.c_str(), a.second.c_str()); 71 | } 72 | fclose(fp); 73 | return true; 74 | } 75 | 76 | bool config_t::clear() { 77 | map_.clear(); 78 | return false; 79 | } 80 | 81 | bool config_t::get(const std::string &key, std::string *out) { 82 | auto itt = map_.find(key); 83 | if (itt != map_.end()) { 84 | *out = itt->second; 85 | return true; 86 | } 87 | return false; 88 | } 89 | 90 | bool config_t::get(const std::string &key, int32_t *out) { 91 | auto itt = map_.find(key); 92 | if (itt != map_.end()) { 93 | const std::string &val = itt->second; 94 | if (sscanf(val.c_str(), "%d", out) == 1) 95 | return true; 96 | } 97 | return false; 98 | } 99 | 100 | bool config_t::get(const std::string &key, bool *out) { 101 | auto itt = map_.find(key); 102 | bool ret = false; 103 | if (itt != map_.end()) { 104 | const std::string &val = itt->second; 105 | if (val == "True" || val == "true") 106 | *out = true, ret = true; 107 | if (val == "False" || val == "false") 108 | *out = false, ret = true; 109 | } 110 | return ret; 111 | } 112 | 113 | bool config_t::get(const std::string &key, float *out) { 114 | auto itt = map_.find(key); 115 | if (itt != map_.end()) { 116 | const std::string &val = itt->second; 117 | if (sscanf(val.c_str(), "%f", out) == 1) 118 | return true; 119 | } 120 | return false; 121 | } 122 | 123 | bool config_t::set(const std::string &key, const std::string &value) { 124 | map_[key] = value; 125 | return true; 126 | } 127 | 128 | bool config_t::set(const std::string &key, const int32_t &value) { 129 | map_[key] = std::to_string(value); 130 | return true; 131 | } 132 | 133 | bool config_t::set(const std::string &key, const bool &value) { 134 | map_[key] = std::to_string(value); 135 | return true; 136 | } 137 | 138 | bool config_t::set(const std::string &key, const float &value) { 139 | map_[key] = std::to_string(value); 140 | return true; 141 | } 142 | 143 | bool config_t::dump() { 144 | for (auto itt : map_) { 145 | printf("%s %s\n", itt.first.c_str(), itt.second.c_str()); 146 | } 147 | return true; 148 | } 149 | -------------------------------------------------------------------------------- /source/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | struct config_t { 6 | 7 | config_t() = default; 8 | 9 | bool load(const char *path); 10 | bool save(const char *path); 11 | 12 | bool dump(); 13 | 14 | bool clear(); 15 | 16 | bool get(const std::string &key, std::string *out); 17 | bool get(const std::string &key, int32_t *out); 18 | bool get(const std::string &key, bool *out); 19 | bool get(const std::string &key, float *out); 20 | 21 | bool set(const std::string &key, const std::string &value); 22 | bool set(const std::string &key, const int32_t &value); 23 | bool set(const std::string &key, const bool &value); 24 | bool set(const std::string &key, const float &value); 25 | 26 | protected: 27 | std::unordered_map map_; 28 | }; 29 | -------------------------------------------------------------------------------- /source/context.cpp: -------------------------------------------------------------------------------- 1 | #include "context.h" 2 | #include "gdi_hook.h" 3 | #include "log.h" 4 | #include "matrix.h" 5 | 6 | gl_context_t::gl_context_t(HWND hwnd, HDC hdc) 7 | : window(hwnd, hdc) 8 | , primative(*this) 9 | { 10 | } 11 | 12 | bool gl_context_t::on_create() { 13 | // load the softgl config 14 | if (!config.load("softgl.cfg")) { 15 | // XXX: we need some defaults or something 16 | } 17 | // create a profiler 18 | profile.reset(profile_create()); 19 | // create a framebuffer 20 | buffer.resize(window.width(), window.height()); 21 | // initalize the raster device 22 | if (!raster_load(raster, *this)) { 23 | // cant load a rasterizer 24 | return false; 25 | } 26 | return raster.inst->start(*this); 27 | } 28 | 29 | void gl_context_t::on_destroy() { 30 | if (raster.inst) { 31 | raster.inst->stop(); 32 | } 33 | } 34 | 35 | void gl_context_t::on_flush() { 36 | if (!primative.triangles().empty()) { 37 | primative.clip_triangles(); 38 | primative.convert_to_dc(); 39 | primative.cull_triangles(); 40 | if (profile) { 41 | profile->on_triangles(primative.triangles()); 42 | } 43 | if (raster.inst) { 44 | const texture_t *tex = texture.boundTexture2d(); 45 | raster.inst->push_triangles(primative.triangles(), tex, state); 46 | } 47 | primative.clear_triangles(); 48 | } 49 | raster.inst->flush(); 50 | } 51 | 52 | void gl_context_t::on_resize() { 53 | // set default viewport 54 | state.viewport = rectf_t{0, 0, float(buffer.width()), float(buffer.height())}; 55 | // set default scissor 56 | state.scissor = rectf_t{0, 0, float(buffer.width()), float(buffer.height())}; 57 | } 58 | 59 | void gl_context_t::on_make_current() { 60 | GdiHook.hook(*this); 61 | } 62 | -------------------------------------------------------------------------------- /source/context.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "GL.h" 6 | #include "windows.h" 7 | 8 | #include "buffer.h" 9 | #include "config.h" 10 | #include "matrix.h" 11 | #include "primative.h" 12 | #include "raster.h" 13 | #include "state.h" 14 | #include "texture.h" 15 | #include "window.h" 16 | #include "profile.h" 17 | 18 | 19 | struct gl_context_t { 20 | 21 | void *operator new(size_t request) { 22 | // note: operator required for aligned alloc 23 | assert(request); 24 | size_t alignment = alignof(gl_context_t); 25 | return _aligned_malloc(request, alignment); 26 | } 27 | 28 | void operator delete(void *ptr) { 29 | // note: operator required for aligned alloc 30 | assert(ptr); 31 | _aligned_free(ptr); 32 | } 33 | 34 | gl_context_t(HWND hwnd, HDC hdc); 35 | 36 | state_manager_t state; 37 | raster_module_t raster; 38 | buffer_manager_t buffer; 39 | window_manager_t window; 40 | matrix_manager_t matrix; 41 | texture_manager_t texture; 42 | primative_manager_t primative; 43 | config_t config; 44 | std::unique_ptr profile; 45 | 46 | bool on_create(); 47 | void on_destroy(); 48 | void on_flush(); 49 | void on_resize(); 50 | void on_make_current(); 51 | 52 | struct user_cmds_t { 53 | user_cmds_t() 54 | : screenshot(false) 55 | , dmp_obj(false) 56 | , dmp_textures(false) { 57 | } 58 | bool screenshot; 59 | bool dmp_obj; 60 | bool dmp_textures; 61 | } user_cmds; 62 | 63 | protected: 64 | 65 | gl_context_t(const gl_context_t &) = delete; 66 | }; 67 | 68 | // context accessor 69 | gl_context_t *getContext(); 70 | #define Context getContext() 71 | -------------------------------------------------------------------------------- /source/forward.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct gl_context_t; 4 | struct buffer_manager_t; 5 | struct window_manager_t; 6 | struct matrix_manager_t; 7 | struct texture_manager_t; 8 | struct gdi_hook_t; 9 | struct log_t; 10 | struct matrix_t; 11 | struct matrix_stack_t; 12 | struct vertex_t; 13 | struct triangle_t; 14 | struct primative_manager_t; 15 | struct raster_t; 16 | struct raster_module_t; 17 | struct state_manager_t; 18 | struct texture_t; 19 | struct profile_t; 20 | 21 | enum game_id_t; 22 | enum matrix_mode_t; -------------------------------------------------------------------------------- /source/game_id.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "game_id.h" 10 | #include "log.h" 11 | 12 | 13 | static uint64_t pearsonHash(const uint8_t *x, size_t len) 14 | { 15 | static const std::array T = { 16 | // 0-255 shuffled in any (random) order suffices 17 | 98, 6, 85,150, 36, 23,112,164,135,207,169, 5, 26, 64,165,219, 18 | 61, 20, 68, 89,130, 63, 52,102, 24,229,132,245, 80,216,195,115, 19 | 90,168,156,203,177,120, 2,190,188, 7,100,185,174,243,162, 10, 20 | 237, 18,253,225, 8,208,172,244,255,126,101, 79,145,235,228,121, 21 | 123,251, 67,250,161, 0,107, 97,241,111,181, 82,249, 33, 69, 55, 22 | 59,153, 29, 9,213,167, 84, 93, 30, 46, 94, 75,151,114, 73,222, 23 | 197, 96,210, 45, 16,227,248,202, 51,152,252,125, 81,206,215,186, 24 | 39,158,178,187,131,136, 1, 49, 50, 17,141, 91, 47,129, 60, 99, 25 | 154, 35, 86,171,105, 34, 38,200,147, 58, 77,118,173,246, 76,254, 26 | 133,232,196,144,198,124, 53, 4,108, 74,223,234,134,230,157,139, 27 | 189,205,199,128,176, 19,211,236,127,192,231, 70,233, 88,146, 44, 28 | 183,201, 22, 83, 13,214,116,109,159, 32, 95,226,140,220, 57, 12, 29 | 221, 31,209,182,143, 92,149,184,148, 62,113, 65, 37, 27,106,166, 30 | 3, 14,204, 72, 21, 41, 56, 66, 28,193, 40,217, 25, 54,179,117, 31 | 238, 87,240,155,180,170,242,212,191,163, 78,218,137,194,175,110, 32 | 43,119,224, 71,122,142, 42,160,104, 48,247,103, 15, 11,138,239 33 | }; 34 | uint64_t out = 0; 35 | for (size_t j = 0; j < sizeof(out); ++j) { 36 | uint8_t h = T[(x[0] + j) % 256]; 37 | for (size_t i = 1; i < len; ++i) 38 | h = T[h ^ x[i]]; 39 | out = (out << 8) | h; 40 | } 41 | return out; 42 | } 43 | 44 | static uint64_t fileHash(const char *path) 45 | { 46 | FILE *fd = nullptr; 47 | if (fopen_s(&fd, path, "rb")) { 48 | return 0; 49 | } 50 | std::unique_ptr temp; 51 | fseek(fd, 0, SEEK_END); 52 | size_t size = ftell(fd); 53 | fseek(fd, 0, SEEK_SET); 54 | temp = std::make_unique(size); 55 | fread(temp.get(), 1, 1, fd); 56 | fclose(fd); 57 | return pearsonHash(temp.get(), size); 58 | } 59 | 60 | game_id_t getGameId() 61 | { 62 | static std::atomic_bool evaluated = false; 63 | static game_id_t game_id = e_unknown; 64 | // early exit if we have already evaluated the game id 65 | if (evaluated) 66 | return game_id; 67 | // we are about to have evaluated 68 | evaluated = true; 69 | // space for executable name 70 | std::array name; 71 | // get executable filename 72 | const DWORD written = GetModuleFileNameA(nullptr, name.data(), name.size()); 73 | if (written == 0 || written >= name.size()) { 74 | return game_id; 75 | } 76 | // force trailing zero 77 | name[name.size() - 1] = '\0'; 78 | // query list of known hashes 79 | const uint64_t hash = fileHash(name.data()); 80 | 81 | log_t::printf("executable name: %s\n", name.data()); 82 | log_t::printf("executable hash: 0x%llx\n", hash); 83 | 84 | switch (hash) { 85 | case 0xf45771674923d6a5: // yquake2 (quake2-20200324-9acb99e.zip) 86 | game_id = e_quake_2; 87 | break; 88 | case 0x2365c561bb63848e: // steam version 89 | game_id = e_ut99_goty; 90 | break; 91 | case 0x58908a8fe82c5ad4: // quake 3 demo 92 | game_id = e_quake_3; 93 | break; 94 | case 0xc388218ae8925ad4: 95 | game_id = e_ut2003_demo; 96 | break; 97 | case 0x3b86fbcbf56b79bf: 98 | game_id = e_half_life_of_demo; 99 | break; 100 | case 0x81326047f57f79bf: 101 | game_id = e_unreal_gold; 102 | break; 103 | default: 104 | game_id = e_unknown; 105 | } 106 | // return known game-id 107 | return game_id; 108 | } 109 | -------------------------------------------------------------------------------- /source/game_id.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | enum game_id_t { 4 | e_unknown, 5 | e_nehe_lesson_01, 6 | e_quake_2, 7 | e_quake_3, 8 | e_half_life, 9 | e_half_life_of_demo, 10 | e_ut99_goty, 11 | e_ut2003_demo, 12 | e_unreal_gold 13 | }; 14 | 15 | game_id_t getGameId(); 16 | -------------------------------------------------------------------------------- /source/gdi_hook.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "context.h" 5 | #include "gdi_hook.h" 6 | #include "log.h" 7 | 8 | 9 | namespace { 10 | LRESULT CALLBACK trampoline(HWND hwnd, uint32_t msg, WPARAM w, LPARAM l) { 11 | return GdiHook.dispatch(hwnd, msg, w, l); 12 | } 13 | } // namespace 14 | 15 | bool gdi_hook_t::unhook(gl_context_t &cxt) { 16 | log_t::printf("Unhooking context %p\n", &cxt); 17 | { 18 | auto itt = procMap.find(cxt.window.getHwnd()); 19 | if (itt != procMap.end()) { 20 | WNDPROC proc = procMap.at(cxt.window.getHwnd()); 21 | SetWindowLongPtrA(cxt.window.getHwnd(), GWL_WNDPROC, (LONG)proc); 22 | procMap.erase(itt); 23 | log_t::printf("Context was unhooked %p\n", &cxt); 24 | } 25 | } 26 | { 27 | for (auto itt = context.begin(); itt != context.end();) { 28 | if (itt->second == &cxt) { 29 | itt = context.erase(itt); 30 | } 31 | else { 32 | ++itt; 33 | } 34 | } 35 | } 36 | return true; 37 | } 38 | 39 | bool gdi_hook_t::hook(gl_context_t &cxt) { 40 | 41 | log_t::printf("Hooking context %p\n", &cxt); 42 | 43 | // check if context already hooked 44 | for (const auto itt : context) { 45 | if (&cxt == itt.second) { 46 | // context already hooked 47 | return true; 48 | } 49 | } 50 | 51 | // save the context 52 | HWND hwnd = cxt.window.getHwnd(); 53 | context.emplace(hwnd, &cxt); 54 | 55 | // save window handle 56 | hwndMap.insert(hwnd); 57 | // create screen buffer 58 | screenPrepare(cxt); 59 | // get all the orig window data 60 | WNDPROC proc = (WNDPROC)GetWindowLongPtrA(hwnd, GWL_WNDPROC); 61 | if (!proc) 62 | return false; 63 | // insert into the proc map 64 | procMap.emplace(hwnd, proc); 65 | // hook the window proc 66 | SetWindowLongPtrA(hwnd, GWL_WNDPROC, (LONG)trampoline); 67 | return true; 68 | } 69 | 70 | bool gdi_hook_t::invalidate(HWND hwnd) { 71 | // invalidate entire window 72 | InvalidateRect(hwnd, NULL, FALSE); 73 | return UpdateWindow(hwnd) == TRUE; 74 | } 75 | 76 | // dispatch window messages to the right wndproc 77 | LRESULT CALLBACK gdi_hook_t::dispatch(HWND hwnd, 78 | uint32_t msg, 79 | WPARAM w, 80 | LPARAM l) { 81 | 82 | auto itt = procMap.find(hwnd); 83 | if (itt == procMap.end()) 84 | return DefWindowProcA(hwnd, msg, w, l); 85 | 86 | gl_context_t *cxt = context.at(hwnd); 87 | 88 | auto proc = itt->second; 89 | 90 | switch (msg) { 91 | case WM_PAINT: 92 | return redraw(*cxt); 93 | case WM_KEYDOWN: 94 | // check for special keys 95 | switch (w) { 96 | case VK_F12: // screenshot 97 | cxt->user_cmds.screenshot = true; 98 | break; 99 | case VK_F11: // dump to obj 100 | cxt->user_cmds.dmp_obj = true; 101 | break; 102 | case VK_F10: // dump textures 103 | cxt->user_cmds.dmp_textures = true; 104 | break; 105 | } 106 | return CallWindowProcA(proc, hwnd, msg, w, l); 107 | default: { 108 | // note: Some window procs are not valid pointers but rather handles 109 | // and must be called via a proxy function. This is the case 110 | // for unreal tournament 99. 111 | return CallWindowProcA(proc, hwnd, msg, w, l); 112 | } 113 | } 114 | } 115 | 116 | // repaint the current window (WM_PAINT) 117 | LRESULT gdi_hook_t::redraw(gl_context_t &cxt) { 118 | // std::lock_guard guard(mutex); 119 | 120 | HWND hwnd = cxt.window.getHwnd(); 121 | 122 | // blit buffer to screen 123 | HDC dc = GetDC(hwnd); 124 | if (dc == NULL) 125 | return 0; 126 | 127 | // get the frame from the context 128 | const auto &buffer = cxt.buffer; 129 | 130 | auto bmpItt = bmpInfoMap.find(hwnd); 131 | if (bmpItt == bmpInfoMap.end()) 132 | return 0; 133 | 134 | // grab bitmap info header 135 | BITMAPINFOHEADER &bih = bmpItt->second.bmiHeader; 136 | bih.biWidth = buffer.width(); 137 | bih.biHeight = buffer.height(); 138 | 139 | // do the bit blit 140 | const int r = 141 | StretchDIBits(dc, 142 | // src 143 | 0, 0, int(buffer.width()), int(buffer.height()), 144 | // dst 145 | 0, 0, bih.biWidth, bih.biHeight, 146 | // pixels 147 | buffer.pixels(), 148 | &(bmpItt->second), 149 | DIB_RGB_COLORS, SRCCOPY); 150 | if (r == 0) 151 | return 0; 152 | 153 | // finished WM_PAINT 154 | ReleaseDC(hwnd, dc); 155 | ValidateRect(hwnd, NULL); 156 | return 0; 157 | } 158 | 159 | // create a back buffer of a specific size 160 | bool gdi_hook_t::screenPrepare(gl_context_t &cxt) { 161 | // std::lock_guard guard(mutex); 162 | 163 | HWND hwnd = cxt.window.getHwnd(); 164 | 165 | // get the screen size 166 | RECT rect = {0}; 167 | if (!GetClientRect(hwnd, &rect)) { 168 | DEBUG_BREAK; 169 | } 170 | const uint32_t w = rect.right; 171 | const uint32_t h = rect.bottom; 172 | 173 | bmpInfoMap.emplace(hwnd, BITMAPINFO{}); 174 | BITMAPINFO &bmpInfo = bmpInfoMap.at(hwnd); 175 | 176 | // create preliminary bitmap info 177 | ZeroMemory(&bmpInfo, sizeof(BITMAPINFO)); 178 | BITMAPINFOHEADER &b = bmpInfo.bmiHeader; 179 | b.biSize = sizeof(BITMAPINFOHEADER); 180 | b.biBitCount = 32; 181 | b.biWidth = w; 182 | b.biHeight = h; 183 | b.biPlanes = 1; 184 | b.biCompression = BI_RGB; 185 | 186 | // tell the context to resize 187 | cxt.buffer.resize(w, h); 188 | 189 | // success 190 | return true; 191 | } 192 | -------------------------------------------------------------------------------- /source/gdi_hook.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define WIN32_LEAN_AND_MEAN 10 | #include 11 | 12 | #include "context.h" 13 | 14 | #define GdiHook gdi_hook_t::inst() 15 | 16 | struct gdi_hook_t { 17 | 18 | bool hook(gl_context_t &cxt); 19 | bool unhook(gl_context_t &cxt); 20 | 21 | bool invalidate(HWND hwnd); 22 | 23 | LRESULT CALLBACK dispatch(HWND hwnd, uint32_t msg, WPARAM w, LPARAM l); 24 | 25 | static gdi_hook_t &inst() { 26 | static gdi_hook_t self; 27 | return self; 28 | } 29 | 30 | protected: 31 | // context map 32 | std::map context; 33 | 34 | // screen buffer info 35 | std::map bmpInfoMap; 36 | std::map procMap; 37 | std::set hwndMap; 38 | 39 | // std::mutex mutex; 40 | 41 | // repaint the current window (WM_PAINT) 42 | LRESULT redraw(gl_context_t &cxt); 43 | 44 | // prepare a back buffer of a specific size 45 | bool screenPrepare(gl_context_t &cxt); 46 | }; 47 | -------------------------------------------------------------------------------- /source/log.cpp: -------------------------------------------------------------------------------- 1 | #include "log.h" 2 | #include "context.h" 3 | 4 | FILE *log_t::_fd; 5 | 6 | void log_t::_open_fd() { 7 | if (!_fd) { 8 | _fd = fopen("softgl.log", "w"); 9 | } 10 | } 11 | 12 | void log_t::_reopen_stdio() { 13 | if (Context) { 14 | bool enable = false; 15 | Context->config.get("open_stdio", &enable); 16 | if (enable) { 17 | AllocConsole(); 18 | freopen("CONOUT$", "wt", stdout); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /source/log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include "windows.h" 7 | 8 | struct log_t { 9 | 10 | static FILE *_fd; 11 | 12 | static void _open_fd(); 13 | 14 | static void _reopen_stdio(); 15 | 16 | static void printf(const char *fmt, ...) { 17 | va_list ap; 18 | { 19 | va_start(ap, fmt); 20 | _open_fd(); 21 | if (_fd) { 22 | vfprintf(_fd, fmt, ap); 23 | fflush(_fd); 24 | } 25 | va_end(ap); 26 | } 27 | { 28 | va_start(ap, fmt); 29 | vprintf(fmt, ap); 30 | va_end(ap); 31 | } 32 | } 33 | }; 34 | -------------------------------------------------------------------------------- /source/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | BOOL WINAPI DllMain(_In_ HINSTANCE hinstDLL, _In_ DWORD fdwReason, 5 | _In_ LPVOID lpvReserved) { 6 | return TRUE; 7 | } 8 | -------------------------------------------------------------------------------- /source/math.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace { 5 | const float pi1 = 3.14159265359f; 6 | const float pi2 = 3.14159265359f * 2.f; 7 | } 8 | 9 | #pragma pack( push, 1 ) 10 | template 11 | struct vec4_t { 12 | 13 | type_t x, y, z, w; 14 | 15 | vec4_t operator + (const vec4_t &v) const { 16 | return vec4_t { 17 | this->x + v.x, 18 | this->y + v.y, 19 | this->z + v.z, 20 | this->w + v.w, 21 | }; 22 | } 23 | 24 | vec4_t operator + (const type_t &v) const { 25 | return vec4_t { 26 | this->x + v, 27 | this->y + v, 28 | this->z + v, 29 | this->w + v, 30 | }; 31 | } 32 | 33 | vec4_t operator - (const vec4_t &v) const { 34 | return vec4_t { 35 | this->x - v.x, 36 | this->y - v.y, 37 | this->z - v.z, 38 | this->w - v.w, 39 | }; 40 | } 41 | 42 | vec4_t operator - (const type_t &v) const { 43 | return vec4_t { 44 | this->x - v, 45 | this->y - v, 46 | this->z - v, 47 | this->w - v, 48 | }; 49 | } 50 | 51 | vec4_t operator * (const vec4_t &v) const { 52 | return vec4_t { 53 | this->x * v.x, 54 | this->y * v.y, 55 | this->z * v.z, 56 | this->w * v.w, 57 | }; 58 | } 59 | 60 | vec4_t operator * (const type_t &v) const { 61 | return vec4_t { 62 | this->x * v, 63 | this->y * v, 64 | this->z * v, 65 | this->w * v, 66 | }; 67 | } 68 | 69 | vec4_t operator / (const vec4_t &v) const { 70 | return vec4_t { 71 | this->x / v.x, 72 | this->y / v.y, 73 | this->z / v.z, 74 | this->w / v.w, 75 | }; 76 | } 77 | 78 | vec4_t operator / (const type_t &v) const { 79 | return vec4_t { 80 | this->x / v, 81 | this->y / v, 82 | this->z / v, 83 | this->w / v, 84 | }; 85 | } 86 | 87 | void operator += (const vec4_t &v) { 88 | this->x += v.x; 89 | this->y += v.y; 90 | this->z += v.z; 91 | this->w += v.w; 92 | } 93 | 94 | void operator += (const type_t &v) { 95 | this->x += v; 96 | this->y += v; 97 | this->z += v; 98 | this->w += v; 99 | } 100 | 101 | void operator -= (const vec4_t &v) { 102 | this->x -= v.x; 103 | this->y -= v.y; 104 | this->z -= v.z; 105 | this->w -= v.w; 106 | } 107 | 108 | void operator -= (const type_t &v) { 109 | this->x -= v; 110 | this->y -= v; 111 | this->z -= v; 112 | this->w -= v; 113 | } 114 | 115 | void operator *= (const vec4_t &v) { 116 | this->x *= v.x; 117 | this->y *= v.y; 118 | this->z *= v.z; 119 | this->w *= v.w; 120 | } 121 | 122 | void operator *= (const type_t &v) { 123 | this->x *= v; 124 | this->y *= v; 125 | this->z *= v; 126 | this->w *= v; 127 | } 128 | 129 | void operator /= (const vec4_t &v) { 130 | this->x /= v.x; 131 | this->y /= v.y; 132 | this->z /= v.z; 133 | this->w /= v.w; 134 | } 135 | 136 | void operator /= (const type_t &v) { 137 | this->x /= v; 138 | this->y /= v; 139 | this->z /= v; 140 | this->w /= v; 141 | } 142 | 143 | static type_t dot(const vec4_t &a, const vec4_t &b) { 144 | return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; 145 | } 146 | 147 | static vec4_t set(const type_t &v) { 148 | return vec4_t { v, v, v, v }; 149 | } 150 | 151 | static vec4_t set(const type_t &x, const type_t &y, const type_t &z, const type_t &w) { 152 | return vec4_t { x, y, z, w }; 153 | } 154 | 155 | static vec4_t lerp(const vec4_t &a, const vec4_t &b, float t) { 156 | return vec4_t{ 157 | a.x + (b.x - a.x) * t, 158 | a.y + (b.y - a.y) * t, 159 | a.z + (b.z - a.z) * t, 160 | a.w + (b.w - a.w) * t}; 161 | } 162 | 163 | }; // vec4_t 164 | #pragma pack(pop) 165 | 166 | namespace { 167 | 168 | template 169 | inline vec4_t operator * (const float lhs, const vec4_t & rhs) { 170 | return vec4_t{ lhs * rhs.x, lhs * rhs.y, lhs * rhs.z, lhs * rhs.w }; 171 | } 172 | 173 | template 174 | inline vec4_t operator / (const float lhs, const vec4_t & rhs) { 175 | return vec4_t{ lhs / rhs.x, lhs / rhs.y, lhs / rhs.z, lhs / rhs.w }; 176 | } 177 | 178 | } // namespace {} 179 | 180 | template 181 | struct vec3_t { 182 | 183 | type_t x, y, z; 184 | 185 | vec3_t operator + (const vec3_t &v) const { 186 | return vec3_t { 187 | this->x + v.x, 188 | this->y + v.y, 189 | this->z + v.z, 190 | }; 191 | } 192 | 193 | vec3_t operator + (const type_t &v) const { 194 | return vec3_t { 195 | this->x + v, 196 | this->y + v, 197 | this->z + v, 198 | }; 199 | } 200 | 201 | vec3_t operator - (const vec3_t &v) const { 202 | return vec3_t { 203 | this->x - v.x, 204 | this->y - v.y, 205 | this->z - v.z, 206 | }; 207 | } 208 | 209 | vec3_t operator - (const type_t &v) const { 210 | return vec3_t { 211 | this->x - v, 212 | this->y - v, 213 | this->z - v, 214 | }; 215 | } 216 | 217 | vec3_t operator * (const vec3_t &v) const { 218 | return vec3_t { 219 | this->x * v.x, 220 | this->y * v.y, 221 | this->z * v.z, 222 | }; 223 | } 224 | 225 | vec3_t operator * (const type_t &v) const { 226 | return vec3_t { 227 | this->x * v, 228 | this->y * v, 229 | this->z * v, 230 | }; 231 | } 232 | 233 | vec3_t operator / (const vec3_t &v) const { 234 | return vec3_t { 235 | this->x / v.x, 236 | this->y / v.y, 237 | this->z / v.z, 238 | }; 239 | } 240 | 241 | vec3_t operator / (const type_t &v) const { 242 | return vec3_t { 243 | this->x / v, 244 | this->y / v, 245 | this->z / v, 246 | }; 247 | } 248 | 249 | void operator += (const vec3_t &v) { 250 | this->x += v.x; 251 | this->y += v.y; 252 | this->z += v.z; 253 | } 254 | 255 | void operator += (const type_t &v) { 256 | this->x += v; 257 | this->y += v; 258 | this->z += v; 259 | } 260 | 261 | void operator -= (const vec3_t &v) { 262 | this->x -= v.x; 263 | this->y -= v.y; 264 | this->z -= v.z; 265 | } 266 | 267 | void operator -= (const type_t &v) { 268 | this->x -= v; 269 | this->y -= v; 270 | this->z -= v; 271 | } 272 | 273 | void operator *= (const vec3_t &v) { 274 | this->x *= v.x; 275 | this->y *= v.y; 276 | this->z *= v.z; 277 | } 278 | 279 | void operator *= (const type_t &v) { 280 | this->x *= v; 281 | this->y *= v; 282 | this->z *= v; 283 | } 284 | 285 | void operator /= (const vec3_t &v) { 286 | this->x /= v.x; 287 | this->y /= v.y; 288 | this->z /= v.z; 289 | } 290 | 291 | void operator /= (const type_t &v) { 292 | this->x /= v; 293 | this->y /= v; 294 | this->z /= v; 295 | } 296 | 297 | static type_t dot(const vec3_t &a, const vec3_t &b) { 298 | return a.x * b.x + a.y * b.y + a.z * b.z; 299 | } 300 | 301 | static vec3_t set(const type_t &v) { 302 | return vec3_t { v, v, v }; 303 | } 304 | 305 | static vec3_t set(const type_t &x, const type_t &y, const type_t &z) { 306 | return vec3_t { x, y, z }; 307 | } 308 | 309 | }; // vec3_t 310 | 311 | 312 | template 313 | struct vec2_t { 314 | 315 | type_t x, y; 316 | 317 | vec2_t operator + (const vec2_t &v) const { 318 | return vec2_t { 319 | this->x + v.x, 320 | this->y + v.y, 321 | }; 322 | } 323 | 324 | vec2_t operator + (const type_t &v) const { 325 | return vec2_t { 326 | this->x + v, 327 | this->y + v, 328 | }; 329 | } 330 | 331 | vec2_t operator - (const vec2_t &v) const { 332 | return vec2_t { 333 | this->x - v.x, 334 | this->y - v.y, 335 | }; 336 | } 337 | 338 | vec2_t operator - (const type_t &v) const { 339 | return vec2_t { 340 | this->x - v, 341 | this->y - v, 342 | }; 343 | } 344 | 345 | vec2_t operator * (const vec2_t &v) const { 346 | return vec2_t { 347 | this->x * v.x, 348 | this->y * v.y, 349 | }; 350 | } 351 | 352 | vec2_t operator * (const type_t &v) const { 353 | return vec2_t { 354 | this->x * v, 355 | this->y * v, 356 | }; 357 | } 358 | 359 | vec2_t operator / (const vec2_t &v) const { 360 | return vec2_t { 361 | this->x / v.x, 362 | this->y / v.y, 363 | }; 364 | } 365 | 366 | vec2_t operator / (const type_t &v) const { 367 | return vec2_t { 368 | this->x / v, 369 | this->y / v, 370 | }; 371 | } 372 | 373 | void operator += (const vec2_t &v) { 374 | this->x += v.x; 375 | this->y += v.y; 376 | } 377 | 378 | void operator += (const type_t &v) { 379 | this->x += v; 380 | this->y += v; 381 | } 382 | 383 | void operator -= (const vec2_t &v) { 384 | this->x -= v.x; 385 | this->y -= v.y; 386 | } 387 | 388 | void operator -= (const type_t &v) { 389 | this->x -= v; 390 | this->y -= v; 391 | } 392 | 393 | void operator *= (const vec2_t &v) { 394 | this->x *= v.x; 395 | this->y *= v.y; 396 | } 397 | 398 | void operator *= (const type_t &v) { 399 | this->x *= v; 400 | this->y *= v; 401 | } 402 | 403 | void operator /= (const vec2_t &v) { 404 | this->x /= v.x; 405 | this->y /= v.y; 406 | } 407 | 408 | void operator /= (const type_t &v) { 409 | this->x /= v; 410 | this->y /= v; 411 | } 412 | 413 | static type_t dot(const vec2_t &a, const vec2_t &b) { 414 | return a.x * b.x + a.y * b.y; 415 | } 416 | 417 | static vec2_t set(const type_t &v) { 418 | return vec2_t { v, v }; 419 | } 420 | 421 | static vec2_t set(const type_t &x, const type_t &y) { 422 | return vec2_t { x, y }; 423 | } 424 | 425 | static vec2_t cross(const vec2_t &v) { 426 | return vec2_t { -v.y, v.x }; 427 | } 428 | 429 | static vec2_t lerp(const vec2_t &a, const vec2_t &b, float t) { 430 | return vec2_t{a.x + (b.x - a.x) * t, 431 | a.y + (b.y - a.y) * t}; 432 | } 433 | 434 | }; // vec2_t 435 | 436 | template 437 | struct rect_t { 438 | type_t x0, y0; 439 | type_t x1, y1; 440 | 441 | type_t dx() const { 442 | return x1 - x0; 443 | } 444 | 445 | type_t dy() const { 446 | return y1 - y0; 447 | } 448 | 449 | type_t area() const { 450 | return dx() * dy(); 451 | } 452 | }; 453 | 454 | using int2 = vec2_t; 455 | using int3 = vec3_t; 456 | using int4 = vec4_t; 457 | using float2 = vec2_t; 458 | using float3 = vec3_t; 459 | using float4 = vec4_t; 460 | 461 | using recti_t = rect_t; 462 | using rectf_t = rect_t; 463 | 464 | template 465 | static inline type_t dot(const vec4_t &a, const vec3_t &b) { 466 | return a.x * b.x + a.y * b.y + a.z * b.z; 467 | } 468 | 469 | template 470 | static inline type_t dot(const vec4_t &a, const vec2_t &b) { 471 | return a.x * b.x + a.y * b.y; 472 | } 473 | 474 | template 475 | static inline vec2_t vec2(const vec4_t &v) { 476 | return vec2_t{v.x, v.y}; 477 | } 478 | 479 | namespace { 480 | 481 | template 482 | inline vec2_t operator * (const float lhs, const vec2_t & rhs) { 483 | return vec2_t{ lhs * rhs.x, lhs * rhs.y }; 484 | } 485 | 486 | template 487 | inline vec2_t operator / (const float lhs, const vec2_t & rhs) { 488 | return vec2_t{ lhs / rhs.x, lhs / rhs.y }; 489 | } 490 | 491 | } // namespace 492 | 493 | namespace { 494 | 495 | template 496 | inline type_t clamp(const type_t min, const type_t val, const type_t max) { 497 | return (val < min) ? min : ((val > max) ? max : val); 498 | } 499 | 500 | } // namespace {} 501 | -------------------------------------------------------------------------------- /source/matrix.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "common.h" 8 | #include "GL.h" 9 | #include "math.h" 10 | 11 | enum matrix_mode_t { 12 | GL_MATRIXMODE_MODELVIEW, 13 | GL_MATRIXMODE_PROJECTION, 14 | GL_MATRIXMODE_TEXTURE, 15 | }; 16 | 17 | struct matrix_t { 18 | 19 | // array index layout: 20 | // 0 4 8 c 21 | // 1 5 9 d 22 | // 2 6 a e 23 | // 3 7 b f 24 | 25 | // c = x, d = y, e = z 26 | 27 | __m128 row_get(const int32_t y) const { 28 | return _mm_loadu_ps(e.data() + y * 4); 29 | } 30 | 31 | void row_set(const int32_t y, __m128 v) { 32 | _mm_storeu_ps(e.data() + y * 4, v); 33 | } 34 | 35 | const float & operator () (int32_t x, int32_t y) const { 36 | return e[x * 4 + y]; 37 | } 38 | 39 | float & operator () (int32_t x, int32_t y) { 40 | return e[x * 4 + y]; 41 | } 42 | 43 | const float & operator [] (int32_t i) const { 44 | return e[i]; 45 | } 46 | 47 | float & operator [] (int32_t i) { 48 | return e[i]; 49 | } 50 | 51 | float *data() { 52 | return e.data(); 53 | } 54 | 55 | const float *data() const { 56 | return e.data(); 57 | } 58 | 59 | void identity() { 60 | e.fill(0.f); 61 | operator()(0, 0) = 1.f; 62 | operator()(1, 1) = 1.f; 63 | operator()(2, 2) = 1.f; 64 | operator()(3, 3) = 1.f; 65 | } 66 | 67 | void _multiply_fpu(const matrix_t &b) { 68 | matrix_t &a = *this; 69 | matrix_t r; 70 | for (uint32_t i = 0; i < 16; i+=4) { 71 | r[i + 0] = b[i + 0] * a[0x0] + b[i + 1] * a[0x4] + b[i + 2] * a[0x8] + b[i + 3] * a[0xc]; 72 | r[i + 1] = b[i + 0] * a[0x1] + b[i + 1] * a[0x5] + b[i + 2] * a[0x9] + b[i + 3] * a[0xd]; 73 | r[i + 2] = b[i + 0] * a[0x2] + b[i + 1] * a[0x6] + b[i + 2] * a[0xa] + b[i + 3] * a[0xe]; 74 | r[i + 3] = b[i + 0] * a[0x3] + b[i + 1] * a[0x7] + b[i + 2] * a[0xb] + b[i + 3] * a[0xf]; 75 | } 76 | e = r.e; 77 | } 78 | 79 | static inline __m128 _lincomb_sse(const __m128 &a, const matrix_t &b) 80 | { 81 | // linear combination: 82 | // a[0] * b.row[0] + a[1] * b.row[1] + a[2] * b.row[2] + a[3] * b.row[3] 83 | __m128 result; 84 | result = _mm_mul_ps( _mm_shuffle_ps(a, a, 0x00), b.row_get(0)); 85 | result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(a, a, 0x55), b.row_get(1))); 86 | result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(a, a, 0xaa), b.row_get(2))); 87 | result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(a, a, 0xff), b.row_get(3))); 88 | return result; 89 | } 90 | 91 | void _multiply_sse(const matrix_t &b) { 92 | const __m128 out0x = _lincomb_sse(b.row_get(0), *this); 93 | const __m128 out1x = _lincomb_sse(b.row_get(1), *this); 94 | const __m128 out2x = _lincomb_sse(b.row_get(2), *this); 95 | const __m128 out3x = _lincomb_sse(b.row_get(3), *this); 96 | row_set(0, out0x); 97 | row_set(1, out1x); 98 | row_set(2, out2x); 99 | row_set(3, out3x); 100 | } 101 | 102 | template 103 | void multiply(const matrix_t &b) { 104 | use_sse ? _multiply_sse(b) : _multiply_fpu(b); 105 | } 106 | 107 | __declspec(align(16)) std::array e; 108 | }; 109 | 110 | 111 | struct matrix_stack_t { 112 | 113 | matrix_stack_t(matrix_mode_t mode): _mode(mode), _head(0) { 114 | top().identity(); 115 | } 116 | 117 | void glPushMatrix() { 118 | assert(can_push()); 119 | ++_head; 120 | _stack[_head] = _stack[_head - 1]; 121 | } 122 | 123 | void glPopMatrix() { 124 | assert(_head > 0); 125 | --_head; 126 | } 127 | 128 | void glOrtho(float l, 129 | float r, 130 | float b, 131 | float t, 132 | float n, 133 | float f) { 134 | matrix_t m; 135 | 136 | m(0, 0) = 2.f / (r - l); 137 | m(1, 0) = 0.f; 138 | m(2, 0) = 0.f; 139 | m(3, 0) = -(r + l) / (r - l); 140 | 141 | m(0, 1) = 0.f; 142 | m(1, 1) = 2.f / (t - b); 143 | m(2, 1) = 0.f; 144 | m(3, 1) = -(t + b) / (t - b); 145 | 146 | m(0, 2) = 0.f; 147 | m(1, 2) = 0.f; 148 | m(2, 2) = -2.f / (f - n); 149 | m(3, 2) = -(f + n) / (f - n); 150 | 151 | m(0, 3) = 0.f; 152 | m(1, 3) = 0.f; 153 | m(2, 3) = 0.f; 154 | m(3, 3) = 1.f; 155 | 156 | top().multiply(m); 157 | } 158 | 159 | void glRotatef(float angle, float x, float y, float z) { 160 | const float RAD = 3.14159265359f / 180.f; 161 | 162 | const float l = 1.f / sqrtf( x*x + y*y + z*z ); 163 | x *= l; y *= l; z *= l; 164 | 165 | const float r = angle * RAD; 166 | const float c = cosf( r ); 167 | const float s = sinf( r ); 168 | const float t = 1-c; 169 | 170 | matrix_t m; 171 | m(0, 0) = x*x*t+c; 172 | m(1, 0) = x*y*t-z*s; 173 | m(2, 0) = x*z*t+y*s; 174 | m(3, 0) = 0.f; 175 | 176 | m(0, 1) = y*x*t+z*s; 177 | m(1, 1) = y*y*t+c; 178 | m(2, 1) = y*z*t-x*s; 179 | m(3, 1) = 0.f; 180 | 181 | m(0, 2) = x*z*t-y*s; 182 | m(1, 2) = y*z*t+x*s; 183 | m(2, 2) = z*z*t+c; 184 | m(3, 2) = 0.f; 185 | 186 | m(0, 3) = 0.f; 187 | m(1, 3) = 0.f; 188 | m(2, 3) = 0.f; 189 | m(3, 3) = 1.f; 190 | 191 | top().multiply(m); 192 | } 193 | 194 | void glScalef(float x, float y, float z) { 195 | 196 | matrix_t m = { 197 | x, 0, 0, 0, 198 | 0, y, 0, 0, 199 | 0, 0, z, 0, 200 | 0, 0, 0, 1 201 | }; 202 | 203 | top().multiply(m); 204 | } 205 | 206 | void glTranslatef(float x, float y, float z) { 207 | 208 | matrix_t m = { 209 | 1, 0, 0, 0, 210 | 0, 1, 0, 0, 211 | 0, 0, 1, 0, 212 | x, y, z, 1 213 | }; 214 | 215 | top().multiply(m); 216 | } 217 | 218 | void glFrustum(const float l, 219 | const float r, 220 | const float b, 221 | const float t, 222 | const float n, 223 | const float f) { 224 | matrix_t m; 225 | 226 | if (n < 0.f || f < 0.f) 227 | return; 228 | 229 | const float m00 = 2.f * n / (r - l); 230 | const float m11 = 2.f * n / (t - b); 231 | const float m02 = (r + l) / (r - l); 232 | const float m12 = (t + b) / (t - b); 233 | const float m22 =-(f + n) / (f - n); 234 | const float m23 =-(2.f * f * n) / (f - n); 235 | 236 | m(0, 0) = m00; m(1, 0) = 0; m(2, 0) = m02; m(3, 0) = 0; 237 | m(0, 1) = 0; m(1, 1) = m11; m(2, 1) = m12; m(3, 1) = 0; 238 | m(0, 2) = 0; m(1, 2) = 0; m(2, 2) = m22; m(3, 2) = m23; 239 | m(0, 3) = 0; m(1, 3) = 0; m(2, 3) = -1.f; m(3, 3) = 0; 240 | 241 | top().multiply(m); 242 | } 243 | 244 | // used by gluPerspective() 245 | void glMultMatrixd(const double *v) { 246 | matrix_t m; 247 | for (uint32_t i = 0; i < 16; ++i) 248 | m[i] = float(v[i]); 249 | top().multiply(m); 250 | } 251 | 252 | void glMultMatrixf(const float *v) { 253 | matrix_t m; 254 | for (uint32_t i = 0; i < 16; ++i) 255 | m[i] = v[i]; 256 | top().multiply(m); 257 | } 258 | 259 | matrix_t & top() { 260 | return _stack[_head]; 261 | } 262 | 263 | const matrix_t & top() const { 264 | return _stack[_head]; 265 | } 266 | 267 | void glLoadIdentity() { 268 | top().identity(); 269 | } 270 | 271 | bool can_pop() const { 272 | return _head > 0; 273 | } 274 | 275 | bool can_push() const { 276 | return (_head + 1) < _stack.size(); 277 | } 278 | 279 | void clear() { 280 | _head = 0; 281 | top().identity(); 282 | } 283 | 284 | protected: 285 | const matrix_mode_t _mode; 286 | uint32_t _head; 287 | std::array _stack; 288 | }; 289 | 290 | 291 | struct matrix_manager_t { 292 | 293 | matrix_manager_t() 294 | : _modelview(GL_MATRIXMODE_MODELVIEW) 295 | , _projection(GL_MATRIXMODE_PROJECTION) 296 | , _texture(GL_MATRIXMODE_TEXTURE) 297 | , _current(&_modelview) 298 | , _dirty(true) 299 | { 300 | _modelview.glLoadIdentity(); 301 | _projection.glLoadIdentity(); 302 | } 303 | 304 | void glMatrixMode(GLenum mode) { 305 | _mode = mode; 306 | switch (mode) { 307 | case GL_MODELVIEW: 308 | _current = &_modelview; 309 | break; 310 | case GL_PROJECTION: 311 | _current = &_projection; 312 | break; 313 | case GL_TEXTURE: // UT2003 314 | _current = &_texture; 315 | break; 316 | default: 317 | DEBUG_BREAK; 318 | } 319 | } 320 | 321 | GLenum getMatrixMode() { 322 | if (_current == &_modelview) { 323 | return GL_MODELVIEW; 324 | } 325 | if (_current == &_projection) { 326 | return GL_PROJECTION; 327 | } 328 | if (_current == &_texture) { 329 | return GL_TEXTURE; 330 | } 331 | DEBUG_BREAK; 332 | return GL_INVALID_ENUM; 333 | } 334 | 335 | void glOrtho(float l, 336 | float r, 337 | float b, 338 | float t, 339 | float n, 340 | float f) { 341 | _dirty = true; 342 | _current->glOrtho(l, r, b, t, n, f); 343 | } 344 | 345 | void glRotatef(float angle, float x, float y, float z) { 346 | _dirty = true; 347 | _current->glRotatef(angle, x, y, z); 348 | } 349 | 350 | void glScalef(float x, float y, float z) { 351 | _dirty = true; 352 | _current->glScalef(x, y, z); 353 | } 354 | 355 | void glTranslatef(float x, float y, float z) { 356 | _dirty = true; 357 | _current->glTranslatef(x, y, z); 358 | } 359 | 360 | void glFrustum(const float l, 361 | const float r, 362 | const float b, 363 | const float t, 364 | const float n, 365 | const float f) { 366 | _dirty = true; 367 | _current->glFrustum(l, r, b, t, n, f); 368 | } 369 | 370 | void glMultMatrixd(const double *v) { 371 | _dirty = true; 372 | _current->glMultMatrixd(v); 373 | } 374 | 375 | void glMultMatrixf(const float *v) { 376 | _dirty = true; 377 | _current->glMultMatrixf(v); 378 | } 379 | 380 | void glLoadMatrixf(const float *v) { 381 | _dirty = true; 382 | matrix_t &m = _current->top(); 383 | for (uint32_t i = 0; i < 16; ++i) { 384 | m.e[i] = v[i]; 385 | } 386 | } 387 | 388 | void glLoadIdentity() { 389 | _dirty = true; 390 | _current->glLoadIdentity(); 391 | } 392 | 393 | void glPushMatrix() { 394 | _dirty = true; 395 | _current->glPushMatrix(); 396 | } 397 | 398 | void glPopMatrix() { 399 | _dirty = true; 400 | _current->glPopMatrix(); 401 | } 402 | 403 | matrix_t &modelview() { 404 | return _modelview.top(); 405 | } 406 | 407 | matrix_t &projection() { 408 | return _projection.top(); 409 | } 410 | 411 | bool can_pop() const { 412 | return _current->can_pop(); 413 | } 414 | 415 | bool can_push() const { 416 | return _current->can_push(); 417 | } 418 | 419 | const matrix_t &top() const { 420 | return _current->top(); 421 | } 422 | 423 | float4 transform(float4 v) { 424 | if (_dirty) { 425 | _memo = _projection.top(); 426 | _memo.multiply(_modelview.top()); 427 | _dirty = false; 428 | } 429 | const matrix_t &m = _memo; 430 | return float4{ 431 | m(0, 0) * v.x + m(1, 0) * v.y + m(2, 0) * v.z + m(3, 0) * v.w, 432 | m(0, 1) * v.x + m(1, 1) * v.y + m(2, 1) * v.z + m(3, 1) * v.w, 433 | m(0, 2) * v.x + m(1, 2) * v.y + m(2, 2) * v.z + m(3, 2) * v.w, 434 | m(0, 3) * v.x + m(1, 3) * v.y + m(2, 3) * v.z + m(3, 3) * v.w 435 | }; 436 | } 437 | 438 | protected: 439 | matrix_stack_t _modelview, _projection, _texture; 440 | matrix_stack_t *_current; 441 | GLenum _mode; 442 | 443 | bool _dirty; 444 | matrix_t _memo; 445 | }; 446 | -------------------------------------------------------------------------------- /source/primative.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" 2 | #include "context.h" 3 | #include "primative.h" 4 | #include "game_id.h" 5 | 6 | 7 | namespace { 8 | 9 | size_t getGLTypeSize(GLenum type) { 10 | switch (type) { 11 | case GL_FLOAT: 12 | case GL_INT: 13 | case GL_UNSIGNED_INT: return 4; 14 | case GL_SHORT: 15 | case GL_UNSIGNED_SHORT: return 2; 16 | case GL_BYTE: 17 | case GL_UNSIGNED_BYTE: return 1; 18 | default: 19 | DEBUG_BREAK; 20 | return 0; 21 | } 22 | } 23 | 24 | } // namespace {} 25 | 26 | void primative_manager_t::_push_vertex(const vertex_t &v) { 27 | _vertex.emplace_back(v); 28 | _vertex.back().coord = Context->matrix.transform(v.coord); 29 | } 30 | 31 | void primative_manager_t::glBegin(GLenum mode) { 32 | if (++_begin_count != 1) { 33 | DEBUG_BREAK; 34 | } 35 | _mode = mode; 36 | _vertex.clear(); 37 | } 38 | 39 | void primative_manager_t::glEnd() { 40 | 41 | // Node: default OpenGL front face is counter clockwise winding 42 | 43 | switch (_mode) { 44 | case GL_TRIANGLES: 45 | _asm_triangles(); 46 | break; 47 | case GL_TRIANGLE_FAN: 48 | _asm_triangle_fan(); 49 | break; 50 | case GL_TRIANGLE_STRIP: 51 | // note: used almost exclusively by quake3 52 | _asm_triangle_strip(); 53 | break; 54 | case GL_QUADS: 55 | _asm_quads(); 56 | break; 57 | case GL_QUAD_STRIP: 58 | _asm_quad_strip(); 59 | break; 60 | case GL_POLYGON: 61 | _asm_polygon(); 62 | break; 63 | case GL_LINE_STRIP: // untested 64 | break; 65 | default: 66 | DEBUG_BREAK; 67 | break; 68 | } 69 | if (--_begin_count != 0) { 70 | DEBUG_BREAK; 71 | } 72 | _vertex.clear(); 73 | } 74 | 75 | void primative_manager_t::add_vertex(const float4 v) { 76 | _push_vertex(vertex_t{v, _latch_argb, _latch_uv}); 77 | } 78 | 79 | void primative_manager_t::_asm_triangles() { 80 | for (size_t i = 2; i < _vertex.size(); i += 3) { 81 | const triangle_t tri = { 82 | _vertex[i - 2], 83 | _vertex[i - 0], 84 | _vertex[i - 1]}; 85 | _triangles.push_back(tri); 86 | } 87 | _vertex.clear(); 88 | } 89 | 90 | void primative_manager_t::_asm_triangle_strip() { 91 | /* 92 | 3---1 93 | / \ / \ 94 | 4---2---0 95 | 96 | {2, 1, 0} 97 | {3, 1, 2} 98 | {4, 3, 2} 99 | */ 100 | for (size_t i = 2; i < _vertex.size(); i += 1) { 101 | const int32_t b = (i & 1); // if odd 102 | const triangle_t tri = { 103 | // 2 3 4 5 6 104 | _vertex[i - 2], // 0 1 2 ... 105 | _vertex[i - (0 ^ b)], // 2 3 4 ... 106 | _vertex[i - (1 ^ b)]}; // 1 2 3 ... 107 | _triangles.push_back(tri); 108 | } 109 | _vertex.clear(); 110 | } 111 | 112 | void primative_manager_t::_asm_triangle_fan() { 113 | for (size_t i = 2; i < _vertex.size(); i += 1) { 114 | const triangle_t tri = { 115 | _vertex[0], 116 | _vertex[i - 0], 117 | _vertex[i - 1]}; 118 | _triangles.push_back(tri); 119 | } 120 | _vertex.clear(); 121 | } 122 | 123 | void primative_manager_t::_asm_quads() { 124 | /* 125 | 0----3 126 | | | 127 | 1----2 128 | */ 129 | for (size_t i = 3; i < _vertex.size(); i += 4) { 130 | const triangle_t t1 = { 131 | _vertex[i - 3], 132 | _vertex[i - 0], 133 | _vertex[i - 1] 134 | }; 135 | _triangles.push_back(t1); 136 | const triangle_t t2 = { 137 | _vertex[i - 3], 138 | _vertex[i - 1], 139 | _vertex[i - 2] 140 | }; 141 | _triangles.push_back(t2); 142 | } 143 | _vertex.clear(); 144 | } 145 | 146 | void primative_manager_t::_asm_quad_strip() { 147 | } 148 | 149 | void primative_manager_t::_asm_polygon() { 150 | for (size_t i = 2; i < _vertex.size(); i += 1) { 151 | const triangle_t tri = { 152 | _vertex[i - 0], 153 | _vertex[i - 1], 154 | _vertex[0] 155 | }; 156 | _triangles.push_back(tri); 157 | } 158 | _vertex.clear(); 159 | } 160 | 161 | static bool _is_backfacing(const float4 & a, const float4 & b, const float4 & c) { 162 | const float2 v0 = float2{b.x - a.x, b.y - a.y}; 163 | const float2 v1 = float2{c.x - a.x, c.y - a.y}; 164 | const float2 c0 = float2{-v0.y, v0.x}; 165 | return float2::dot(c0, v1) > 0.f; 166 | } 167 | 168 | void primative_manager_t::clip_triangles() { 169 | 170 | if (_triangles.empty()) { 171 | return; 172 | } 173 | 174 | std::array vert; 175 | memset(&vert, 0, sizeof(vert)); 176 | uint32_t head = 0; 177 | 178 | const auto clip_edge = [&vert, &head]( 179 | const vertex_t &v0, 180 | const vertex_t &v1, 181 | const uint32_t c0, 182 | const uint32_t c1) { 183 | 184 | // if v0 is inside frustum 185 | if (c0 == 0) { 186 | vert[head++] = vertex_t{v0}; 187 | } 188 | 189 | const float4 p0 = v0.coord, p1 = v1.coord; 190 | const float2 t0 = v0.tex, t1 = v1.tex; 191 | const float4 r0 = v0.rgba, r1 = v1.rgba; 192 | 193 | // the equality that we are trying to solve: 194 | // (z0 + (z1 - z0) * t) == (-w0 + (-w1 - -w0) * t) 195 | // to find z == -w intersection point 196 | // 197 | // or when our 3d coordinate is: 198 | // z / w = -1 199 | // 200 | // t = (z0 + w0) / ((w0 - w1) - (z1 - z0)) 201 | // 202 | const float nom = (p0.z + p0.w); 203 | const float denom = (p0.w - p1.w) - (p1.z - p0.z); 204 | const float t = nom / denom; 205 | if (denom == 0.f) { 206 | DEBUG_BREAK; 207 | vert[head++] = vertex_t{v1}; 208 | return; 209 | } 210 | 211 | if (t < 0.f || t > 1.f) { 212 | DEBUG_BREAK; 213 | } 214 | 215 | const float4 midPos = float4::lerp(p0, p1, t); 216 | const float2 midTex = float2::lerp(t0, t1, t); 217 | const float4 midCol = float4::lerp(r0, r1, t); 218 | vert[head++] = vertex_t{midPos, midCol, midTex}; 219 | }; 220 | 221 | uint32_t cutoff = _triangles.size(); 222 | while (cutoff) { 223 | 224 | auto &t = _triangles[--cutoff]; 225 | const auto &v0 = t.vert[0]; 226 | const auto &v1 = t.vert[1]; 227 | const auto &v2 = t.vert[2]; 228 | 229 | // bit positive when behind near plane 230 | const int32_t c0 = (v0.coord.z < -v0.coord.w) << 0; // 1 231 | const int32_t c1 = (v1.coord.z < -v1.coord.w) << 1; // 2 232 | const int32_t c2 = (v2.coord.z < -v2.coord.w) << 2; // 4 233 | const uint32_t cc = c0 | c1 | c2; 234 | 235 | if (cc == 0) { 236 | // keep, fully in front of near plane 237 | continue; 238 | } 239 | 240 | if (cc == 7) { 241 | // discard, fully behind near plane 242 | memset(&t, 0, sizeof(t)); 243 | continue; 244 | } 245 | 246 | head = 0; 247 | 248 | // edge v0 -> v1 249 | if ((c0 | c1) == 0) // fully in 250 | vert[head++] = v0; 251 | else if ((c0 | c1) != 3) // spanning 252 | clip_edge(v0, v1, c0, c1); 253 | 254 | // edge v1 -> v2 255 | if ((c1 | c2) == 0) // fully in 256 | vert[head++] = v1; 257 | else if ((c1 | c2) != 6) // spanning 258 | clip_edge(v1, v2, c1, c2); 259 | 260 | // edge v2 -> v0 261 | if ((c2 | c0) == 0) // fully in 262 | vert[head++] = v2; 263 | else if ((c2 | c0) != 5) // spanning 264 | clip_edge(v2, v0, c2, c0); 265 | 266 | assert(head <= 4); 267 | 268 | // re-assemble primative 269 | if (head >= 3) 270 | // replace current triangle 271 | t = triangle_t{vert[0], vert[1], vert[2]}; 272 | if (head >= 4) 273 | // append extra triangle 274 | _triangles.push_back(triangle_t{vert[0], vert[2], vert[3]}); 275 | } 276 | } 277 | 278 | void primative_manager_t::cull_triangles() { 279 | auto &state = _cxt.state; 280 | 281 | bool is_ut99 = getGameId() == e_ut99_goty || getGameId() == e_unreal_gold; 282 | 283 | if (state.cullFace) { 284 | for (triangle_t &t : _triangles) { 285 | 286 | const auto &v0 = t.vert[0]; 287 | const auto &v1 = t.vert[1]; 288 | const auto &v2 = t.vert[2]; 289 | 290 | // backface culling 291 | bool backface = _is_backfacing(v0.coord, v1.coord, v2.coord); 292 | 293 | if (state.frontFace == GL_CW || is_ut99) { 294 | backface = !backface; 295 | } 296 | 297 | bool discard = false; 298 | discard |= (state.cullMode == GL_FRONT) && !backface; 299 | discard |= (state.cullMode == GL_BACK) && backface; 300 | discard |= (state.cullMode == GL_FRONT_AND_BACK); 301 | 302 | if (discard) { 303 | // discard backfacing triangle 304 | memset(&t, 0, sizeof(t)); 305 | continue; 306 | } 307 | } 308 | } 309 | } 310 | 311 | void primative_manager_t::convert_to_dc() { 312 | 313 | const auto &state = Context->state; 314 | 315 | auto &viewport = state.viewport; 316 | // get viewport center offset 317 | const float vx = viewport.x0; 318 | const float vy = viewport.y0; 319 | const float vw = viewport.dx() * .5f; 320 | const float vh = viewport.dy() * .5f; 321 | 322 | auto transform = [vx, vy, vw, vh, &state](float4 &v) { 323 | // homogenous perspective divide 324 | v.x /= v.w; 325 | v.y /= v.w; 326 | v.z /= v.w; 327 | v.w = 1.f / v.w; 328 | // ndc -> dc coordinate 329 | v.x = vx + (v.x * vw + vw); 330 | v.y = vy + (v.y * vh + vh); 331 | // depth range 332 | v.z = state.depthRangeNear + (state.depthRangeFar - state.depthRangeNear) * v.z; 333 | }; 334 | 335 | for (auto &t : _triangles) { 336 | transform(t.vert[0].coord); 337 | transform(t.vert[1].coord); 338 | transform(t.vert[2].coord); 339 | } 340 | } 341 | 342 | void primative_manager_t::glVertexPointer(GLint size, GLenum type, 343 | GLsizei stride, 344 | const GLvoid *pointer) { 345 | _array_vertex._size = size; 346 | _array_vertex._type = type; 347 | _array_vertex._stride = stride ? stride : (getGLTypeSize(type) * size); 348 | _array_vertex._pointer = (const uint8_t *)pointer; 349 | } 350 | 351 | void primative_manager_t::glColorPointer(GLint size, GLenum type, 352 | GLsizei stride, 353 | const GLvoid *pointer) { 354 | _array_color._size = size; 355 | _array_color._type = type; 356 | _array_color._stride = stride ? stride : (getGLTypeSize(type) * size); 357 | _array_color._pointer = (const uint8_t *)pointer; 358 | } 359 | 360 | void primative_manager_t::glTexCoordPointer(GLint size, GLenum type, 361 | GLsizei stride, 362 | const GLvoid *pointer) { 363 | _array_tex_coord._size = size; 364 | _array_tex_coord._type = type; 365 | _array_tex_coord._stride = stride ? stride : (getGLTypeSize(type) * size); 366 | _array_tex_coord._pointer = (const uint8_t *)pointer; 367 | } 368 | 369 | void primative_manager_t::glArrayElement(GLint i) { 370 | 371 | const auto &state = _cxt.state; 372 | if (!state.array_vertex) { 373 | return; 374 | } 375 | if (!_array_vertex._pointer) { 376 | return; 377 | } 378 | 379 | // vertex coordinates 380 | float2 uv = {0.f, 0.f}; 381 | if (state.array_tex_coord) { 382 | if (_array_tex_coord._pointer) { 383 | if (_array_tex_coord._type != GL_FLOAT) 384 | DEBUG_BREAK; 385 | // find tex coord 386 | const uint8_t * t = (const uint8_t *)_array_tex_coord._pointer; 387 | t += i * _array_tex_coord._stride; 388 | const float *b = (const float *)t; 389 | uv = float2{b[0], b[1]}; 390 | } 391 | } 392 | 393 | // color 394 | float4 argb = {1.f, 1.f, 1.f, 1.f}; 395 | if (state.array_color) { 396 | if (_array_color._pointer) { 397 | switch (_array_color._type) { 398 | case GL_UNSIGNED_BYTE: 399 | { 400 | const uint8_t *c = (_array_color._pointer + i * _array_color._stride); 401 | argb = {_array_color._size > 3 ? float(c[3] / 256.f) : 1.f, 402 | _array_color._size > 0 ? float(c[0] / 256.f) : 1.f, 403 | _array_color._size > 1 ? float(c[1] / 256.f) : 1.f, 404 | _array_color._size > 2 ? float(c[2] / 256.f) : 1.f}; 405 | } 406 | break; 407 | case GL_FLOAT: 408 | { 409 | const float *c = (const float *)(_array_color._pointer + i * _array_color._stride); 410 | argb = {_array_color._size > 3 ? c[3] : 1.f, 411 | _array_color._size > 0 ? c[0] : 1.f, 412 | _array_color._size > 1 ? c[1] : 1.f, 413 | _array_color._size > 2 ? c[2] : 1.f}; 414 | } 415 | break; 416 | default: 417 | DEBUG_BREAK; 418 | } 419 | } 420 | } 421 | 422 | // support gl float vertex element type 423 | if (_array_vertex._type != GL_FLOAT) 424 | DEBUG_BREAK; 425 | // find vertex 426 | const uint8_t *f = (const uint8_t *)_array_vertex._pointer; 427 | f += i * _array_vertex._stride; 428 | const float *a = (const float *)f; 429 | 430 | // form vertices 431 | const float4 v = { 432 | _array_vertex._size > 0 ? a[0] : 0.f, 433 | _array_vertex._size > 1 ? a[1] : 0.f, 434 | _array_vertex._size > 2 ? a[2] : 0.f, 435 | _array_vertex._size > 3 ? a[3] : 1.f 436 | }; 437 | 438 | // push vertex 439 | _push_vertex(vertex_t{v, argb, uv}); 440 | } 441 | 442 | void primative_manager_t::glDrawElements(GLenum mode, 443 | GLsizei count, 444 | GLenum type, 445 | const GLvoid *indices) { 446 | switch (type) { 447 | case GL_UNSIGNED_SHORT: 448 | { 449 | glBegin(mode); 450 | const uint16_t *ind = (const uint16_t *)indices; 451 | for (int i = 0; i < count; ++i) { 452 | const uint16_t index = ind[i]; 453 | glArrayElement(index); 454 | } 455 | glEnd(); 456 | } 457 | break; 458 | case GL_UNSIGNED_INT: 459 | { 460 | glBegin(mode); 461 | const uint32_t *ind = (const uint32_t *)indices; 462 | for (int i = 0; i < count; ++i) { 463 | const uint16_t index = ind[i]; 464 | glArrayElement(index); 465 | } 466 | glEnd(); 467 | } 468 | break; 469 | default: 470 | DEBUG_BREAK; 471 | } 472 | } 473 | 474 | 475 | // mode 476 | // Specifies what kind of primitives to render. Symbolic constants GL_POINTS, 477 | // GL_LINE_STRIP, GL_LINE_LOOP, GL_LINES, GL_TRIANGLE_STRIP, GL_TRIANGLE_FAN, 478 | // GL_TRIANGLES, GL_QUAD_STRIP, GL_QUADS, and GL_POLYGON are accepted. 479 | // first 480 | // Specifies the starting index in the enabled arrays. 481 | // count 482 | // Specifies the number of indices to be rendered. 483 | void primative_manager_t::glDrawArrays(GLenum mode, GLint first, GLsizei count) { 484 | glBegin(mode); 485 | for (int i = 0; i < count; ++i) { 486 | glArrayElement(first + i); 487 | } 488 | glEnd(); 489 | } 490 | 491 | void primative_manager_t::glDrawRangeElements(GLenum mode, GLuint start, 492 | GLuint end, GLsizei count, 493 | GLenum type, 494 | const void *indices) { 495 | switch (type) { 496 | case GL_UNSIGNED_SHORT: 497 | { 498 | glBegin(mode); 499 | const uint16_t *ind = (const uint16_t *)indices; 500 | for (int i = 0; i < count; ++i) { 501 | const uint16_t index = ind[i]; 502 | assert(index >= start && index <= end); 503 | glArrayElement(index); 504 | } 505 | glEnd(); 506 | } 507 | break; 508 | default: 509 | DEBUG_BREAK; 510 | } 511 | } 512 | -------------------------------------------------------------------------------- /source/primative.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "GL.h" 6 | #include "math.h" 7 | 8 | struct gl_context_t; 9 | 10 | #pragma pack( push, 1 ) 11 | struct vertex_t { 12 | float4 coord; 13 | float4 rgba; 14 | float2 tex; 15 | }; 16 | 17 | #pragma pack( push, 1 ) 18 | struct triangle_t { 19 | vertex_t vert[3]; 20 | }; 21 | 22 | struct primative_manager_t { 23 | 24 | primative_manager_t(gl_context_t &cxt) 25 | : _cxt(cxt) 26 | , _mode(GL_TRIANGLES) 27 | , _begin_count(0) {} 28 | 29 | void glBegin(GLenum mode); 30 | 31 | void glEnd(); 32 | 33 | void add_vertex(const float4 v); 34 | 35 | void clear_triangles() { _triangles.clear(); } 36 | 37 | const std::vector &triangles() const { return _triangles; } 38 | 39 | void clip_triangles(); 40 | void convert_to_dc(); 41 | void cull_triangles(); 42 | 43 | void latch_uv(float2 t) { _latch_uv = t; } 44 | 45 | // int4 intead? 46 | void latch_argb(float4 c) { _latch_argb = c; } 47 | 48 | void glVertexPointer(GLint size, GLenum type, GLsizei stride, 49 | const GLvoid *pointer); 50 | 51 | void glColorPointer(GLint size, GLenum type, GLsizei stride, 52 | const GLvoid *pointer); 53 | 54 | void glTexCoordPointer(GLint size, GLenum type, GLsizei stride, 55 | const GLvoid *pointer); 56 | 57 | void glDrawElements(GLenum mode, GLsizei count, GLenum type, 58 | const GLvoid *indices); 59 | 60 | void glArrayElement(GLint i); 61 | 62 | void glDrawArrays(GLenum mode, GLint first, GLsizei count); 63 | 64 | void glDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, 65 | GLenum type, const void *indices); 66 | 67 | protected: 68 | void _push_vertex(const vertex_t &v); 69 | 70 | void _asm_quads(); 71 | void _asm_triangles(); 72 | void _asm_triangle_strip(); 73 | void _asm_triangle_fan(); 74 | void _asm_quad_strip(); 75 | void _asm_polygon(); 76 | 77 | struct array_t { 78 | 79 | array_t() 80 | : _type(0) 81 | , _stride(0) 82 | , _size(0) 83 | , _pointer(nullptr) 84 | { 85 | } 86 | 87 | GLenum _type; 88 | GLsizei _stride; 89 | GLint _size; 90 | const uint8_t *_pointer; 91 | }; 92 | 93 | gl_context_t &_cxt; 94 | 95 | array_t _array_vertex; 96 | array_t _array_color; 97 | array_t _array_tex_coord; 98 | 99 | GLenum _mode; 100 | int32_t _begin_count; 101 | float2 _latch_uv; 102 | float4 _latch_argb; 103 | 104 | std::vector _vertex; 105 | std::vector _triangles; 106 | }; 107 | -------------------------------------------------------------------------------- /source/profile.cpp: -------------------------------------------------------------------------------- 1 | #include "profile.h" 2 | #include "context.h" 3 | 4 | 5 | struct profile_imp_t : public profile_t { 6 | 7 | profile_imp_t() 8 | { 9 | reset(); 10 | } 11 | 12 | void on_end_frame() override { 13 | reset(); 14 | }; 15 | 16 | void on_triangles(const std::vector &t) override { 17 | _state.triangles += t.size(); 18 | }; 19 | 20 | protected: 21 | 22 | float _area(const triangle_t &t) const { 23 | return 1.f; 24 | } 25 | 26 | void _inspect(const triangle_t &t) { 27 | // todo: calculate triangle area 28 | unsigned long area = (unsigned long)_area(t); 29 | if (area <= 0) { 30 | return; 31 | } 32 | unsigned long index = 0; 33 | _BitScanReverse(&index, area); 34 | if (index < 32) { 35 | _state.bin[index] += 1; 36 | } 37 | } 38 | 39 | void reset() { 40 | memset(&_state, 0, sizeof(_state)); 41 | _state.bin.fill(0); 42 | } 43 | 44 | struct state_t { 45 | uint32_t triangles; 46 | std::array bin; 47 | }; 48 | state_t _state; 49 | }; 50 | 51 | profile_t * profile_create() { 52 | if (Context) { 53 | auto &cfg = Context->config; 54 | bool enable = false; 55 | if (cfg.get("profile", &enable)) { 56 | if (enable) { 57 | return new profile_imp_t; 58 | } 59 | } 60 | } 61 | return new profile_t; 62 | } 63 | -------------------------------------------------------------------------------- /source/profile.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #include "forward.h" 5 | 6 | struct profile_t { 7 | 8 | virtual void on_end_frame() {}; 9 | 10 | virtual void on_triangles(const std::vector &t) {}; 11 | }; 12 | 13 | profile_t *profile_create(); 14 | -------------------------------------------------------------------------------- /source/raster.cpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "raster.h" 6 | #include "math.h" 7 | #include "context.h" 8 | 9 | 10 | bool raster_load(raster_module_t &dll, gl_context_t &cxt) { 11 | 12 | const char * dll_name[] = { 13 | "softgl_rast_wireframe.dll", 14 | "softgl_rast_reference.dll" 15 | }; 16 | 17 | std::string name; 18 | if (!cxt.config.get("raster_dll", &name)) { 19 | name = dll_name[0]; 20 | } 21 | 22 | dll.handle = LoadLibraryA(name.c_str()); 23 | if (!dll.handle) { 24 | return false; 25 | } 26 | 27 | dll.create = (rast_create_t)GetProcAddress(dll.handle, "raster_create"); 28 | dll.release = (rast_release_t)GetProcAddress(dll.handle, "raster_release"); 29 | 30 | if (dll.create && dll.release) { 31 | dll.inst = dll.create(); 32 | return nullptr != dll.inst; 33 | } 34 | 35 | return false; 36 | } 37 | -------------------------------------------------------------------------------- /source/raster.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "windows.h" 3 | #include "buffer.h" 4 | #include "primative.h" 5 | #include "texture.h" 6 | #include "state.h" 7 | 8 | struct gl_context_t; 9 | 10 | struct raster_t { 11 | 12 | // register a texture 13 | virtual void texture_add(const texture_t *tex) {} 14 | 15 | // unregister a texture 16 | virtual void texture_release(const texture_t *tex) {} 17 | 18 | // a texture has been updated 19 | virtual void texture_update(const texture_t *tex) {} 20 | 21 | // bind a texture to this pipeline 22 | virtual void texture_bind(const texture_t *tex) {} 23 | 24 | // stop any use of old framebuffer 25 | virtual void framebuffer_release() = 0; 26 | 27 | // attach new framebuffer 28 | virtual void framebuffer_aquire() = 0; 29 | 30 | // clear the framebuffer 31 | virtual void framebuffer_clear( 32 | bool color, 33 | bool depth, 34 | bool stencil, 35 | uint32_t clear_color, 36 | float clear_depth, 37 | uint32_t clear_stencil) {}; 38 | 39 | // spin up the rasterizer 40 | virtual bool start(gl_context_t &cxt) = 0; 41 | 42 | // kill the rasterizer 43 | virtual void stop() = 0; 44 | 45 | // push triangles to the rasterizer 46 | virtual void push_triangles(const std::vector &triangles, 47 | const texture_t *tex, 48 | const state_manager_t &state) = 0; 49 | 50 | // pipeline is being flushed 51 | virtual void flush() = 0; 52 | 53 | // present screen buffer 54 | virtual void present() = 0; 55 | }; 56 | 57 | typedef raster_t *(*rast_create_t)(); 58 | typedef void (*rast_release_t)(raster_t *); 59 | 60 | struct raster_module_t { 61 | 62 | raster_module_t() 63 | : handle(nullptr), create(nullptr), release(nullptr), inst(nullptr) {} 64 | 65 | HMODULE handle; 66 | rast_create_t create; 67 | rast_release_t release; 68 | raster_t *inst; 69 | }; 70 | 71 | // load a rasterization dll 72 | bool raster_load(raster_module_t &dll, gl_context_t &cxt); 73 | -------------------------------------------------------------------------------- /source/state.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | #include "GL.h" 6 | #include "common.h" 7 | 8 | struct state_manager_t { 9 | 10 | state_manager_t() { 11 | memset(this, 0, sizeof(*this)); 12 | clearColor = 0x101010; 13 | 14 | array_vertex = true; 15 | cullMode = GL_BACK; 16 | cullFace = true; 17 | 18 | frontFace = GL_CCW; 19 | 20 | depthRangeNear = 0.f; 21 | depthRangeFar = 1.f; 22 | } 23 | 24 | // set by glDepthRange 25 | // this used by quake3 when drawing the sky 26 | GLfloat depthRangeNear; 27 | GLfloat depthRangeFar; 28 | 29 | GLenum beginMode; 30 | uint32_t clearColor; 31 | GLenum cullMode; 32 | rectf_t viewport; 33 | GLfloat clearDepth; 34 | rectf_t scissor; 35 | 36 | GLenum blendFuncSrc; 37 | GLenum blendFuncDst; 38 | GLenum depthFunc; 39 | 40 | GLenum frontFace; // defaults to GL_CCW 41 | 42 | bool testAlpha; // GL_ALPHA_TEST 43 | bool testDepth; // GL_DEPTH_TEST 44 | bool cullFace; // GL_CULL_FACE 45 | bool blendFrag; // GL_BLEND 46 | bool testScissor; // GL_SCISSOR_TEST 47 | bool testStencil; // GL_STENCIL_TEST 48 | 49 | bool texture1D; // GL_TEXTURE_1D 50 | bool texture2D; // GL_TEXTURE_2D 51 | 52 | bool array_color; 53 | bool array_vertex; 54 | bool array_tex_coord; 55 | }; 56 | -------------------------------------------------------------------------------- /source/texture.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "common.h" 5 | #include "texture.h" 6 | #include "log.h" 7 | #include "context.h" 8 | 9 | namespace { 10 | 11 | bool isPowerOfTwo(uint32_t x) { 12 | return (x & (x - 1)) == 0; 13 | } 14 | 15 | static uint32_t targetToIndex(GLenum e) { 16 | switch (e) { 17 | case GL_TEXTURE_1D: return 0; 18 | case GL_TEXTURE_2D: return 1; 19 | default: return 2; 20 | } 21 | } 22 | 23 | // required number of texels for root image and mipchain where levels is the 24 | // number of mip levels 25 | uint32_t texture_space_req(const uint32_t width, const uint32_t height, uint32_t levels) { 26 | uint32_t space = 0; 27 | for (uint32_t i = 0; i < levels; ++i) { 28 | const uint32_t w = std::max(1u, width >> i); 29 | const uint32_t h = std::max(1u, height >> i); 30 | space += w * h; 31 | } 32 | return space; 33 | } 34 | 35 | } // namespace 36 | 37 | void texture_manager_t::glTexImage1D(GLenum target, GLint level, 38 | GLint internalformat, GLsizei width, 39 | GLint border, GLenum format, GLenum type, 40 | const GLvoid *pixels) { 41 | DEBUG_BREAK; 42 | } 43 | 44 | void texture_manager_t::glTexImage2D(GLenum target, GLint level, 45 | GLint internalFormat, GLsizei width, 46 | GLsizei height, GLint border, 47 | GLenum format, GLenum type, 48 | const GLvoid *pixels) { 49 | 50 | if (!isPowerOfTwo(width) || !isPowerOfTwo(height)) { 51 | DEBUG_BREAK; 52 | } 53 | if (std::max(width, height) >= texture_t::max_size) { 54 | DEBUG_BREAK; 55 | } 56 | if (level >= texture_t::mip_levels) { 57 | DEBUG_BREAK; 58 | } 59 | if (level != 0) { 60 | // XXX: disable mip upload for now 61 | return; 62 | } 63 | 64 | texture_t *t = boundTexture2d(); 65 | if (!t) { 66 | return; 67 | } 68 | 69 | // if this is the root level then allocate the image 70 | if (level == 0) { 71 | 72 | if (t->_pixels[0]) { 73 | t->release(); 74 | t->_pixels.fill(nullptr); 75 | } 76 | 77 | t->_width = width; 78 | t->_height = height; 79 | t->_wshift = int32_t(log2(width)); 80 | 81 | size_t mem_size = 82 | texture_space_req(width, height, texture_t::mip_levels) * sizeof(uint32_t); 83 | t->_pixels[0] = (uint32_t *)_aligned_malloc(mem_size, 16); 84 | memset(t->_pixels[0], 0xFF, mem_size); 85 | 86 | // alloc all of the other mip levels 87 | for (int i = 0; i < texture_t::mip_levels - 1; ++i) { 88 | uint32_t advance = std::max(1u, (width >> i) * (height >> i)); 89 | t->_pixels[i + 1] = t->_pixels[i] + advance; 90 | } 91 | } 92 | else { 93 | // check our mip level is a submultiple 94 | if (width != std::max(1u, t->_width >> level) || 95 | height != std::max(1u, t->_height >> level)) { 96 | DEBUG_BREAK; 97 | } 98 | } 99 | 100 | // load texture from the source 101 | t->load(level, format, type, pixels); 102 | 103 | // XXX: lets fudge in some mip levels for now 104 | if (level == 0) { 105 | t->generateMipLevels(); 106 | } 107 | } 108 | 109 | void texture_manager_t::glCopyTexImage1D(GLenum target, GLint level, 110 | GLenum internalFormat, GLint x, 111 | GLint y, GLsizei width, GLint border) { 112 | auto itt = _tex_map.find(_bound[targetToIndex(target)]); 113 | if (itt != _tex_map.end()) { 114 | auto *t = itt->second; 115 | assert(t); 116 | DEBUG_BREAK; 117 | } 118 | } 119 | 120 | void texture_manager_t::glCopyTexImage2D(GLenum target, GLint level, 121 | GLenum internalFormat, GLint x, 122 | GLint y, GLsizei width, GLsizei height, 123 | GLint border) { 124 | auto itt = _tex_map.find(_bound[targetToIndex(target)]); 125 | if (itt != _tex_map.end()) { 126 | auto *t = itt->second; 127 | assert(t); 128 | DEBUG_BREAK; 129 | } 130 | DEBUG_BREAK; 131 | } 132 | 133 | void texture_manager_t::glTexSubImage1D(GLenum target, GLint level, 134 | GLint xoffset, GLsizei width, 135 | GLenum format, GLenum type, 136 | const GLvoid *pixels) { 137 | auto itt = _tex_map.find(_bound[targetToIndex(target)]); 138 | if (itt != _tex_map.end()) { 139 | auto *t = itt->second; 140 | assert(t); 141 | } 142 | } 143 | 144 | void texture_manager_t::glTexSubImage2D(GLenum target, GLint level, 145 | GLint xoffset, GLint yoffset, 146 | GLsizei width, GLsizei height, 147 | GLenum format, GLenum type, 148 | const GLvoid *pixels) { 149 | auto itt = _tex_map.find(_bound[targetToIndex(target)]); 150 | if (itt != _tex_map.end()) { 151 | auto *t = itt->second; 152 | assert(t); 153 | } 154 | } 155 | 156 | void texture_manager_t::glCopyTexSubImage1D(GLenum target, GLint level, 157 | GLint xoffset, GLint x, GLint y, 158 | GLsizei width) { 159 | DEBUG_BREAK; 160 | } 161 | 162 | void texture_manager_t::glCopyTexSubImage2D(GLenum target, GLint level, 163 | GLint xoffset, GLint yoffset, 164 | GLint x, GLint y, GLsizei width, 165 | GLsizei height) { 166 | auto itt = _tex_map.find(_bound[targetToIndex(target)]); 167 | if (itt != _tex_map.end()) { 168 | auto *t = itt->second; 169 | assert(t); 170 | } 171 | } 172 | 173 | void texture_manager_t::glBindTexture(GLenum target, GLuint texture) { 174 | 175 | #define GL_TEXTURE_CUBE_MAP_ARB 0x8513 176 | 177 | if (target != GL_TEXTURE_2D) { 178 | // DEBUG_BREAK; 179 | return; 180 | } 181 | 182 | texture_t *tex = getOrCreateTexture(texture); 183 | assert(tex); 184 | (void)tex; 185 | _bound[targetToIndex(target)] = texture; 186 | } 187 | 188 | GLboolean texture_manager_t::glAreTexturesResident(GLsizei n, 189 | const GLuint *textures, 190 | GLboolean *residences) { 191 | // determine if textures are loaded in texture memory 192 | DEBUG_BREAK; 193 | return false; 194 | } 195 | 196 | void texture_manager_t::glDeleteTextures(GLsizei n, const GLuint *textures) { 197 | 198 | // flush the pipeline 199 | // XXX: only do if it was the bound texture 200 | Context->on_flush(); 201 | 202 | for (GLsizei i = 0; i < n; ++i) { 203 | // locate texture 204 | auto itt = _tex_map.find(textures[i]); 205 | if (itt == _tex_map.end()) { 206 | continue; 207 | } 208 | // delete contents 209 | itt->second->release(); 210 | // delete structure 211 | delete itt->second; 212 | // remove from texmap 213 | _tex_map.erase(itt); 214 | } 215 | } 216 | 217 | texture_t *texture_manager_t::getTexture(uint32_t x) { 218 | // return texture if it exists 219 | auto itt = _tex_map.find(x); 220 | if (itt != _tex_map.end()) { 221 | return itt->second; 222 | } 223 | return nullptr; 224 | } 225 | 226 | texture_t *texture_manager_t::getOrCreateTexture(uint32_t x) { 227 | // return texture if it exists 228 | if (texture_t *t = getTexture(x)) { 229 | return t; 230 | } 231 | // create new texture 232 | auto tex = std::make_unique(); 233 | // insert into the tex map 234 | texture_t *t = tex.release(); 235 | _tex_map[x] = t; 236 | return t; 237 | } 238 | 239 | void texture_manager_t::glGenTextures(GLsizei n, GLuint *textures) { 240 | for (GLsizei i = 0; i < n; ++i) { 241 | // check texture is not already registered 242 | uint32_t x = ++_uuid; 243 | auto itt = _tex_map.find(x); 244 | if (itt != _tex_map.end()) { 245 | continue; 246 | } 247 | texture_t *tex = getOrCreateTexture(x); 248 | (void)tex; 249 | // return texture id to caller 250 | textures[i] = x; 251 | } 252 | } 253 | 254 | GLboolean texture_manager_t::glIsTexture(GLuint texture) { 255 | DEBUG_BREAK; 256 | return GL_FALSE; 257 | } 258 | 259 | texture_t *texture_manager_t::boundTexture2d() { 260 | const uint32_t index = _bound[targetToIndex(GL_TEXTURE_2D)]; 261 | auto itt = _tex_map.find(index); 262 | if (itt != _tex_map.end()) { 263 | return itt->second; 264 | } 265 | return nullptr; 266 | } 267 | 268 | texture_t::texture_t() 269 | : _format(e_argb) 270 | , _width(0) 271 | , _height(0) 272 | { 273 | _pixels.fill(nullptr); 274 | } 275 | 276 | void texture_t::generateMipLevels() { 277 | for (int i = 1; i < mip_levels; ++i) { 278 | 279 | const uint32_t *src = _pixels[i - 1]; 280 | uint32_t *dst = _pixels[i - 0]; 281 | 282 | // destination with and height 283 | const uint32_t dw = std::max(1u, _width >> i ); 284 | const uint32_t dh = std::max(1u, _height >> i ); 285 | 286 | // source width height 287 | const uint32_t sw = std::max(1u, _width >> (i-1)); 288 | const uint32_t sh = std::max(1u, _height >> (i-1)); 289 | 290 | for (uint32_t y = 0; y < dh; ++y) { 291 | for (uint32_t x = 0; x < dw; ++x) { 292 | 293 | const uint32_t ox = sw > 1 ? 1 : 0; 294 | const uint32_t oy = sh > 1 ? 1 : 0; 295 | 296 | const uint32_t s00 = src[(x * 2 + 0) + (y * 2 + 0) * sw]; 297 | const uint32_t s10 = src[(x * 2 + ox) + (y * 2 + 0) * sw]; 298 | const uint32_t s01 = src[(x * 2 + 0) + (y * 2 + oy) * sw]; 299 | const uint32_t s11 = src[(x * 2 + ox) + (y * 2 + oy) * sw]; 300 | #if 0 301 | dst[x + y * dw] = 0x01010101 << i; 302 | #else 303 | dst[x + y * dw] = 304 | ((s00 >> 2) & 0x3f3f3f3f) + 305 | ((s10 >> 2) & 0x3f3f3f3f) + 306 | ((s01 >> 2) & 0x3f3f3f3f) + 307 | ((s11 >> 2) & 0x3f3f3f3f); 308 | #endif 309 | } 310 | } 311 | } 312 | } 313 | 314 | void texture_t::release() { 315 | if (_pixels[0]) { 316 | _aligned_free(_pixels[0]); 317 | } 318 | _width = 0; 319 | _height = 0; 320 | _pixels.fill(nullptr); 321 | } 322 | 323 | void texture_t::load(uint32_t level, GLenum format, GLenum type, const void *src) { 324 | 325 | assert(level < mip_levels); 326 | assert(_pixels[level]); 327 | 328 | switch (type) { 329 | case GL_UNSIGNED_BYTE: 330 | switch (format) { 331 | case GL_RGBA: load_rgba_8(level, src); break; 332 | case GL_BGR_EXT: load_bgr_8 (level, src); break; 333 | case GL_RGB: load_rgb_8 (level, src); break; 334 | case GL_BGRA_EXT: load_bgra_8(level, src); break; 335 | default: 336 | DEBUG_BREAK; 337 | } 338 | break; 339 | default: 340 | DEBUG_BREAK; 341 | } 342 | } 343 | 344 | static inline uint32_t packRGBA(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { 345 | return (r << 24) | (g << 16) | (b << 8) | a; 346 | } 347 | 348 | void texture_t::load_rgba_8(uint32_t level, const void *src) { 349 | 350 | const uint32_t swidth = std::max(1u, _width >> level); 351 | const uint32_t sheight = std::max(1u, _height >> level); 352 | 353 | const uint8_t *srcy = (const uint8_t *)src; 354 | uint32_t *dsty = _pixels[level]; 355 | assert(dsty && srcy); 356 | 357 | for (uint32_t y = 0; y < sheight; ++y) { 358 | const uint8_t *srcx = srcy; 359 | uint32_t *dstx = dsty; 360 | for (uint32_t x = 0; x < swidth; ++x) { 361 | *dstx = packRGBA( 362 | 0, 363 | (x * 255) / swidth, 364 | (y * 255) / sheight, 365 | 0); 366 | *dstx = packRGBA( 367 | srcx[3], 368 | srcx[0], 369 | srcx[1], 370 | srcx[2]); 371 | srcx += 4; 372 | dstx += 1; 373 | } 374 | srcy += swidth * 4; 375 | dsty += swidth; 376 | } 377 | } 378 | 379 | void texture_t::load_bgra_8(uint32_t level, const void *src) { 380 | 381 | const uint32_t swidth = std::max(1u, _width >> level); 382 | const uint32_t sheight = std::max(1u, _height >> level); 383 | 384 | const uint8_t *srcy = (const uint8_t *)src; 385 | uint32_t *dsty = _pixels[level]; 386 | assert(dsty && srcy); 387 | 388 | for (uint32_t y = 0; y < sheight; ++y) { 389 | const uint8_t *srcx = srcy; 390 | uint32_t *dstx = dsty; 391 | for (uint32_t x = 0; x < swidth; ++x) { 392 | *dstx = packRGBA( 393 | 0, 394 | (x * 255) / swidth, 395 | (y * 255) / sheight, 396 | 0); 397 | *dstx = packRGBA( 398 | srcx[3], 399 | srcx[0], 400 | srcx[1], 401 | srcx[2]); 402 | srcx += 4; 403 | dstx += 1; 404 | } 405 | srcy += swidth * 4; 406 | dsty += swidth; 407 | } 408 | } 409 | 410 | void texture_t::load_bgr_8(uint32_t level, const void *src) { 411 | 412 | const uint32_t swidth = std::max(1u, _width >> level); 413 | const uint32_t sheight = std::max(1u, _height >> level); 414 | 415 | const uint8_t *srcy = (const uint8_t *)src; 416 | uint32_t *dsty = _pixels[level]; 417 | assert(dsty && srcy); 418 | 419 | for (uint32_t y = 0; y < sheight; ++y) { 420 | const uint8_t *srcx = srcy; 421 | uint32_t *dstx = dsty; 422 | for (uint32_t x = 0; x < swidth; ++x) { 423 | *dstx = packRGBA(0, srcx[2], srcx[1], srcx[0]); 424 | srcx += 3; 425 | dstx += 1; 426 | } 427 | srcy += swidth * 3; 428 | dsty += swidth; 429 | } 430 | } 431 | 432 | void texture_t::load_rgb_8(uint32_t level, const void *src) { 433 | 434 | const uint32_t swidth = std::max(1u, _width >> level); 435 | const uint32_t sheight = std::max(1u, _height >> level); 436 | 437 | const uint8_t *srcy = (const uint8_t *)src; 438 | uint32_t *dsty = _pixels[level]; 439 | assert(dsty && srcy); 440 | 441 | for (uint32_t y = 0; y < sheight; ++y) { 442 | const uint8_t *srcx = srcy; 443 | uint32_t *dstx = dsty; 444 | for (uint32_t x = 0; x < swidth; ++x) { 445 | *dstx = packRGBA(0, srcx[0], srcx[1], srcx[2]); 446 | srcx += 3; 447 | dstx += 1; 448 | } 449 | srcy += swidth * 3; 450 | dsty += swidth; 451 | } 452 | } 453 | -------------------------------------------------------------------------------- /source/texture.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "GL.h" 3 | 4 | #include 5 | #include 6 | 7 | 8 | struct texture_t { 9 | 10 | enum { 11 | max_size = 1024, 12 | mip_levels = 10 // log2(max_size) 13 | }; 14 | 15 | enum format_t { 16 | e_argb 17 | }; 18 | 19 | texture_t(); 20 | 21 | format_t _format; 22 | 23 | uint32_t _width; 24 | uint32_t _wshift; // log2(_width) 25 | 26 | uint32_t _height; 27 | std::array _pixels; 28 | 29 | void load(uint32_t level, GLenum format, GLenum type, const void *src); 30 | void load_rgba_8(uint32_t level, const void *src); 31 | void load_bgr_8(uint32_t level, const void *src); 32 | void load_rgb_8(uint32_t level, const void *src); 33 | void load_bgra_8(uint32_t level, const void *src); 34 | 35 | void generateMipLevels(); 36 | 37 | void release(); 38 | }; 39 | 40 | 41 | struct texture_manager_t { 42 | 43 | texture_manager_t() : _uuid(0) { 44 | _bound.fill(0); 45 | } 46 | 47 | // 48 | 49 | void glTexImage1D(GLenum target, GLint level, GLint internalformat, 50 | GLsizei width, GLint border, GLenum format, GLenum type, 51 | const GLvoid *pixels); 52 | 53 | void glTexImage2D(GLenum target, GLint level, GLint internalformat, 54 | GLsizei width, GLsizei height, GLint border, GLenum format, 55 | GLenum type, const GLvoid *pixels); 56 | 57 | // 58 | 59 | void glTexSubImage1D(GLenum target, GLint level, GLint xoffset, GLsizei width, 60 | GLenum format, GLenum type, const GLvoid *pixels); 61 | 62 | void glTexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, 63 | GLsizei width, GLsizei height, GLenum format, 64 | GLenum type, const GLvoid *pixels); 65 | 66 | // 67 | 68 | void glCopyTexImage1D(GLenum target, GLint level, GLenum internalFormat, 69 | GLint x, GLint y, GLsizei width, GLint border); 70 | 71 | void glCopyTexImage2D(GLenum target, GLint level, GLenum internalFormat, 72 | GLint x, GLint y, GLsizei width, GLsizei height, 73 | GLint border); 74 | 75 | // 76 | 77 | void glCopyTexSubImage1D(GLenum target, GLint level, GLint xoffset, GLint x, 78 | GLint y, GLsizei width); 79 | 80 | void glCopyTexSubImage2D(GLenum target, GLint level, GLint xoffset, 81 | GLint yoffset, GLint x, GLint y, GLsizei width, 82 | GLsizei height); 83 | 84 | void glBindTexture(GLenum target, GLuint texture); 85 | 86 | GLboolean glAreTexturesResident(GLsizei n, const GLuint *textures, 87 | GLboolean *residences); 88 | 89 | void glDeleteTextures(GLsizei n, const GLuint *textures); 90 | 91 | void glGenTextures(GLsizei n, GLuint *textures); 92 | 93 | GLboolean glIsTexture(GLuint texture); 94 | 95 | texture_t *boundTexture2d(); 96 | 97 | protected: 98 | 99 | // get or create a texture object 100 | texture_t *getOrCreateTexture(uint32_t index); 101 | 102 | // get or create a texture object 103 | texture_t *getTexture(uint32_t index); 104 | 105 | std::array _bound; 106 | 107 | uint32_t _uuid; 108 | std::unordered_map _tex_map; 109 | }; 110 | -------------------------------------------------------------------------------- /source/wgl.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "windows.h" 10 | 11 | #include "common.h" 12 | #include "wgl.h" 13 | #include "game_id.h" 14 | #include "gdi_hook.h" 15 | #include "log.h" 16 | #include "game_id.h" 17 | 18 | 19 | struct hdc_info_t { 20 | tagPIXELFORMATDESCRIPTOR pfd; 21 | }; 22 | 23 | struct wgl_state_t { 24 | game_id_t gameId; 25 | std::vector pixelFormats; 26 | std::map hdcMap; 27 | std::set contexts; 28 | } wgl; 29 | 30 | static thread_local gl_context_t *gl_context = nullptr; 31 | 32 | gl_context_t *getContext() { 33 | return gl_context; 34 | } 35 | 36 | BOOL __stdcall wglSwapBuffers_imp(HDC a) { 37 | #if 0 38 | log_t::printf("%s(%p)\n", __func__, (void*)a); 39 | #endif 40 | 41 | if (!gl_context) 42 | return FALSE; 43 | Context->on_flush(); 44 | if (Context->profile) { 45 | Context->profile->on_end_frame(); 46 | } 47 | if (Context->raster.inst) { 48 | Context->raster.inst->present(); 49 | } 50 | 51 | if (Context->user_cmds.screenshot) { 52 | Context->buffer.save_bmp(); 53 | Context->user_cmds.screenshot = false; 54 | } 55 | 56 | GdiHook.invalidate(gl_context->window.getHwnd()); 57 | 58 | switch (getGameId()) { 59 | case e_ut2003_demo: 60 | case e_ut99_goty: 61 | case e_half_life_of_demo: 62 | case e_unreal_gold: 63 | if (Context->raster.inst) { 64 | Context->raster.inst->framebuffer_clear(false, true, false, 0x202020, 10000.f, 0); 65 | } 66 | } 67 | 68 | #ifndef NDEBUG 69 | Sleep(5); 70 | #endif 71 | 72 | return TRUE; 73 | } 74 | 75 | HGLRC __stdcall wglCreateContext_imp(HDC hdc) { 76 | log_t::printf("%s(%p)\n", __func__, (void*)hdc); 77 | 78 | // lookup the hdc to get the pixel format 79 | auto itt = wgl.hdcMap.find(hdc); 80 | if (itt == wgl.hdcMap.end()) 81 | return nullptr; 82 | 83 | const hdc_info_t &info = itt->second; 84 | HWND hwnd = WindowFromDC(hdc); 85 | // create a new context 86 | gl_context_t *cxt = new gl_context_t(hwnd, hdc); 87 | if (!cxt->on_create()) { 88 | //XXX: bad news 89 | return nullptr; 90 | } 91 | log_t::printf("new context created -> %p\n", (void*)cxt);; 92 | // insert into the context map 93 | wgl.contexts.insert(cxt); 94 | return HGLRC(cxt); 95 | } 96 | 97 | BOOL __stdcall wglDeleteContext_imp(HGLRC a) { 98 | log_t::printf("%s(%p)\n", __func__, (void *)a); 99 | 100 | if (!a) { 101 | return false; 102 | } 103 | gl_context_t *cxt = (gl_context_t *)a; 104 | 105 | // erase the context 106 | auto itt = wgl.contexts.find(cxt); 107 | if (itt != wgl.contexts.end()) { 108 | 109 | if (gl_context == cxt) { 110 | // this is the current context 111 | gl_context = nullptr; 112 | } 113 | cxt->on_destroy(); 114 | // unhook this window 115 | GdiHook.unhook(*cxt); 116 | 117 | log_t::printf("context deleted -> %p\n", (void *)*itt); 118 | delete *itt; 119 | wgl.contexts.erase(itt); 120 | } 121 | return TRUE; 122 | } 123 | 124 | BOOL __stdcall wglMakeCurrent_imp(HDC a, HGLRC b) { 125 | log_t::printf("%s(%p, %p)\n", __func__, (void*)a, (void*)b); 126 | 127 | gl_context_t *cxt = (gl_context_t*)b; 128 | if (b != nullptr) { 129 | // make the global context 130 | gl_context = cxt; 131 | if (cxt->window.getHdc() != a) { 132 | __debugbreak(); 133 | } 134 | gl_context->on_make_current(); 135 | } 136 | return TRUE; 137 | } 138 | 139 | BOOL __stdcall wglSetPixelFormat_imp(HDC hdc, 140 | int iPixelFormat, 141 | const struct PIXELFORMATDESCRIPTOR *ppfd) 142 | { 143 | log_t::printf("%s(%p, %d, %p)\n", __func__, (void*)hdc, iPixelFormat, (void*)ppfd); 144 | 145 | if (iPixelFormat <= 0 || size_t(iPixelFormat) > wgl.pixelFormats.size()) 146 | return FALSE; 147 | // lookup the requested format 148 | const tagPIXELFORMATDESCRIPTOR &pfmt = wgl.pixelFormats[iPixelFormat - 1]; 149 | 150 | // have we seen this hdc before 151 | hdc_info_t *info = nullptr; 152 | auto itt = wgl.hdcMap.find(hdc); 153 | if (itt == wgl.hdcMap.end()) { 154 | wgl.hdcMap[hdc] = hdc_info_t{}; 155 | info = &wgl.hdcMap.at(hdc); 156 | } 157 | else { 158 | info = &itt->second; 159 | } 160 | assert(info); 161 | 162 | // map pixel format to this hdc 163 | info->pfd = pfmt; 164 | 165 | // pretend we set the pixel format 166 | return TRUE; 167 | } 168 | 169 | void dump_pixel_format(const PPIXELFORMATDESCRIPTOR ppfd) { 170 | log_t::printf(" nSize : %d\n", (int) ppfd->nSize ); 171 | log_t::printf(" nVersion : %d\n", (int) ppfd->nVersion ); 172 | log_t::printf(" dwFlags : %d\n", (int) ppfd->dwFlags ); 173 | log_t::printf(" iPixelType : %d\n", (int) ppfd->iPixelType ); 174 | log_t::printf(" cColorBits : %d\n", (int) ppfd->cColorBits ); 175 | log_t::printf(" cRedBits : %d\n", (int) ppfd->cRedBits ); 176 | log_t::printf(" cRedShift : %d\n", (int) ppfd->cRedShift ); 177 | log_t::printf(" cGreenBits : %d\n", (int) ppfd->cGreenBits ); 178 | log_t::printf(" cGreenShift : %d\n", (int) ppfd->cGreenShift ); 179 | log_t::printf(" cBlueBits : %d\n", (int) ppfd->cBlueBits ); 180 | log_t::printf(" cBlueShift : %d\n", (int) ppfd->cBlueShift ); 181 | log_t::printf(" cAlphaBits : %d\n", (int) ppfd->cAlphaBits ); 182 | log_t::printf(" cAlphaShift : %d\n", (int) ppfd->cAlphaShift ); 183 | log_t::printf(" cAccumBits : %d\n", (int) ppfd->cAccumBits ); 184 | log_t::printf(" cAccumRedBits : %d\n", (int) ppfd->cAccumRedBits ); 185 | log_t::printf(" cAccumGreenBits: %d\n", (int) ppfd->cAccumGreenBits ); 186 | log_t::printf(" cAccumBlueBits : %d\n", (int) ppfd->cAccumBlueBits ); 187 | log_t::printf(" cAccumAlphaBits: %d\n", (int) ppfd->cAccumAlphaBits ); 188 | log_t::printf(" cDepthBits : %d\n", (int) ppfd->cDepthBits ); 189 | log_t::printf(" cStencilBits : %d\n", (int) ppfd->cStencilBits ); 190 | log_t::printf(" cAuxBuffers : %d\n", (int) ppfd->cAuxBuffers ); 191 | log_t::printf(" iLayerType : %d\n", (int) ppfd->iLayerType ); 192 | log_t::printf(" bReserved : %d\n", (int) ppfd->bReserved ); 193 | log_t::printf(" dwLayerMask : %d\n", (int) ppfd->dwLayerMask ); 194 | log_t::printf(" dwVisibleMask : %d\n", (int) ppfd->dwVisibleMask ); 195 | log_t::printf(" dwDamageMask : %d\n", (int) ppfd->dwDamageMask ); 196 | } 197 | 198 | int __stdcall wglChoosePixelFormat_imp(HDC hdc, const PPIXELFORMATDESCRIPTOR ppfd) 199 | { 200 | log_t::printf("%s(%p)\n", __func__, (void*)ppfd); 201 | if (!ppfd) 202 | return 0; 203 | dump_pixel_format(ppfd); 204 | // add pixel format to our list of requested formats 205 | wgl.pixelFormats.push_back(*ppfd); 206 | return wgl.pixelFormats.size(); 207 | } 208 | 209 | int __stdcall wglDescribePixelFormat_imp(HDC hdc, 210 | int iPixelFormat, 211 | UINT nBytes, 212 | LPPIXELFORMATDESCRIPTOR ppfd) { 213 | log_t::printf("%s(%p, %d, %d, %p)\n", __func__, (void*)hdc, iPixelFormat, (int)nBytes, (void*)ppfd); 214 | 215 | if (iPixelFormat <= 0 || iPixelFormat > int(wgl.pixelFormats.size())) { 216 | return 0; 217 | } 218 | if (ppfd) { 219 | const auto &fmt = wgl.pixelFormats.at(iPixelFormat-1); 220 | nBytes = std::min(nBytes, sizeof(tagPIXELFORMATDESCRIPTOR)); 221 | memcpy(ppfd, &fmt, nBytes); 222 | ppfd->nSize = sizeof(*ppfd); 223 | ppfd->nVersion = 1; 224 | if (!ppfd->dwFlags) 225 | ppfd->dwFlags = PFD_SUPPORT_OPENGL | PFD_DRAW_TO_WINDOW | PFD_DOUBLEBUFFER; 226 | ppfd->iPixelType = PFD_TYPE_RGBA; 227 | #if 1 228 | if (!ppfd->cColorBits) 229 | ppfd->cColorBits = 32; // some games may want 24 here 230 | if (!ppfd->cDepthBits) 231 | ppfd->cDepthBits = 24; 232 | #endif 233 | ppfd->cAccumBits = 0; 234 | ppfd->cAccumAlphaBits = 8; 235 | ppfd->cStencilBits = 8; 236 | ppfd->cRedBits = 8; 237 | ppfd->cRedShift = 0; 238 | ppfd->cGreenBits = 8; 239 | ppfd->cGreenShift = 8; 240 | ppfd->cBlueBits = 8; 241 | ppfd->cBlueShift = 16; 242 | ppfd->cAlphaBits = 8; 243 | ppfd->cAlphaShift = 24; 244 | if (!ppfd->cStencilBits) 245 | ppfd->cStencilBits = 8; 246 | if (!ppfd->cAuxBuffers) 247 | ppfd->cAuxBuffers = 4; 248 | if (!ppfd->iPixelType) 249 | ppfd->iPixelType = PFD_TYPE_RGBA; 250 | } 251 | return wgl.pixelFormats.size(); 252 | } 253 | 254 | HDC __stdcall wglGetCurrentDC_imp(VOID) { 255 | log_t::printf("%s()\n", __func__); 256 | return gl_context ? gl_context->window.getHdc() : nullptr; 257 | } 258 | 259 | PROC __stdcall wglGetProcAddress_imp(LPCSTR a) { 260 | log_t::printf("%s(%s)\n", __func__, (char*)a); 261 | PROC proc = (PROC)GetProcAddress(GetModuleHandleA("opengl32.dll"), a); 262 | if (!proc) { 263 | log_t::printf(" not found!"); 264 | } 265 | return proc; 266 | } 267 | 268 | HGLRC __stdcall wglGetCurrentContext_imp(VOID) { 269 | log_t::printf("%s()\n", __func__); 270 | DEBUG_BREAK; 271 | return (HGLRC)gl_context; 272 | } 273 | 274 | // ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 275 | 276 | BOOL __stdcall wglCopyContext_imp(HGLRC a, HGLRC b, UINT c) { 277 | DEBUG_BREAK; 278 | return FALSE; 279 | } 280 | 281 | BOOL __stdcall wglShareLists_imp(HGLRC a, HGLRC b) { 282 | log_t::printf("%s()\n"); 283 | // note: UnrealGold expects this to return true 284 | return TRUE; 285 | } 286 | 287 | HGLRC __stdcall wglCreateLayerContext_imp(HDC a, int b) { 288 | DEBUG_BREAK; 289 | return 0; 290 | } 291 | 292 | BOOL __stdcall wglUseFontBitmapsA_imp(HDC a, DWORD b, DWORD c, DWORD d) { 293 | DEBUG_BREAK; 294 | return FALSE; 295 | } 296 | 297 | BOOL __stdcall wglUseFontBitmapsW_imp(HDC a, DWORD b, DWORD c, DWORD d) { 298 | DEBUG_BREAK; 299 | return FALSE; 300 | } 301 | 302 | BOOL __stdcall wglUseFontOutlinesA_imp(HDC a, DWORD b, DWORD c, DWORD d, FLOAT e, 303 | FLOAT f, int g, LPGLYPHMETRICSFLOAT h) { 304 | DEBUG_BREAK; 305 | return FALSE; 306 | } 307 | 308 | BOOL __stdcall wglUseFontOutlinesW_imp(HDC a, DWORD b, DWORD c, DWORD d, FLOAT e, 309 | FLOAT f, int g, LPGLYPHMETRICSFLOAT h) { 310 | DEBUG_BREAK; 311 | return FALSE; 312 | } 313 | 314 | BOOL __stdcall wglDescribeLayerPlane_imp(HDC a, int b, int c, UINT d, 315 | LPLAYERPLANEDESCRIPTOR e) { 316 | DEBUG_BREAK; 317 | return FALSE; 318 | } 319 | 320 | int __stdcall wglSetLayerPaletteEntries_imp(HDC a, int b, int c, int d, 321 | CONST COLORREF *e) { 322 | DEBUG_BREAK; 323 | return 0; 324 | } 325 | 326 | int __stdcall wglGetLayerPaletteEntries_imp(HDC a, int b, int c, int d, 327 | COLORREF *e) { 328 | DEBUG_BREAK; 329 | return 0; 330 | } 331 | 332 | BOOL __stdcall wglRealizeLayerPalette_imp(HDC a, int b, BOOL c) { 333 | DEBUG_BREAK; 334 | return FALSE; 335 | } 336 | 337 | BOOL __stdcall wglSwapLayerBuffers_imp(HDC a, UINT b) { 338 | DEBUG_BREAK; 339 | return FALSE; 340 | } 341 | 342 | DWORD __stdcall wglSwapMultipleBuffers_imp(UINT a, CONST WGLSWAP *b) { 343 | DEBUG_BREAK; 344 | return 0; 345 | } 346 | 347 | const char * __stdcall wglGetExtensionsStringARB_imp(HDC hdc) { 348 | log_t::printf("%s(%p)\n", __func__, hdc); 349 | return ""; 350 | } 351 | -------------------------------------------------------------------------------- /source/wgl.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "forward.h" 4 | 5 | gl_context_t *getContext(); 6 | -------------------------------------------------------------------------------- /source/window.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "windows.h" 3 | 4 | 5 | struct window_manager_t { 6 | 7 | window_manager_t(HWND hwnd, HDC hdc) 8 | : _hwnd(hwnd) 9 | , _hdc(hdc) 10 | { 11 | } 12 | 13 | uint32_t width() const { 14 | RECT r; 15 | GetClientRect(_hwnd, &r); 16 | return r.right; 17 | } 18 | 19 | uint32_t height() const { 20 | RECT r; 21 | GetClientRect(_hwnd, &r); 22 | return r.bottom; 23 | } 24 | 25 | // get window data 26 | HWND getHwnd() const { return _hwnd; } 27 | HDC getHdc() const { return _hdc; } 28 | 29 | protected: 30 | // window data info 31 | const HWND _hwnd; 32 | const HDC _hdc; 33 | }; 34 | -------------------------------------------------------------------------------- /source/windows.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define WIN32_LEAN_AND_MEAN 4 | #include 5 | 6 | #undef near 7 | #undef far 8 | #undef min 9 | #undef max 10 | --------------------------------------------------------------------------------