├── CMakeLists.txt
├── apigen
├── .idea
│ ├── apigen.iml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── apigen.py
├── gen.c
└── glfuncs.txt
├── docs
└── screenshots
│ ├── doom3_1.png
│ ├── doom3_2.png
│ ├── hl_of_demo.png
│ ├── quake2.png
│ ├── quake3_demo.png
│ ├── ut2003.png
│ └── ut99_goty.png
├── opengl32.def
├── rast_fasterizer
├── CMakeLists.txt
├── clip.h
└── fasterizer.cpp
├── rast_opencl
├── CMakeLists.txt
└── rast_opencl.cpp
├── rast_reference
├── CMakeLists.txt
├── kernel.h
├── kernels
│ ├── rast_tex_dst_src.cpp
│ ├── rast_tex_dst_zero.cpp
│ ├── rast_tex_one_msa.cpp
│ ├── rast_tex_one_msc.cpp
│ ├── rast_tex_one_one.cpp
│ ├── rast_tex_one_zero.cpp
│ ├── rast_tex_sa_msa.cpp
│ └── template.h
└── rast_reference.cpp
├── rast_wireframe
├── CMakeLists.txt
├── rast_wireframe.cpp
├── surface.cpp
└── surface.h
├── readme.md
├── softgl.cfg
└── source
├── GL.h
├── buffer.cpp
├── buffer.h
├── common.cpp
├── common.h
├── config.cpp
├── config.h
├── context.cpp
├── context.h
├── exports.h
├── forward.h
├── game_id.cpp
├── game_id.h
├── gdi_hook.cpp
├── gdi_hook.h
├── log.cpp
├── log.h
├── main.cpp
├── math.h
├── matrix.h
├── opengl.cpp
├── primative.cpp
├── primative.h
├── profile.cpp
├── profile.h
├── raster.cpp
├── raster.h
├── state.h
├── texture.cpp
├── texture.h
├── wgl.cpp
├── wgl.h
├── window.h
└── windows.h
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.0)
2 | project(opengl32)
3 |
4 | add_definitions(-D_CRT_SECURE_NO_WARNINGS)
5 |
6 | add_subdirectory(rast_fasterizer)
7 | add_subdirectory(rast_wireframe)
8 | add_subdirectory(rast_reference)
9 | add_subdirectory(rast_opencl)
10 |
11 | file(GLOB C_FILES source/*.cpp)
12 | file(GLOB H_FILES source/*.h)
13 |
14 | add_library(opengl32 SHARED ${C_FILES} ${H_FILES} opengl32.def)
15 |
--------------------------------------------------------------------------------
/apigen/.idea/apigen.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/apigen/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/apigen/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/apigen/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 | void
33 |
34 |
35 |
36 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 | 1504959552633
252 |
253 |
254 | 1504959552633
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
--------------------------------------------------------------------------------
/apigen/apigen.py:
--------------------------------------------------------------------------------
1 | def print_function(proto, args, name):
2 | typedef = '{0}_t'.format(name)
3 | ret_type = ' '.join(proto)
4 | arg_proto = list()
5 | arg_name = list()
6 | if len(args) > 0:
7 | for arg in args:
8 | arg_tokens = arg.split()
9 | arg_proto += [' '.join(arg_tokens[:-1])]
10 | arg_name += list() if len(arg_tokens) == 1 else [arg_tokens[-1]]
11 | ret = '' if ret_type == 'void' else 'return '
12 | print 'ATTRIB'
13 | print '{0} APIENTRY {2}({1}) {{'.format(' '.join(proto), ', '.join(args), name)
14 | # print ' typedef {0} (WINAPI * {1})({2});'.format(ret_type, typedef, ', '.join(arg_proto))
15 | # print ' {0} thunk = ({0})gl_func_table[e_{1}];'.format(typedef, name)
16 | # print ' assert(thunk);'
17 | # print ' {0}thunk({1});'.format(ret, ', '.join(arg_name))
18 | print '}'
19 | print ''
20 |
21 |
22 | g_func_names = []
23 |
24 |
25 |
26 | def parse(line):
27 | global g_func_names
28 | lparen = line.find('(')
29 | rparen = line.find(')')
30 | arg = line[lparen+1:rparen]
31 | arg_tokens = arg.split(',')
32 | arg_tokens = list(x.strip() for x in arg_tokens)
33 | proto = line[0:lparen].strip()
34 | proto_tokens = proto.split()
35 | proto_tokens = list(x.strip() for x in proto_tokens)
36 | func_name = proto_tokens[-1]
37 | print_function(proto_tokens[:-1], arg_tokens, func_name)
38 | g_func_names += [func_name]
39 |
40 |
41 | def main():
42 | with open('glfuncs.txt', 'rb') as f:
43 | for line in f.readlines():
44 | parse(line.strip('\r\n'))
45 |
46 | print 'const char *gl_func_names[e_gl_func_count__] = {'
47 | for x in g_func_names:
48 | print ' "{0}",'.format(x)
49 | print '};'
50 | print ''
51 | print 'enum gl_func_enum_t {'
52 | for x in g_func_names:
53 | print ' e_{0},'.format(x)
54 | print ' e_gl_func_count__'
55 | print '};'
56 |
57 | if __name__ == '__main__':
58 | main()
59 |
--------------------------------------------------------------------------------
/docs/screenshots/doom3_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/doom3_1.png
--------------------------------------------------------------------------------
/docs/screenshots/doom3_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/doom3_2.png
--------------------------------------------------------------------------------
/docs/screenshots/hl_of_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/hl_of_demo.png
--------------------------------------------------------------------------------
/docs/screenshots/quake2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/quake2.png
--------------------------------------------------------------------------------
/docs/screenshots/quake3_demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/quake3_demo.png
--------------------------------------------------------------------------------
/docs/screenshots/ut2003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/ut2003.png
--------------------------------------------------------------------------------
/docs/screenshots/ut99_goty.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bit-hack/softgl/565f59c5529582da94acd49e621c1e2138191e83/docs/screenshots/ut99_goty.png
--------------------------------------------------------------------------------
/opengl32.def:
--------------------------------------------------------------------------------
1 | LIBRARY OPENGL32
2 | EXPORTS
3 | glAccum
4 | glActiveTexture
5 | glActiveTextureARB
6 | glAlphaFunc
7 | glAreTexturesResident
8 | glArrayElement
9 | glBegin
10 | glBindTexture
11 | glBitmap
12 | glBlendFunc
13 | glCallList
14 | glCallLists
15 | glClear
16 | glClearAccum
17 | glClearColor
18 | glClearDepth
19 | glClearIndex
20 | glClearStencil
21 | glClientActiveTextureARB
22 | glClipPlane
23 | glColor3b
24 | glColor3bv
25 | glColor3d
26 | glColor3dv
27 | glColor3f
28 | glColor3fv
29 | glColor3i
30 | glColor3iv
31 | glColor3s
32 | glColor3sv
33 | glColor3ub
34 | glColor3ubv
35 | glColor3ui
36 | glColor3uiv
37 | glColor3us
38 | glColor3usv
39 | glColor4b
40 | glColor4bv
41 | glColor4d
42 | glColor4dv
43 | glColor4f
44 | glColor4fv
45 | glColor4i
46 | glColor4iv
47 | glColor4s
48 | glColor4sv
49 | glColor4ub
50 | glColor4ubv
51 | glColor4ui
52 | glColor4uiv
53 | glColor4us
54 | glColor4usv
55 | glColorMask
56 | glColorMaterial
57 | glColorPointer
58 | glCompressedTexSubImage2DARB
59 | glCompressedTexImage2DARB
60 | glCopyPixels
61 | glCopyTexImage1D
62 | glCopyTexImage2D
63 | glCopyTexSubImage1D
64 | glCopyTexSubImage2D
65 | glCullFace
66 | glDebugEntry
67 | glDeleteLists
68 | glDeleteTextures
69 | glDepthFunc
70 | glDepthMask
71 | glDepthRange
72 | glDisable
73 | glDisableClientState
74 | glDrawArrays
75 | glDrawBuffer
76 | glDrawElements
77 | glDrawRangeElements
78 | glDrawPixels
79 | glEdgeFlag
80 | glEdgeFlagPointer
81 | glEdgeFlagv
82 | glEnable
83 | glEnableClientState
84 | glEnd
85 | glEndList
86 | glEvalCoord1d
87 | glEvalCoord1dv
88 | glEvalCoord1f
89 | glEvalCoord1fv
90 | glEvalCoord2d
91 | glEvalCoord2dv
92 | glEvalCoord2f
93 | glEvalCoord2fv
94 | glEvalMesh1
95 | glEvalMesh2
96 | glEvalPoint1
97 | glEvalPoint2
98 | glFeedbackBuffer
99 | glFinish
100 | glFlush
101 | glFogf
102 | glFogfv
103 | glFogi
104 | glFogiv
105 | glFrontFace
106 | glFrustum
107 | glGenLists
108 | glGenTextures
109 | glGetBooleanv
110 | glGetClipPlane
111 | glGetDoublev
112 | glGetError
113 | glGetFloatv
114 | glGetIntegerv
115 | glGetLightfv
116 | glGetLightiv
117 | glGetMapdv
118 | glGetMapfv
119 | glGetMapiv
120 | glGetMaterialfv
121 | glGetMaterialiv
122 | glGetPixelMapfv
123 | glGetPixelMapuiv
124 | glGetPixelMapusv
125 | glGetPointerv
126 | glGetPolygonStipple
127 | glGetString
128 | glGetTexEnvfv
129 | glGetTexEnviv
130 | glGetTexGendv
131 | glGetTexGenfv
132 | glGetTexGeniv
133 | glGetTexImage
134 | glGetTexLevelParameterfv
135 | glGetTexLevelParameteriv
136 | glGetTexParameterfv
137 | glGetTexParameteriv
138 | glHint
139 | glIndexMask
140 | glIndexPointer
141 | glIndexd
142 | glIndexdv
143 | glIndexf
144 | glIndexfv
145 | glIndexi
146 | glIndexiv
147 | glIndexs
148 | glIndexsv
149 | glIndexub
150 | glIndexubv
151 | glInitNames
152 | glInterleavedArrays
153 | glIsEnabled
154 | glIsList
155 | glIsTexture
156 | glLightModelf
157 | glLightModelfv
158 | glLightModeli
159 | glLightModeliv
160 | glLightf
161 | glLightfv
162 | glLighti
163 | glLightiv
164 | glLineStipple
165 | glLineWidth
166 | glListBase
167 | glLoadIdentity
168 | glLoadMatrixd
169 | glLoadMatrixf
170 | glLoadName
171 | glLogicOp
172 | glMap1d
173 | glMap1f
174 | glMap2d
175 | glMap2f
176 | glMapGrid1d
177 | glMapGrid1f
178 | glMapGrid2d
179 | glMapGrid2f
180 | glMaterialf
181 | glMaterialfv
182 | glMateriali
183 | glMaterialiv
184 | glMatrixMode
185 | glMultiTexCoord1dARB
186 | glMultiTexCoord1fARB
187 | glMultiTexCoord1iARB
188 | glMultiTexCoord1sARB
189 | glMultiTexCoord2dARB
190 | glMultiTexCoord2fARB
191 | glMultiTexCoord2iARB
192 | glMultiTexCoord2sARB
193 | glMultiTexCoord3dARB
194 | glMultiTexCoord3fARB
195 | glMultiTexCoord3iARB
196 | glMultiTexCoord3sARB
197 | glMultiTexCoord4dARB
198 | glMultiTexCoord4fARB
199 | glMultiTexCoord4iARB
200 | glMultiTexCoord4sARB
201 | glMultMatrixd
202 | glMultMatrixf
203 | glNewList
204 | glNormal3b
205 | glNormal3bv
206 | glNormal3d
207 | glNormal3dv
208 | glNormal3f
209 | glNormal3fv
210 | glNormal3i
211 | glNormal3iv
212 | glNormal3s
213 | glNormal3sv
214 | glNormalPointer
215 | glOrtho
216 | glPassThrough
217 | glPixelMapfv
218 | glPixelMapuiv
219 | glPixelMapusv
220 | glPixelStoref
221 | glPixelStorei
222 | glPixelTransferf
223 | glPixelTransferi
224 | glPixelZoom
225 | glPointSize
226 | glPolygonMode
227 | glPolygonOffset
228 | glPolygonStipple
229 | glPopAttrib
230 | glPopClientAttrib
231 | glPopMatrix
232 | glPopName
233 | glPrioritizeTextures
234 | glPushAttrib
235 | glPushClientAttrib
236 | glPushMatrix
237 | glPushName
238 | glRasterPos2d
239 | glRasterPos2dv
240 | glRasterPos2f
241 | glRasterPos2fv
242 | glRasterPos2i
243 | glRasterPos2iv
244 | glRasterPos2s
245 | glRasterPos2sv
246 | glRasterPos3d
247 | glRasterPos3dv
248 | glRasterPos3f
249 | glRasterPos3fv
250 | glRasterPos3i
251 | glRasterPos3iv
252 | glRasterPos3s
253 | glRasterPos3sv
254 | glRasterPos4d
255 | glRasterPos4dv
256 | glRasterPos4f
257 | glRasterPos4fv
258 | glRasterPos4i
259 | glRasterPos4iv
260 | glRasterPos4s
261 | glRasterPos4sv
262 | glReadBuffer
263 | glReadPixels
264 | glRectd
265 | glRectdv
266 | glRectf
267 | glRectfv
268 | glRecti
269 | glRectiv
270 | glRects
271 | glRectsv
272 | glRenderMode
273 | glRotated
274 | glRotatef
275 | glScaled
276 | glScalef
277 | glScissor
278 | glSelectBuffer
279 | glShadeModel
280 | glStencilFunc
281 | glStencilMask
282 | glStencilOp
283 | glTexCoord1d
284 | glTexCoord1dv
285 | glTexCoord1f
286 | glTexCoord1fv
287 | glTexCoord1i
288 | glTexCoord1iv
289 | glTexCoord1s
290 | glTexCoord1sv
291 | glTexCoord2d
292 | glTexCoord2dv
293 | glTexCoord2f
294 | glTexCoord2fv
295 | glTexCoord2i
296 | glTexCoord2iv
297 | glTexCoord2s
298 | glTexCoord2sv
299 | glTexCoord3d
300 | glTexCoord3dv
301 | glTexCoord3f
302 | glTexCoord3fv
303 | glTexCoord3i
304 | glTexCoord3iv
305 | glTexCoord3s
306 | glTexCoord3sv
307 | glTexCoord4d
308 | glTexCoord4dv
309 | glTexCoord4f
310 | glTexCoord4fv
311 | glTexCoord4i
312 | glTexCoord4iv
313 | glTexCoord4s
314 | glTexCoord4sv
315 | glTexCoordPointer
316 | glTexEnvf
317 | glTexEnvfv
318 | glTexEnvi
319 | glTexEnviv
320 | glTexGend
321 | glTexGendv
322 | glTexGenf
323 | glTexGenfv
324 | glTexGeni
325 | glTexGeniv
326 | glTexImage1D
327 | glTexImage2D
328 | glTexParameterf
329 | glTexParameterfv
330 | glTexParameteri
331 | glTexParameteriv
332 | glTexSubImage1D
333 | glTexSubImage2D
334 | glTranslated
335 | glTranslatef
336 | glVertex2d
337 | glVertex2dv
338 | glVertex2f
339 | glVertex2fv
340 | glVertex2i
341 | glVertex2iv
342 | glVertex2s
343 | glVertex2sv
344 | glVertex3d
345 | glVertex3dv
346 | glVertex3f
347 | glVertex3fv
348 | glVertex3i
349 | glVertex3iv
350 | glVertex3s
351 | glVertex3sv
352 | glVertex4d
353 | glVertex4dv
354 | glVertex4f
355 | glVertex4fv
356 | glVertex4i
357 | glVertex4iv
358 | glVertex4s
359 | glVertex4sv
360 | glVertexPointer
361 | glViewport
362 |
363 | wglCopyContext=wglCopyContext_imp
364 | wglCreateContext=wglCreateContext_imp
365 | wglCreateLayerContext=wglCreateLayerContext_imp
366 | wglDeleteContext=wglDeleteContext_imp
367 | wglDescribeLayerPlane=wglDescribeLayerPlane_imp
368 | wglGetCurrentContext=wglGetCurrentContext_imp
369 | wglGetCurrentDC=wglGetCurrentDC_imp
370 | wglGetLayerPaletteEntries=wglGetLayerPaletteEntries_imp
371 | wglGetProcAddress=wglGetProcAddress_imp
372 | wglMakeCurrent=wglMakeCurrent_imp
373 | wglRealizeLayerPalette=wglRealizeLayerPalette_imp
374 | wglSetLayerPaletteEntries=wglSetLayerPaletteEntries_imp
375 | wglShareLists=wglShareLists_imp
376 | wglSwapBuffers=wglSwapBuffers_imp
377 | wglSwapLayerBuffers=wglSwapLayerBuffers_imp
378 | wglSwapMultipleBuffers=wglSwapMultipleBuffers_imp
379 | wglUseFontBitmapsA=wglUseFontBitmapsA_imp
380 | wglUseFontBitmapsW=wglUseFontBitmapsW_imp
381 | wglUseFontOutlinesA=wglUseFontOutlinesA_imp
382 | wglUseFontOutlinesW=wglUseFontOutlinesW_imp
383 | wglChoosePixelFormat=wglChoosePixelFormat_imp
384 | wglSetPixelFormat=wglSetPixelFormat_imp
385 | wglGetExtensionsStringARB=wglGetExtensionsStringARB_imp
386 | wglDescribePixelFormat=wglDescribePixelFormat_imp
387 |
--------------------------------------------------------------------------------
/rast_fasterizer/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB C_FILES *.cpp)
2 | file(GLOB H_FILES *.h)
3 |
4 | add_library(
5 | softgl_rast_fasterizer
6 | SHARED
7 | ${C_FILES} ${H_FILES})
8 |
--------------------------------------------------------------------------------
/rast_fasterizer/clip.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include "../source/math.h"
3 |
4 | struct edge_t {
5 |
6 | // note: normals face 'inwards' for triangles
7 | // positive may be inside triangle, negative is always outside
8 |
9 | // return normal {-dy, dx} * s, where s is chosen so the normal extends to reach p
10 | static const float2 scale_normal(const float2 &a, const float2 &b, const float2 &p) {
11 | // find edges
12 | // note: (a - b) as we want an inward facing normal
13 | const float2 edge = float2(a) - float2(b);
14 | // edge normal {-dy, dx}
15 | const float2 normal = float2::cross(edge);
16 | // distance between edge and opposite vertex
17 | // note: dist should always be > 0.f for a forward facing triangle
18 | const float dist = float2::dot(normal, float2(p - a));
19 | // normalize to get interpolants
20 | return normal / dist;
21 | }
22 |
23 | edge_t(const float2 &a, const float2 &b)
24 | : _n(float2::cross(a - b)) // {-dy, dx}
25 | , _d(float2::dot(a, _n)) { // distance of edge to origin
26 | // note: normal cant be used for attribute interpolation
27 | }
28 |
29 | edge_t(const float2 &a, const float2 &b, const float2 &p)
30 | : _n(scale_normal(a, b, p)) // {-dy, dx} * s
31 | , _d(float2::dot(a, _n)) // distance of edge to origin
32 | {
33 | }
34 |
35 | // evaluate the edge equation at a given point
36 | float eval(const float2 & p) const {
37 | // distance from edge
38 | return float2::dot(p, _n);
39 | }
40 |
41 | // test if a point falls on negative side of edge
42 | // point on negative side is 'out'
43 | bool test_out(const float2 &p) const {
44 | return (p.x * _n.x + p.y * _n.y) < _d;
45 | }
46 |
47 | // test if a point falls on positive side of edge
48 | // point on positive side is 'in'
49 | bool test_in(const float2 &p) const {
50 | return (p.x * _n.x + p.y * _n.y) >= _d;
51 | }
52 |
53 | // return normal sign code
54 | int normal_quadrant() const {
55 | return (_n.x > 0.f) | ((_n.y > 0.f) << 1);
56 | }
57 |
58 | // trivial rejection based on edge normal and closest box vertex
59 | bool trivial_out(const rectf_t &r) const {
60 | //note: remember edge normals face inward for triangles
61 | switch (normal_quadrant()) {
62 | case 3: return test_out(float2{r.x1, r.y1}); // (+,+) -> box (-, -)
63 | case 2: return test_out(float2{r.x0, r.y1}); // (-,+) -> box (+, -)
64 | case 1: return test_out(float2{r.x1, r.y0}); // (+,-) -> box (-, +)
65 | case 0: return test_out(float2{r.x0, r.y0}); // (-,-) -> box (+, +)
66 | default: __assume(false);
67 | }
68 | }
69 |
70 | // trivial inclusion based on edge normal and closest box vertex
71 | bool trivial_in(const rectf_t &r) const {
72 | //note: remember edge normals face inward for triangles
73 | switch (normal_quadrant()) {
74 | case 3: return test_in(float2{r.x0, r.y0}); // (+,+) -> box (+, +)
75 | case 2: return test_in(float2{r.x1, r.y0}); // (-,+) -> box (-, +)
76 | case 1: return test_in(float2{r.x0, r.y1}); // (+,-) -> box (+, -)
77 | case 0: return test_in(float2{r.x1, r.y1}); // (-,-) -> box (-, -)
78 | default: __assume(false);
79 | }
80 | }
81 |
82 | const float2 &normal() const {
83 | return _n;
84 | }
85 |
86 | protected:
87 | float2 _n;
88 | float _d;
89 | };
90 |
91 |
92 | struct tri_setup_t {
93 |
94 | // constructor
95 | tri_setup_t(const float2 &v0, const float2 &v1, const float2 &v2)
96 | : _e{edge_t(v0, v1, v2), // e01
97 | edge_t(v1, v2, v0), // e12
98 | edge_t(v2, v0, v1)} // e20
99 | {}
100 |
101 | // test if 'r' is outside triangle via edge test
102 | bool trivial_out(const rectf_t &r) const {
103 | return _e[0].trivial_out(r) ||
104 | _e[1].trivial_out(r) ||
105 | _e[2].trivial_out(r);
106 | }
107 |
108 | // test if 'r' is inside triangle via edge test
109 | bool trivial_in(const rectf_t &r) const {
110 | return _e[0].trivial_in(r) &&
111 | _e[1].trivial_in(r) &&
112 | _e[2].trivial_in(r);
113 | }
114 |
115 | protected:
116 | // edges {e01, e12, e20}
117 | std::array _e;
118 |
119 | }; // tri_setup_t
120 |
--------------------------------------------------------------------------------
/rast_fasterizer/fasterizer.cpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | #include "../source/context.h"
6 | #include "../source/math.h"
7 | #include "../source/raster.h"
8 |
9 | #include "clip.h"
10 |
11 | namespace {
12 |
13 | static const int32_t BLOCK_SIZE = 16;
14 |
15 | struct triangle_setup_t {
16 |
17 | enum {
18 | slot_w0, // triangle weight 0
19 | slot_w1, // triangle weight 1
20 | slot_iw, // inverse w
21 | slot_r, // r / w
22 | slot_g, // g / w
23 | slot_b, // b / w
24 | slot_u, // u / w
25 | slot_v, // v / w
26 | _slot_count_
27 | };
28 |
29 | std::array v;
30 | std::array vx;
31 | std::array vy;
32 |
33 | float minz, maxz;
34 | uint32_t mip_level;
35 | };
36 |
37 | struct frame_t {
38 | uint32_t *_pixels;
39 | float *_depth;
40 | int32_t _width;
41 | int32_t _height;
42 | int _blocks_in_x;
43 | int _blocks_in_y;
44 | };
45 |
46 | struct block_t {
47 | std::vector _triangle_setup;
48 |
49 | // block bounding rect
50 | float2 _min, _max;
51 |
52 | // frame buffer pointers
53 | uint32_t *_fb_color;
54 | float *_fb_depth;
55 | };
56 |
57 | void bounds(const float4 &v0,
58 | const float4 &v1,
59 | const float4 &v2,
60 | recti_t &out) {
61 | // Compute triangle bounding box
62 | out.x0 = std::min({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)});
63 | out.y0 = std::min({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)});
64 | out.x1 = std::max({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)});
65 | out.y1 = std::max({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)});
66 | }
67 |
68 | } // namespace
69 |
70 | struct rast_fasterizer_t : public raster_t {
71 |
72 | void framebuffer_clear(
73 | bool color,
74 | bool depth,
75 | bool stencil,
76 | uint32_t clear_color,
77 | float clear_depth,
78 | uint32_t clear_stencil) override {
79 |
80 | if (clear_depth) {
81 | float *depth = _frame._depth;
82 | for (int y = 0; y < _frame._height; ++y) {
83 | for (int x = 0; x < _frame._width; ++x) {
84 | depth[x] = 1000.f;
85 | }
86 | depth += _frame._width;
87 | }
88 | }
89 | };
90 |
91 | void framebuffer_release() override {
92 | _blocks.clear();
93 | }
94 |
95 | void framebuffer_aquire() override {
96 | _blocks.clear();
97 | _frame._pixels = _cxt->buffer.pixels();
98 | _frame._depth = _cxt->buffer.depth();
99 | _frame._width = _cxt->buffer.width();
100 | _frame._height = _cxt->buffer.height();
101 |
102 | _frame._blocks_in_x = _frame._width / BLOCK_SIZE;
103 | _frame._blocks_in_y = _frame._height / BLOCK_SIZE;
104 |
105 | for (int y = 0; y < _frame._blocks_in_y; ++y) {
106 | for (int x = 0; x < _frame._blocks_in_x; ++x) {
107 |
108 | const int32_t ix = x * BLOCK_SIZE;
109 | const int32_t iy = y * BLOCK_SIZE;
110 |
111 | const int offset = ix + iy * _frame._width;
112 |
113 | block_t b;
114 | b._min = float2{float(ix), float(iy)};
115 | b._max = float2{float(ix + BLOCK_SIZE), float(iy + BLOCK_SIZE)};
116 | b._fb_color = _frame._pixels + offset;
117 | b._fb_depth = _frame._depth + offset;
118 | _blocks.push_back(b);
119 | }
120 | }
121 |
122 | assert(_blocks.size() == _frame._blocks_in_x * _frame._blocks_in_y);
123 | }
124 |
125 | bool start(gl_context_t &cxt) override {
126 | _cxt = &cxt;
127 | return true;
128 | }
129 |
130 | void stop() override {
131 | }
132 |
133 | void draw_block_fast(const block_t &block, const triangle_setup_t &s) {
134 |
135 | float iw = (s.vx[2] * (block._min.x) +
136 | s.vy[2] * (block._min.y)) - s.v[2];
137 |
138 | float u = (s.vx[3] * (block._min.x) +
139 | s.vy[3] * (block._min.y)) - s.v[3];
140 |
141 | uint32_t *dst = block._fb_color;
142 | float *depth = block._fb_depth;
143 |
144 | for (int y = 0; y < BLOCK_SIZE; ++y) {
145 |
146 | float iw_ = iw;
147 |
148 | const float u0 = (u) / iw;
149 | const float u1 = (u + s.vx[3] * BLOCK_SIZE) / iw;
150 | const float u_dx = (u1 - u0) / BLOCK_SIZE;
151 | float u_ = u0;
152 |
153 | for (int x = 0; x < BLOCK_SIZE; ++x) {
154 |
155 | const float w = 1.f / iw_;
156 |
157 | // depth test
158 | if (w <= depth[x]) {
159 | depth[x] = w;
160 |
161 | const uint8_t r = uint8_t(128 + w * 0.1f);
162 | const uint8_t g = uint8_t(128 + w * 0.1f);
163 | const uint8_t b = uint8_t(128 + w * 0.1f);
164 |
165 | dst[x] = (r << 16) | (g << 8) | b;
166 | }
167 |
168 | // x step interpolants
169 | iw_ += s.vx[2];
170 | u_ += u_dx;
171 | }
172 |
173 | // y step interpolants
174 | iw += s.vy[2];
175 | u += s.vy[3];
176 |
177 | dst += _frame._width;
178 | depth += _frame._width;
179 | }
180 | }
181 |
182 | void draw_block(const block_t &block, const triangle_setup_t &s) {
183 |
184 | const float vx0 = s.vx[0];
185 | const float vy0 = s.vy[0];
186 | const float vx1 = s.vx[1];
187 | const float vy1 = s.vy[1];
188 |
189 | float v0 = (vx0 * block._min.x + vy0 * block._min.y) - s.v[0];
190 | float v1 = (vx1 * block._min.x + vy1 * block._min.y) - s.v[1];
191 |
192 | float iw = (s.vx[2] * (block._min.x) +
193 | s.vy[2] * (block._min.y)) - s.v[2];
194 |
195 | uint32_t *dst = block._fb_color;
196 | float *depth = block._fb_depth;
197 |
198 | for (int y = 0; y < BLOCK_SIZE; ++y) {
199 |
200 | float v0_ = v0;
201 | float v1_ = v1;
202 |
203 | float iw_ = iw;
204 |
205 | for (int x = 0; x < BLOCK_SIZE; ++x) {
206 |
207 | const float v2_ = 1.f - (v0_ + v1_);
208 |
209 | const float w = 1.f / iw_;
210 |
211 | // triangle edge test
212 | if (v0_ > 0.f && v1_ > 0.f && v2_ > 0.f) {
213 |
214 | // depth test
215 | if (w <= depth[x]) {
216 | depth[x] = w;
217 |
218 | const uint8_t r = uint8_t(128 + w * 0.1f);
219 | const uint8_t g = uint8_t(128 + w * 0.1f);
220 | const uint8_t b = uint8_t(128 + w * 0.1f);
221 |
222 | dst[x] = (r << 16) | (g << 8) | b;
223 | }
224 |
225 | }
226 |
227 | v0_ += vx0;
228 | v1_ += vx1;
229 | iw_ += s.vx[2];
230 | }
231 |
232 | v0 += vy0;
233 | v1 += vy1;
234 | iw += s.vy[2];
235 |
236 | dst += _frame._width;
237 | depth += _frame._width;
238 | }
239 | }
240 |
241 | void draw_block(const block_t &block) {
242 | for (uint32_t setup_index : block._triangle_setup) {
243 |
244 | bool trivial_in = 0 != (setup_index & 0x80000000);
245 |
246 | setup_index &= 0x7fffffff;
247 |
248 | const triangle_setup_t &s = _setup[setup_index];
249 |
250 | if (trivial_in) {
251 | draw_block_fast(block, s);
252 | }
253 | else {
254 | draw_block(block, s);
255 | }
256 | }
257 | }
258 |
259 | void insert_triangle(const triangle_t &t,
260 | const triangle_setup_t &s,
261 | uint32_t setup_index) {
262 |
263 | // find bounding rectangle of the triangle
264 | recti_t rect;
265 | bounds(t.vert[0].coord,
266 | t.vert[1].coord,
267 | t.vert[2].coord,
268 | rect);
269 |
270 | // clip the triangle if fully out of the frame
271 | if (rect.x1 < 0) return;
272 | if (rect.y1 < 0) return;
273 | if (rect.x0 > _frame._width) return;
274 | if (rect.y0 > _frame._height) return;
275 |
276 | // clamp in block space
277 | const int32_t ix0 = std::max(rect.x0 / BLOCK_SIZE, 0);
278 | const int32_t iy0 = std::max(rect.y0 / BLOCK_SIZE, 0);
279 | const int32_t ix1 = std::min(rect.x1 / BLOCK_SIZE, _frame._blocks_in_x-1);
280 | const int32_t iy1 = std::min(rect.y1 / BLOCK_SIZE, _frame._blocks_in_y-1);
281 |
282 | // isolate 2d coordinates
283 | const float2 v0{t.vert[0].coord.x,
284 | t.vert[0].coord.y};
285 | const float2 v1{t.vert[1].coord.x,
286 | t.vert[1].coord.y};
287 | const float2 v2{t.vert[2].coord.x,
288 | t.vert[2].coord.y};
289 |
290 | tri_setup_t clip{v0, v1, v2};
291 |
292 | // insert into blocks
293 | for (int y = iy0; y <= iy1; ++y) {
294 | for (int x = ix0; x <= ix1; ++x) {
295 |
296 | //XXX: only do these tests if the area is large
297 | // heuristic for a large triangle:
298 | // area > (2 * (BLOCK_SIZE * BLOCK_SIZE));
299 |
300 | const rectf_t r{
301 | (x + 0) * BLOCK_SIZE,
302 | (y + 0) * BLOCK_SIZE,
303 | (x + 1) * BLOCK_SIZE,
304 | (y + 1) * BLOCK_SIZE};
305 |
306 | if (clip.trivial_out(r)) {
307 | continue;
308 | }
309 |
310 | // apply trivial in mask
311 | //XXX: trivial in seems broken
312 | const uint32_t mask = 0; // clip.trivial_in(r) ? 0x80000000 : 0x00000000;
313 |
314 | // offset into the block list
315 | const uint32_t bo = x + y * _frame._blocks_in_x;
316 |
317 | assert(bo >= 0 && bo < _blocks.size());
318 | _blocks[bo]._triangle_setup.push_back(setup_index | mask);
319 | }
320 | }
321 | }
322 |
323 | float triangle_area(const float2 &v0,
324 | const float2 &v1,
325 | const float2 &v2) {
326 |
327 | // area is found using part of the vector product
328 |
329 | // x = a2 * b3 - a3 * b2
330 | // y = a1 * b3 - a3 * b1
331 | // z = a1 * b2 - a2 * b1
332 |
333 | // where a = v0 -> v1
334 | // where b = v0 -> v2
335 |
336 | // we only care about the z componant which contains the area of the
337 | // parallelogram formed. i'm unsure why we dont need to multiply by
338 | // 0.5f however when returning the result.
339 |
340 | return (v1.x - v0.x) * (v2.y - v0.y) -
341 | (v2.x - v0.x) * (v1.y - v0.y);
342 | }
343 |
344 | // evaluate the gradient field given the following:
345 | // normal: the normal for that edge
346 | // poe: a point on the edge
347 | // point: the location where to sample it
348 | float evaluate(const float2 &normal,
349 | const float2 &poe) {
350 | return normal.x * poe.x + normal.y * poe.y;
351 | }
352 |
353 | bool setup_triangle(const triangle_t &t, triangle_setup_t &s) {
354 |
355 | // isolate 2d coordinates
356 | const float2 v0{t.vert[0].coord.x, t.vert[0].coord.y};
357 | const float2 v1{t.vert[1].coord.x, t.vert[1].coord.y};
358 | const float2 v2{t.vert[2].coord.x, t.vert[2].coord.y};
359 |
360 | // find the area of the triangle
361 | const float area = triangle_area(v0, v1, v2);
362 |
363 | // find edge vectors
364 | const float2 d01 = v1 - v0;
365 | const float2 d12 = v2 - v1;
366 | const float2 d20 = v0 - v2;
367 |
368 | // cross product gives us normals from the edges
369 | // which we 'normalize' to the area of the triangle
370 | const float2 n0 = float2::cross(d12) / area;
371 | const float2 n1 = float2::cross(d20) / area;
372 | const float2 n2 = float2::cross(d01) / area;
373 |
374 | // evaluate the starting position for each interpolant
375 | const float s0 = evaluate(n0, v1);
376 | const float s1 = evaluate(n1, v2);
377 | const float s2 = evaluate(n2, v0);
378 |
379 | // edge function interpolants
380 | {
381 | s. v[triangle_setup_t::slot_w0] = s0;
382 | s.vx[triangle_setup_t::slot_w0] = n0.x;
383 | s.vy[triangle_setup_t::slot_w0] = n0.y;
384 |
385 | s. v[triangle_setup_t::slot_w1] = s1;
386 | s.vx[triangle_setup_t::slot_w1] = n1.x;
387 | s.vy[triangle_setup_t::slot_w1] = n1.y;
388 | }
389 |
390 | // XXX: make this float3 and use dot products
391 | std::array c;
392 |
393 | // 1/w interpolation
394 | {
395 | const float iw0 = 1.f / t.vert[0].coord.w;
396 | const float iw1 = 1.f / t.vert[1].coord.w;
397 | const float iw2 = 1.f / t.vert[2].coord.w;
398 |
399 | c[0] = s0 * iw0; c[1] = s1 * iw1; c[2] = s2 * iw2;
400 | c[3] = n0.x * iw0; c[4] = n1.x * iw1; c[5] = n2.x * iw2;
401 | c[6] = n0.y * iw0; c[7] = n1.y * iw1; c[8] = n2.y * iw2;
402 |
403 | const uint32_t slot = triangle_setup_t::slot_iw;
404 | s. v[slot] = c[0] + c[1] + c[2];
405 | s.vx[slot] = c[3] + c[4] + c[5];
406 | s.vy[slot] = c[6] + c[7] + c[8];
407 | }
408 |
409 | // r
410 | {
411 | const uint32_t slot = triangle_setup_t::slot_r;
412 | const float a0 = t.vert[0].rgba.x;
413 | const float a1 = t.vert[1].rgba.x;
414 | const float a2 = t.vert[2].rgba.x;
415 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
416 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
417 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
418 | }
419 |
420 | // g
421 | {
422 | const uint32_t slot = triangle_setup_t::slot_g;
423 | const float a0 = t.vert[0].rgba.y;
424 | const float a1 = t.vert[1].rgba.y;
425 | const float a2 = t.vert[2].rgba.y;
426 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
427 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
428 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
429 | }
430 |
431 | // b
432 | {
433 | const uint32_t slot = triangle_setup_t::slot_b;
434 | const float a0 = t.vert[0].rgba.z;
435 | const float a1 = t.vert[1].rgba.z;
436 | const float a2 = t.vert[2].rgba.z;
437 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
438 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
439 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
440 | }
441 |
442 | // u
443 | {
444 | const uint32_t slot = triangle_setup_t::slot_u;
445 | const float a0 = t.vert[0].tex.x;
446 | const float a1 = t.vert[1].tex.x;
447 | const float a2 = t.vert[2].tex.x;
448 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
449 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
450 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
451 | }
452 |
453 | // v
454 | {
455 | const uint32_t slot = triangle_setup_t::slot_v;
456 | const float a0 = t.vert[0].tex.y;
457 | const float a1 = t.vert[1].tex.y;
458 | const float a2 = t.vert[2].tex.y;
459 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
460 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
461 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
462 | }
463 |
464 | return true;
465 | }
466 |
467 | void push_triangles(const std::vector &triangles,
468 | const texture_t *tex,
469 | const state_manager_t &state) override {
470 |
471 | // insert all triangles into blocks
472 | for (const auto &t : triangles) {
473 | if (t.vert[0].coord.w == 0.f) {
474 | // signals fully clipped so discard
475 | continue;
476 | }
477 |
478 | _setup.emplace_back();
479 | triangle_setup_t &s = _setup.back();
480 | if (!setup_triangle(t, s)) {
481 | _setup.pop_back();
482 | continue;
483 | }
484 |
485 | const uint32_t setup_index = (uint32_t)_setup.size() - 1u;
486 |
487 | insert_triangle(t, s, setup_index);
488 | }
489 |
490 | // draw all blocks
491 | for (block_t &b : _blocks) {
492 | draw_block(b);
493 | b._triangle_setup.clear();
494 | }
495 |
496 | // clear all of our triangle setup
497 | _setup.clear();
498 | }
499 |
500 | void flush() override {}
501 |
502 | void present() override {
503 | }
504 |
505 | protected:
506 |
507 | std::vector _setup;
508 |
509 | frame_t _frame;
510 | std::vector _blocks;
511 | gl_context_t *_cxt;
512 | };
513 |
514 | extern "C" {
515 | __declspec(dllexport) raster_t *raster_create() {
516 | return new rast_fasterizer_t;
517 | }
518 |
519 | __declspec(dllexport) void raster_release(raster_t *r) {
520 | assert(r);
521 | delete r;
522 | }
523 | };
524 |
--------------------------------------------------------------------------------
/rast_opencl/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB C_FILES *.cpp)
2 | file(GLOB H_FILES *.h)
3 |
4 | find_package(OPENCL)
5 |
6 | if (${OpenCL_FOUND})
7 |
8 | add_library(
9 | softgl_rast_opencl
10 | SHARED
11 | ${C_FILES} ${H_FILES})
12 |
13 | target_include_directories(
14 | softgl_rast_opencl
15 | PUBLIC
16 | ${OpenCL_INCLUDE_DIRS})
17 |
18 | target_link_libraries(
19 | softgl_rast_opencl
20 | ${OpenCL_LIBRARIES})
21 |
22 | else()
23 |
24 | endif()
25 |
--------------------------------------------------------------------------------
/rast_opencl/rast_opencl.cpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
3 | #include
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | #include
10 |
11 | #include "../source/context.h"
12 | #include "../source/math.h"
13 | #include "../source/raster.h"
14 | #include "../source/texture.h"
15 |
16 | namespace {
17 |
18 | struct frame_t {
19 | uint32_t w, h;
20 | uint32_t num_w, num_h;
21 | };
22 |
23 | const char *cl_kernel_raster = R"(
24 | struct __attribute__ ((packed)) vertex_t {
25 | float4 coord;
26 | float4 rgba;
27 | float2 tex;
28 | };
29 |
30 | struct __attribute__ ((packed)) triangle_t {
31 | struct vertex_t vert[3];
32 | };
33 |
34 | float triangle_area(const float2 v0,
35 | const float2 v1,
36 | const float2 v2) {
37 |
38 | // area is found using part of the vector product
39 |
40 | // x = a2 * b3 - a3 * b2
41 | // y = a1 * b3 - a3 * b1
42 | // z = a1 * b2 - a2 * b1
43 |
44 | // where a = v0 -> v1
45 | // where b = v0 -> v2
46 |
47 | // we only care about the z componant which contains the area of the
48 | // parallelogram formed. i'm unsure why we dont need to multiply by
49 | // 0.5f however when returning the result.
50 |
51 | return (v1.x - v0.x) * (v2.y - v0.y) - (v2.x - v0.x) * (v1.y - v0.y);
52 | }
53 |
54 | // evaluate the gradient field given the following:
55 | // normal: the normal for that edge
56 | // poe: a point on the edge
57 | // point: the location where to sample it
58 | float evaluate(const float2 normal,
59 | const float2 poe,
60 | const float2 point) {
61 | return dot(normal, point) - dot(normal, poe);
62 | }
63 |
64 | float2 cross2d(const float2 a) {
65 | return (float2)(-a.y, a.x );
66 | }
67 |
68 | kernel void raster(__global const struct triangle_t *tri,
69 | const uint num_tri,
70 | __global uint *fb_color,
71 | __global float *fb_depth,
72 | uint fb_pitch) {
73 |
74 | // chunk bounding rect
75 | const float chunk_min_x = get_group_id(0) * CHUNK_SIZE;
76 | const float chunk_min_y = get_group_id(1) * CHUNK_SIZE;
77 | const float chunk_max_x = chunk_min_x + CHUNK_SIZE;
78 | const float chunk_max_y = chunk_min_y + CHUNK_SIZE;
79 |
80 | // offset the framebuffer
81 | const uint fb_offs = get_group_id(0) * CHUNK_SIZE +
82 | get_group_id(1) * CHUNK_SIZE * fb_pitch;
83 | fb_color += fb_offs;
84 | fb_depth += fb_offs;
85 |
86 | // chunk staring coordinate
87 | const float2 start = (float2)(chunk_min_x, chunk_min_y);
88 |
89 | // for each triangle
90 | for (uint t=0; t < num_tri; ++t) {
91 |
92 | // get the vertices
93 | const float2 v0 = tri[t].vert[0].coord.xy;
94 | const float2 v1 = tri[t].vert[2].coord.xy;
95 | const float2 v2 = tri[t].vert[1].coord.xy;
96 |
97 | // find the area of the triangle
98 | const float area = triangle_area(v0, v1, v2);
99 |
100 | // find edge vectors
101 | const float2 d12 = v2 - v1;
102 | const float2 d20 = v0 - v2;
103 |
104 | // cross product gives us normals from the edges
105 | // which we 'normalize' to the area of the triangle
106 | const float2 n0 = cross2d(d12) / area;
107 | const float2 n1 = cross2d(d20) / area;
108 |
109 | // edge functions are
110 | // s0 = dot(n0, point) - d0
111 | // s1 = dot(n1, point) - d1
112 | // s2 = dot(n2, point) - d2
113 |
114 | // evaluate the starting position for each interpolant
115 | float s0 = evaluate(n0, v1, start);
116 | float s1 = evaluate(n1, v2, start);
117 |
118 | for (int y = 0; y < CHUNK_SIZE; y += 1) {
119 |
120 | float s0_ = s0;
121 | float s1_ = s1;
122 |
123 | for (int x = 0; x < CHUNK_SIZE; x += 1) {
124 |
125 | // s2_ can be derived from s0_ and s1_ since we know the
126 | // sum of them all should add up to 1.0f (the full area)
127 | const float s2_ = 1.f - (s0_ + s1_);
128 |
129 | if (s0_ > 0 && s1_ > 0 && s2_ > 0) {
130 | const uchar r = (uchar)(s0_ * 256);
131 | const uchar g = (uchar)(s1_ * 256);
132 | const uchar b = (uchar)(s2_ * 256);
133 | const uint rgb = (r << 16) | (g << 8) | b;
134 |
135 | fb_color[ x + y * fb_pitch ] = rgb;
136 | }
137 |
138 | s0_ += n0.x;
139 | s1_ += n1.x;
140 | }
141 |
142 | s0 += n0.y;
143 | s1 += n1.y;
144 | }
145 |
146 | }
147 | }
148 | )";
149 |
150 | } // namespace
151 |
152 | struct rast_opencl_t : public raster_t {
153 |
154 | static const uint32_t chunk_size = 32;
155 |
156 | rast_opencl_t()
157 | : _cl_context(nullptr)
158 | , _cl_device(nullptr)
159 | , _cl_queue(nullptr)
160 | , _cl_program(nullptr)
161 | , _cl_kernel(nullptr)
162 | , _cl_fb_color(nullptr)
163 | , _cl_fb_depth(nullptr)
164 | , _cl_triangles(nullptr)
165 | , _cl_triangle_setup(nullptr)
166 | , _cxt(nullptr) {
167 | }
168 |
169 | void framebuffer_release() override {
170 | if (_cl_fb_color) {
171 | clReleaseMemObject(_cl_fb_color);
172 | _cl_fb_color = nullptr;
173 | }
174 | if (_cl_fb_depth) {
175 | clReleaseMemObject(_cl_fb_depth);
176 | _cl_fb_depth = nullptr;
177 | }
178 | }
179 |
180 | void framebuffer_aquire() override {
181 |
182 | if (!_cl_context || !_cxt) {
183 | return;
184 | }
185 |
186 | // release the old framebuffer
187 | framebuffer_release();
188 |
189 | // start with framebuffer size
190 | uint32_t w = _cxt->buffer.width();
191 | uint32_t h = _cxt->buffer.height();
192 |
193 | // round to nearest chunk size
194 | const uint32_t chunk_mask = chunk_size - 1;
195 | w = (w + chunk_mask) & ~chunk_mask;
196 | h = (h + chunk_mask) & ~chunk_mask;
197 |
198 | _frame.w = w;
199 | _frame.h = h;
200 | _frame.num_w = w / chunk_size;
201 | _frame.num_h = h / chunk_size;
202 |
203 | const size_t fb_area = w * h;
204 |
205 | // create the color buffer
206 | _cl_fb_color = clCreateBuffer(
207 | _cl_context,
208 | CL_MEM_READ_WRITE,
209 | fb_area * sizeof(uint32_t),
210 | nullptr,
211 | nullptr);
212 | // create the depth buffer
213 | _cl_fb_depth = clCreateBuffer(
214 | _cl_context,
215 | CL_MEM_READ_WRITE,
216 | fb_area * sizeof(float),
217 | nullptr,
218 | nullptr);
219 |
220 | }
221 |
222 | bool start(gl_context_t &cxt) override {
223 | _cxt = &cxt;
224 |
225 | // access an OpenCL device
226 | if (!init_cl()) {
227 | return false;
228 | }
229 |
230 | // triangle list buffer
231 | _cl_triangles = clCreateBuffer(
232 | _cl_context,
233 | CL_MEM_READ_WRITE,
234 | 1024 * 1024 * 16, // 16meg
235 | nullptr,
236 | nullptr);
237 | if (!_cl_triangles) {
238 | return false;
239 | }
240 |
241 | // triangle setup buffer
242 | _cl_triangle_setup = clCreateBuffer(
243 | _cl_context,
244 | CL_MEM_READ_WRITE,
245 | 1024 * 1024 * 16, // 16meg
246 | nullptr,
247 | nullptr);
248 | if (!_cl_triangle_setup) {
249 | return false;
250 | }
251 |
252 | if (!compile_kernel()) {
253 | return false;
254 | }
255 |
256 | return true;
257 | }
258 |
259 | void stop() override {
260 | if (_cl_triangles) {
261 | clReleaseMemObject(_cl_triangles);
262 | _cl_triangles = nullptr;
263 | }
264 | if (_cl_triangle_setup) {
265 | clReleaseMemObject(_cl_triangle_setup);
266 | _cl_triangle_setup = nullptr;
267 | }
268 | if (_cl_fb_color) {
269 | clReleaseMemObject(_cl_fb_color);
270 | _cl_fb_color = nullptr;
271 | }
272 | if (_cl_fb_depth) {
273 | clReleaseMemObject(_cl_fb_depth);
274 | _cl_fb_depth = nullptr;
275 | }
276 | if (_cl_kernel) {
277 | clReleaseKernel(_cl_kernel);
278 | _cl_kernel = nullptr;
279 | }
280 | if (_cl_program) {
281 | clReleaseProgram(_cl_program);
282 | _cl_program = nullptr;
283 | }
284 | if (_cl_queue) {
285 | clReleaseCommandQueue(_cl_queue);
286 | _cl_queue = nullptr;
287 | }
288 | if (_cl_context) {
289 | clReleaseContext(_cl_context);
290 | _cl_context = nullptr;
291 | }
292 | _cxt = nullptr;
293 | }
294 |
295 | void push_triangles(const std::vector &triangles,
296 | const texture_t *tex,
297 | const state_manager_t &state) override {
298 |
299 | if (!_cl_triangles || !_cl_queue) {
300 | return;
301 | }
302 |
303 | cl_int ret = CL_SUCCESS;
304 |
305 | // write triangles into the triangle list buffer
306 | ret = clEnqueueWriteBuffer(
307 | _cl_queue,
308 | _cl_triangles,
309 | false,
310 | 0,
311 | triangles.size() * sizeof(triangle_t),
312 | triangles.data(),
313 | 0,
314 | 0,
315 | nullptr
316 | );
317 | if (ret != CL_SUCCESS) {
318 | DEBUG_BREAK;
319 | return;
320 | }
321 |
322 | // triangle setup kernel
323 | // ...
324 |
325 | // set kernel parameters
326 | {
327 | const cl_uint num_triangles = triangles.size();
328 | const cl_uint fp_pitch = _frame.w;
329 | clSetKernelArg(_cl_kernel, 0, sizeof(_cl_triangles), &_cl_triangles);
330 | clSetKernelArg(_cl_kernel, 1, sizeof(cl_uint), &num_triangles);
331 | clSetKernelArg(_cl_kernel, 2, sizeof(_cl_fb_color), &_cl_fb_color);
332 | clSetKernelArg(_cl_kernel, 3, sizeof(_cl_fb_depth), &_cl_fb_depth);
333 | clSetKernelArg(_cl_kernel, 4, sizeof(cl_uint), &fp_pitch);
334 | }
335 |
336 | // triangle raster kernel
337 | size_t gwgs[3] = { _frame.num_w, _frame.num_h, 1 };
338 | size_t lwgs[3] = { 1, 1, 1 };
339 | ret = clEnqueueNDRangeKernel(
340 | _cl_queue,
341 | _cl_kernel,
342 | 2,
343 | nullptr,
344 | gwgs,
345 | lwgs,
346 | 0,
347 | 0,
348 | nullptr);
349 | if (ret != CL_SUCCESS) {
350 | DEBUG_BREAK;
351 | return;
352 | }
353 |
354 | clFinish(_cl_queue);
355 | }
356 |
357 | void flush() override {}
358 |
359 | void present() override {
360 |
361 | if (!_cl_fb_color || !_cl_queue || !_cxt) {
362 | return;
363 | }
364 |
365 | // TODO: use clEnqueueReadBufferRect since our rendertarget
366 | // and the actual screen buffer might be different sizes
367 | // deppending on the chunk size.
368 |
369 | const size_t nbytes = _cxt->buffer.width() * _cxt->buffer.height() * sizeof(uint32_t);
370 | cl_int ret = clEnqueueReadBuffer(
371 | _cl_queue,
372 | _cl_fb_color,
373 | true,
374 | 0,
375 | nbytes,
376 | _cxt->buffer.pixels(),
377 | 0,
378 | 0,
379 | nullptr);
380 |
381 | if (ret != CL_SUCCESS) {
382 | DEBUG_BREAK;
383 | }
384 |
385 | cl_uint color = 0x10101010;
386 | clEnqueueFillBuffer(_cl_queue, _cl_fb_color, &color, 4, 0, nbytes, 0, 0, nullptr);
387 | }
388 |
389 | protected:
390 |
391 | bool init_cl();
392 | bool compile_kernel();
393 | void log_compile_error();
394 |
395 | // opencl handles
396 | cl_context _cl_context;
397 | cl_device_id _cl_device;
398 | cl_command_queue _cl_queue;
399 |
400 | // kernels
401 | cl_program _cl_program;
402 | cl_kernel _cl_kernel;
403 |
404 | // framebuffers
405 | cl_mem _cl_fb_color;
406 | cl_mem _cl_fb_depth;
407 |
408 | // triangle list and triangle setup
409 | cl_mem _cl_triangles;
410 | cl_mem _cl_triangle_setup;
411 |
412 | gl_context_t *_cxt;
413 | frame_t _frame;
414 |
415 | // opencl platforms
416 | std::array _cl_platforms;
417 | };
418 |
419 | bool rast_opencl_t::init_cl() {
420 |
421 | cl_int ret = CL_SUCCESS;
422 |
423 | cl_uint num_platforms = 0;
424 | ret = clGetPlatformIDs(
425 | _cl_platforms.size(),
426 | _cl_platforms.data(),
427 | &num_platforms);
428 | if (CL_SUCCESS != ret)
429 | return false;
430 |
431 | ret = clGetDeviceIDs(
432 | _cl_platforms[0], // platform selection
433 | CL_DEVICE_TYPE_CPU,
434 | 1, // number of devices
435 | &_cl_device, // device pointer
436 | nullptr);
437 | if (CL_SUCCESS != ret)
438 | return false;
439 |
440 | _cl_context = clCreateContext(
441 | nullptr,
442 | 1,
443 | &_cl_device,
444 | nullptr,
445 | nullptr,
446 | nullptr);
447 | if (!_cl_context) {
448 | return false;
449 | }
450 |
451 | _cl_queue = clCreateCommandQueue(
452 | _cl_context,
453 | _cl_device,
454 | cl_command_queue_properties(0),
455 | nullptr);
456 | if (!_cl_queue) {
457 | return false;
458 | }
459 |
460 | return true;
461 | }
462 |
463 | bool rast_opencl_t::compile_kernel() {
464 |
465 | cl_int ret = CL_SUCCESS;
466 |
467 | _cl_program = clCreateProgramWithSource(
468 | _cl_context,
469 | 1, // number of source buffers
470 | &cl_kernel_raster, // source code
471 | nullptr, // source length
472 | nullptr); // error code ret
473 | if (!_cl_program) {
474 | return false;
475 | }
476 |
477 | ret = clBuildProgram(
478 | _cl_program,
479 | 0, // num devices
480 | nullptr, // device list
481 | "-DCHUNK_SIZE=32", // options
482 | nullptr, // notify callback
483 | nullptr); // user data
484 |
485 | _cl_kernel = clCreateKernel(
486 | _cl_program,
487 | "raster", // kernel name
488 | nullptr);
489 | if (!_cl_kernel) {
490 | log_compile_error();
491 | return false;
492 | }
493 |
494 | return true;
495 | }
496 |
497 | void rast_opencl_t::log_compile_error() {
498 | std::array error;
499 | error[0] = '\0';
500 | size_t written = 0;
501 | cl_uint ret = clGetProgramBuildInfo(
502 | _cl_program,
503 | _cl_device,
504 | CL_PROGRAM_BUILD_LOG,
505 | error.size(),
506 | error.data(),
507 | &written);
508 | printf("%s\n\n", error.data());
509 | }
510 |
511 | extern "C" {
512 | __declspec(dllexport) raster_t *raster_create() {
513 | return new rast_opencl_t;
514 | }
515 |
516 | __declspec(dllexport) void raster_release(raster_t *r) {
517 | delete r;
518 | }
519 | };
520 |
--------------------------------------------------------------------------------
/rast_reference/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB KERNELS
2 | kernels/*.cpp
3 | kernels/*.h)
4 | add_library(
5 | softgl_rast_reference_kernels
6 | ${KERNELS})
7 |
8 | file(GLOB SOURCE *.cpp *.h)
9 | add_library(
10 | softgl_rast_reference
11 | SHARED
12 | ${SOURCE})
13 |
14 | target_link_libraries(
15 | softgl_rast_reference
16 | softgl_rast_reference_kernels)
17 |
18 | target_compile_definitions(
19 | softgl_rast_reference_kernels
20 | PUBLIC
21 | -DUSE_APROX_TEMPLATE)
22 |
--------------------------------------------------------------------------------
/rast_reference/kernel.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 |
8 | #include "../source/context.h"
9 | #include "../source/math.h"
10 | #include "../source/raster.h"
11 | #include "../source/texture.h"
12 |
13 |
14 | static const int32_t BLOCK_SIZE = 16;
15 | static const int32_t BLOCK_MASK = ~(BLOCK_SIZE - 1);
16 |
17 | struct triangle_setup_t {
18 |
19 | enum {
20 | slot_w0, // triangle weight 0
21 | slot_w1, // triangle weight 1
22 | slot_iw, // inverse w
23 | slot_z, // z / w
24 |
25 | slot_a, // a / w
26 | slot_r, // r / w
27 | slot_g, // g / w
28 | slot_b, // b / w
29 |
30 | slot_u, // u / w
31 | slot_v, // v / w
32 | _slot_count_
33 | };
34 |
35 | std::array v;
36 | std::array vx;
37 | std::array vy;
38 |
39 | std::array edge;
40 |
41 | bool affine;
42 | recti_t bound;
43 | uint32_t mip_level;
44 | };
45 |
46 | struct frame_t {
47 | uint32_t *_pixels;
48 | float *_depth;
49 | int32_t _width;
50 | int32_t _height;
51 | };
52 |
53 | extern const std::array mip_log_table;
54 |
55 | static inline uint32_t get_mip_level(float tri_area,
56 | float uv_area) {
57 | const float factor = fabsf(uv_area) / fabsf(tri_area);
58 | uint32_t ifactor = uint32_t(factor);
59 | uint32_t max = mip_log_table.size() - 1;
60 | uint32_t index = std::min(ifactor, max);
61 | return mip_log_table[index];
62 | }
63 |
64 | static inline float triangle_area(const float2 &v0,
65 | const float2 &v1,
66 | const float2 &v2) {
67 |
68 | // area is found using part of the vector product
69 |
70 | // x = a2 * b3 - a3 * b2
71 | // y = a1 * b3 - a3 * b1
72 | // z = a1 * b2 - a2 * b1
73 |
74 | // where a = v0 -> v1
75 | // where b = v0 -> v2
76 |
77 | // we only care about the z componant which contains the area of the
78 | // parallelogram formed. i'm unsure why we dont need to multiply by
79 | // 0.5f however when returning the result.
80 |
81 | return (v1.x - v0.x) * (v2.y - v0.y) -
82 | (v2.x - v0.x) * (v1.y - v0.y);
83 | }
84 |
85 | // evaluate the gradient field given the following:
86 | // normal: the normal for that edge
87 | // poe: a point on the edge
88 | // point: the location where to sample it
89 | static inline float evaluate(const float2 &normal,
90 | const float2 &poe) {
91 | return normal.x * poe.x + normal.y * poe.y;
92 | }
93 |
94 | static inline __m128 step_x(float v, float vx) {
95 | return _mm_set_ps(v + vx * 3.f,
96 | v + vx * 2.f,
97 | v + vx * 1.f,
98 | v + vx * 0.f);
99 | }
100 |
101 | static inline bool affine_heuristic(const recti_t &r) {
102 | // heuristic decides if this is a small triangle using
103 | // the square of the max edge to avoid slim triangles being
104 | // flagged
105 | const int32_t max = std::max(r.dx(), r.dy());
106 | return (max * max) < (64 * 64);
107 | }
108 |
109 | static inline int32_t normal_quadrant(const float3 &edge) {
110 | return (edge.x > 0.f) | ((edge.y > 0.f) << 1);
111 | }
112 |
113 | static inline bool test_out(const float3 &edge,
114 | const float2 &p) {
115 | return (p.x * edge.x + p.y * edge.y) < edge.z;
116 | }
117 |
118 | static inline bool test_in(const float3 &edge,
119 | const float2 &p) {
120 | return (p.x * edge.x + p.y * edge.y) > edge.z;
121 | }
122 |
123 | static inline bool trivial_out(const float3 &e,
124 | const float2 &min,
125 | const float2 & max) {
126 | switch (normal_quadrant( e )) {
127 | case 3: return test_out(e, float2{max.x, max.y}); // (+,+) -> box (-, -)
128 | case 2: return test_out(e, float2{min.x, max.y}); // (-,+) -> box (+, -)
129 | case 1: return test_out(e, float2{max.x, min.y}); // (+,-) -> box (-, +)
130 | case 0: return test_out(e, float2{min.x, min.y}); // (-,-) -> box (+, +)
131 | default: __assume(false);
132 | }
133 | return false;
134 | }
135 |
136 | static inline bool trivial_in(const float3 &e,
137 | const float2 &min,
138 | const float2 & max) {
139 | switch (normal_quadrant( e )) {
140 | case 3: return test_in(e, float2{min.x, min.y}); // (+,+) -> box (+, +)
141 | case 2: return test_in(e, float2{max.x, min.y}); // (-,+) -> box (-, +)
142 | case 1: return test_in(e, float2{min.x, max.y}); // (+,-) -> box (+, -)
143 | case 0: return test_in(e, float2{max.x, max.y}); // (-,-) -> box (-, -)
144 | default: __assume(false);
145 | }
146 | return false;
147 | }
148 |
149 | template
150 | static __forceinline __m128i blend_factor(__m128i src, __m128i dst) {
151 |
152 | // Input layout:
153 | // ARGB colour layout
154 |
155 | switch (mode) {
156 | case GL_ZERO:
157 | return _mm_setzero_si128();
158 | case GL_ONE:
159 | return _mm_set1_epi8((char)0xff);
160 | case GL_DST_COLOR:
161 | return dst;
162 | case GL_SRC_COLOR:
163 | return src;
164 | case GL_ONE_MINUS_DST_COLOR:
165 | return _mm_sub_epi8(_mm_set1_epi8((char)0xff), dst);
166 | case GL_ONE_MINUS_SRC_COLOR:
167 | return _mm_sub_epi8(_mm_set1_epi8((char)0xff), src);
168 | case GL_SRC_ALPHA: {
169 | const __m128i a = _mm_and_si128(src, _mm_set1_epi32(0x000000ff));
170 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16));
171 | return _mm_or_si128(b, _mm_slli_epi32(b, 8));
172 | }
173 | case GL_ONE_MINUS_SRC_ALPHA: {
174 | const __m128i a = _mm_and_si128(src, _mm_set1_epi32(0x000000ff));
175 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16));
176 | const __m128i c = _mm_or_si128(b, _mm_slli_epi32(b, 8));
177 | return _mm_sub_epi8(_mm_set1_epi32(0xffffffff), c);
178 | }
179 | case GL_DST_ALPHA: {
180 | const __m128i a = _mm_and_si128(dst, _mm_set1_epi32(0x000000ff));
181 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16));
182 | return _mm_or_si128(b, _mm_slli_epi32(b, 8));
183 | }
184 | case GL_ONE_MINUS_DST_ALPHA: {
185 | const __m128i a = _mm_and_si128(dst, _mm_set1_epi32(0x000000ff));
186 | const __m128i b = _mm_or_si128(a, _mm_slli_epi32(a, 16));
187 | const __m128i c = _mm_or_si128(b, _mm_slli_epi32(b, 8));
188 | return _mm_sub_epi8(_mm_set1_epi32(0xffffffff), c);
189 | }
190 | case GL_SRC_ALPHA_SATURATE:
191 | return _mm_setzero_si128(); // TODO
192 | default:
193 | __assume(0);
194 | }
195 | }
196 |
197 | // multiply packed 8 bit unsigned bytes
198 | static __forceinline __m128i _mm_mulhi_epi8( __m128i x, __m128i y ) {
199 | #if 0
200 | // unpack and multiply
201 | __m128i dst_even = _mm_mullo_epi16(x, y);
202 | __m128i dst_odd = _mm_mullo_epi16(_mm_srli_epi16(x, 8),
203 | _mm_srli_epi16(y, 8));
204 | // repack
205 | return _mm_or_si128(_mm_slli_epi16(dst_odd, 8),
206 | _mm_srli_epi16(_mm_slli_epi16(dst_even,8), 8));
207 | #else
208 | const __m128i zero = _mm_setzero_si128();
209 |
210 | // unpack 8 bits into 16 bits
211 | __m128i xlo = _mm_unpacklo_epi8( x, zero );
212 | __m128i ylo = _mm_unpacklo_epi8( y, zero );
213 | __m128i xhi = _mm_unpackhi_epi8( x, zero );
214 | __m128i yhi = _mm_unpackhi_epi8( y, zero );
215 |
216 | // x * y
217 | xlo = _mm_mullo_epi16( xlo, ylo );
218 | xhi = _mm_mullo_epi16( xhi, yhi );
219 |
220 | // >> 8
221 | xlo = _mm_srli_epi16( xlo, 8 );
222 | xhi = _mm_srli_epi16( xhi, 8 );
223 |
224 | // saturate and repack into 8 bits
225 | return _mm_packus_epi16( xlo, xhi );
226 | #endif
227 | }
228 |
229 | // saturate(src * src_factor + dst * dst_factor)
230 | template
231 | static __forceinline __m128i blend_sum(__m128i src, __m128i dst) {
232 | const __m128i sf = blend_factor(src, dst);
233 | const __m128i df = blend_factor(src, dst);
234 | return _mm_adds_epu8(_mm_mulhi_epi8(src, sf),
235 | _mm_mulhi_epi8(dst, df));
236 | }
237 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_dst_src.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_DST_COLOR
8 | #define DST_BLEND GL_SRC_COLOR
9 | #define KERNEL_NAME rast_tex_dst_src
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_dst_zero.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_DST_COLOR
8 | #define DST_BLEND GL_ZERO
9 | #define KERNEL_NAME rast_tex_dst_zero
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_one_msa.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_ONE
8 | #define DST_BLEND GL_ONE_MINUS_SRC_ALPHA
9 | #define KERNEL_NAME rast_tex_one_msa
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_one_msc.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_ONE
8 | #define DST_BLEND GL_ONE_MINUS_SRC_COLOR
9 | #define KERNEL_NAME rast_tex_one_msc
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_one_one.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_ONE
8 | #define DST_BLEND GL_ONE
9 | #define KERNEL_NAME rast_tex_one_one
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_one_zero.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_ONE
8 | #define DST_BLEND GL_ZERO
9 | #define KERNEL_NAME rast_tex_one_zero
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/rast_tex_sa_msa.cpp:
--------------------------------------------------------------------------------
1 | #include "../kernel.h"
2 |
3 | #define DEPTH_TEST 1
4 | #define DEPTH_WRITE 1
5 | #define COLOR_WRITE 1
6 | #define DEPTH_CMP _mm_cmple_ps
7 | #define SRC_BLEND GL_SRC_ALPHA
8 | #define DST_BLEND GL_ONE_MINUS_SRC_ALPHA
9 | #define KERNEL_NAME rast_tex_sa_msa
10 |
11 | #if USE_APROX_TEMPLATE
12 | #include "template_aprox.h"
13 | #else
14 | #include "template.h"
15 | #endif
16 |
--------------------------------------------------------------------------------
/rast_reference/kernels/template.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | static inline void stamp_affine(
4 | const triangle_setup_t &s,
5 | const texture_t &tex,
6 | const float2 origin,
7 | uint32_t *color,
8 | float *depth,
9 | uint32_t pitch) {
10 |
11 | const float v0 = (s.vx[s.slot_w0] * origin.x + s.vy[s.slot_w0] * origin.y) - s.v[s.slot_w0];
12 | const float v1 = (s.vx[s.slot_w1] * origin.x + s.vy[s.slot_w1] * origin.y) - s.v[s.slot_w1];
13 | const float iw = (s.vx[s.slot_iw] * origin.x + s.vy[s.slot_iw] * origin.y) - s.v[s.slot_iw];
14 | const float z = (s.vx[s.slot_z ] * origin.x + s.vy[s.slot_z ] * origin.y) - s.v[s.slot_z ];
15 | const float u = (s.vx[s.slot_u ] * origin.x + s.vy[s.slot_u ] * origin.y) - s.v[s.slot_u ];
16 | const float v = (s.vx[s.slot_v ] * origin.x + s.vy[s.slot_v ] * origin.y) - s.v[s.slot_v ];
17 |
18 | __m128 Sv0x = _mm_set1_ps(s.vx[s.slot_w0] * 4.f);
19 | __m128 Sv0y = _mm_set1_ps(s.vy[s.slot_w0]);
20 | __m128 Sv0 = step_x(v0, s.vx[s.slot_w0]);
21 |
22 | __m128 Sv1x = _mm_set1_ps(s.vx[s.slot_w1] * 4.f);
23 | __m128 Sv1y = _mm_set1_ps(s.vy[s.slot_w1]);
24 | __m128 Sv1 = step_x(v1, s.vx[s.slot_w1]);
25 |
26 | __m128 Szx = _mm_set1_ps(s.vx[s.slot_z] * 4.f);
27 | __m128 Szy = _mm_set1_ps(s.vy[s.slot_z]);
28 | __m128 Sz = step_x(z, s.vx[s.slot_z]);
29 |
30 | const int32_t tw = tex._width >> s.mip_level;
31 | const int32_t twm = tw - 1;
32 | __m128 Sux = _mm_set1_ps(s.vx[s.slot_u] * 4.f * tw);
33 | __m128 Suy = _mm_set1_ps(s.vy[s.slot_u] * tw);
34 | __m128 Su = step_x(u * tw, s.vx[s.slot_u] * tw);
35 | __m128i Stwm = _mm_set_epi32(twm, twm, twm, twm);
36 |
37 | const int32_t th = tex._height >> s.mip_level;
38 | const int32_t thm = th - 1;
39 | __m128 Svx = _mm_set1_ps(s.vx[s.slot_v] * 4.f * th);
40 | __m128 Svy = _mm_set1_ps(s.vy[s.slot_v] * th);
41 | __m128 Sv = step_x(v * th, s.vx[s.slot_v] * th);
42 | __m128i Sthm = _mm_set_epi32(thm, thm, thm, thm);
43 |
44 | const int32_t wshift = std::max(0, int32_t(tex._wshift) - int32_t(s.mip_level));
45 | const uint32_t *texel = tex._pixels[s.mip_level];
46 |
47 | for (int y = 0; y < BLOCK_SIZE; ++y) {
48 |
49 | __m128 Sv0_ = Sv0;
50 | __m128 Sv1_ = Sv1;
51 | __m128 Sz_ = Sz;
52 | __m128 Su_ = Su;
53 | __m128 Sv_ = Sv;
54 |
55 | for (int x = 0; x < BLOCK_SIZE; x += 4) {
56 |
57 | // third edge coefficient
58 | // const float v2_ = 1.f - (v0_ + v1_);
59 | __m128 Sv2_ = _mm_sub_ps(_mm_set_ps1(1.f), _mm_add_ps(Sv0_, Sv1_));
60 |
61 | // load depth values
62 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER)
63 | __m128 zbuf = _mm_load_ps(depth + x);
64 | #endif
65 |
66 | // triangle edge test
67 | // if (v0_ > 0.f && v1_ > 0.f && v2_ > 0.f) {
68 | __m128 m0 = _mm_cmpge_ps(Sv0_, _mm_setzero_ps());
69 | __m128 m1 = _mm_cmpge_ps(Sv1_, _mm_setzero_ps());
70 | __m128 m2 = _mm_cmpge_ps(Sv2_, _mm_setzero_ps());
71 |
72 | // triangle edge test and depth (together)
73 | // if (zed <= depth[x]) {
74 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER)
75 | __m128 keep = _mm_and_ps(_mm_and_ps(m0, DEPTH_CMP(Sz_, zbuf)),
76 | _mm_and_ps(m1, m2));
77 | #else
78 | __m128 keep = _mm_and_ps(m0, _mm_and_ps(m1, m2));
79 | #endif
80 |
81 | #if DEPTH_WRITE
82 | // depth write
83 | // depth[x] = zed;
84 | _mm_maskstore_ps(depth + x, _mm_castps_si128(keep), Sz_);
85 | #endif
86 |
87 | #if COLOR_WRITE
88 | // u / (1/w), v / (1/w)
89 | // ((int32_t(u/iw)&twm) +
90 | // (int32_t(u/iw)&twm) << tex._wshift)
91 | const __m128i tu = _mm_and_si128(_mm_cvtps_epi32(Su_), Stwm);
92 | const __m128i tv = _mm_and_si128(_mm_cvtps_epi32(Sv_), Sthm);
93 | const __m128i ti = _mm_add_epi32(tu, _mm_slli_epi32(tv, wshift));
94 |
95 | // extract texture indices
96 | const uint32_t ti0 = _mm_extract_epi32(ti, 0);
97 | const uint32_t ti1 = _mm_extract_epi32(ti, 1);
98 | const uint32_t ti2 = _mm_extract_epi32(ti, 2);
99 | const uint32_t ti3 = _mm_extract_epi32(ti, 3);
100 |
101 | // load from the texture
102 | const uint32_t tc0 = texel[ti0];
103 | const uint32_t tc1 = texel[ti1];
104 | const uint32_t tc2 = texel[ti2];
105 | const uint32_t tc3 = texel[ti3];
106 |
107 | // blend equation
108 | #if (SRC_BLEND == GL_ONE) && (DST_BLEND == GL_ZERO)
109 | // special case for one and zero since out is directly our source
110 | const __m128i out = _mm_set_epi32(tc3, tc2, tc1, tc0);
111 | #else
112 | // generic case where we have an arbitary blending equation
113 | const __m128i src = _mm_set_epi32(tc3, tc2, tc1, tc0);
114 | const __m128i dst = _mm_load_si128((__m128i*)(color + x));
115 | const __m128i out = blend_sum(src, dst);
116 | #endif
117 |
118 | // color write
119 | _mm_maskstore_epi32((int*)color + x, _mm_castps_si128(keep), out);
120 | #endif
121 |
122 | // x-axis step
123 | Sv0_ = _mm_add_ps(Sv0_, Sv0x);
124 | Sv1_ = _mm_add_ps(Sv1_, Sv1x);
125 | Sz_ = _mm_add_ps(Sz_ , Szx );
126 | Su_ = _mm_add_ps(Su_ , Sux );
127 | Sv_ = _mm_add_ps(Sv_ , Svx );
128 | }
129 |
130 | // y-axis step
131 | Sv0 = _mm_add_ps(Sv0, Sv0y);
132 | Sv1 = _mm_add_ps(Sv1, Sv1y);
133 | Sz = _mm_add_ps(Sz , Szy );
134 | Su = _mm_add_ps(Su , Suy );
135 | Sv = _mm_add_ps(Sv , Svy );
136 |
137 | // framebuffer step
138 | color += pitch;
139 | depth += pitch;
140 | }
141 | }
142 |
143 | static inline void stamp(
144 | const triangle_setup_t &s,
145 | const texture_t &tex,
146 | const float2 origin,
147 | uint32_t *color,
148 | float *depth,
149 | uint32_t pitch) {
150 |
151 | const float v0 = (s.vx[s.slot_w0] * origin.x + s.vy[s.slot_w0] * origin.y) - s.v[s.slot_w0];
152 | const float v1 = (s.vx[s.slot_w1] * origin.x + s.vy[s.slot_w1] * origin.y) - s.v[s.slot_w1];
153 | const float iw = (s.vx[s.slot_iw] * origin.x + s.vy[s.slot_iw] * origin.y) - s.v[s.slot_iw];
154 | const float z = (s.vx[s.slot_z ] * origin.x + s.vy[s.slot_z ] * origin.y) - s.v[s.slot_z ];
155 | const float u = (s.vx[s.slot_u ] * origin.x + s.vy[s.slot_u ] * origin.y) - s.v[s.slot_u ];
156 | const float v = (s.vx[s.slot_v ] * origin.x + s.vy[s.slot_v ] * origin.y) - s.v[s.slot_v ];
157 |
158 | __m128 Sv0x = _mm_set1_ps(s.vx[s.slot_w0] * 4.f);
159 | __m128 Sv0y = _mm_set1_ps(s.vy[s.slot_w0]);
160 | __m128 Sv0 = step_x(v0, s.vx[s.slot_w0]);
161 |
162 | __m128 Sv1x = _mm_set1_ps(s.vx[s.slot_w1] * 4.f);
163 | __m128 Sv1y = _mm_set1_ps(s.vy[s.slot_w1]);
164 | __m128 Sv1 = step_x(v1, s.vx[s.slot_w1]);
165 |
166 | __m128 Siwx = _mm_set1_ps(s.vx[s.slot_iw] * 4.f);
167 | __m128 Siwy = _mm_set1_ps(s.vy[s.slot_iw]);
168 | __m128 Siw = step_x(iw, s.vx[s.slot_iw]);
169 |
170 | __m128 Szx = _mm_set1_ps(s.vx[s.slot_z] * 4.f);
171 | __m128 Szy = _mm_set1_ps(s.vy[s.slot_z]);
172 | __m128 Sz = step_x(z, s.vx[s.slot_z]);
173 |
174 | const int32_t tw = tex._width >> s.mip_level;
175 | const int32_t twm = tw - 1;
176 | __m128 Sux = _mm_set1_ps(s.vx[s.slot_u] * 4.f * tw);
177 | __m128 Suy = _mm_set1_ps(s.vy[s.slot_u] * tw);
178 | __m128 Su = step_x(u * tw, s.vx[s.slot_u] * tw);
179 | __m128i Stwm = _mm_set_epi32(twm, twm, twm, twm);
180 |
181 | const int32_t th = tex._height >> s.mip_level;
182 | const int32_t thm = th - 1;
183 | __m128 Svx = _mm_set1_ps(s.vx[s.slot_v] * 4.f * th);
184 | __m128 Svy = _mm_set1_ps(s.vy[s.slot_v] * th);
185 | __m128 Sv = step_x(v * th, s.vx[s.slot_v] * th);
186 | __m128i Sthm = _mm_set_epi32(thm, thm, thm, thm);
187 |
188 | const int32_t wshift = std::max(0, int32_t(tex._wshift) - int32_t(s.mip_level));
189 | const uint32_t *texel = tex._pixels[s.mip_level];
190 |
191 | for (int y = 0; y < BLOCK_SIZE; ++y) {
192 |
193 | __m128 Sv0_ = Sv0;
194 | __m128 Sv1_ = Sv1;
195 | __m128 Siw_ = Siw;
196 | __m128 Sz_ = Sz;
197 | __m128 Su_ = Su;
198 | __m128 Sv_ = Sv;
199 |
200 | for (int x = 0; x < BLOCK_SIZE; x += 4) {
201 |
202 | // third edge coefficient
203 | // const float v2_ = 1.f - (v0_ + v1_);
204 | const __m128 Sv2_ = _mm_sub_ps(_mm_set_ps1(1.f), _mm_add_ps(Sv0_, Sv1_));
205 |
206 | // load depth values
207 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER)
208 | const __m128 zbuf = _mm_load_ps(depth + x);
209 | #endif
210 |
211 | // triangle edge test
212 | // if (v0_ > 0.f && v1_ > 0.f && v2_ > 0.f) {
213 | const __m128 m0 = _mm_cmpge_ps(Sv0_, _mm_setzero_ps());
214 | const __m128 m1 = _mm_cmpge_ps(Sv1_, _mm_setzero_ps());
215 | const __m128 m2 = _mm_cmpge_ps(Sv2_, _mm_setzero_ps());
216 |
217 | // triangle edge test and depth (together)
218 | // if (zed <= depth[x]) {
219 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER)
220 | const __m128 keep = _mm_and_ps(_mm_and_ps(m0, DEPTH_CMP(Sz_, zbuf)),
221 | _mm_and_ps(m1, m2));
222 | #else
223 | const __m128 keep = _mm_and_ps(m0, _mm_and_ps(m1, m2));
224 | #endif
225 |
226 | #if DEPTH_WRITE
227 | // depth write
228 | // depth[x] = zed;
229 | _mm_maskstore_ps(depth + x, _mm_castps_si128(keep), Sz_);
230 | #endif
231 |
232 | #if COLOR_WRITE
233 | // find 1 / (1/w)
234 | const __m128 rw = _mm_rcp_ps(Siw_);
235 |
236 | // u / (1/w), v / (1/w)
237 | // ((int32_t(u/iw)&twm) +
238 | // (int32_t(u/iw)&twm) << tex._wshift)
239 | const __m128i tu = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Su_, rw)), Stwm);
240 | const __m128i tv = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Sv_, rw)), Sthm);
241 | const __m128i ti = _mm_add_epi32(tu, _mm_slli_epi32(tv, wshift));
242 |
243 | // extract texture indices
244 | const uint32_t ti0 = _mm_extract_epi32(ti, 0);
245 | const uint32_t ti1 = _mm_extract_epi32(ti, 1);
246 | const uint32_t ti2 = _mm_extract_epi32(ti, 2);
247 | const uint32_t ti3 = _mm_extract_epi32(ti, 3);
248 |
249 | // load from the texture
250 | const uint32_t tc0 = texel[ti0];
251 | const uint32_t tc1 = texel[ti1];
252 | const uint32_t tc2 = texel[ti2];
253 | const uint32_t tc3 = texel[ti3];
254 |
255 | // blend equation
256 | #if (SRC_BLEND == GL_ONE) && (DST_BLEND == GL_ZERO)
257 | // special case for one and zero since out is directly our source
258 | const __m128i out = _mm_set_epi32(tc3, tc2, tc1, tc0);
259 | #else
260 | // generic case where we have an arbitary blending equation
261 | __m128i src = _mm_set_epi32(tc3, tc2, tc1, tc0);
262 | __m128i dst = _mm_load_si128((__m128i*)(color + x));
263 | __m128i out = blend_sum(src, dst);
264 | #endif
265 |
266 | // color write
267 | _mm_maskstore_epi32((int*)color + x, _mm_castps_si128(keep), out);
268 | #endif
269 |
270 | // x-axis step
271 | Sv0_ = _mm_add_ps(Sv0_, Sv0x);
272 | Sv1_ = _mm_add_ps(Sv1_, Sv1x);
273 | Siw_ = _mm_add_ps(Siw_, Siwx);
274 | Sz_ = _mm_add_ps(Sz_ , Szx );
275 | Su_ = _mm_add_ps(Su_ , Sux );
276 | Sv_ = _mm_add_ps(Sv_ , Svx );
277 | }
278 |
279 | // y-axis step
280 | Sv0 = _mm_add_ps(Sv0, Sv0y);
281 | Sv1 = _mm_add_ps(Sv1, Sv1y);
282 | Siw = _mm_add_ps(Siw, Siwy);
283 | Sz = _mm_add_ps(Sz , Szy );
284 | Su = _mm_add_ps(Su , Suy );
285 | Sv = _mm_add_ps(Sv , Svy );
286 |
287 | // framebuffer step
288 | color += pitch;
289 | depth += pitch;
290 | }
291 | }
292 |
293 | // trivial in case
294 | static inline void stamp_ti(
295 | const triangle_setup_t &s,
296 | const texture_t &tex,
297 | const float2 origin,
298 | uint32_t *color,
299 | float *depth,
300 | uint32_t pitch) {
301 |
302 | const float iw = (s.vx[s.slot_iw] * origin.x + s.vy[s.slot_iw] * origin.y) - s.v[s.slot_iw];
303 | const float z = (s.vx[s.slot_z ] * origin.x + s.vy[s.slot_z ] * origin.y) - s.v[s.slot_z ];
304 | const float u = (s.vx[s.slot_u ] * origin.x + s.vy[s.slot_u ] * origin.y) - s.v[s.slot_u ];
305 | const float v = (s.vx[s.slot_v ] * origin.x + s.vy[s.slot_v ] * origin.y) - s.v[s.slot_v ];
306 |
307 | __m128 Siwx = _mm_set1_ps(s.vx[s.slot_iw] * 4.f);
308 | __m128 Siwy = _mm_set1_ps(s.vy[s.slot_iw]);
309 | __m128 Siw = step_x(iw, s.vx[s.slot_iw]);
310 |
311 | __m128 Szx = _mm_set1_ps(s.vx[s.slot_z] * 4.f);
312 | __m128 Szy = _mm_set1_ps(s.vy[s.slot_z]);
313 | __m128 Sz = step_x(z, s.vx[s.slot_z]);
314 |
315 | const int32_t tw = tex._width >> s.mip_level;
316 | const int32_t twm = tw - 1;
317 | __m128 Sux = _mm_set1_ps(s.vx[s.slot_u] * 4.f * tw);
318 | __m128 Suy = _mm_set1_ps(s.vy[s.slot_u] * tw);
319 | __m128 Su = step_x(u * tw, s.vx[s.slot_u] * tw);
320 | __m128i Stwm = _mm_set_epi32(twm, twm, twm, twm);
321 |
322 | const int32_t th = tex._height >> s.mip_level;
323 | const int32_t thm = th - 1;
324 | __m128 Svx = _mm_set1_ps(s.vx[s.slot_v] * 4.f * th);
325 | __m128 Svy = _mm_set1_ps(s.vy[s.slot_v] * th);
326 | __m128 Sv = step_x(v * th, s.vx[s.slot_v] * th);
327 | __m128i Sthm = _mm_set_epi32(thm, thm, thm, thm);
328 |
329 | const int32_t wshift = std::max(0, int32_t(tex._wshift) - int32_t(s.mip_level));
330 | const uint32_t *texel = tex._pixels[s.mip_level];
331 |
332 | for (int y = 0; y < BLOCK_SIZE; ++y) {
333 |
334 | __m128 Siw_ = Siw;
335 | __m128 Sz_ = Sz;
336 | __m128 Su_ = Su;
337 | __m128 Sv_ = Sv;
338 |
339 | for (int x = 0; x < BLOCK_SIZE; x += 4) {
340 |
341 | // load depth values
342 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER)
343 | const __m128 zbuf = _mm_load_ps(depth + x);
344 | #endif
345 |
346 | // triangle edge test and depth (together)
347 | // if (zed <= depth[x]) {
348 | #if DEPTH_TEST && (DEPTH_CMP != GL_ALWAYS) && (DEPTH_CMP != GL_NEVER)
349 | const __m128 keep = DEPTH_CMP(Sz_, zbuf);
350 | #else
351 | const __m128 keep = _mm_castsi128_ps (_mm_set1_epi32(0xffffffff));
352 | #endif
353 |
354 | #if DEPTH_WRITE
355 | // depth write
356 | // depth[x] = zed;
357 | _mm_maskstore_ps(depth + x, _mm_castps_si128(keep), Sz_);
358 | #endif
359 |
360 | #if COLOR_WRITE
361 | // find 1 / (1/w)
362 | const __m128 rw = _mm_rcp_ps(Siw_);
363 |
364 | // u / (1/w), v / (1/w)
365 | // ((int32_t(u/iw)&twm) +
366 | // (int32_t(u/iw)&twm) << tex._wshift)
367 | const __m128i tu = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Su_, rw)), Stwm);
368 | const __m128i tv = _mm_and_si128(_mm_cvtps_epi32(_mm_mul_ps(Sv_, rw)), Sthm);
369 | const __m128i ti = _mm_add_epi32(tu, _mm_slli_epi32(tv, wshift));
370 |
371 | // extract texture indices
372 | const uint32_t ti0 = _mm_extract_epi32(ti, 0);
373 | const uint32_t ti1 = _mm_extract_epi32(ti, 1);
374 | const uint32_t ti2 = _mm_extract_epi32(ti, 2);
375 | const uint32_t ti3 = _mm_extract_epi32(ti, 3);
376 |
377 | // load from the texture
378 | const uint32_t tc0 = texel[ti0];
379 | const uint32_t tc1 = texel[ti1];
380 | const uint32_t tc2 = texel[ti2];
381 | const uint32_t tc3 = texel[ti3];
382 |
383 | // blend equation
384 | #if (SRC_BLEND == GL_ONE) && (DST_BLEND == GL_ZERO)
385 | // special case for one and zero since out is directly our source
386 | const __m128i out = _mm_set_epi32(tc3, tc2, tc1, tc0);
387 | #else
388 | // generic case where we have an arbitary blending equation
389 | const __m128i src = _mm_set_epi32(tc3, tc2, tc1, tc0);
390 | const __m128i dst = _mm_load_si128((__m128i*)(color + x));
391 | const __m128i out = blend_sum(src, dst);
392 | #endif
393 |
394 | // color write
395 | _mm_maskstore_epi32((int*)color + x, _mm_castps_si128(keep), out);
396 | #endif
397 |
398 | // x-axis step
399 | Siw_ = _mm_add_ps(Siw_, Siwx);
400 | Sz_ = _mm_add_ps(Sz_ , Szx );
401 | Su_ = _mm_add_ps(Su_ , Sux );
402 | Sv_ = _mm_add_ps(Sv_ , Svx );
403 | }
404 |
405 | // y-axis step
406 | Siw = _mm_add_ps(Siw, Siwy);
407 | Sz = _mm_add_ps(Sz , Szy );
408 | Su = _mm_add_ps(Su , Suy );
409 | Sv = _mm_add_ps(Sv , Svy );
410 |
411 | // framebuffer step
412 | color += pitch;
413 | depth += pitch;
414 | }
415 | }
416 |
417 | void KERNEL_NAME (
418 | const frame_t &f,
419 | const triangle_setup_t &s,
420 | const texture_t &tex)
421 | {
422 | const recti_t rect = { s.bound.x0 & BLOCK_MASK,
423 | s.bound.y0 & BLOCK_MASK,
424 | (s.bound.x1 + BLOCK_SIZE - 1) & BLOCK_MASK,
425 | (s.bound.y1 + BLOCK_SIZE - 1) & BLOCK_MASK};
426 | const uint32_t pitch = f._width;
427 | uint32_t *color = f._pixels + (rect.y0 * pitch);
428 | float *depth = f._depth + (rect.y0 * pitch);
429 |
430 | // for small triangles we can render them affine with out much visible
431 | // distortion giving us a speed boost.
432 | if (s.affine) {
433 | for (int32_t y = rect.y0; y < rect.y1; y += BLOCK_SIZE) {
434 | for (int32_t x = rect.x0; x < rect.x1; x += BLOCK_SIZE) {
435 | stamp_affine(s, tex, float2{float(x), float(y)}, color + x, depth + x, pitch);
436 | }
437 | // step the framebuffer
438 | color += pitch * BLOCK_SIZE;
439 | depth += pitch * BLOCK_SIZE;
440 | }
441 | }
442 | else {
443 | const auto &e0 = s.edge[0];
444 | const auto &e1 = s.edge[1];
445 | const auto &e2 = s.edge[2];
446 | for (int32_t y = rect.y0; y < rect.y1; y += BLOCK_SIZE) {
447 | for (int32_t x = rect.x0; x < rect.x1; x += BLOCK_SIZE) {
448 | const float2 min{float(x), float(y)};
449 | const float2 max{float(x + BLOCK_SIZE), float(y + BLOCK_SIZE)};
450 |
451 | // trivial out cases
452 | if (trivial_out(e0, min, max)) continue;
453 | if (trivial_out(e1, min, max)) continue;
454 | if (trivial_out(e2, min, max)) continue;
455 |
456 | // if we are fully inside the triangle we dont need to do any edge
457 | // tests so we special case the stamp
458 | if (trivial_in(e0, min, max) &&
459 | trivial_in(e1, min, max) &&
460 | trivial_in(e2, min, max)) {
461 | stamp_ti(s, tex, min, color + x, depth + x, pitch);
462 | } else {
463 | stamp(s, tex, min, color + x, depth + x, pitch);
464 | }
465 | }
466 | // step the framebuffer
467 | color += pitch * BLOCK_SIZE;
468 | depth += pitch * BLOCK_SIZE;
469 | }
470 | }
471 | }
472 |
--------------------------------------------------------------------------------
/rast_reference/rast_reference.cpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 |
6 | #include
7 |
8 | #include "../source/context.h"
9 | #include "../source/math.h"
10 | #include "../source/raster.h"
11 | #include "../source/texture.h"
12 |
13 | #include "kernel.h"
14 |
15 | typedef void (draw_func_t)(
16 | const frame_t &f,
17 | const triangle_setup_t &s,
18 | const texture_t &tex);
19 |
20 | draw_func_t rast_tex_one_zero;
21 | draw_func_t rast_tex_one_one;
22 | draw_func_t rast_tex_dst_src;
23 | draw_func_t rast_tex_sa_msa;
24 | draw_func_t rast_tex_dst_zero;
25 | draw_func_t rast_tex_one_msc;
26 | draw_func_t rast_tex_one_msa;
27 |
28 | // ~log3.75 (should be log4 but this looks nice)
29 | const std::array mip_log_table = {
30 | 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
31 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32 | 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
33 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
34 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
35 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5,
36 | };
37 |
38 | static inline constexpr uint32_t blend_code(GLenum mode) {
39 | return
40 | (mode == GL_ZERO) ? 0x0 :
41 | (mode == GL_ONE) ? 0x1 :
42 | (mode == GL_DST_COLOR) ? 0x2 :
43 | (mode == GL_SRC_COLOR) ? 0x3 :
44 | (mode == GL_ONE_MINUS_DST_COLOR) ? 0x4 :
45 | (mode == GL_ONE_MINUS_SRC_COLOR) ? 0x5 :
46 | (mode == GL_SRC_ALPHA) ? 0x6 :
47 | (mode == GL_ONE_MINUS_SRC_ALPHA) ? 0x7 :
48 | (mode == GL_DST_ALPHA) ? 0x8 :
49 | (mode == GL_ONE_MINUS_DST_ALPHA) ? 0x9 :
50 | (mode == GL_SRC_ALPHA_SATURATE) ? 0xa : 0x0;
51 | }
52 |
53 | static inline constexpr uint32_t blend_code(GLenum src, GLenum dst) {
54 | return (blend_code(src) << 4) | blend_code(dst);
55 | }
56 |
57 | struct rast_reference_t : public raster_t {
58 |
59 | rast_reference_t() {
60 | _cxt = nullptr;
61 | _frame._pixels = nullptr;
62 | _tex = nullptr;
63 | _draw_func = nullptr;
64 | }
65 |
66 | void framebuffer_clear(
67 | bool color,
68 | bool depth,
69 | bool stencil,
70 | uint32_t clear_color,
71 | float clear_depth,
72 | uint32_t clear_stencil) override {
73 | if (_cxt) {
74 | if (color) {
75 | _cxt->buffer.clear_colour(0x202020);
76 | }
77 | if (depth) {
78 | _cxt->buffer.clear_depth(clear_depth);
79 | }
80 | }
81 | }
82 |
83 | void framebuffer_release() override {
84 | _frame._pixels = nullptr;
85 | }
86 |
87 | void framebuffer_aquire() override {
88 | _frame._pixels = _cxt->buffer.pixels();
89 | _frame._depth = _cxt->buffer.depth();
90 | _frame._width = _cxt->buffer.width();
91 | _frame._height = _cxt->buffer.height();
92 | }
93 |
94 | bool start(gl_context_t &cxt) override {
95 | _cxt = &cxt;
96 | return true;
97 | }
98 |
99 | void stop() override {
100 | _cxt = nullptr;
101 | }
102 |
103 | void push_triangles(const std::vector &triangles,
104 | const texture_t *tex,
105 | const state_manager_t &state) override;
106 |
107 | void flush() override {}
108 |
109 | void present() override {}
110 |
111 | protected:
112 |
113 | draw_func_t *find_draw_func(const state_manager_t &state);
114 |
115 | draw_func_t *_draw_func;
116 |
117 | bool setup_triangle(const triangle_t &t,
118 | triangle_setup_t &s);
119 |
120 | const texture_t *_tex;
121 | gl_context_t *_cxt;
122 | frame_t _frame;
123 | };
124 |
125 | draw_func_t *rast_reference_t::find_draw_func(const state_manager_t &state) {
126 | if (state.blendFrag) {
127 | switch (blend_code(state.blendFuncSrc, state.blendFuncDst)) {
128 | case blend_code(GL_ONE, GL_ZERO):
129 | return rast_tex_one_zero;
130 | case blend_code(GL_ONE, GL_ONE):
131 | return rast_tex_one_zero;
132 | case blend_code(GL_DST_COLOR, GL_SRC_COLOR):
133 | return rast_tex_dst_src;
134 | case blend_code(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA):
135 | return rast_tex_sa_msa;
136 | case blend_code(GL_DST_COLOR, GL_ZERO):
137 | return rast_tex_dst_zero;
138 | case blend_code(GL_ONE, GL_ONE_MINUS_SRC_COLOR):
139 | return rast_tex_one_msc;
140 | case blend_code(GL_ONE, GL_ONE_MINUS_SRC_ALPHA):
141 | return rast_tex_one_msa;
142 | default:
143 | // DEBUG_BREAK;
144 | break;
145 | }
146 | }
147 | return rast_tex_one_zero;
148 | }
149 |
150 | bool rast_reference_t::setup_triangle(const triangle_t &t,
151 | triangle_setup_t &s) {
152 |
153 | // isolate 2d coordinates
154 | const float2 v0{t.vert[0].coord.x, t.vert[0].coord.y};
155 | const float2 v1{t.vert[1].coord.x, t.vert[1].coord.y};
156 | const float2 v2{t.vert[2].coord.x, t.vert[2].coord.y};
157 |
158 | // compute triangle bounding box
159 | s.bound.x0 = std::min({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)});
160 | s.bound.y0 = std::min({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)});
161 | s.bound.x1 = std::max({int32_t(v0.x), int32_t(v1.x), int32_t(v2.x)});
162 | s.bound.y1 = std::max({int32_t(v0.y), int32_t(v1.y), int32_t(v2.y)});
163 |
164 | // reject if off screen
165 | if (s.bound.x1 < 0) return false;
166 | if (s.bound.x0 > _frame._width) return false;
167 | if (s.bound.y1 < 0) return false;
168 | if (s.bound.y0 > _frame._height) return false;
169 |
170 | // clip against screen bounds
171 | s.bound.x0 = std::max(s.bound.x0, 0);
172 | s.bound.y0 = std::max(s.bound.y0, 0);
173 | s.bound.x1 = std::min(s.bound.x1, _frame._width - 1);
174 | s.bound.y1 = std::min(s.bound.y1, _frame._height - 1);
175 |
176 | // find the area of the triangle
177 | const float area = triangle_area(v0, v1, v2);
178 |
179 | // heuristic for if we should use affine mapping
180 | const bool affine = affine_heuristic(s.bound);
181 | s.affine = affine;
182 |
183 | // the signed area of the UVs (texel space)
184 | if (_tex) {
185 | const float2 &t0 = t.vert[0].tex;
186 | const float2 &t1 = t.vert[1].tex;
187 | const float2 &t2 = t.vert[2].tex;
188 |
189 | const float texture_area = _tex->_width * _tex->_height;
190 | const float uv_area =
191 | ((t1.x - t0.x) * (t2.y - t0.y) - (t2.x - t0.x) * (t1.y - t0.y));
192 | s.mip_level = get_mip_level(area, uv_area * texture_area);
193 | }
194 |
195 | // find edge vectors
196 | const float2 d01 = v1 - v0;
197 | const float2 d12 = v2 - v1;
198 | const float2 d20 = v0 - v2;
199 |
200 | // cross product gives us normals from the edges
201 | // which we 'normalize' to the area of the triangle
202 | const float2 n0 = float2::cross(d12) / area;
203 | const float2 n1 = float2::cross(d20) / area;
204 | const float2 n2 = float2::cross(d01) / area;
205 |
206 | // evaluate the starting position for each interpolant
207 | const float s0 = evaluate(n0, v1);
208 | const float s1 = evaluate(n1, v2);
209 | const float s2 = evaluate(n2, v0);
210 |
211 | // store edges
212 | s.edge[0] = float3{n0.x, n0.y, s0};
213 | s.edge[1] = float3{n1.x, n1.y, s1};
214 | s.edge[2] = float3{n2.x, n2.y, s2};
215 |
216 | // edge function interpolants
217 | {
218 | s. v[triangle_setup_t::slot_w0] = s0;
219 | s.vx[triangle_setup_t::slot_w0] = n0.x;
220 | s.vy[triangle_setup_t::slot_w0] = n0.y;
221 |
222 | s. v[triangle_setup_t::slot_w1] = s1;
223 | s.vx[triangle_setup_t::slot_w1] = n1.x;
224 | s.vy[triangle_setup_t::slot_w1] = n1.y;
225 | }
226 |
227 | // XXX: make this float3 and use dot products
228 | std::array c;
229 |
230 | const auto & state = _cxt->state;
231 |
232 | // 1/w interpolation
233 | {
234 | const float iw0 = affine ? 1.f : t.vert[0].coord.w;
235 | const float iw1 = affine ? 1.f : t.vert[1].coord.w;
236 | const float iw2 = affine ? 1.f : t.vert[2].coord.w;
237 |
238 | c[0] = s0 * iw0; c[1] = s1 * iw1; c[2] = s2 * iw2;
239 | c[3] = n0.x * iw0; c[4] = n1.x * iw1; c[5] = n2.x * iw2;
240 | c[6] = n0.y * iw0; c[7] = n1.y * iw1; c[8] = n2.y * iw2;
241 |
242 | const uint32_t slot = triangle_setup_t::slot_iw;
243 | s.v [slot] = c[0] + c[1] + c[2];
244 | s.vx[slot] = c[3] + c[4] + c[5];
245 | s.vy[slot] = c[6] + c[7] + c[8];
246 | }
247 |
248 | // z
249 | {
250 | const uint32_t slot = triangle_setup_t::slot_z;
251 | const float a0 = t.vert[0].coord.z;
252 | const float a1 = t.vert[1].coord.z;
253 | const float a2 = t.vert[2].coord.z;
254 | s.v [slot] = s0 * a0 + s1 * a1 + s2 * a2;
255 | s.vx[slot] = n0.x * a0 + n1.x * a1 + n2.x * a2;
256 | s.vy[slot] = n0.y * a0 + n1.y * a1 + n2.y * a2;
257 | }
258 |
259 | // a
260 | {
261 | const uint32_t slot = triangle_setup_t::slot_a;
262 | const float a0 = t.vert[0].rgba.x;
263 | const float a1 = t.vert[1].rgba.x;
264 | const float a2 = t.vert[2].rgba.x;
265 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
266 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
267 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
268 | }
269 |
270 | // r
271 | {
272 | const uint32_t slot = triangle_setup_t::slot_r;
273 | const float a0 = t.vert[0].rgba.w;
274 | const float a1 = t.vert[1].rgba.w;
275 | const float a2 = t.vert[2].rgba.w;
276 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
277 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
278 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
279 | }
280 |
281 | // g
282 | {
283 | const uint32_t slot = triangle_setup_t::slot_g;
284 | const float a0 = t.vert[0].rgba.y;
285 | const float a1 = t.vert[1].rgba.y;
286 | const float a2 = t.vert[2].rgba.y;
287 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
288 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
289 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
290 | }
291 |
292 | // b
293 | {
294 | const uint32_t slot = triangle_setup_t::slot_b;
295 | const float a0 = t.vert[0].rgba.z;
296 | const float a1 = t.vert[1].rgba.z;
297 | const float a2 = t.vert[2].rgba.z;
298 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
299 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
300 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
301 | }
302 |
303 | // u
304 | {
305 | const uint32_t slot = triangle_setup_t::slot_u;
306 | const float a0 = t.vert[0].tex.x;
307 | const float a1 = t.vert[1].tex.x;
308 | const float a2 = t.vert[2].tex.x;
309 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
310 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
311 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
312 | }
313 |
314 | // v
315 | {
316 | const uint32_t slot = triangle_setup_t::slot_v;
317 | const float a0 = t.vert[0].tex.y;
318 | const float a1 = t.vert[1].tex.y;
319 | const float a2 = t.vert[2].tex.y;
320 | s.v [slot] = c[0] * a0 + c[1] * a1 + c[2] * a2;
321 | s.vx[slot] = c[3] * a0 + c[4] * a1 + c[5] * a2;
322 | s.vy[slot] = c[6] * a0 + c[7] * a1 + c[8] * a2;
323 | }
324 |
325 | return true;
326 | }
327 |
328 | void rast_reference_t::push_triangles(const std::vector &triangles,
329 | const texture_t *tex,
330 | const state_manager_t &state) {
331 |
332 | _draw_func = find_draw_func(state);
333 |
334 | if (!_cxt || !_frame._pixels || !_draw_func) {
335 | return;
336 | }
337 |
338 | _tex = tex;
339 |
340 | for (const auto &t : triangles) {
341 | if (t.vert[0].coord.w == 0.f) {
342 | // signals fully clipped so discard
343 | continue;
344 | }
345 |
346 | triangle_setup_t setup;
347 | if (!setup_triangle(t, setup)) {
348 | continue;
349 | }
350 |
351 | if (state.texture2D || true) {
352 | if (tex && tex->_pixels[0]) {
353 | _draw_func(_frame, setup, *tex);
354 | } else {
355 | // TODO
356 | }
357 | }
358 | }
359 | }
360 |
361 | extern "C" {
362 | __declspec(dllexport) raster_t *raster_create() {
363 | return new rast_reference_t;
364 | }
365 |
366 | __declspec(dllexport) void raster_release(raster_t *r) {
367 | delete r;
368 | }
369 | };
370 |
--------------------------------------------------------------------------------
/rast_wireframe/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | file(GLOB C_FILES *.cpp)
2 | file(GLOB H_FILES *.h)
3 |
4 | add_library(
5 | softgl_rast_wireframe
6 | SHARED
7 | ${C_FILES} ${H_FILES})
8 |
--------------------------------------------------------------------------------
/rast_wireframe/rast_wireframe.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "../source/raster.h"
4 | #include "../source/context.h"
5 | #include "surface.h"
6 |
7 |
8 | struct line_t {
9 | float2 a, b;
10 |
11 | line_t(float2 i, float2 j) : a(i), b(j) {}
12 | };
13 |
14 |
15 | struct raster_wire_t : public raster_t {
16 |
17 | raster_wire_t()
18 | : _cxt(nullptr)
19 | {}
20 |
21 | void framebuffer_release() override {}
22 |
23 | void framebuffer_aquire() override {}
24 |
25 | void framebuffer_clear(
26 | bool color,
27 | bool depth,
28 | bool stencil,
29 | uint32_t clear_color,
30 | float clear_depth,
31 | uint32_t clear_stencil) override {
32 | if (_cxt) {
33 | _cxt->buffer.clear_colour(0x202020);
34 | if (depth) {
35 | _cxt->buffer.clear_depth(0.f);
36 | }
37 | }
38 | }
39 |
40 | bool start(gl_context_t &cxt) override {
41 | _cxt = &cxt;
42 | return true;
43 | }
44 |
45 | void stop() override {}
46 |
47 | void push_triangles(const std::vector &triangles,
48 | const texture_t *tex,
49 | const state_manager_t &state) override {
50 |
51 | assert(_cxt);
52 |
53 | surface_t surf(_cxt->buffer.pixels(), _cxt->buffer.width(), _cxt->buffer.height(), _cxt->buffer.width());
54 |
55 | for (const auto &t : triangles) {
56 | if (t.vert[0].coord.w == 0.f) {
57 | // signals fully clipped vertex so discard
58 | continue;
59 | }
60 |
61 | // XXX: insert edge into edge list to avoid redraw
62 |
63 | const std::array c{
64 | float2{t.vert[0].coord.x, t.vert[0].coord.y},
65 | float2{t.vert[1].coord.x, t.vert[1].coord.y},
66 | float2{t.vert[2].coord.x, t.vert[2].coord.y},
67 | };
68 |
69 | surf.wuline(c[0], c[1], 0xffffff);
70 | surf.wuline(c[1], c[2], 0xffffff);
71 | surf.wuline(c[2], c[0], 0xffffff);
72 | }
73 | }
74 |
75 | void flush() override {}
76 |
77 | void present() override {}
78 |
79 | protected:
80 | gl_context_t *_cxt;
81 | };
82 |
83 |
84 | extern "C" {
85 |
86 | __declspec(dllexport)
87 | raster_t *raster_create() {
88 | return new raster_wire_t;
89 | }
90 |
91 | __declspec(dllexport)
92 | void raster_release(raster_t *r) {
93 | delete r;
94 | }
95 | };
96 |
--------------------------------------------------------------------------------
/rast_wireframe/surface.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 | #include
5 |
6 | #include "surface.h"
7 | #include "math.h"
8 |
9 |
10 | namespace {
11 |
12 | //
13 | // faster packed 4 x 8bit fixed point multiply
14 | //
15 | uint32_t alpha(uint32_t a, uint32_t b, uint8_t i) {
16 | #if 0
17 | const uint32_t t0 = ((a & 0xff00ff) + ((b & 0xff00ff) - (a & 0xff00ff)) * i) >> 8;
18 | const uint32_t t1 = ((a & 0x00ff00) + ((b & 0x00ff00) - (a & 0x00ff00)) * i) >> 8;
19 | return (t0 & 0xff00ff) | (t1 & 0x00ff00);
20 | #else
21 | // pixel a
22 | const uint32_t a0 = ((a & 0xff00ff) * ~i) >> 8;
23 | const uint32_t a1 = ((a & 0x00ff00) * ~i) >> 8;
24 | // pixel b
25 | const uint32_t b0 = ((b & 0xff00ff) * i) >> 8;
26 | const uint32_t b1 = ((b & 0x00ff00) * i) >> 8;
27 | // mix results
28 | return ((a0 & 0xff00ff) | (a1 & 0xff00)) +
29 | ((b0 & 0xff00ff) | (b1 & 0xff00));
30 | #endif
31 | }
32 |
33 | } // namespace {}
34 |
35 | // return true - all out
36 | // false - inside or partial in
37 | bool surface_t::line_clip(float2 &a, float2 &b) {
38 |
39 | enum {
40 | CLIP_X_LO = 1,
41 | CLIP_X_HI = 2,
42 | CLIP_Y_LO = 4,
43 | CLIP_Y_HI = 8,
44 | };
45 |
46 | const float min_x = 1.f;
47 | const float min_y = 1.f;
48 | const float max_x = float(_width) - 2;
49 | const float max_y = float(_height) - 2;
50 |
51 | static const auto classify_x = [=](const float2 &p) -> int {
52 | return (p.x < min_x ? CLIP_X_LO : 0) | (p.x > max_x ? CLIP_X_HI : 0);
53 | };
54 |
55 | static const auto classify_y = [=](const float2 &p) -> int {
56 | return (p.y < min_y ? CLIP_Y_LO : 0) | (p.y > max_y ? CLIP_Y_HI : 0);
57 | };
58 |
59 | static const auto classify = [=](const float2 &p) -> int {
60 | return classify_x(p) | classify_y(p);
61 | };
62 |
63 | static const auto clip_y_lo = [=](int cl, float2 &va, const float2 &vb) {
64 | if (cl & CLIP_Y_LO) {
65 | const float dx = (vb.x - va.x) / (vb.y - va.y);
66 | va.x += dx * (min_y - va.y);
67 | va.y = min_y;
68 | }
69 | };
70 |
71 | static const auto clip_y_hi = [=](int cl, float2 &va, const float2 &vb) {
72 | if (cl & CLIP_Y_HI) {
73 | const float dx = (vb.x - va.x) / (vb.y - va.y);
74 | va.x -= dx * (va.y - max_y);
75 | va.y = max_y;
76 | }
77 | };
78 |
79 | static const auto clip_x_lo = [=](int cl, float2 &va, const float2 &vb) {
80 | if (cl & CLIP_X_LO) {
81 | const float dy = (vb.y - va.y) / (vb.x - va.x);
82 | va.y += dy * (min_x - va.x);
83 | va.x = min_x;
84 | }
85 | };
86 |
87 | static const auto clip_x_hi = [=](int cl, float2 &va, const float2 &vb) {
88 | if (cl & CLIP_X_HI) {
89 | const float dy = (vb.y - va.y) / (vb.x - va.x);
90 | va.y -= dy * (va.x - max_x);
91 | va.x = max_x;
92 | }
93 | };
94 |
95 | for (uint32_t i = 0;; ++i) {
96 |
97 | if ((fabs(b.x - a.x) + fabs(b.y - a.y)) < 1.f) {
98 | // remove ultra tiny points
99 | return true;
100 | }
101 |
102 | const int ca = classify(a);
103 | const int cb = classify(b);
104 |
105 | if (0 == (ca | cb)) {
106 | // all in center, no clipping
107 | return false;
108 | }
109 |
110 | const int code = ca & cb;
111 | if ((code & CLIP_X_LO) || (code & CLIP_X_HI) ||
112 | (code & CLIP_Y_LO) || (code & CLIP_Y_HI)) {
113 | // all outside one plane
114 | return true;
115 | }
116 |
117 | // todo: clip longest axis first?
118 |
119 | switch (i) {
120 | case 0:
121 | clip_x_lo(ca, a, b);
122 | clip_x_hi(ca, a, b);
123 | clip_x_lo(cb, b, a);
124 | clip_x_hi(cb, b, a);
125 | break;
126 | case 1:
127 | clip_y_lo(ca, a, b);
128 | clip_y_hi(ca, a, b);
129 | clip_y_lo(cb, b, a);
130 | clip_y_hi(cb, b, a);
131 | break;
132 | default:
133 | assert(!"bad clip");
134 | }
135 | }
136 |
137 | return false;
138 | }
139 |
140 | // fast fixed point line drawing
141 | void surface_t::line(float2 a, float2 b, uint32_t rgb) {
142 |
143 | // clip line to screen
144 | if (line_clip(a, b)) {
145 | // fully clipped
146 | return;
147 | }
148 |
149 | const float dx = b.x - a.x, dy = b.y - a.y;
150 | const float adx = fabsf(dx), ady = fabs(dy);
151 |
152 | static const float fract = float(1u << 16);
153 |
154 | // select the longest axis
155 | if (fabsf(dx) > fabsf(dy)) {
156 |
157 | // sort vertices in y axis
158 | if (b.x < a.x)
159 | std::swap(a, b);
160 | // compute dy/dx
161 | const float ndy = (b.y - a.y) / adx;
162 | // convert y itterator to fixed point
163 | const int32_t iy = int32_t(ndy * fract);
164 | int32_t y = int32_t(a.y * fract);
165 |
166 | // quantize start and end locations
167 | const int32_t iax = int32_t(a.x);
168 | const int32_t ibx = int32_t(b.x);
169 | // raster loop
170 | {
171 | for (int32_t x = iax; x < ibx; ++x, y += iy) {
172 | _pixels[x + (y >> 16) * _pitch] = rgb;
173 | }
174 | }
175 | } else {
176 | // sort vertices in y axis
177 | if (b.y < a.y)
178 | std::swap(a, b);
179 | // compute dx/dy
180 | const float ndx = (b.x - a.x) / ady;
181 | // convert x itterator to fixed point
182 | const int32_t ix = int32_t(ndx * fract);
183 | int32_t x = int32_t(a.x * fract);
184 |
185 | // quantize start and end locations
186 | const int32_t iay = int32_t(a.y);
187 | const int32_t iby = int32_t(b.y);
188 | // raster loop
189 | {
190 | uint32_t *pix = _pixels + (x >> 16) + iay * _pitch;
191 | int32_t xstep = ix > 0 ? 1 : -1;
192 | for (int32_t y = iay; y < iby; ++y, x += ix) {
193 | _pixels[(x >> 16) + y * _pitch] = rgb;
194 | }
195 | }
196 | }
197 | }
198 |
199 | // fast fixed point anti-aliased line drawing
200 | void surface_t::wuline(float2 a, float2 b, uint32_t rgb) {
201 |
202 | // clip line to screen
203 | if (line_clip(a, b)) {
204 | // fully clipped
205 | return;
206 | }
207 |
208 | if (std::isnan(a.x) || std::isnan(a.y) ||
209 | std::isnan(b.x) || std::isnan(b.y)) {
210 | return;
211 | }
212 |
213 | const float dx = b.x - a.x, dy = b.y - a.y;
214 | const float adx = fabsf(dx), ady = fabs(dy);
215 |
216 | static const float point = float(1u << 16);
217 |
218 | // select the longest axis
219 | if (fabsf(dx) > fabsf(dy)) {
220 | // sort vertices in x axis
221 | if (b.x < a.x)
222 | std::swap(a, b);
223 | // compute dy/dx
224 | const float ndy = (b.y - a.y) / adx;
225 | // convert y itterator to fixed point
226 | const int32_t iy = int32_t(ndy * point);
227 | int32_t y = int32_t(a.y * point);
228 | // quantize start and end locations
229 | const int32_t iax = int32_t(a.x);
230 | const int32_t ibx = int32_t(b.x);
231 | // raster loop
232 | {
233 | for (int32_t x = iax; x < ibx; ++x, y += iy) {
234 | const uint32_t fract = (y & 0xffff) >> 8;
235 | uint32_t &p0 = _pixels[x + (y >> 16) * _pitch];
236 | uint32_t &p1 = _pixels[x + (y >> 16) * _pitch + _pitch];
237 | p0 = alpha(rgb, p0, fract);
238 | p1 = alpha(rgb, p1, fract ^ 0xff);
239 | }
240 | }
241 | } else {
242 | // sort vertices in y axis
243 | if (b.y < a.y)
244 | std::swap(a, b);
245 | // compute dx/dy
246 | const float ndx = (b.x - a.x) / ady;
247 | // convert x itterator to fixed point
248 | const int32_t ix = int32_t(ndx * point);
249 | int32_t x = int32_t(a.x * point);
250 | // quantize start and end locations
251 | const int32_t iay = int32_t(a.y);
252 | const int32_t iby = int32_t(b.y);
253 | // raster loop
254 | {
255 | uint32_t *pix = _pixels + (x >> 16) + iay * _pitch;
256 | int32_t xstep = ix > 0 ? 1 : -1;
257 | for (int32_t y = iay; y < iby; ++y, x += ix) {
258 | const uint32_t fract = (x & 0xffff) >> 8;
259 | uint32_t &p0 = _pixels[(x >> 16) + y * _pitch];
260 | uint32_t &p1 = _pixels[(x >> 16) + y * _pitch + 1];
261 | p0 = alpha(rgb, p0, fract);
262 | p1 = alpha(rgb, p1, fract ^ 0xff);
263 | }
264 | }
265 | }
266 | }
267 |
268 | void surface_t::plot(const float2 &p, uint32_t rgb) {
269 | const int32_t x = int32_t(floorf(p.x));
270 | const int32_t y = int32_t(floorf(p.y));
271 | if (x < 0 || y < 0 || x >= int32_t(_width) || y >= int32_t(_height)) {
272 | return;
273 | }
274 | _pixels[x + y * _pitch] = rgb;
275 | }
276 |
277 | void surface_t::wuplot(const float2 &p, uint32_t rgb) {
278 |
279 | if (p.x < 0 || p.x >= int32_t(_width -1) ||
280 | p.y < 0 || p.y >= int32_t(_height-1)) {
281 | return;
282 | }
283 |
284 | const float ix = floorf(p.x);
285 | const float iy = floorf(p.y);
286 |
287 | const float fx = p.x - ix;
288 | const float fy = p.y - iy;
289 |
290 | const uint32_t bx = uint32_t(fx * 0xff);
291 | const uint32_t by = uint32_t(fy * 0xff);
292 | const uint32_t jx = bx ^ 0xff;
293 | const uint32_t jy = by ^ 0xff;
294 |
295 | uint32_t &p00 = _pixels[int32_t(ix + 0) + int32_t(iy + 0) * _pitch];
296 | uint32_t &p10 = _pixels[int32_t(ix + 1) + int32_t(iy + 0) * _pitch];
297 | uint32_t &p01 = _pixels[int32_t(ix + 0) + int32_t(iy + 1) * _pitch];
298 | uint32_t &p11 = _pixels[int32_t(ix + 1) + int32_t(iy + 1) * _pitch];
299 |
300 | p00 = alpha(rgb, p00, 0xff ^ ((jx * jy) >> 8));
301 | p10 = alpha(rgb, p10, 0xff ^ ((bx * jy) >> 8));
302 | p01 = alpha(rgb, p01, 0xff ^ ((jx * by) >> 8));
303 | p11 = alpha(rgb, p11, 0xff ^ ((bx * by) >> 8));
304 | }
305 |
306 | void surface_t::fill(uint32_t rgb) {
307 | const uint32_t len = _pitch * _height;
308 | for (uint32_t i = 0; i < len; ++i) {
309 | _pixels[i] = rgb;
310 | }
311 | }
312 |
--------------------------------------------------------------------------------
/rast_wireframe/surface.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | #include "../source/math.h"
6 |
7 |
8 | struct surface_t {
9 |
10 | surface_t() = default;
11 |
12 | surface_t(uint32_t *pixels, uint32_t width, uint32_t height, uint32_t pitch)
13 | : _pixels(pixels)
14 | , _width(width)
15 | , _height(height)
16 | , _pitch(pitch)
17 | {
18 | assert(pixels && width && height && pitch);
19 | }
20 |
21 | uint32_t width() const {
22 | return _width;
23 | }
24 |
25 | uint32_t height() const {
26 | return _height;
27 | }
28 |
29 | uint32_t pitch() const {
30 | return _pitch;
31 | }
32 |
33 | uint32_t *pixels() {
34 | return _pixels;
35 | }
36 |
37 | const uint32_t *pixels() const {
38 | return _pixels;
39 | }
40 |
41 | void plot(const float2 &a, uint32_t rgb);
42 | void wuplot(const float2 &a, uint32_t rgb);
43 |
44 | void line(float2 a, float2 b, uint32_t rgb);
45 | void wuline(float2 a, float2 b, uint32_t rgb);
46 |
47 | void fill(uint32_t rgb);
48 |
49 | protected:
50 | bool line_clip(float2 &a, float2 &b);
51 |
52 | uint32_t *_pixels;
53 | uint32_t _pitch;
54 | uint32_t _width;
55 | uint32_t _height;
56 | };
57 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # SoftGL
2 |
3 | An opengl implementation using Software Rendering on the CPU.
4 |
5 |
6 | ## Q&A
7 |
8 | > Why would I make this?
9 | >
10 | > Because its fun to learn OpenGL from the wrong end.
11 |
12 | > What version of OpenGL does this support?
13 | >
14 | > Somewhere around OpenGL1.4
15 |
16 | > Is it fast?
17 | >
18 | > While i'm trying to make it fast, its not even close to something like a Geforce256 graphics card.
19 |
20 | > How do I use this?
21 | >
22 | > You dont, and you shouldnt. It really just an experiment.
23 |
24 | > But really how do I use this?
25 | >
26 | > 1. Compile it.
27 | > 2. Copy OpenGL32.dll, softgl_rast_reference.dll and softgl.cfg into your game folder.
28 | > 3. Launch your game (which will most likely crash.)
29 |
30 | > Whats your goal?
31 | >
32 | > When something like Doom3 is playable.
33 |
34 | > Is it conformant?
35 | >
36 | > No.
37 |
38 | ## Features
39 |
40 | - Per triangle Mip-Mapping
41 | - Perspective correct texture mapping
42 | - Simple backend architecture
43 | - Experimental OpenCL backend
44 | - Not much else at the moment
45 |
46 |
47 | ## Optimizations
48 |
49 | - Affine mapping for small triangles
50 | - Vectorizable edge based rendering
51 | - 16x16 stamp rendering with fast culling
52 | - Stamps coded in tight SSE
53 |
54 |
55 | ## The future
56 |
57 | - Support more old OpenGL games
58 | - Fixed point UV interpolation
59 | - More blend modes
60 | - Use AVX stamps
61 | - Multuithread the rasterizer
62 | - JIT the raster stamps
63 | - Improve the OpenCL backend
64 | - Z-buffer at the stamp level for fast culling
65 | - More optimizations
66 | - Support more features
67 | - Alpha Test
68 | - Scissor
69 | - Texenv
70 | - Stencil Test
71 | - ...
72 |
73 |
74 | ## Statistics
75 |
76 | Stats from the quake 3 timedemo (default settings) invoked via `quake3.exe +timedemo 1 +demo four`.
77 |
78 | > 1260 frames 51.0 seconds 24.7 fps 15.0/40.5/78.0/11.2 ms
79 |
80 |
81 | ## Screenshots
82 |
83 | These screenshots show the current state of progress on 15/04/2020.
84 |
85 | 
86 |
87 | 
88 |
89 | 
90 |
91 | 
92 |
93 | 
94 |
95 | 
96 |
97 | 
98 |
--------------------------------------------------------------------------------
/softgl.cfg:
--------------------------------------------------------------------------------
1 | raster_dll softgl_rast_reference.dll
2 | profile true
3 | open_stdio true
4 |
--------------------------------------------------------------------------------
/source/buffer.cpp:
--------------------------------------------------------------------------------
1 | #include "buffer.h"
2 | #include "context.h"
3 |
4 | buffer_manager_t::buffer_manager_t()
5 | : _width(0), _height(0), _pixels(nullptr), _depth(nullptr) {}
6 |
7 | void buffer_manager_t::_release() {
8 | if (_pixels) {
9 | _aligned_free(_pixels);
10 | _pixels = nullptr;
11 | }
12 | if (_depth) {
13 | _aligned_free(_depth);
14 | _depth = nullptr;
15 | }
16 | }
17 |
18 | void buffer_manager_t::resize(int32_t w, int32_t h) {
19 | // release framebuffer
20 | if (Context && Context->raster.inst) {
21 | Context->raster.inst->framebuffer_release();
22 | }
23 |
24 | _width = w;
25 | _height = h;
26 |
27 | if (_pixels || _depth) {
28 | _release();
29 | }
30 | // allocate aligned buffer planes
31 | // XXX: note offsets so pixels and depth dont alias in cache
32 | _pixels = (uint32_t *)_aligned_malloc(w * h * sizeof(uint32_t), 16);
33 | _depth = (float *)_aligned_malloc(w * h * sizeof(float), 16);
34 | // notify context of resize
35 | if (Context) {
36 | Context->on_resize();
37 | }
38 | // aquire framebuffer
39 | if (Context && Context->raster.inst) {
40 | Context->raster.inst->framebuffer_aquire();
41 | }
42 | }
43 |
44 | void buffer_manager_t::save_bmp() {
45 | // save a screenshot
46 | ::save_bmp(pixels(), width(), height(), "screenshot.bmp");
47 | }
48 |
--------------------------------------------------------------------------------
/source/buffer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | #include
6 |
7 |
8 | struct buffer_manager_t {
9 |
10 | buffer_manager_t();
11 |
12 | ~buffer_manager_t() { _release(); }
13 |
14 | void resize(int32_t w, int32_t h);
15 |
16 | uint32_t width() const { return _width; }
17 |
18 | uint32_t height() const { return _height; }
19 |
20 | uint32_t *pixels() const { return _pixels; }
21 |
22 | float *depth() const {
23 | assert(_depth);
24 | return _depth;
25 | }
26 |
27 | void clear_colour(const uint32_t rgb) {
28 | const int32_t area = _width * _height;
29 | const uint32_t *end = _pixels + area;
30 | uint32_t *ptr = _pixels;
31 | for (; ptr != end; ++ptr) {
32 | *ptr = rgb;
33 | }
34 | }
35 |
36 | void clear_depth(const float value) {
37 | const int32_t area = _width * _height;
38 | const float *end = _depth + area;
39 | float *ptr = _depth;
40 | for (; ptr != end; ++ptr) {
41 | *ptr = value;
42 | }
43 | }
44 |
45 | void save_bmp();
46 |
47 | protected:
48 | void _release();
49 |
50 | uint32_t _width, _height;
51 | uint32_t *_pixels;
52 | float *_depth;
53 | };
54 |
--------------------------------------------------------------------------------
/source/common.cpp:
--------------------------------------------------------------------------------
1 | #include "common.h"
2 |
3 | void save_bmp(const uint32_t *pixels, uint32_t w, uint32_t h, const char *path) {
4 | //
5 | }
6 |
--------------------------------------------------------------------------------
/source/common.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 |
5 | // a simple break once macro
6 | #if _DEBUG
7 | #define DEBUG_BREAK \
8 | { \
9 | static bool once=true; \
10 | if (once) { \
11 | __debugbreak(); \
12 | once = false; \
13 | printf("%s\n", __FUNCTION__); \
14 | } \
15 | }
16 | #else
17 | #define DEBUG_BREAK // __assume(0)
18 | #endif
19 |
20 | #if 0
21 | struct rectf_t {
22 | float x0, y0, x1, y1;
23 |
24 | float dx() const {
25 | return x1 - x0;
26 | }
27 |
28 | float dy() const {
29 | return y1 - y0;
30 | }
31 | };
32 | #endif
33 |
34 | void save_bmp(const uint32_t *pixels, uint32_t w, uint32_t h, const char *path);
35 |
--------------------------------------------------------------------------------
/source/config.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "config.h"
4 |
5 |
6 | static bool extract(const char *line, std::string &key, std::string &value) {
7 | // clear output strings
8 | key.clear(), value.clear();
9 | // parse head
10 | const char *p = line;
11 | // skip whitespace
12 | while (*p == ' ' || *p == '\t' || *p == '\r')
13 | ++p;
14 | // check for comments
15 | if (*p == '#') {
16 | return false;
17 | }
18 | const char *k1 = p;
19 | while (*p > ' ')
20 | ++p;
21 | if (k1 != p) {
22 | key.assign(k1, p);
23 | }
24 | // value
25 | while (*p == ' ' || *p == '\t' || *p == '\r')
26 | ++p;
27 | const char *v1 = p;
28 | if (*p == '"' ? ++p, ++v1, true : false) {
29 | while (*p != '"' && (*p >= ' ' || *p == '\t'))
30 | ++p;
31 | } else {
32 | while (*p > ' ')
33 | ++p;
34 | }
35 | if (v1 != p) {
36 | value.assign(v1, p);
37 | }
38 | // should have got something for both
39 | return !key.empty() && !value.empty();
40 | }
41 |
42 | bool config_t::load(const char *path) {
43 | if (!path)
44 | return false;
45 | FILE *fp = fopen(path, "r");
46 | if (!fp)
47 | return false;
48 | char line[1024];
49 | std::string key, value;
50 | while (!feof(fp)) {
51 | if (!fgets(line, sizeof(line), fp)) {
52 | break;
53 | }
54 | line[sizeof(line) - 1] = '\0';
55 | if (extract(line, key, value)) {
56 | map_[key] = value;
57 | }
58 | }
59 | fclose(fp);
60 | return true;
61 | }
62 |
63 | bool config_t::save(const char *path) {
64 | if (!path)
65 | return false;
66 | FILE *fp = fopen(path, "w");
67 | if (!fp)
68 | return false;
69 | for (const auto &a : map_) {
70 | fprintf(fp, "%s %s\n", a.first.c_str(), a.second.c_str());
71 | }
72 | fclose(fp);
73 | return true;
74 | }
75 |
76 | bool config_t::clear() {
77 | map_.clear();
78 | return false;
79 | }
80 |
81 | bool config_t::get(const std::string &key, std::string *out) {
82 | auto itt = map_.find(key);
83 | if (itt != map_.end()) {
84 | *out = itt->second;
85 | return true;
86 | }
87 | return false;
88 | }
89 |
90 | bool config_t::get(const std::string &key, int32_t *out) {
91 | auto itt = map_.find(key);
92 | if (itt != map_.end()) {
93 | const std::string &val = itt->second;
94 | if (sscanf(val.c_str(), "%d", out) == 1)
95 | return true;
96 | }
97 | return false;
98 | }
99 |
100 | bool config_t::get(const std::string &key, bool *out) {
101 | auto itt = map_.find(key);
102 | bool ret = false;
103 | if (itt != map_.end()) {
104 | const std::string &val = itt->second;
105 | if (val == "True" || val == "true")
106 | *out = true, ret = true;
107 | if (val == "False" || val == "false")
108 | *out = false, ret = true;
109 | }
110 | return ret;
111 | }
112 |
113 | bool config_t::get(const std::string &key, float *out) {
114 | auto itt = map_.find(key);
115 | if (itt != map_.end()) {
116 | const std::string &val = itt->second;
117 | if (sscanf(val.c_str(), "%f", out) == 1)
118 | return true;
119 | }
120 | return false;
121 | }
122 |
123 | bool config_t::set(const std::string &key, const std::string &value) {
124 | map_[key] = value;
125 | return true;
126 | }
127 |
128 | bool config_t::set(const std::string &key, const int32_t &value) {
129 | map_[key] = std::to_string(value);
130 | return true;
131 | }
132 |
133 | bool config_t::set(const std::string &key, const bool &value) {
134 | map_[key] = std::to_string(value);
135 | return true;
136 | }
137 |
138 | bool config_t::set(const std::string &key, const float &value) {
139 | map_[key] = std::to_string(value);
140 | return true;
141 | }
142 |
143 | bool config_t::dump() {
144 | for (auto itt : map_) {
145 | printf("%s %s\n", itt.first.c_str(), itt.second.c_str());
146 | }
147 | return true;
148 | }
149 |
--------------------------------------------------------------------------------
/source/config.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | struct config_t {
6 |
7 | config_t() = default;
8 |
9 | bool load(const char *path);
10 | bool save(const char *path);
11 |
12 | bool dump();
13 |
14 | bool clear();
15 |
16 | bool get(const std::string &key, std::string *out);
17 | bool get(const std::string &key, int32_t *out);
18 | bool get(const std::string &key, bool *out);
19 | bool get(const std::string &key, float *out);
20 |
21 | bool set(const std::string &key, const std::string &value);
22 | bool set(const std::string &key, const int32_t &value);
23 | bool set(const std::string &key, const bool &value);
24 | bool set(const std::string &key, const float &value);
25 |
26 | protected:
27 | std::unordered_map map_;
28 | };
29 |
--------------------------------------------------------------------------------
/source/context.cpp:
--------------------------------------------------------------------------------
1 | #include "context.h"
2 | #include "gdi_hook.h"
3 | #include "log.h"
4 | #include "matrix.h"
5 |
6 | gl_context_t::gl_context_t(HWND hwnd, HDC hdc)
7 | : window(hwnd, hdc)
8 | , primative(*this)
9 | {
10 | }
11 |
12 | bool gl_context_t::on_create() {
13 | // load the softgl config
14 | if (!config.load("softgl.cfg")) {
15 | // XXX: we need some defaults or something
16 | }
17 | // create a profiler
18 | profile.reset(profile_create());
19 | // create a framebuffer
20 | buffer.resize(window.width(), window.height());
21 | // initalize the raster device
22 | if (!raster_load(raster, *this)) {
23 | // cant load a rasterizer
24 | return false;
25 | }
26 | return raster.inst->start(*this);
27 | }
28 |
29 | void gl_context_t::on_destroy() {
30 | if (raster.inst) {
31 | raster.inst->stop();
32 | }
33 | }
34 |
35 | void gl_context_t::on_flush() {
36 | if (!primative.triangles().empty()) {
37 | primative.clip_triangles();
38 | primative.convert_to_dc();
39 | primative.cull_triangles();
40 | if (profile) {
41 | profile->on_triangles(primative.triangles());
42 | }
43 | if (raster.inst) {
44 | const texture_t *tex = texture.boundTexture2d();
45 | raster.inst->push_triangles(primative.triangles(), tex, state);
46 | }
47 | primative.clear_triangles();
48 | }
49 | raster.inst->flush();
50 | }
51 |
52 | void gl_context_t::on_resize() {
53 | // set default viewport
54 | state.viewport = rectf_t{0, 0, float(buffer.width()), float(buffer.height())};
55 | // set default scissor
56 | state.scissor = rectf_t{0, 0, float(buffer.width()), float(buffer.height())};
57 | }
58 |
59 | void gl_context_t::on_make_current() {
60 | GdiHook.hook(*this);
61 | }
62 |
--------------------------------------------------------------------------------
/source/context.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 |
5 | #include "GL.h"
6 | #include "windows.h"
7 |
8 | #include "buffer.h"
9 | #include "config.h"
10 | #include "matrix.h"
11 | #include "primative.h"
12 | #include "raster.h"
13 | #include "state.h"
14 | #include "texture.h"
15 | #include "window.h"
16 | #include "profile.h"
17 |
18 |
19 | struct gl_context_t {
20 |
21 | void *operator new(size_t request) {
22 | // note: operator required for aligned alloc
23 | assert(request);
24 | size_t alignment = alignof(gl_context_t);
25 | return _aligned_malloc(request, alignment);
26 | }
27 |
28 | void operator delete(void *ptr) {
29 | // note: operator required for aligned alloc
30 | assert(ptr);
31 | _aligned_free(ptr);
32 | }
33 |
34 | gl_context_t(HWND hwnd, HDC hdc);
35 |
36 | state_manager_t state;
37 | raster_module_t raster;
38 | buffer_manager_t buffer;
39 | window_manager_t window;
40 | matrix_manager_t matrix;
41 | texture_manager_t texture;
42 | primative_manager_t primative;
43 | config_t config;
44 | std::unique_ptr profile;
45 |
46 | bool on_create();
47 | void on_destroy();
48 | void on_flush();
49 | void on_resize();
50 | void on_make_current();
51 |
52 | struct user_cmds_t {
53 | user_cmds_t()
54 | : screenshot(false)
55 | , dmp_obj(false)
56 | , dmp_textures(false) {
57 | }
58 | bool screenshot;
59 | bool dmp_obj;
60 | bool dmp_textures;
61 | } user_cmds;
62 |
63 | protected:
64 |
65 | gl_context_t(const gl_context_t &) = delete;
66 | };
67 |
68 | // context accessor
69 | gl_context_t *getContext();
70 | #define Context getContext()
71 |
--------------------------------------------------------------------------------
/source/forward.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | struct gl_context_t;
4 | struct buffer_manager_t;
5 | struct window_manager_t;
6 | struct matrix_manager_t;
7 | struct texture_manager_t;
8 | struct gdi_hook_t;
9 | struct log_t;
10 | struct matrix_t;
11 | struct matrix_stack_t;
12 | struct vertex_t;
13 | struct triangle_t;
14 | struct primative_manager_t;
15 | struct raster_t;
16 | struct raster_module_t;
17 | struct state_manager_t;
18 | struct texture_t;
19 | struct profile_t;
20 |
21 | enum game_id_t;
22 | enum matrix_mode_t;
--------------------------------------------------------------------------------
/source/game_id.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | #include
8 |
9 | #include "game_id.h"
10 | #include "log.h"
11 |
12 |
13 | static uint64_t pearsonHash(const uint8_t *x, size_t len)
14 | {
15 | static const std::array T = {
16 | // 0-255 shuffled in any (random) order suffices
17 | 98, 6, 85,150, 36, 23,112,164,135,207,169, 5, 26, 64,165,219,
18 | 61, 20, 68, 89,130, 63, 52,102, 24,229,132,245, 80,216,195,115,
19 | 90,168,156,203,177,120, 2,190,188, 7,100,185,174,243,162, 10,
20 | 237, 18,253,225, 8,208,172,244,255,126,101, 79,145,235,228,121,
21 | 123,251, 67,250,161, 0,107, 97,241,111,181, 82,249, 33, 69, 55,
22 | 59,153, 29, 9,213,167, 84, 93, 30, 46, 94, 75,151,114, 73,222,
23 | 197, 96,210, 45, 16,227,248,202, 51,152,252,125, 81,206,215,186,
24 | 39,158,178,187,131,136, 1, 49, 50, 17,141, 91, 47,129, 60, 99,
25 | 154, 35, 86,171,105, 34, 38,200,147, 58, 77,118,173,246, 76,254,
26 | 133,232,196,144,198,124, 53, 4,108, 74,223,234,134,230,157,139,
27 | 189,205,199,128,176, 19,211,236,127,192,231, 70,233, 88,146, 44,
28 | 183,201, 22, 83, 13,214,116,109,159, 32, 95,226,140,220, 57, 12,
29 | 221, 31,209,182,143, 92,149,184,148, 62,113, 65, 37, 27,106,166,
30 | 3, 14,204, 72, 21, 41, 56, 66, 28,193, 40,217, 25, 54,179,117,
31 | 238, 87,240,155,180,170,242,212,191,163, 78,218,137,194,175,110,
32 | 43,119,224, 71,122,142, 42,160,104, 48,247,103, 15, 11,138,239
33 | };
34 | uint64_t out = 0;
35 | for (size_t j = 0; j < sizeof(out); ++j) {
36 | uint8_t h = T[(x[0] + j) % 256];
37 | for (size_t i = 1; i < len; ++i)
38 | h = T[h ^ x[i]];
39 | out = (out << 8) | h;
40 | }
41 | return out;
42 | }
43 |
44 | static uint64_t fileHash(const char *path)
45 | {
46 | FILE *fd = nullptr;
47 | if (fopen_s(&fd, path, "rb")) {
48 | return 0;
49 | }
50 | std::unique_ptr temp;
51 | fseek(fd, 0, SEEK_END);
52 | size_t size = ftell(fd);
53 | fseek(fd, 0, SEEK_SET);
54 | temp = std::make_unique(size);
55 | fread(temp.get(), 1, 1, fd);
56 | fclose(fd);
57 | return pearsonHash(temp.get(), size);
58 | }
59 |
60 | game_id_t getGameId()
61 | {
62 | static std::atomic_bool evaluated = false;
63 | static game_id_t game_id = e_unknown;
64 | // early exit if we have already evaluated the game id
65 | if (evaluated)
66 | return game_id;
67 | // we are about to have evaluated
68 | evaluated = true;
69 | // space for executable name
70 | std::array name;
71 | // get executable filename
72 | const DWORD written = GetModuleFileNameA(nullptr, name.data(), name.size());
73 | if (written == 0 || written >= name.size()) {
74 | return game_id;
75 | }
76 | // force trailing zero
77 | name[name.size() - 1] = '\0';
78 | // query list of known hashes
79 | const uint64_t hash = fileHash(name.data());
80 |
81 | log_t::printf("executable name: %s\n", name.data());
82 | log_t::printf("executable hash: 0x%llx\n", hash);
83 |
84 | switch (hash) {
85 | case 0xf45771674923d6a5: // yquake2 (quake2-20200324-9acb99e.zip)
86 | game_id = e_quake_2;
87 | break;
88 | case 0x2365c561bb63848e: // steam version
89 | game_id = e_ut99_goty;
90 | break;
91 | case 0x58908a8fe82c5ad4: // quake 3 demo
92 | game_id = e_quake_3;
93 | break;
94 | case 0xc388218ae8925ad4:
95 | game_id = e_ut2003_demo;
96 | break;
97 | case 0x3b86fbcbf56b79bf:
98 | game_id = e_half_life_of_demo;
99 | break;
100 | case 0x81326047f57f79bf:
101 | game_id = e_unreal_gold;
102 | break;
103 | default:
104 | game_id = e_unknown;
105 | }
106 | // return known game-id
107 | return game_id;
108 | }
109 |
--------------------------------------------------------------------------------
/source/game_id.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | enum game_id_t {
4 | e_unknown,
5 | e_nehe_lesson_01,
6 | e_quake_2,
7 | e_quake_3,
8 | e_half_life,
9 | e_half_life_of_demo,
10 | e_ut99_goty,
11 | e_ut2003_demo,
12 | e_unreal_gold
13 | };
14 |
15 | game_id_t getGameId();
16 |
--------------------------------------------------------------------------------
/source/gdi_hook.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include