├── .gitignore
├── momentous.sln
├── util.h
├── momentous.vcxproj.filters
├── README.md
├── d3du.h
├── util.cpp
├── shaders.hlsl
├── momentous.vcxproj
├── math.h
├── main.cpp
└── d3du.cpp
/.gitignore:
--------------------------------------------------------------------------------
1 | Debug
2 | Release
3 | *.sdf
4 | *.opensdf
5 | *.suo
--------------------------------------------------------------------------------
/momentous.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Express 2012 for Windows Desktop
4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "momentous", "momentous.vcxproj", "{AD11938C-C989-43EE-B618-36CD6118C035}"
5 | EndProject
6 | Global
7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
8 | Debug|Win32 = Debug|Win32
9 | Release|Win32 = Release|Win32
10 | EndGlobalSection
11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
12 | {AD11938C-C989-43EE-B618-36CD6118C035}.Debug|Win32.ActiveCfg = Debug|Win32
13 | {AD11938C-C989-43EE-B618-36CD6118C035}.Debug|Win32.Build.0 = Debug|Win32
14 | {AD11938C-C989-43EE-B618-36CD6118C035}.Release|Win32.ActiveCfg = Release|Win32
15 | {AD11938C-C989-43EE-B618-36CD6118C035}.Release|Win32.Build.0 = Release|Win32
16 | EndGlobalSection
17 | GlobalSection(SolutionProperties) = preSolution
18 | HideSolutionNode = FALSE
19 | EndGlobalSection
20 | EndGlobal
21 |
--------------------------------------------------------------------------------
/util.h:
--------------------------------------------------------------------------------
1 | #ifndef __UTIL_H__
2 | #define __UTIL_H__
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | // General utility functions
9 |
10 | void panic( char const * fmt, ... );
11 | char * read_file( char const * filename ); // mallocs result, you need to free()
12 | void dump_dwords( unsigned int const * vals, unsigned int num_dwords );
13 |
14 | // pixel compare that returns position of first mismatch
15 | // pos_x / pos_y may both be NULL.
16 | int pixel_compare_pos( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h, int * pos_x, int * pos_y );
17 |
18 | // pixel compare without position reporting
19 | int pixel_compare( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h );
20 | void print_pixels( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h );
21 |
22 | typedef struct run_stats run_stats;
23 |
24 | run_stats * run_stats_create( void );
25 | void run_stats_destroy( run_stats * stats );
26 | void run_stats_clear( run_stats * stats ); // reset all measurements
27 | void run_stats_record( run_stats * stats, float value ); // record a measurement
28 | void run_stats_report( run_stats * stats, char const * desc ); // print a report
29 |
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 |
34 | #endif
35 |
--------------------------------------------------------------------------------
/momentous.vcxproj.filters:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
7 |
8 |
9 | {93995380-89BD-4b04-88EB-625FBE52EBFB}
10 | h;hpp;hxx;hm;inl;inc;xsd
11 |
12 |
13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
15 |
16 |
17 |
18 |
19 | Header Files
20 |
21 |
22 | Header Files
23 |
24 |
25 | Header Files
26 |
27 |
28 |
29 |
30 | Source Files
31 |
32 |
33 | Source Files
34 |
35 |
36 | Source Files
37 |
38 |
39 |
40 |
41 | Source Files
42 |
43 |
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Momentous
2 |
3 | This is a reimplementation of the particle system from "fr-059: momentum",
4 | using D3D10 GPU hardware.
5 |
6 | I was thinking about this a few days back; the original version was using several
7 | hacks to get good performance on shader level 2 hardware, and the actual particle
8 | system ran on the CPU.
9 |
10 | Well, the particle system is actually really easy to run on the GPU provided there
11 | is support for floating point render targets and filtering of floating point volume
12 | textures. That used to be a problem in 2007 but not today.
13 |
14 | Also, having actual HW instancing support instead of having to get by with shader
15 | instancing sure makes things easier.
16 |
17 | Tricks from the original implementation that made it into this version:
18 |
19 | * Hard-edged cubes specified using 8 vertices only (to reduce VS load). The original
20 | version determined the (faceted) normal in the pixel shader using a cube map lookup
21 | from an interpolated object-space position (passed as an attribute). This was the
22 | easiest way to do this on shader level 2 hardware, but it did tend to cause some
23 | sparkling near the edges (exactly which cube map face a near-edge sample landed on
24 | depended on interpolation rounding, which was a bit dicey).
25 |
26 | This version passes the world-space position as an attribute and then determines the
27 | face normal as the cross product of the position's derivatives. This is simpler, has
28 | no texture lookup, and does not have any sparkling. (But it does require derivative
29 | instructions).
30 | * Instance all the things!
31 | * Trilinear interpolation using smoothstepped weights: much cheaper than spline
32 | interpolation and looks almost as good.
33 |
34 | Bonus: this version also renders each cube as an indexed triangle strip (14 verts plus
35 | primitive restart) - again, the original version was written before primitive restart
36 | was a feature you could rely on. This should not make any significant difference
37 | compared to explicit quads, but it *feels* nicer. :)
38 |
39 | This uses D3D10 because I'm writing this on a Laptop with Intel integrated graphics and
40 | drivers that haven't been updated for over 2 years; was I feeling lucky enough to try
41 | GL 3? Evidently not.
42 |
43 | Oh and I really need to add some camera control and shadow mapping, like the original
44 | version had.
45 |
46 | -Fabian 'ryg' Giesen,
47 | December 2013
--------------------------------------------------------------------------------
/d3du.h:
--------------------------------------------------------------------------------
1 | #ifndef D3DU_H
2 | #define D3DU_H
3 |
4 | // you need to include windows.h and d3d11.h first.
5 |
6 | struct d3du_context {
7 | HWND hwnd;
8 | ID3D11Device * dev;
9 | ID3D11DeviceContext * ctx;
10 | IDXGISwapChain * swap;
11 |
12 | ID3D11Texture2D * backbuf;
13 | ID3D11Texture2D * depthbuf;
14 |
15 | ID3D11RenderTargetView * backbuf_rtv;
16 | ID3D11DepthStencilView * depthbuf_dsv;
17 |
18 | D3D11_VIEWPORT default_vp;
19 | };
20 |
21 | // Creates a D3DU context and opens a window with given title and width/height
22 | d3du_context * d3du_init( char const * title, int w, int h, D3D_FEATURE_LEVEL feature_level );
23 |
24 | // Shuts down a D3DU context and frees it.
25 | void d3du_shutdown( d3du_context * ctx );
26 |
27 | // Processes window events. Returns 1 if OK, 0 if user requested exit.
28 | int d3du_handle_events( d3du_context * ctx );
29 |
30 | // Swap buffers.
31 | void d3du_swap_buffers( d3du_context * ctx, bool vsync );
32 |
33 | // Creates a D3D11_VIEWPORT for an entire render target view.
34 | D3D11_VIEWPORT d3du_full_tex2d_viewport( ID3D11Texture2D * tex );
35 |
36 | // Creates a buffer
37 | ID3D11Buffer * d3du_make_buffer( ID3D11Device * dev, UINT size, D3D11_USAGE use, UINT bind_flags, const void * initial );
38 |
39 | // Reads back the contents of a buffer and returns them as an unsigned char array.
40 | // size_in_bytes, when non-NULL, will receive the buffer size.
41 | //
42 | // Intended for debugging only.
43 | unsigned char * d3du_get_buffer( d3du_context * ctx, ID3D11Buffer * buf, int * size_in_bytes );
44 |
45 | // Reads back the contents of the given mip level of a texture SRV.
46 | //
47 | // Intended for debugging only.
48 | unsigned char * d3du_read_texture_level( d3du_context * ctx, ID3D11ShaderResourceView * srv, int level );
49 |
50 | // Creates a simple rasterizer state
51 | ID3D11RasterizerState * d3du_simple_raster( ID3D11Device * dev, D3D11_CULL_MODE cull, bool front_ccw, bool scissor_enable );
52 |
53 | // Creates a simple blend state.
54 | ID3D11BlendState * d3du_simple_blend( ID3D11Device * dev, D3D11_BLEND src_blend, D3D11_BLEND dest_blend );
55 |
56 | // Creates a simplified sampler state.
57 | ID3D11SamplerState * d3du_simple_sampler( ID3D11Device * dev, D3D11_FILTER filter, D3D11_TEXTURE_ADDRESS_MODE addr );
58 |
59 | // Compiles the given shader or dies trying!
60 | ID3DBlob * d3du_compile_source_or_die( char const * source, char const * profile, char const * entrypt );
61 |
62 | // *All* the shaders.
63 | union d3du_shader {
64 | ID3D11DeviceChild * generic;
65 | ID3D11PixelShader * ps;
66 | ID3D11VertexShader * vs;
67 | ID3D11ComputeShader * cs;
68 | };
69 |
70 | // Compile and create a shader with the given profile on the given device
71 | d3du_shader d3du_compile_and_create_shader( ID3D11Device * dev, char const * source, char const * profile, char const * entrypt );
72 |
73 | // Texture helper
74 | struct d3du_tex {
75 | union {
76 | ID3D11Resource * resrc;
77 | ID3D11Texture2D * tex2d;
78 | ID3D11Texture2D * tex3d;
79 | };
80 | ID3D11ShaderResourceView * srv;
81 | ID3D11RenderTargetView * rtv;
82 |
83 | ~d3du_tex();
84 |
85 | static d3du_tex * make2d( ID3D11Device * dev, UINT w, UINT h, UINT num_mips,
86 | DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT initial_pitch );
87 |
88 | static d3du_tex * make3d( ID3D11Device * dev, UINT w, UINT h, UINT d, UINT num_mips,
89 | DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT init_row_pitch, UINT init_depth_pitch );
90 |
91 | private:
92 | d3du_tex( ID3D11Resource * resrc, ID3D11ShaderResourceView * srv, ID3D11RenderTargetView * rtv );
93 | };
94 |
95 | // D3DU timer measures how long D3D calls take on the GPU side
96 | // Create, call bracket begin/end around area you want to capture, then "report" at the end.
97 | typedef struct d3du_timer d3du_timer;
98 |
99 | d3du_timer * d3du_timer_create( d3du_context * ctx, size_t warmup_frames ); // warmup_frames = no. of initial measurements to throw away
100 | void d3du_timer_destroy( d3du_timer * timer );
101 | void d3du_timer_bracket_begin( d3du_context * ctx, d3du_timer * timer );
102 | void d3du_timer_bracket_end( d3du_context * ctx, d3du_timer * timer );
103 | void d3du_timer_report( d3du_context * ctx, d3du_timer * timer, char const * label );
104 |
105 | #endif
106 |
107 |
--------------------------------------------------------------------------------
/util.cpp:
--------------------------------------------------------------------------------
1 | #define _CRT_SECURE_NO_WARNINGS
2 | #include "util.h"
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #include
10 | #include
11 |
12 | void panic(char const * fmt, ...)
13 | {
14 | va_list arg;
15 | va_start( arg, fmt );
16 | fputs( "Error: ", stderr );
17 | vfprintf( stderr, fmt, arg );
18 | va_end( arg );
19 | exit( 1 );
20 | }
21 |
22 | char * read_file(char const * filename)
23 | {
24 | FILE *f = fopen( filename, "rb" );
25 | if ( !f )
26 | return 0;
27 |
28 | fseek( f, 0, SEEK_END );
29 | size_t sz = ftell( f );
30 | fseek( f, 0, SEEK_SET );
31 |
32 | char * buffer = (char *)malloc( sz + 1 );
33 | if (buffer) {
34 | buffer[ sz ] = 0;
35 | if (fread( buffer, sz, 1, f ) != 1) {
36 | free( buffer );
37 | buffer = 0;
38 | }
39 | }
40 |
41 | fclose( f );
42 | return buffer;
43 | }
44 |
45 | void dump_dwords( unsigned int const * vals, unsigned int num_dwords )
46 | {
47 | for ( unsigned int row = 0 ; row < num_dwords ; row += 8 )
48 | {
49 | printf( "[%04x]", row );
50 | for ( unsigned int i = 0 ; i < 8 && row + i < num_dwords ; i++)
51 | printf( " %08x", vals[ row + i ] );
52 | printf( "\n" );
53 | }
54 | }
55 |
56 | int pixel_compare_pos( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h, int * pos_x, int * pos_y )
57 | {
58 | for ( int y = 0 ; y < h ; y++ )
59 | {
60 | unsigned char const * pa = a + y * stride_a;
61 | unsigned char const * pb = b + y * stride_b;
62 | int d = memcmp( pa, pb, w );
63 | if ( d != 0 )
64 | {
65 | if ( pos_y )
66 | *pos_y = y;
67 |
68 | if ( pos_x )
69 | {
70 | // we know there's a mismatch in this line
71 | // find its x position
72 | int x = 0;
73 | while ( pa[x] == pb[x] )
74 | x++;
75 |
76 | *pos_x = x;
77 | }
78 |
79 | return d;
80 | }
81 | }
82 |
83 | return 0;
84 | }
85 |
86 | int pixel_compare( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h )
87 | {
88 | return pixel_compare_pos( a, stride_a, b, stride_b, w, h, NULL, NULL );
89 | }
90 |
91 | void print_pixels( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h )
92 | {
93 | for ( int y = 0 ; y < h ; y++ )
94 | {
95 | for ( int x = 0 ; x < w ; x++ )
96 | printf( "%02x ", a[x] );
97 |
98 | printf(" - ");
99 |
100 | for ( int x = 0 ; x < w ; x++ )
101 | printf( " %02x", b[x] );
102 |
103 | printf("\n");
104 | a += stride_a;
105 | b += stride_b;
106 | }
107 | }
108 |
109 | struct run_stats
110 | {
111 | std::vector values;
112 | };
113 |
114 | run_stats * run_stats_create( )
115 | {
116 | return new run_stats;
117 | }
118 |
119 | void run_stats_destroy( run_stats * stats )
120 | {
121 | delete stats;
122 | }
123 |
124 | void run_stats_clear( run_stats * stats )
125 | {
126 | stats->values.clear();
127 | }
128 |
129 | void run_stats_record( run_stats * stats, float val )
130 | {
131 | stats->values.push_back( val );
132 | }
133 |
134 | void run_stats_report( run_stats * stats, char const * desc )
135 | {
136 | size_t count = stats->values.size();
137 | if (count < 2)
138 | return;
139 |
140 | // Print min/max and different percentiles
141 | std::sort(stats->values.begin(), stats->values.end());
142 |
143 | // desc, min,25th,med,75th,max, mean,sdev
144 | char buffer[512];
145 | char *p = buffer;
146 |
147 | p += sprintf(p, "%s, ", desc );
148 |
149 | for (int i=0; i < 5; i++)
150 | p += sprintf(p, "%.3f,", stats->values[i * (count - 1) / 4]);
151 |
152 | // Mean and standard deviation
153 | double mean = 0.0;
154 | for (std::vector::const_iterator it = stats->values.begin(); it != stats->values.end(); ++it)
155 | mean += *it;
156 | mean /= count;
157 |
158 | double varsum = 0.0;
159 | for (std::vector::const_iterator it = stats->values.begin(); it != stats->values.end(); ++it)
160 | varsum += (*it - mean) * (*it - mean);
161 | double sdev = sqrt(varsum / (count - 1.0));
162 |
163 | p += sprintf(p, " %.3f,%.3f\n", mean, sdev );
164 | printf( "%s", buffer );
165 | }
166 |
--------------------------------------------------------------------------------
/shaders.hlsl:
--------------------------------------------------------------------------------
1 | #define TEX_WIDTH_LOG2 10
2 |
3 | struct CubeVert {
4 | float4 clip_pos : SV_Position;
5 | float3 world_pos : WorldPos; // .xyz = world space position
6 | };
7 |
8 | cbuffer CubeConsts : register(b0) {
9 | float4x4 clip_from_world;
10 | float3 world_down_vector;
11 | float time_offs;
12 |
13 | // diffuse trilight plus ambient
14 | float3 light_color_ambient;
15 | float3 light_color_key;
16 | float3 light_color_fill;
17 | float3 light_color_back;
18 | float3 light_dir;
19 | };
20 |
21 | cbuffer UpdateConsts : register(b1) {
22 | float3 field_scale;
23 | float damping;
24 | float3 field_offs;
25 | float accel;
26 | float3 field_sample_scale;
27 | float vel_scale;
28 | };
29 |
30 | float4 UpdateVertShader(
31 | uint vertex_id : SV_VertexID
32 | ) : SV_Position
33 | {
34 | return float4(float(vertex_id >> 1) * 4.0 - 1.0, 1.0 - float(vertex_id & 1) * 4.0, 0.5, 1.0);
35 | }
36 |
37 | float4 UpdatePosShader(
38 | float4 pos : SV_Position,
39 | SamplerState force_smp : register(s0),
40 | Texture2D tex_older_pos : register(t0),
41 | Texture2D tex_newer_pos : register(t1),
42 | Texture3D tex_force : register(t2)
43 | ) : SV_Target
44 | {
45 | int3 coord_pos = int3(int2(pos.xy), 0);
46 | float4 older_pos = tex_older_pos.Load(coord_pos);
47 | float4 newer_pos = tex_newer_pos.Load(coord_pos);
48 |
49 | // determine force field sample pos
50 | float3 force_pos = newer_pos.xyz * field_scale + field_offs;
51 | float3 force_frac = frac(force_pos);
52 | float3 force_smooth = force_frac * force_frac * (3.0 - 2.0 * force_frac);
53 | force_pos = (force_pos - force_frac) + force_smooth;
54 |
55 | // sample force from texture
56 | float3 force = tex_force.Sample(force_smp, force_pos * field_sample_scale).xyz;
57 |
58 | // verlet integration
59 | float3 new_pos = newer_pos.xyz + damping * (newer_pos.xyz - older_pos.xyz);
60 | new_pos += accel * force;
61 |
62 | float4 output = float4(new_pos, newer_pos.w);
63 |
64 | // nuke particles if they get too far from the origin
65 | if (dot(new_pos, new_pos) > 16.0)
66 | output.w = 0.0;
67 |
68 | return output;
69 | }
70 |
71 | float4 UpdateVelShader(
72 | float4 pos : SV_Position,
73 | Texture2D tex_older_pos : register(t0),
74 | Texture2D tex_newer_pos : register(t1)
75 | ) : SV_Target
76 | {
77 | int3 coord_pos = int3(int2(pos.xy), 0);
78 | float4 older_pos = tex_older_pos.Load(coord_pos);
79 | float4 newer_pos = tex_newer_pos.Load(coord_pos);
80 |
81 | return newer_pos - older_pos;
82 | }
83 |
84 | CubeVert RenderCubeVertexShader(
85 | uint vertex_id : SV_VertexID,
86 | uint instance_id : SV_InstanceID,
87 | Texture2D tex_pos : register(t0),
88 | Texture2D tex_fwd : register(t1)
89 | )
90 | {
91 | CubeVert v;
92 |
93 | // fetch cube position and velocity from textures
94 | int3 fetch_coord = int3(vertex_id >> 3, instance_id, 0);
95 | float4 cube_pos = tex_pos.Load(fetch_coord);
96 | float4 cube_fwd = tex_fwd.Load(fetch_coord);
97 |
98 | // early-out if cube is off
99 | if (cube_pos.w == 0.0) {
100 | v.clip_pos = 0;
101 | v.world_pos = 0;
102 | return v;
103 | }
104 |
105 | // determine local coordinate system
106 | float3 x_axis = cube_fwd.xyz;
107 | float3 z_axis = normalize(cross(x_axis, world_down_vector));
108 | float3 y_axis = normalize(cross(z_axis, x_axis));
109 |
110 | // generate cube vertex
111 | float3 world_pos = cube_pos.xyz;
112 | float across_size = cube_pos.w;
113 |
114 | world_pos += (((vertex_id & 1) != 0) ? 1.0 : -1.0) * x_axis;
115 | world_pos += (((vertex_id & 2) != 0) ? across_size : -across_size) * y_axis;
116 | world_pos += (((vertex_id & 4) != 0) ? across_size : -across_size) * z_axis;
117 |
118 | // generate output vertex
119 | v.clip_pos = mul(clip_from_world, float4(world_pos, 1.0));
120 | v.world_pos = world_pos;
121 | return v;
122 | }
123 |
124 | float4 RenderCubePixelShader(
125 | CubeVert v
126 | ) : SV_Target
127 | {
128 | // determine triangle plane from derivatives
129 | float3 dPos_dx = ddx(v.world_pos.xyz);
130 | float3 dPos_dy = ddy(v.world_pos.xyz);
131 |
132 | // world-space normal from tangents
133 | float3 world_normal = cross(dPos_dy, dPos_dx);
134 |
135 | // lighting model (trilight)
136 | float NdotL = dot(world_normal, light_dir) * rsqrt(dot(world_normal, world_normal));
137 |
138 | float3 diffuse_lit = light_color_ambient
139 | + saturate(NdotL) * light_color_key
140 | + (1.0 - abs(NdotL)) * light_color_fill
141 | + saturate(-NdotL) * light_color_back;
142 |
143 | return float4(diffuse_lit, 1.0);
144 | }
--------------------------------------------------------------------------------
/momentous.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 |
14 | {AD11938C-C989-43EE-B618-36CD6118C035}
15 | Win32Proj
16 | momentous
17 |
18 |
19 |
20 | Application
21 | true
22 | v110
23 | Unicode
24 |
25 |
26 | Application
27 | false
28 | v110
29 | true
30 | Unicode
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | true
44 |
45 |
46 | false
47 |
48 |
49 |
50 |
51 |
52 | Level3
53 | Disabled
54 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
55 | true
56 |
57 |
58 | Console
59 | true
60 |
61 |
62 |
63 |
64 | Level3
65 |
66 |
67 | MaxSpeed
68 | true
69 | true
70 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
71 | true
72 | MultiThreaded
73 |
74 |
75 | Console
76 | true
77 | true
78 | true
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 | true
94 | true
95 |
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/math.h:
--------------------------------------------------------------------------------
1 | #ifndef MATH_H_INCLUDED
2 | #define MATH_H_INCLUDED
3 |
4 | #include
5 |
6 | // Matrices are column-major.
7 | //
8 | // Coordinate system conventions:
9 | // +x = right
10 | // +y = down
11 | // +z = into screen
12 | // this is a bit unorthodox but right-handed and convenient.
13 |
14 | #define IMPL_COMPONENT_OP2(op) \
15 | this_type& operator op(const this_type& b) { x op b.x; y op b.y; return *this; }
16 |
17 | #define IMPL_COMPONENT_OP3(op) \
18 | this_type& operator op(const this_type& b) { x op b.x; y op b.y; z op b.z; return *this; }
19 |
20 | #define IMPL_COMPONENT_OP4(op) \
21 | this_type& operator op(const this_type& b) { x op b.x; y op b.y; z op b.z; w op b.w; return *this; }
22 |
23 | #define IMPL_LINEAR_OPS(type) \
24 | template type operator -(const type& v) { type x = v; x *= T(-1); return x; } \
25 | template type operator +(const type& a, const type& b) { type x = a; x += b; return x; } \
26 | template type operator -(const type& a, const type& b) { type x = a; x -= b; return x; } \
27 | template type operator *(const type& a, T s) { type x = a; x *= s; return x; } \
28 | template type operator *(const type& a, const type& b) { type x = a; x *= b; return x; } \
29 | template type operator *(T s, const type& b) { type x = b; x *= s; return x; }
30 |
31 | #define IMPL_VECTOR_OPS(type, dot_expr) \
32 | IMPL_LINEAR_OPS(type) \
33 | template T dot(const type& a, const type& b) { return dot_expr; } \
34 | template T len_sq(const type& a) { return dot(a, a); } \
35 | template T len(const type& a) { return std::sqrt(len_sq(a)); } \
36 | template type normalize(const type& a) { return rsqrt(len_sq(a)) * a; }
37 |
38 | #define IMPL_MATRIX_OPS(mat_type, vec_type, mul_expr) \
39 | IMPL_LINEAR_OPS(mat_type) \
40 | template vec_type operator *(const mat_type& m, const vec_type& v) { return mul_expr; }
41 |
42 | namespace math {
43 | template
44 | T rsqrt(T x)
45 | {
46 | return T(1) / std::sqrt(x);
47 | }
48 |
49 | template
50 | struct vec2T {
51 | typedef vec2T this_type;
52 |
53 | union {
54 | struct {
55 | T x, y;
56 | };
57 | T v[2];
58 | };
59 |
60 | vec2T() {}
61 | explicit vec2T(T s) : x(s), y(s) {}
62 | vec2T(T x, T y) : x(x), y(y) {}
63 |
64 | T operator[](int i) const { return v[i]; }
65 | T& operator[](int i) { return v[i]; }
66 |
67 | IMPL_COMPONENT_OP2(+=)
68 | IMPL_COMPONENT_OP2(-=)
69 | IMPL_COMPONENT_OP2(*=)
70 | this_type& operator *=(T s) { x *= s; y *= s; return *this; }
71 | };
72 |
73 | IMPL_VECTOR_OPS(vec2T, a.x*b.x + a.y*b.y)
74 |
75 | template
76 | struct vec3T {
77 | typedef vec3T this_type;
78 |
79 | union {
80 | struct {
81 | T x, y, z;
82 | };
83 | T v[3];
84 | };
85 |
86 | vec3T() {}
87 | explicit vec3T(T s) : x(s), y(s), z(s) {}
88 | vec3T(T x, T y, T z) : x(x), y(y), z(z) {}
89 |
90 | T operator[](int i) const { return v[i]; }
91 | T& operator[](int i) { return v[i]; }
92 |
93 | IMPL_COMPONENT_OP3(+=)
94 | IMPL_COMPONENT_OP3(-=)
95 | IMPL_COMPONENT_OP3(*=)
96 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; return *this; }
97 | };
98 |
99 | IMPL_VECTOR_OPS(vec3T, a.x*b.x + a.y*b.y + a.z*b.z)
100 | template
101 | vec3T cross(const vec3T& a, const vec3T& b)
102 | {
103 | return vec3T(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
104 | }
105 |
106 | template
107 | struct vec4T {
108 | typedef vec4T this_type;
109 |
110 | union {
111 | struct {
112 | T x, y, z, w;
113 | };
114 | T v[4];
115 | };
116 |
117 | vec4T() {}
118 | explicit vec4T(T s) : x(s), y(s), z(s), w(s) {}
119 | vec4T(const vec3T& v, T w) : x(v.x), y(v.y), z(v.z), w(w) {}
120 | vec4T(T x, T y, T z, T w) : x(x), y(y), z(z), w(w) {}
121 |
122 | T operator[](int i) const { return v[i]; }
123 | T& operator[](int i) { return v[i]; }
124 |
125 | IMPL_COMPONENT_OP4(+=)
126 | IMPL_COMPONENT_OP4(-=)
127 | IMPL_COMPONENT_OP4(*=)
128 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; w *= s; return *this; }
129 | };
130 |
131 | IMPL_VECTOR_OPS(vec4T, a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w)
132 |
133 | template
134 | struct mat33T {
135 | typedef vec3T vec_type;
136 | typedef mat33T this_type;
137 |
138 | vec_type x, y, z; // columns
139 |
140 | mat33T() {}
141 | mat33T(const vec_type& colX, const vec_type& colY, const vec_type& colZ) : x(colX), y(colY), z(colZ) {}
142 | mat33T(
143 | T _00, T _01, T _02,
144 | T _10, T _11, T _12,
145 | T _20, T _21, T _22
146 | ) : x(_00, _10, _20), y(_01, _11, _21), z(_02, _12, _22) {}
147 |
148 | T operator()(int i, int j) const { return (&x)[j][i]; }
149 | T& operator()(int i, int j) { return (&x)[j][i]; }
150 |
151 | const vec_type& get_col(int i) const { return (&x)[i]; }
152 | void set_col(int i, const vec_type& v) { (&x)[i] = v; }
153 | const vec_type get_row(int i) const { return vec_type((&x)[0][i], (&x)[1][i], (&x)[2][i]); }
154 | void set_row(int i, const vec_type& v) { (&x)[0][i] = v.x; (&x)[1][i] = v.y; (&x)[2][i] = v.z; }
155 |
156 | IMPL_COMPONENT_OP3(+=)
157 | IMPL_COMPONENT_OP3(-=)
158 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; return *this; }
159 | this_type& operator *=(const this_type& b);
160 |
161 | static this_type diag(T x, T y, T z) { return mat33T(x, T(0), T(0), T(0), y, T(0), T(0), T(0), z); }
162 | static this_type identity() { return diag(T(1), T(1), T(1)); }
163 | static this_type uniform_scale(T s) { return diag(s, s, s); }
164 |
165 | static this_type rotation(const vec_type& axis, T angle)
166 | {
167 | // Rodrigues rotation formula
168 | T cosv = std::cos(angle);
169 | vec_type sa = std::sin(angle) * axis;
170 | vec_type omca = (T(1) - cosv) * axis;
171 |
172 | return mat33T(
173 | omca.x*axis.x + cosv, omca.x*axis.y - sa.z, omca.x*axis.z + sa.y,
174 | omca.y*axis.x + sa.z, omca.y*axis.y + cosv, omca.y*axis.z - sa.x,
175 | omca.z*axis.x - sa.y, omca.z*axis.y + sa.x, omca.z*axis.z + cosv
176 | );
177 | }
178 | };
179 |
180 | IMPL_MATRIX_OPS(mat33T, vec3T, v.x*m.x + v.y*m.y + v.z*m.z)
181 |
182 | template
183 | mat33T& mat33T::operator *=(const mat33T& b)
184 | {
185 | const mat33T M = *this;
186 | x = M * b.x;
187 | y = M * b.y;
188 | z = M * b.z;
189 | w = M * b.w;
190 | return *this;
191 | }
192 |
193 | template
194 | mat33T transpose(const mat33T& m)
195 | {
196 | return mat33T(m.get_row(0), m.get_row(1), m.get_row(2));
197 | }
198 |
199 | template
200 | struct mat44T {
201 | typedef vec4T vec_type;
202 | typedef vec3T vec3_type;
203 | typedef mat44T this_type;
204 |
205 | vec_type x, y, z, w; // columns
206 |
207 | mat44T() {}
208 | mat44T(const vec_type& colX, const vec_type& colY, const vec_type& colZ, const vec_type& colW) : x(colX), y(colY), z(colZ), w(colW) {}
209 | mat44T(const mat33T& mat3x3, const vec3_type& translate) : x(mat3x3.x, T(0)), y(mat3x3.y, T(0)), z(mat3x3.z, T(0)), w(translate, T(1)) {}
210 | mat44T(
211 | T _00, T _01, T _02, T _03,
212 | T _10, T _11, T _12, T _13,
213 | T _20, T _21, T _22, T _23,
214 | T _30, T _31, T _32, T _33
215 | ) : x(_00, _10, _20, _30), y(_01, _11, _21, _31), z(_02, _12, _22, _32), w(_03, _13, _23, _33) {}
216 |
217 | T operator()(int i, int j) const { return (&x)[j][i]; }
218 | T& operator()(int i, int j) { return (&x)[j][i]; }
219 |
220 | const vec_type& get_col(int i) const { return (&x)[i]; }
221 | void set_col(int i, const vec_type& v) { (&x)[i] = v; }
222 | const vec_type get_row(int i) const { return vec_type((&x)[0][i], (&x)[1][i], (&x)[2][i], (&x)[3][i]); }
223 | void set_row(int i, const vec_type& v) { (&x)[0][i] = v.x; (&x)[1][i] = v.y; (&x)[2][i] = v.z; (&x)[3][i] = v.w; }
224 |
225 | IMPL_COMPONENT_OP4(+=)
226 | IMPL_COMPONENT_OP4(-=)
227 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; w *= s; return *this; }
228 | this_type& operator *=(const this_type& b);
229 |
230 | static this_type diag(T x, T y, T z, T w) { return mat44T(x, T(0), T(0), T(0), T(0), y, T(0), T(0), T(0), T(0), z, T(0), T(0), T(0), T(0), w); }
231 | static this_type identity() { return diag(T(1), T(1), T(1), T(1)); }
232 |
233 | static this_type look_at(const vec3_type& pos, const vec3_type& look_at, const vec3_type& down)
234 | {
235 | mat33T M;
236 | vec3_type z_axis = normalize(look_at - pos);
237 | vec3_type x_axis = normalize(cross(down, z_axis));
238 | vec3_type y_axis = cross(z_axis, x_axis);
239 |
240 | M.set_row(0, x_axis);
241 | M.set_row(1, y_axis);
242 | M.set_row(2, z_axis);
243 | return this_type(M, M * -pos);
244 | }
245 |
246 | static this_type orthoD3D(T lft, T rgt, T top, T bot, T nearv, T farv)
247 | {
248 | vec3_type mid((lft + rgt) / T(2), (bot + top) / T(2), (nearv + farv) / T(2));
249 | T sx = T(2) / (rgt - lft);
250 | T sy = T(2) / (top - bot);
251 | T sz = T(1) / (farv - nearv);
252 |
253 | return this_type(mat33T::diag(sx, sy, sz), vec3T(-mid.x * sx, -mid.y * sy, T(0.5) - mid.z * sz));
254 | }
255 |
256 | // NOTE: this takes lft/rgt/bot/top at z=1 plane, not near plane!
257 | static this_type frustumD3D(T lft, T rgt, T top, T bot, T nearv, T farv)
258 | {
259 | T Q = farv / (farv - nearv);
260 |
261 | return this_type(
262 | T(2) / (rgt - lft), 0, (rgt + lft) / (rgt - lft), T(0),
263 | T(0), T(2) / (top - bot), (top + bot) / (top - bot), T(0),
264 | T(0), T(0), Q, -nearv * Q,
265 | T(0), T(0), T(1), T(0)
266 | );
267 | }
268 |
269 | // w/h at z=1 plane, not near plane!
270 | static this_type perspectiveD3D(T w, T h, T nearv, T farv)
271 | {
272 | T wh = w / T(2);
273 | T hh = h / T(2);
274 | return frustumD3D(-wh, wh, -hh, hh, nearv, farv);
275 | }
276 | };
277 |
278 | IMPL_MATRIX_OPS(mat44T, vec4T, v.x*m.x + v.y*m.y + v.z*m.z + v.w*m.w)
279 |
280 | template
281 | mat44T& mat44T::operator *=(const mat44T& b)
282 | {
283 | const mat44T M = *this;
284 | x = M * b.x;
285 | y = M * b.y;
286 | z = M * b.z;
287 | w = M * b.w;
288 | return *this;
289 | }
290 |
291 | template
292 | mat44T transpose(const mat44T& m)
293 | {
294 | return mat44T(m.get_row(0), m.get_row(1), m.get_row(2), m.get_row(3));
295 | }
296 |
297 | typedef vec2T vec2i;
298 | typedef vec2T vec2;
299 |
300 | typedef vec3T vec3i;
301 | typedef vec3T vec3;
302 |
303 | typedef vec4T vec4i;
304 | typedef vec4T vec4;
305 |
306 | typedef mat33T mat33;
307 |
308 | typedef mat44T mat44;
309 | }
310 |
311 | #undef IMPL_COMPONENT_OP2
312 | #undef IMPL_COMPONENT_OP3
313 | #undef IMPL_COMPONENT_OP4
314 |
315 | #undef IMPL_LINEAR_OPS
316 | #undef IMPL_VECTOR_OPS
317 |
318 | #endif // MATH_H_INCLUDED
--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
1 | #define WIN32_LEAN_AND_MEAN
2 | #define NOMINMAX
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #include "d3du.h"
10 | #include "util.h"
11 | #include "math.h"
12 |
13 | static union {
14 | ID3D11Buffer* buffers[16];
15 | ID3D11ShaderResourceView* srvs[16];
16 | ID3D11RenderTargetView* rtvs[16];
17 | } s_no;
18 |
19 | struct CubeConstBuf {
20 | math::mat44 clip_from_world;
21 | math::vec3 world_down_vector;
22 | float time_offs;
23 |
24 | math::vec3 light_color_ambient;
25 | float pad1;
26 | math::vec3 light_color_key;
27 | float pad2;
28 | math::vec3 light_color_fill;
29 | float pad3;
30 | math::vec3 light_color_back;
31 | float pad4;
32 | math::vec3 light_dir;
33 | float pad5;
34 | };
35 |
36 | struct UpdateConstBuf {
37 | math::vec3 field_scale;
38 | float damping;
39 | math::vec3 field_offs;
40 | float accel;
41 | math::vec3 field_sample_scale;
42 | float vel_scale;
43 | };
44 |
45 | static float srgb2lin(float x)
46 | {
47 | static const float lin_thresh = 0.04045f;
48 | if (x < lin_thresh)
49 | return x * (1.0f / 12.92f);
50 | else
51 | return std::pow((x + 0.055f) / 1.055f, 2.4f);
52 | }
53 |
54 | static math::vec3 srgb_color(int col)
55 | {
56 | return math::vec3(
57 | srgb2lin(((col >> 16) & 0xff) / 255.0f),
58 | srgb2lin(((col >> 8) & 0xff) / 255.0f),
59 | srgb2lin(((col >> 0) & 0xff) / 255.0f)
60 | );
61 | }
62 |
63 | static void* map_cbuf_typeless(d3du_context* ctx, ID3D11Buffer* buf)
64 | {
65 | D3D11_MAPPED_SUBRESOURCE mapped;
66 | HRESULT hr = ctx->ctx->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped);
67 | if (FAILED(hr))
68 | panic("D3D buffer map failed!\n");
69 | return mapped.pData;
70 | }
71 |
72 | template
73 | static T* map_cbuf(d3du_context* ctx, ID3D11Buffer* buf)
74 | {
75 | return (T*) map_cbuf_typeless(ctx, buf);
76 | }
77 |
78 | static void unmap_cbuf(d3du_context* ctx, ID3D11Buffer* buf)
79 | {
80 | ctx->ctx->Unmap(buf, 0);
81 | }
82 |
83 | static ID3D11Buffer* make_cube_inds(ID3D11Device* dev, int num_cubes)
84 | {
85 | static const USHORT cube_inds[] = {
86 | 0, 2, 1, 3, 7, 2, 6, 0, 4, 1, 5, 7, 4, 6,
87 | };
88 |
89 | assert(num_cubes * 8 < 65535); // 65535 = prim restart
90 | USHORT * ind_data = new USHORT[num_cubes * 15];
91 | for (int i=0; i < num_cubes; i++)
92 | {
93 | USHORT * out_ind = ind_data + i*15;
94 | for (UINT j=0; j < 14; j++)
95 | out_ind[j] = cube_inds[j] + i*8;
96 | out_ind[14] = 0xffff;
97 | }
98 |
99 | ID3D11Buffer* ind_buf = d3du_make_buffer(dev, num_cubes * 15 * sizeof(USHORT),
100 | D3D11_USAGE_IMMUTABLE, D3D11_BIND_INDEX_BUFFER, ind_data);
101 | delete[] ind_data;
102 |
103 | return ind_buf;
104 | }
105 |
106 | static bool is_pow2(int x)
107 | {
108 | return x != 0 && (x & (x - 1)) == 0;
109 | }
110 |
111 | static float randf()
112 | {
113 | return 1.0f * rand() / RAND_MAX;
114 | }
115 |
116 | static math::vec3 rand_vec3_unit_sphere(float* len_sq_out = nullptr)
117 | {
118 | math::vec3 v;
119 | float l;
120 |
121 | do
122 | {
123 | v.x = 2.0f * randf() - 1.0f;
124 | v.y = 2.0f * randf() - 1.0f;
125 | v.z = 2.0f * randf() - 1.0f;
126 | l = math::len_sq(v);
127 | } while (l > 1.0f);
128 |
129 | if (len_sq_out)
130 | *len_sq_out = l;
131 | return v;
132 | }
133 |
134 | static math::vec3 rand_unit_vec3()
135 | {
136 | math::vec3 v;
137 | float l;
138 |
139 | do v = rand_vec3_unit_sphere(&l); while (l == 0.0f);
140 | return math::rsqrt(l) * v;
141 | }
142 |
143 | static int step_idx(int base, int step, int mask)
144 | {
145 | return (base & ~mask) | ((base + step) & mask);
146 | }
147 |
148 | static d3du_tex* make_force_tex(ID3D11Device* dev, int size, float strength, float post_scale)
149 | {
150 | using namespace math;
151 | assert(is_pow2(size));
152 |
153 | int stepx = 1, maskx = size - 1;
154 | int stepy = size, masky = (size - 1) * size;
155 | int stepz = size*size, maskz = (size - 1) * size * size;
156 | int nelem = size * size * size;
157 | vec4* forces = new vec4[nelem];
158 |
159 | // create a random vector field
160 | for (int zo = 0; zo <= maskz; zo += stepz) {
161 | for (int yo = 0; yo <= masky; yo += stepy) {
162 | for (int xo = 0; xo <= maskx; xo += stepx) {
163 | forces[xo + yo + zo] = math::vec4(strength * rand_unit_vec3(), 0.0f);
164 | }
165 | }
166 | }
167 |
168 | // calc divergences
169 | float* div = new float[nelem];
170 | float* high = new float[nelem];
171 |
172 | float div_scale = -0.5f / (float)size;
173 |
174 | for (int zo = 0; zo <= maskz; zo += stepz) {
175 | for (int yo = 0; yo <= masky; yo += stepy) {
176 | for (int xo = 0; xo <= maskx; xo += stepx) {
177 | int o = xo + yo + zo;
178 |
179 | div[o] = div_scale *
180 | (
181 | forces[step_idx(o, stepx, maskx)].x - forces[step_idx(o, -stepx, maskx)].x +
182 | forces[step_idx(o, stepy, masky)].y - forces[step_idx(o, -stepy, masky)].y +
183 | forces[step_idx(o, stepz, maskz)].z - forces[step_idx(o, -stepz, maskz)].z
184 | );
185 | high[o] = 0.0f;
186 | }
187 | }
188 | }
189 |
190 | // gauss-seidel iteration to calc density field
191 | for (int step = 0; step < 40; step++) {
192 | for (int zo = 0; zo <= maskz; zo += stepz) {
193 | for (int yo = 0; yo <= masky; yo += stepy) {
194 | for (int xo = 0; xo <= maskx; xo += stepx) {
195 | int o = xo + yo + zo;
196 | high[o] =
197 | (
198 | high[step_idx(o, -stepx, maskx)] + high[step_idx(o, stepx, maskx)] +
199 | high[step_idx(o, -stepy, masky)] + high[step_idx(o, stepy, masky)] +
200 | high[step_idx(o, -stepz, maskz)] + high[step_idx(o, stepz, maskz)]
201 | ) * (1.0f / 6.0f) - div[o];
202 | }
203 | }
204 | }
205 | }
206 |
207 | // remove gradients from vector field
208 | float grad_scale = 0.5f * (float)size;
209 | for (int zo = 0; zo <= maskz; zo += stepz) {
210 | for (int yo = 0; yo <= masky; yo += stepy) {
211 | for (int xo = 0; xo <= maskx; xo += stepx) {
212 | int o = xo + yo + zo;
213 | vec4* f = forces + o;
214 |
215 | f->x = (f->x - grad_scale * (high[step_idx(o, stepx, maskx)] - high[step_idx(o, -stepx, maskx)])) * post_scale;
216 | f->y = (f->y - grad_scale * (high[step_idx(o, stepy, masky)] - high[step_idx(o, -stepy, masky)])) * post_scale;
217 | f->z = (f->z - grad_scale * (high[step_idx(o, stepz, maskz)] - high[step_idx(o, -stepz, maskz)])) * post_scale;
218 | }
219 | }
220 | }
221 |
222 | d3du_tex* tex = d3du_tex::make3d(dev, size, size, size, 1, DXGI_FORMAT_R32G32B32A32_FLOAT,
223 | D3D11_USAGE_IMMUTABLE, D3D11_BIND_SHADER_RESOURCE, forces, stepy * sizeof(*forces), stepz * sizeof(*forces));
224 |
225 | delete[] div;
226 | delete[] high;
227 | delete[] forces;
228 | return tex;
229 | }
230 |
231 | int main()
232 | {
233 | d3du_context* d3d = d3du_init("Momentous", 1280, 720, D3D_FEATURE_LEVEL_10_0);
234 |
235 | char* shader_source = read_file("shaders.hlsl");
236 |
237 | ID3D11VertexShader *update_vs = d3du_compile_and_create_shader(d3d->dev, shader_source,
238 | "vs_4_0", "UpdateVertShader").vs;
239 | ID3D11PixelShader *update_pos_ps = d3du_compile_and_create_shader(d3d->dev, shader_source,
240 | "ps_4_0", "UpdatePosShader").ps;
241 | ID3D11PixelShader *update_vel_ps = d3du_compile_and_create_shader(d3d->dev, shader_source,
242 | "ps_4_0", "UpdateVelShader").ps;
243 |
244 | ID3D11VertexShader *cube_vs = d3du_compile_and_create_shader(d3d->dev, shader_source,
245 | "vs_4_0", "RenderCubeVertexShader").vs;
246 | ID3D11PixelShader *cube_ps = d3du_compile_and_create_shader(d3d->dev, shader_source,
247 | "ps_4_0", "RenderCubePixelShader").ps;
248 |
249 | free(shader_source);
250 |
251 | static const UINT kChunkSize = 1024;
252 | static const UINT kNumCubes = 48 * 1024;
253 | static const UINT kTexHeight = (kNumCubes + kChunkSize - 1) / kChunkSize;
254 |
255 | ID3D11Buffer* update_const_buf = d3du_make_buffer(d3d->dev, sizeof(UpdateConstBuf),
256 | D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, NULL);
257 |
258 | ID3D11Buffer* cube_const_buf = d3du_make_buffer(d3d->dev, sizeof(CubeConstBuf),
259 | D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, NULL);
260 |
261 | ID3D11Buffer* cube_index_buf = make_cube_inds(d3d->dev, kChunkSize);
262 |
263 | ID3D11RasterizerState* raster_state = d3du_simple_raster(d3d->dev, D3D11_CULL_BACK, true, false);
264 | ID3D11SamplerState* force_sampler = d3du_simple_sampler(d3d->dev, D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT, D3D11_TEXTURE_ADDRESS_WRAP);
265 |
266 | // triple-buffer for position, plus velocity
267 | d3du_tex* part_tex[4];
268 | for (int i=0; i < 4; i++)
269 | part_tex[i] = d3du_tex::make2d(d3d->dev, kChunkSize, kTexHeight, 1, DXGI_FORMAT_R32G32B32A32_FLOAT,
270 | D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, NULL, 0);
271 |
272 | d3du_tex* force_tex = make_force_tex(d3d->dev, 32, 1.0f, 0.001f);
273 |
274 | D3D11_VIEWPORT part_vp = d3du_full_tex2d_viewport(part_tex[0]->tex2d);
275 |
276 | int frame = 0;
277 | unsigned int cur_part = 0;
278 | int num_cubes = kNumCubes;
279 | unsigned int spawn_counter = 0;
280 |
281 | while (d3du_handle_events(d3d)) {
282 | using namespace math;
283 |
284 | static const float part_size = 0.001f;
285 |
286 | vec3 emit_pos(0.0f);
287 | emit_pos.x = 0.7f * sin(frame * 0.001f);
288 |
289 | // spawn new particles
290 | {
291 | static const int kSpawnCount = 256;
292 | vec4 pos_old[kSpawnCount];
293 | vec4 pos_new[kSpawnCount];
294 |
295 | for (int i = 0; i < kSpawnCount; i++) {
296 | vec3 pos = emit_pos + rand_vec3_unit_sphere() * 0.002f;
297 | vec3 vel = rand_vec3_unit_sphere() * 0.003f;
298 |
299 | pos_old[i] = vec4(pos - vel, part_size);
300 | pos_new[i] = vec4(pos, part_size);
301 | }
302 |
303 | // upload
304 | D3D11_BOX box = { };
305 | box.left = spawn_counter % kChunkSize;
306 | box.right = box.left + kSpawnCount;
307 | box.top = spawn_counter / kChunkSize;
308 | box.bottom = box.top + 1;
309 | box.front = 0;
310 | box.back = 1;
311 | d3d->ctx->UpdateSubresource(part_tex[(cur_part + 2) % 3]->tex2d, 0, &box, pos_old, 0, 0);
312 | d3d->ctx->UpdateSubresource(part_tex[cur_part]->tex2d, 0, &box, pos_new, 0, 0);
313 |
314 | spawn_counter = (spawn_counter + kSpawnCount) % num_cubes;
315 | }
316 |
317 | // set up update constant buffer
318 | auto update_consts = map_cbuf(d3d, update_const_buf);
319 | update_consts->field_scale = math::vec3(32.0f);
320 | update_consts->damping = 0.99f;
321 | update_consts->field_offs = math::vec3(0.0f);
322 | update_consts->accel = 0.75f;
323 | update_consts->field_sample_scale = math::vec3(1.0f / 32.0f);
324 | update_consts->vel_scale = part_size * 6.0f;
325 | unmap_cbuf(d3d, update_const_buf);
326 |
327 | // update position (potentially several time steps)
328 | d3d->ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
329 |
330 | d3d->ctx->VSSetShader(update_vs, NULL, 0);
331 | d3d->ctx->RSSetViewports(1, &part_vp);
332 |
333 | d3d->ctx->PSSetShader(update_pos_ps, NULL, 0);
334 | d3d->ctx->PSSetSamplers(0, 1, &force_sampler);
335 | d3d->ctx->PSSetConstantBuffers(1, 1, &update_const_buf);
336 | d3d->ctx->PSSetShaderResources(2, 1, &force_tex->srv);
337 | for (int step=0; step < 1; step++) {
338 | cur_part = (cur_part + 1) % 3;
339 |
340 | ID3D11ShaderResourceView* srvs[2];
341 | for (int i=0; i < 2; i++)
342 | srvs[i] = part_tex[(cur_part + 1 + i) % 3]->srv;
343 |
344 | d3d->ctx->PSSetShaderResources(0, 2, srvs);
345 | d3d->ctx->OMSetRenderTargets(1, &part_tex[cur_part]->rtv, NULL);
346 | d3d->ctx->Draw(3, 0);
347 | d3d->ctx->PSSetShaderResources(0, 2, s_no.srvs);
348 | d3d->ctx->OMSetRenderTargets(1, s_no.rtvs, NULL);
349 | }
350 |
351 | // update velocities
352 | {
353 | ID3D11ShaderResourceView* srvs[2];
354 | for (int i=0; i < 2; i++)
355 | srvs[i] = part_tex[(cur_part + 2 + i) % 3]->srv;
356 |
357 | d3d->ctx->PSSetShader(update_vel_ps, NULL, 0);
358 | d3d->ctx->PSSetShaderResources(0, 2, srvs);
359 | d3d->ctx->OMSetRenderTargets(1, &part_tex[3]->rtv, NULL);
360 | d3d->ctx->Draw(3, 0);
361 | d3d->ctx->PSSetShaderResources(0, 2, s_no.srvs);
362 | d3d->ctx->OMSetRenderTargets(1, s_no.rtvs, NULL);
363 | }
364 |
365 | static const float clear_color[4] = { 0.2f, 0.4f, 0.6f, 1.0f };
366 | d3d->ctx->ClearDepthStencilView(d3d->depthbuf_dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
367 | d3d->ctx->ClearRenderTargetView(d3d->backbuf_rtv, clear_color);
368 |
369 | // back to main render target and viewport
370 | d3d->ctx->OMSetRenderTargets(1, &d3d->backbuf_rtv, d3d->depthbuf_dsv);
371 | d3d->ctx->RSSetViewports(1, &d3d->default_vp);
372 |
373 | // set up camera
374 | vec3 world_cam_pos(0.0f, 0.0f, -0.9f);
375 | vec3 world_cam_target = emit_pos;
376 | mat44 view_from_world = mat44::look_at(world_cam_pos, world_cam_target, vec3(0,1,0));
377 |
378 | // projection
379 | mat44 clip_from_view = mat44::perspectiveD3D(1280.0f / 720.0f, 1.0f, 0.01f, 50.0f);
380 | mat44 clip_from_world = clip_from_view * view_from_world;
381 |
382 | auto cube_consts = map_cbuf(d3d, cube_const_buf);
383 | cube_consts->clip_from_world = clip_from_world;
384 | cube_consts->world_down_vector = math::vec3(0.0f, 1.0f, 0.0f);
385 | cube_consts->time_offs = frame * 0.0001f;
386 | cube_consts->light_color_ambient = srgb_color(0x202020);
387 | cube_consts->light_color_key = srgb_color(0xc0c0c0);
388 | cube_consts->light_color_back = srgb_color(0x101040);
389 | cube_consts->light_color_fill = srgb_color(0x602020);
390 | cube_consts->light_dir = normalize(vec3(0.0f, -0.7f, -0.3f));
391 | unmap_cbuf(d3d, cube_const_buf);
392 |
393 | // render cubes
394 | ID3D11ShaderResourceView* part_pos_srvs[2];
395 | part_pos_srvs[0] = part_tex[cur_part]->srv;
396 | part_pos_srvs[1] = part_tex[3]->srv;
397 |
398 | d3d->ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
399 | d3d->ctx->IASetIndexBuffer(cube_index_buf, DXGI_FORMAT_R16_UINT, 0);
400 |
401 | d3d->ctx->VSSetShader(cube_vs, NULL, 0);
402 | d3d->ctx->VSSetShaderResources(0, 2, part_pos_srvs);
403 | d3d->ctx->VSSetConstantBuffers(0, 1, &cube_const_buf);
404 |
405 | d3d->ctx->RSSetState(raster_state);
406 |
407 | d3d->ctx->PSSetShader(cube_ps, NULL, 0);
408 | d3d->ctx->PSSetConstantBuffers(0, 1, &cube_const_buf);
409 |
410 | d3d->ctx->DrawIndexedInstanced(kChunkSize * 15, (num_cubes + kChunkSize - 1) / kChunkSize, 0, 0, 0);
411 |
412 | d3d->ctx->VSSetShaderResources(0, 2, s_no.srvs);
413 |
414 | d3du_swap_buffers(d3d, true);
415 | frame++;
416 | }
417 |
418 | for (int i=0; i < 4; i++)
419 | delete part_tex[i];
420 | delete force_tex;
421 |
422 | update_const_buf->Release();
423 | cube_const_buf->Release();
424 | cube_index_buf->Release();
425 | cube_ps->Release();
426 | cube_vs->Release();
427 | update_vs->Release();
428 | update_pos_ps->Release();
429 | update_vel_ps->Release();
430 | raster_state->Release();
431 | force_sampler->Release();
432 |
433 | d3du_shutdown(d3d);
434 | return 0;
435 | }
--------------------------------------------------------------------------------
/d3du.cpp:
--------------------------------------------------------------------------------
1 | #define WIN32_LEAN_AND_MEAN
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include "d3du.h"
7 | #include "util.h"
8 |
9 | #pragma comment(lib, "d3d11.lib")
10 | #pragma comment(lib, "d3dcompiler.lib")
11 |
12 | static LRESULT CALLBACK window_proc( HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam )
13 | {
14 | //d3du_context * ctx = (glx_context *) GetWindowLongPtrA( hwnd, GWLP_USERDATA );
15 |
16 | switch ( msg )
17 | {
18 | case WM_CREATE:
19 | {
20 | CREATESTRUCTA * cs = (CREATESTRUCTA *) lparam;
21 | SetWindowLongPtrA( hwnd, GWLP_USERDATA, (LONG_PTR)cs->lpCreateParams );
22 | }
23 | break;
24 |
25 | case WM_ERASEBKGND:
26 | return 1;
27 |
28 | case WM_PAINT:
29 | ValidateRect( hwnd, NULL );
30 | return 0;
31 |
32 | case WM_CHAR:
33 | if ( wparam == 27 ) // escape
34 | PostMessage( hwnd, WM_CLOSE, 0, 0 );
35 | return 0;
36 |
37 | case WM_DESTROY:
38 | PostQuitMessage( 0 );
39 | break;
40 | }
41 |
42 | return DefWindowProcA( hwnd, msg, wparam, lparam );
43 | }
44 |
45 | template
46 | static void safe_release( T * * p )
47 | {
48 | if ( *p )
49 | {
50 | (*p)->Release();
51 | *p = NULL;
52 | }
53 | }
54 |
55 | static d3du_context * d3du_init_fail( d3du_context * ctx )
56 | {
57 | safe_release( &ctx->backbuf );
58 | safe_release( &ctx->depthbuf );
59 | safe_release( &ctx->backbuf_rtv );
60 | safe_release( &ctx->depthbuf_dsv );
61 | safe_release( &ctx->swap );
62 | safe_release( &ctx->ctx );
63 | safe_release( &ctx->dev );
64 | if ( ctx->hwnd ) DestroyWindow( ctx->hwnd );
65 | delete ctx;
66 | return NULL;
67 | }
68 |
69 | d3du_context * d3du_init( char const * title, int w, int h, D3D_FEATURE_LEVEL feature_level )
70 | {
71 | d3du_context * ctx = new d3du_context;
72 | memset( ctx, 0, sizeof( *ctx ) );
73 |
74 | HINSTANCE hinst = GetModuleHandleA( NULL );
75 |
76 | WNDCLASSA wc = { 0 };
77 | wc.hbrBackground = (HBRUSH) GetStockObject( BLACK_BRUSH );
78 | wc.hCursor = LoadCursor( 0, IDC_ARROW );
79 | wc.hInstance = hinst;
80 | wc.lpfnWndProc = window_proc;
81 | wc.lpszClassName = "rad.d3du";
82 | RegisterClassA( &wc );
83 |
84 | DWORD style = WS_OVERLAPPEDWINDOW;
85 |
86 | RECT rc = { 0, 0, w, h };
87 | AdjustWindowRect( &rc, style, FALSE );
88 |
89 | ctx->hwnd = CreateWindowExA( 0, "rad.d3du", title, style | WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT, rc.right - rc.left, rc.bottom - rc.top, NULL, NULL, hinst, ctx );
90 | if ( !ctx->hwnd )
91 | return d3du_init_fail( ctx );
92 |
93 | DXGI_SWAP_CHAIN_DESC swap_desc = { 0 };
94 | swap_desc.BufferDesc.Width = w;
95 | swap_desc.BufferDesc.Height = h;
96 | swap_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
97 | swap_desc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
98 | swap_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
99 | swap_desc.SampleDesc.Count = 1;
100 | swap_desc.SampleDesc.Quality = 0;
101 | swap_desc.BufferUsage = DXGI_USAGE_BACK_BUFFER | DXGI_USAGE_RENDER_TARGET_OUTPUT;
102 | swap_desc.BufferCount = 1;
103 | swap_desc.OutputWindow = ctx->hwnd;
104 | swap_desc.Windowed = TRUE;
105 | swap_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
106 | swap_desc.Flags = 0;
107 |
108 | D3D_FEATURE_LEVEL out_level;
109 | UINT flags = 0;
110 | #ifdef _DEBUG
111 | flags |= D3D11_CREATE_DEVICE_DEBUG;
112 | #endif
113 | HRESULT hr = D3D11CreateDeviceAndSwapChain( NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags,
114 | &feature_level, 1, D3D11_SDK_VERSION, &swap_desc, &ctx->swap, &ctx->dev, &out_level, &ctx->ctx );
115 | if ( FAILED( hr ) )
116 | return d3du_init_fail( ctx );
117 |
118 | // render target and rtv
119 | hr = ctx->swap->GetBuffer( 0, __uuidof(ID3D11Texture2D), (void **)&ctx->backbuf );
120 | if ( FAILED( hr ) )
121 | return d3du_init_fail( ctx );
122 |
123 | hr = ctx->dev->CreateRenderTargetView( ctx->backbuf, NULL, &ctx->backbuf_rtv );
124 | if ( FAILED( hr ) )
125 | return d3du_init_fail( ctx );
126 |
127 | // depth/stencil surface and dsv
128 | D3D11_TEXTURE2D_DESC desc =
129 | {
130 | w, h, 1, 1, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, { 1, 0 },
131 | D3D11_USAGE_DEFAULT, D3D11_BIND_DEPTH_STENCIL, 0, 0
132 | };
133 | hr = ctx->dev->CreateTexture2D( &desc, NULL, &ctx->depthbuf );
134 | if ( FAILED( hr ) )
135 | return d3du_init_fail( ctx );
136 |
137 | hr = ctx->dev->CreateDepthStencilView( ctx->depthbuf, NULL, &ctx->depthbuf_dsv );
138 | if ( FAILED( hr ) )
139 | return d3du_init_fail( ctx );
140 |
141 | ctx->default_vp.TopLeftX = 0.0f;
142 | ctx->default_vp.TopLeftY = 0.0f;
143 | ctx->default_vp.Width = (float)w;
144 | ctx->default_vp.Height = (float)h;
145 | ctx->default_vp.MinDepth = 0.0f;
146 | ctx->default_vp.MaxDepth = 1.0f;
147 |
148 | // bind default RT, DSV and viewport for convenience.
149 | ctx->ctx->OMSetRenderTargets( 1, &ctx->backbuf_rtv, ctx->depthbuf_dsv );
150 | ctx->ctx->RSSetViewports( 1, &ctx->default_vp );
151 |
152 | return ctx;
153 | }
154 |
155 | void d3du_shutdown( d3du_context * ctx )
156 | {
157 | if ( ctx->ctx )
158 | ctx->ctx->ClearState();
159 |
160 | safe_release( &ctx->backbuf );
161 | safe_release( &ctx->depthbuf );
162 | safe_release( &ctx->backbuf_rtv );
163 | safe_release( &ctx->depthbuf_dsv );
164 | safe_release( &ctx->swap );
165 | safe_release( &ctx->ctx );
166 |
167 | #if 0 && defined(_DEBUG) // use to trace leaks
168 | if ( ctx->dev )
169 | {
170 | ID3D11Debug * dbg;
171 | ctx->dev->QueryInterface( __uuidof(ID3D11Debug), (void**)&dbg );
172 | dbg->ReportLiveDeviceObjects( D3D11_RLDO_DETAIL );
173 | dbg->Release();
174 | }
175 | #endif
176 |
177 | safe_release( &ctx->dev );
178 | DestroyWindow( ctx->hwnd );
179 | delete ctx;
180 | }
181 |
182 | int d3du_handle_events( d3du_context * ctx )
183 | {
184 | MSG msg;
185 | int ok = 1;
186 |
187 | while ( PeekMessage( &msg, 0, 0, 0, PM_REMOVE ) )
188 | {
189 | if ( msg.message == WM_QUIT )
190 | ok = 0;
191 | TranslateMessage( &msg );
192 | DispatchMessage( &msg );
193 | }
194 |
195 | return ok;
196 | }
197 |
198 | void d3du_swap_buffers( d3du_context * ctx, bool vsync )
199 | {
200 | ctx->swap->Present( vsync ? 1 : 0, 0 );
201 | }
202 |
203 | D3D11_VIEWPORT d3du_full_tex2d_viewport( ID3D11Texture2D * tex )
204 | {
205 | D3D11_TEXTURE2D_DESC desc;
206 | tex->GetDesc( &desc );
207 |
208 | D3D11_VIEWPORT vp;
209 | vp.TopLeftX = 0.0f;
210 | vp.TopLeftY = 0.0f;
211 | vp.Width = (float)desc.Width;
212 | vp.Height = (float)desc.Height;
213 | vp.MinDepth = 0.0f;
214 | vp.MaxDepth = 1.0f;
215 |
216 | return vp;
217 | }
218 |
219 | ID3D11Buffer * d3du_make_buffer( ID3D11Device * dev, UINT size, D3D11_USAGE use, UINT bind_flags, const void * initial )
220 | {
221 | D3D11_BUFFER_DESC desc;
222 | desc.ByteWidth = size;
223 | desc.Usage = use;
224 | desc.BindFlags = bind_flags;
225 | switch ( use )
226 | {
227 | case D3D11_USAGE_DYNAMIC:
228 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
229 | break;
230 |
231 | case D3D11_USAGE_STAGING:
232 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
233 | break;
234 |
235 | default:
236 | desc.CPUAccessFlags = 0;
237 | break;
238 | }
239 | desc.MiscFlags = 0;
240 | desc.StructureByteStride = 0;
241 |
242 | D3D11_SUBRESOURCE_DATA initial_data;
243 | initial_data.pSysMem = initial;
244 | initial_data.SysMemPitch = 0;
245 | initial_data.SysMemSlicePitch = 0;
246 |
247 | ID3D11Buffer * buf;
248 | HRESULT hr = dev->CreateBuffer( &desc, initial ? &initial_data : NULL, &buf );
249 | if ( FAILED( hr ) )
250 | panic( "D3D CreateBuffer failed: 0x%08x\n", hr );
251 |
252 | return buf;
253 | }
254 |
255 | unsigned char * d3du_get_buffer( d3du_context * ctx, ID3D11Buffer * buf, int * size_in_bytes )
256 | {
257 | D3D11_BUFFER_DESC desc;
258 | buf->GetDesc( &desc );
259 | desc.Usage = D3D11_USAGE_STAGING;
260 | desc.BindFlags = 0;
261 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
262 | desc.MiscFlags = 0;
263 |
264 | ID3D11Buffer * temp_buf;
265 | HRESULT hr = ctx->dev->CreateBuffer( &desc, NULL, &temp_buf );
266 | if ( FAILED( hr ) )
267 | return NULL;
268 |
269 | ctx->ctx->CopyResource( temp_buf, buf );
270 |
271 | D3D11_MAPPED_SUBRESOURCE mapped;
272 | hr = ctx->ctx->Map( temp_buf, 0, D3D11_MAP_READ, 0, &mapped );
273 | if ( FAILED( hr ) )
274 | panic( "d3du_get_buffer map failed\n" );
275 |
276 | unsigned char * result = new unsigned char[desc.ByteWidth];
277 | memcpy( result, mapped.pData, desc.ByteWidth );
278 |
279 | ctx->ctx->Unmap( temp_buf, 0 );
280 | temp_buf->Release();
281 |
282 | if ( size_in_bytes )
283 | *size_in_bytes = desc.ByteWidth;
284 |
285 | return result;
286 | }
287 |
288 | static unsigned int get_bpp( DXGI_FORMAT fmt )
289 | {
290 | unsigned int bpp = 0;
291 |
292 | switch ( fmt )
293 | {
294 | case DXGI_FORMAT_R8_TYPELESS:
295 | case DXGI_FORMAT_R8_UNORM:
296 | case DXGI_FORMAT_R8_UINT:
297 | case DXGI_FORMAT_R8_SNORM:
298 | case DXGI_FORMAT_R8_SINT:
299 | bpp = 1;
300 | break;
301 |
302 | case DXGI_FORMAT_R8G8_TYPELESS:
303 | case DXGI_FORMAT_R8G8_UNORM:
304 | case DXGI_FORMAT_R8G8_UINT:
305 | case DXGI_FORMAT_R8G8_SNORM:
306 | case DXGI_FORMAT_R8G8_SINT:
307 | bpp = 2;
308 | break;
309 |
310 | default:
311 | panic( "unsupported DXGI format %d\n", fmt );
312 | }
313 |
314 | return bpp;
315 | }
316 |
317 | unsigned char * d3du_read_texture_level( d3du_context * ctx, ID3D11ShaderResourceView * srv, int srv_level )
318 | {
319 | D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc;
320 | srv->GetDesc( &srv_desc );
321 |
322 | if ( srv_desc.ViewDimension != D3D11_SRV_DIMENSION_TEXTURE2D )
323 | panic( "d3du_read_texture_level only supports 2D textures right now" );
324 |
325 | unsigned int bpp = get_bpp( srv_desc.Format );
326 | int res_level = srv_level + srv_desc.Texture2D.MostDetailedMip;
327 |
328 | D3D11_TEXTURE2D_DESC tex_desc;
329 | ID3D11Texture2D * tex2d;
330 | srv->GetResource( (ID3D11Resource **)&tex2d );
331 | tex2d->GetDesc( &tex_desc );
332 |
333 | tex_desc.Usage = D3D11_USAGE_STAGING;
334 | tex_desc.BindFlags = 0;
335 | tex_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
336 | tex_desc.MiscFlags = 0;
337 |
338 | ID3D11Texture2D * temp_tex;
339 | HRESULT hr = ctx->dev->CreateTexture2D( &tex_desc, NULL, &temp_tex );
340 | if ( FAILED( hr ) )
341 | {
342 | tex2d->Release();
343 | return NULL;
344 | }
345 |
346 | ctx->ctx->CopyResource( temp_tex, tex2d );
347 |
348 | D3D11_MAPPED_SUBRESOURCE mapped;
349 | hr = ctx->ctx->Map( temp_tex, res_level, D3D11_MAP_READ, 0, &mapped );
350 | if ( FAILED( hr ) )
351 | panic( "d3du_read_texture_level map failed\n" );
352 |
353 | unsigned int out_width = tex_desc.Width >> res_level;
354 | unsigned int out_height = tex_desc.Height >> res_level;
355 |
356 | if ( !out_width ) out_width = 1;
357 | if ( !out_height ) out_height = 1;
358 |
359 | unsigned int out_pitch = out_width * bpp;
360 |
361 | unsigned char * result = new unsigned char[out_pitch * out_height];
362 | for ( unsigned int y = 0 ; y < out_height ; y++ )
363 | memcpy( result + y*out_pitch, (unsigned char *)mapped.pData + y*mapped.RowPitch, out_pitch );
364 |
365 | ctx->ctx->Unmap( temp_tex, res_level );
366 | temp_tex->Release();
367 | tex2d->Release();
368 |
369 | return result;
370 | }
371 |
372 | ID3D11RasterizerState * d3du_simple_raster( ID3D11Device * dev, D3D11_CULL_MODE cull, bool front_ccw, bool scissor_enable )
373 | {
374 | D3D11_RASTERIZER_DESC raster_desc = { D3D11_FILL_SOLID };
375 | raster_desc.CullMode = cull;
376 | raster_desc.FrontCounterClockwise = front_ccw;
377 | raster_desc.DepthClipEnable = TRUE;
378 | raster_desc.ScissorEnable = scissor_enable;
379 |
380 | ID3D11RasterizerState * raster_state = NULL;
381 | HRESULT hr = dev->CreateRasterizerState( &raster_desc, &raster_state );
382 | if ( FAILED( hr ) )
383 | panic( "CreateRasterizerState failed\n" );
384 |
385 | return raster_state;
386 | }
387 |
388 | ID3D11BlendState * d3du_simple_blend( ID3D11Device * dev, D3D11_BLEND src_blend, D3D11_BLEND dest_blend )
389 | {
390 | D3D11_BLEND_DESC blend_desc = { FALSE, FALSE };
391 | blend_desc.RenderTarget[0].BlendEnable = ( src_blend != D3D11_BLEND_ONE || dest_blend != D3D11_BLEND_ZERO );
392 | blend_desc.RenderTarget[0].SrcBlend = src_blend;
393 | blend_desc.RenderTarget[0].DestBlend = dest_blend;
394 | blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
395 | blend_desc.RenderTarget[0].SrcBlendAlpha = src_blend;
396 | blend_desc.RenderTarget[0].DestBlendAlpha = dest_blend;
397 | blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
398 | blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
399 |
400 | ID3D11BlendState * blend_state = NULL;
401 | HRESULT hr = dev->CreateBlendState( &blend_desc, &blend_state );
402 | if ( FAILED( hr ) )
403 | panic( "CreateBlendState failed\n" );
404 |
405 | return blend_state;
406 | }
407 |
408 | ID3D11SamplerState * d3du_simple_sampler( ID3D11Device * dev, D3D11_FILTER filter, D3D11_TEXTURE_ADDRESS_MODE addr )
409 | {
410 | HRESULT hr;
411 | ID3D11SamplerState * sampler = NULL;
412 |
413 | D3D11_SAMPLER_DESC desc;
414 | desc.Filter = filter;
415 | desc.AddressU = addr;
416 | desc.AddressV = addr;
417 | desc.AddressW = addr;
418 | desc.MipLODBias = 0.0f;
419 | desc.MaxAnisotropy = 8;
420 | desc.ComparisonFunc = D3D11_COMPARISON_NEVER;
421 | desc.BorderColor[0] = 1.0f;
422 | desc.BorderColor[1] = 1.0f;
423 | desc.BorderColor[2] = 1.0f;
424 | desc.BorderColor[3] = 1.0f;
425 | desc.MinLOD = -1e+20f;
426 | desc.MaxLOD = 1e+20f;
427 |
428 | hr = dev->CreateSamplerState( &desc, &sampler );
429 | if ( FAILED( hr ) )
430 | panic( "CreateSamplerState failed\n" );
431 |
432 | return sampler;
433 | }
434 |
435 | ID3DBlob * d3du_compile_source_or_die( char const * source, char const * profile, char const * entrypt )
436 | {
437 | ID3DBlob * code;
438 | ID3DBlob * errors;
439 | HRESULT hr = D3DCompile( source, strlen( source ), NULL, NULL, NULL, entrypt, profile, D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_OPTIMIZATION_LEVEL1, 0,
440 | &code, &errors );
441 |
442 | if ( errors )
443 | {
444 | OutputDebugStringA( "While compiling:\n" );
445 | OutputDebugStringA( source );
446 | OutputDebugStringA( "Got errors:\n" );
447 | OutputDebugStringA( (char*)errors->GetBufferPointer() );
448 | errors->Release();
449 | }
450 |
451 | if ( FAILED( hr ) )
452 | panic( "Shader compilation failed!\n" );
453 |
454 | return code;
455 | }
456 |
457 | d3du_shader d3du_compile_and_create_shader( ID3D11Device * dev, char const * source, char const * profile, char const * entrypt )
458 | {
459 | ID3DBlob * code = d3du_compile_source_or_die( source, profile, entrypt );
460 | HRESULT hr = S_OK;
461 | d3du_shader sh;
462 |
463 | sh.generic = NULL;
464 |
465 | switch ( profile[0] )
466 | {
467 | case 'p': hr = dev->CreatePixelShader( code->GetBufferPointer(), code->GetBufferSize(), NULL, &sh.ps ); break;
468 | case 'v': hr = dev->CreateVertexShader( code->GetBufferPointer(), code->GetBufferSize(), NULL, &sh.vs ); break;
469 | case 'c': hr = dev->CreateComputeShader( code->GetBufferPointer(), code->GetBufferSize(), NULL, &sh.cs ); break;
470 | default: panic( "Unsupported shader profile '%s'\n", profile );
471 | }
472 |
473 | if ( FAILED( hr ) )
474 | panic( "Error creating shader.\n" );
475 |
476 | return sh;
477 | }
478 |
479 | d3du_tex::d3du_tex( ID3D11Resource * resrc, ID3D11ShaderResourceView * srv, ID3D11RenderTargetView * rtv )
480 | : resrc(resrc), srv(srv), rtv(rtv)
481 | {
482 | }
483 |
484 | d3du_tex::~d3du_tex()
485 | {
486 | safe_release( &resrc );
487 | safe_release( &srv );
488 | safe_release( &rtv );
489 | }
490 |
491 | d3du_tex * d3du_tex::make2d( ID3D11Device * dev, UINT w, UINT h, UINT num_mips, DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT initial_pitch )
492 | {
493 | HRESULT hr = S_OK;
494 | ID3D11Texture2D *tex = NULL;
495 | ID3D11ShaderResourceView *srv = NULL;
496 | ID3D11RenderTargetView *rtv = NULL;
497 |
498 | D3D11_TEXTURE2D_DESC desc;
499 | desc.Width = w;
500 | desc.Height = h;
501 | desc.MipLevels = num_mips;
502 | desc.ArraySize = 1;
503 | desc.Format = fmt;
504 | desc.SampleDesc.Count = 1;
505 | desc.SampleDesc.Quality = 0;
506 | desc.Usage = usage;
507 | desc.BindFlags = bind_flags;
508 | desc.CPUAccessFlags = 0;
509 | desc.MiscFlags = 0;
510 |
511 | D3D11_SUBRESOURCE_DATA initial_data;
512 | initial_data.pSysMem = initial;
513 | initial_data.SysMemPitch = initial_pitch;
514 | initial_data.SysMemSlicePitch = 0;
515 |
516 | hr = dev->CreateTexture2D( &desc, initial ? &initial_data : nullptr, &tex );
517 |
518 | if ( !FAILED( hr ) && ( bind_flags & D3D11_BIND_SHADER_RESOURCE ) )
519 | hr = dev->CreateShaderResourceView( tex, nullptr, &srv );
520 |
521 | if ( !FAILED( hr ) && ( bind_flags & D3D11_BIND_RENDER_TARGET ) )
522 | hr = dev->CreateRenderTargetView( tex, nullptr, &rtv );
523 |
524 | if ( FAILED( hr ) )
525 | {
526 | safe_release( &tex );
527 | safe_release( &srv );
528 | safe_release( &rtv );
529 | return NULL;
530 | } else
531 | return new d3du_tex( tex, srv, rtv );
532 | }
533 |
534 | d3du_tex * d3du_tex::make3d( ID3D11Device * dev, UINT w, UINT h, UINT d, UINT num_mips, DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT init_row_pitch, UINT init_depth_pitch )
535 | {
536 | HRESULT hr = S_OK;
537 | ID3D11Texture3D *tex = NULL;
538 | ID3D11ShaderResourceView *srv = NULL;
539 |
540 | D3D11_TEXTURE3D_DESC desc;
541 | desc.Width = w;
542 | desc.Height = h;
543 | desc.Depth = d;
544 | desc.MipLevels = num_mips;
545 | desc.Format = fmt;
546 | desc.Usage = usage;
547 | desc.BindFlags = bind_flags;
548 | desc.CPUAccessFlags = 0;
549 | desc.MiscFlags = 0;
550 |
551 | D3D11_SUBRESOURCE_DATA initial_data;
552 | initial_data.pSysMem = initial;
553 | initial_data.SysMemPitch = init_row_pitch;
554 | initial_data.SysMemSlicePitch = init_depth_pitch;
555 |
556 | hr = dev->CreateTexture3D( &desc, initial ? &initial_data : nullptr, &tex );
557 |
558 | if ( !FAILED( hr ) && ( bind_flags & D3D11_BIND_SHADER_RESOURCE ) )
559 | hr = dev->CreateShaderResourceView( tex, nullptr, &srv );
560 |
561 | if ( FAILED( hr ) )
562 | {
563 | safe_release( &tex );
564 | safe_release( &srv );
565 | return NULL;
566 | } else
567 | return new d3du_tex( tex, srv, NULL );
568 | }
569 |
570 | static const size_t TIMER_SLOTS = 4; // depth of queue of in-flight queries (must be pow2)
571 |
572 | struct d3du_timer_group
573 | {
574 | ID3D11Query * begin;
575 | ID3D11Query * end;
576 | ID3D11Query * disjoint;
577 | };
578 |
579 | struct d3du_timer
580 | {
581 | d3du_timer_group grp[TIMER_SLOTS];
582 | size_t issue_idx; // index of timer we're issuing
583 | size_t retire_idx; // index of timer we're retiring
584 | size_t warmup_frames;
585 | run_stats * stats;
586 | };
587 |
588 | static d3du_timer_group * timer_get( d3du_timer * timer, size_t index )
589 | {
590 | return &timer->grp[ index & ( TIMER_SLOTS - 1 ) ];
591 | }
592 |
593 | static void timer_ensure_max_in_flight( d3du_context * ctx, d3du_timer * timer, size_t max_in_flight )
594 | {
595 | while ( ( timer->issue_idx - timer->retire_idx ) > max_in_flight )
596 | {
597 | // retire oldest timer in flight
598 | d3du_timer_group * grp = timer_get( timer, timer->retire_idx );
599 | UINT64 start, end;
600 | D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint;
601 | HRESULT hr;
602 |
603 | while ( ( hr = ctx->ctx->GetData( grp->begin, &start, sizeof( UINT64 ), 0 ) ) != S_OK );
604 | while ( ( hr = ctx->ctx->GetData( grp->end, &end, sizeof( UINT64 ), 0 ) ) != S_OK );
605 | while ( ( hr = ctx->ctx->GetData( grp->disjoint, &disjoint, sizeof( disjoint ), 0 ) ) != S_OK );
606 |
607 | if ( timer->retire_idx >= timer->warmup_frames && !disjoint.Disjoint )
608 | run_stats_record( timer->stats, (float) ( 1000.0 * ( end - start ) / disjoint.Frequency ) );
609 |
610 | timer->retire_idx++;
611 | }
612 | }
613 |
614 | d3du_timer * d3du_timer_create( d3du_context * ctx, size_t warmup_frames )
615 | {
616 | d3du_timer * timer = new d3du_timer;
617 |
618 | for ( size_t i = 0 ; i < TIMER_SLOTS ; i++ )
619 | {
620 | D3D11_QUERY_DESC desc = {};
621 | HRESULT hr;
622 | desc.Query = D3D11_QUERY_TIMESTAMP;
623 | hr = ctx->dev->CreateQuery( &desc, &timer->grp[i].begin );
624 | if ( FAILED( hr ) ) panic( "CreateQuery failed.\n" );
625 | hr = ctx->dev->CreateQuery( &desc, &timer->grp[i].end );
626 | if ( FAILED( hr ) ) panic( "CreateQuery failed.\n" );
627 |
628 | desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
629 | hr = ctx->dev->CreateQuery( &desc, &timer->grp[i].disjoint );
630 | if ( FAILED( hr ) ) panic( "CreateQuery failed.\n" );
631 | }
632 |
633 | timer->issue_idx = 0;
634 | timer->retire_idx = 0;
635 | timer->warmup_frames = warmup_frames;
636 | timer->stats = run_stats_create();
637 | return timer;
638 | }
639 |
640 | void d3du_timer_destroy( d3du_timer * timer )
641 | {
642 | if ( timer )
643 | {
644 | for ( size_t i = 0 ; i < TIMER_SLOTS ; i++ )
645 | {
646 | safe_release( &timer->grp[i].begin );
647 | safe_release( &timer->grp[i].end );
648 | safe_release( &timer->grp[i].disjoint );
649 | }
650 |
651 | run_stats_destroy( timer->stats );
652 | delete timer;
653 | }
654 | }
655 |
656 | void d3du_timer_bracket_begin( d3du_context * ctx, d3du_timer * timer )
657 | {
658 | // make sure we have a free timer to issue first
659 | timer_ensure_max_in_flight( ctx, timer, TIMER_SLOTS - 1 );
660 |
661 | d3du_timer_group * grp = timer_get( timer, timer->issue_idx );
662 |
663 | ctx->ctx->Begin( grp->disjoint );
664 | ctx->ctx->End( grp->begin );
665 | timer->issue_idx++;
666 | }
667 |
668 | void d3du_timer_bracket_end( d3du_context * ctx, d3du_timer * timer )
669 | {
670 | d3du_timer_group * grp = timer_get( timer, timer->issue_idx - 1 );
671 |
672 | ctx->ctx->End( grp->end );
673 | ctx->ctx->End( grp->disjoint );
674 | }
675 |
676 | void d3du_timer_report( d3du_context * ctx, d3du_timer * timer, char const * label )
677 | {
678 | timer_ensure_max_in_flight( ctx, timer, 0 );
679 | run_stats_report( timer->stats, label );
680 | }
681 |
682 | // @cdep pre $set(c8sysincludes, -I$dxPath/include $c8sysincludes)
683 | // @cdep pre $set(csysincludes64EMT, -I$dxPath/include $csysincludes64EMT)
684 |
685 |
--------------------------------------------------------------------------------