├── .gitignore ├── momentous.sln ├── util.h ├── momentous.vcxproj.filters ├── README.md ├── d3du.h ├── util.cpp ├── shaders.hlsl ├── momentous.vcxproj ├── math.h ├── main.cpp └── d3du.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | Debug 2 | Release 3 | *.sdf 4 | *.opensdf 5 | *.suo -------------------------------------------------------------------------------- /momentous.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Express 2012 for Windows Desktop 4 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "momentous", "momentous.vcxproj", "{AD11938C-C989-43EE-B618-36CD6118C035}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Win32 = Debug|Win32 9 | Release|Win32 = Release|Win32 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {AD11938C-C989-43EE-B618-36CD6118C035}.Debug|Win32.ActiveCfg = Debug|Win32 13 | {AD11938C-C989-43EE-B618-36CD6118C035}.Debug|Win32.Build.0 = Debug|Win32 14 | {AD11938C-C989-43EE-B618-36CD6118C035}.Release|Win32.ActiveCfg = Release|Win32 15 | {AD11938C-C989-43EE-B618-36CD6118C035}.Release|Win32.Build.0 = Release|Win32 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #ifndef __UTIL_H__ 2 | #define __UTIL_H__ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | // General utility functions 9 | 10 | void panic( char const * fmt, ... ); 11 | char * read_file( char const * filename ); // mallocs result, you need to free() 12 | void dump_dwords( unsigned int const * vals, unsigned int num_dwords ); 13 | 14 | // pixel compare that returns position of first mismatch 15 | // pos_x / pos_y may both be NULL. 16 | int pixel_compare_pos( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h, int * pos_x, int * pos_y ); 17 | 18 | // pixel compare without position reporting 19 | int pixel_compare( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h ); 20 | void print_pixels( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h ); 21 | 22 | typedef struct run_stats run_stats; 23 | 24 | run_stats * run_stats_create( void ); 25 | void run_stats_destroy( run_stats * stats ); 26 | void run_stats_clear( run_stats * stats ); // reset all measurements 27 | void run_stats_record( run_stats * stats, float value ); // record a measurement 28 | void run_stats_report( run_stats * stats, char const * desc ); // print a report 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /momentous.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | Header Files 26 | 27 | 28 | 29 | 30 | Source Files 31 | 32 | 33 | Source Files 34 | 35 | 36 | Source Files 37 | 38 | 39 | 40 | 41 | Source Files 42 | 43 | 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Momentous 2 | 3 | This is a reimplementation of the particle system from "fr-059: momentum", 4 | using D3D10 GPU hardware. 5 | 6 | I was thinking about this a few days back; the original version was using several 7 | hacks to get good performance on shader level 2 hardware, and the actual particle 8 | system ran on the CPU. 9 | 10 | Well, the particle system is actually really easy to run on the GPU provided there 11 | is support for floating point render targets and filtering of floating point volume 12 | textures. That used to be a problem in 2007 but not today. 13 | 14 | Also, having actual HW instancing support instead of having to get by with shader 15 | instancing sure makes things easier. 16 | 17 | Tricks from the original implementation that made it into this version: 18 | 19 | * Hard-edged cubes specified using 8 vertices only (to reduce VS load). The original 20 | version determined the (faceted) normal in the pixel shader using a cube map lookup 21 | from an interpolated object-space position (passed as an attribute). This was the 22 | easiest way to do this on shader level 2 hardware, but it did tend to cause some 23 | sparkling near the edges (exactly which cube map face a near-edge sample landed on 24 | depended on interpolation rounding, which was a bit dicey). 25 | 26 | This version passes the world-space position as an attribute and then determines the 27 | face normal as the cross product of the position's derivatives. This is simpler, has 28 | no texture lookup, and does not have any sparkling. (But it does require derivative 29 | instructions). 30 | * Instance all the things! 31 | * Trilinear interpolation using smoothstepped weights: much cheaper than spline 32 | interpolation and looks almost as good. 33 | 34 | Bonus: this version also renders each cube as an indexed triangle strip (14 verts plus 35 | primitive restart) - again, the original version was written before primitive restart 36 | was a feature you could rely on. This should not make any significant difference 37 | compared to explicit quads, but it *feels* nicer. :) 38 | 39 | This uses D3D10 because I'm writing this on a Laptop with Intel integrated graphics and 40 | drivers that haven't been updated for over 2 years; was I feeling lucky enough to try 41 | GL 3? Evidently not. 42 | 43 | Oh and I really need to add some camera control and shadow mapping, like the original 44 | version had. 45 | 46 | -Fabian 'ryg' Giesen, 47 | December 2013 -------------------------------------------------------------------------------- /d3du.h: -------------------------------------------------------------------------------- 1 | #ifndef D3DU_H 2 | #define D3DU_H 3 | 4 | // you need to include windows.h and d3d11.h first. 5 | 6 | struct d3du_context { 7 | HWND hwnd; 8 | ID3D11Device * dev; 9 | ID3D11DeviceContext * ctx; 10 | IDXGISwapChain * swap; 11 | 12 | ID3D11Texture2D * backbuf; 13 | ID3D11Texture2D * depthbuf; 14 | 15 | ID3D11RenderTargetView * backbuf_rtv; 16 | ID3D11DepthStencilView * depthbuf_dsv; 17 | 18 | D3D11_VIEWPORT default_vp; 19 | }; 20 | 21 | // Creates a D3DU context and opens a window with given title and width/height 22 | d3du_context * d3du_init( char const * title, int w, int h, D3D_FEATURE_LEVEL feature_level ); 23 | 24 | // Shuts down a D3DU context and frees it. 25 | void d3du_shutdown( d3du_context * ctx ); 26 | 27 | // Processes window events. Returns 1 if OK, 0 if user requested exit. 28 | int d3du_handle_events( d3du_context * ctx ); 29 | 30 | // Swap buffers. 31 | void d3du_swap_buffers( d3du_context * ctx, bool vsync ); 32 | 33 | // Creates a D3D11_VIEWPORT for an entire render target view. 34 | D3D11_VIEWPORT d3du_full_tex2d_viewport( ID3D11Texture2D * tex ); 35 | 36 | // Creates a buffer 37 | ID3D11Buffer * d3du_make_buffer( ID3D11Device * dev, UINT size, D3D11_USAGE use, UINT bind_flags, const void * initial ); 38 | 39 | // Reads back the contents of a buffer and returns them as an unsigned char array. 40 | // size_in_bytes, when non-NULL, will receive the buffer size. 41 | // 42 | // Intended for debugging only. 43 | unsigned char * d3du_get_buffer( d3du_context * ctx, ID3D11Buffer * buf, int * size_in_bytes ); 44 | 45 | // Reads back the contents of the given mip level of a texture SRV. 46 | // 47 | // Intended for debugging only. 48 | unsigned char * d3du_read_texture_level( d3du_context * ctx, ID3D11ShaderResourceView * srv, int level ); 49 | 50 | // Creates a simple rasterizer state 51 | ID3D11RasterizerState * d3du_simple_raster( ID3D11Device * dev, D3D11_CULL_MODE cull, bool front_ccw, bool scissor_enable ); 52 | 53 | // Creates a simple blend state. 54 | ID3D11BlendState * d3du_simple_blend( ID3D11Device * dev, D3D11_BLEND src_blend, D3D11_BLEND dest_blend ); 55 | 56 | // Creates a simplified sampler state. 57 | ID3D11SamplerState * d3du_simple_sampler( ID3D11Device * dev, D3D11_FILTER filter, D3D11_TEXTURE_ADDRESS_MODE addr ); 58 | 59 | // Compiles the given shader or dies trying! 60 | ID3DBlob * d3du_compile_source_or_die( char const * source, char const * profile, char const * entrypt ); 61 | 62 | // *All* the shaders. 63 | union d3du_shader { 64 | ID3D11DeviceChild * generic; 65 | ID3D11PixelShader * ps; 66 | ID3D11VertexShader * vs; 67 | ID3D11ComputeShader * cs; 68 | }; 69 | 70 | // Compile and create a shader with the given profile on the given device 71 | d3du_shader d3du_compile_and_create_shader( ID3D11Device * dev, char const * source, char const * profile, char const * entrypt ); 72 | 73 | // Texture helper 74 | struct d3du_tex { 75 | union { 76 | ID3D11Resource * resrc; 77 | ID3D11Texture2D * tex2d; 78 | ID3D11Texture2D * tex3d; 79 | }; 80 | ID3D11ShaderResourceView * srv; 81 | ID3D11RenderTargetView * rtv; 82 | 83 | ~d3du_tex(); 84 | 85 | static d3du_tex * make2d( ID3D11Device * dev, UINT w, UINT h, UINT num_mips, 86 | DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT initial_pitch ); 87 | 88 | static d3du_tex * make3d( ID3D11Device * dev, UINT w, UINT h, UINT d, UINT num_mips, 89 | DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT init_row_pitch, UINT init_depth_pitch ); 90 | 91 | private: 92 | d3du_tex( ID3D11Resource * resrc, ID3D11ShaderResourceView * srv, ID3D11RenderTargetView * rtv ); 93 | }; 94 | 95 | // D3DU timer measures how long D3D calls take on the GPU side 96 | // Create, call bracket begin/end around area you want to capture, then "report" at the end. 97 | typedef struct d3du_timer d3du_timer; 98 | 99 | d3du_timer * d3du_timer_create( d3du_context * ctx, size_t warmup_frames ); // warmup_frames = no. of initial measurements to throw away 100 | void d3du_timer_destroy( d3du_timer * timer ); 101 | void d3du_timer_bracket_begin( d3du_context * ctx, d3du_timer * timer ); 102 | void d3du_timer_bracket_end( d3du_context * ctx, d3du_timer * timer ); 103 | void d3du_timer_report( d3du_context * ctx, d3du_timer * timer, char const * label ); 104 | 105 | #endif 106 | 107 | -------------------------------------------------------------------------------- /util.cpp: -------------------------------------------------------------------------------- 1 | #define _CRT_SECURE_NO_WARNINGS 2 | #include "util.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | void panic(char const * fmt, ...) 13 | { 14 | va_list arg; 15 | va_start( arg, fmt ); 16 | fputs( "Error: ", stderr ); 17 | vfprintf( stderr, fmt, arg ); 18 | va_end( arg ); 19 | exit( 1 ); 20 | } 21 | 22 | char * read_file(char const * filename) 23 | { 24 | FILE *f = fopen( filename, "rb" ); 25 | if ( !f ) 26 | return 0; 27 | 28 | fseek( f, 0, SEEK_END ); 29 | size_t sz = ftell( f ); 30 | fseek( f, 0, SEEK_SET ); 31 | 32 | char * buffer = (char *)malloc( sz + 1 ); 33 | if (buffer) { 34 | buffer[ sz ] = 0; 35 | if (fread( buffer, sz, 1, f ) != 1) { 36 | free( buffer ); 37 | buffer = 0; 38 | } 39 | } 40 | 41 | fclose( f ); 42 | return buffer; 43 | } 44 | 45 | void dump_dwords( unsigned int const * vals, unsigned int num_dwords ) 46 | { 47 | for ( unsigned int row = 0 ; row < num_dwords ; row += 8 ) 48 | { 49 | printf( "[%04x]", row ); 50 | for ( unsigned int i = 0 ; i < 8 && row + i < num_dwords ; i++) 51 | printf( " %08x", vals[ row + i ] ); 52 | printf( "\n" ); 53 | } 54 | } 55 | 56 | int pixel_compare_pos( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h, int * pos_x, int * pos_y ) 57 | { 58 | for ( int y = 0 ; y < h ; y++ ) 59 | { 60 | unsigned char const * pa = a + y * stride_a; 61 | unsigned char const * pb = b + y * stride_b; 62 | int d = memcmp( pa, pb, w ); 63 | if ( d != 0 ) 64 | { 65 | if ( pos_y ) 66 | *pos_y = y; 67 | 68 | if ( pos_x ) 69 | { 70 | // we know there's a mismatch in this line 71 | // find its x position 72 | int x = 0; 73 | while ( pa[x] == pb[x] ) 74 | x++; 75 | 76 | *pos_x = x; 77 | } 78 | 79 | return d; 80 | } 81 | } 82 | 83 | return 0; 84 | } 85 | 86 | int pixel_compare( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h ) 87 | { 88 | return pixel_compare_pos( a, stride_a, b, stride_b, w, h, NULL, NULL ); 89 | } 90 | 91 | void print_pixels( unsigned char const * a, int stride_a, unsigned char const * b, int stride_b, int w, int h ) 92 | { 93 | for ( int y = 0 ; y < h ; y++ ) 94 | { 95 | for ( int x = 0 ; x < w ; x++ ) 96 | printf( "%02x ", a[x] ); 97 | 98 | printf(" - "); 99 | 100 | for ( int x = 0 ; x < w ; x++ ) 101 | printf( " %02x", b[x] ); 102 | 103 | printf("\n"); 104 | a += stride_a; 105 | b += stride_b; 106 | } 107 | } 108 | 109 | struct run_stats 110 | { 111 | std::vector values; 112 | }; 113 | 114 | run_stats * run_stats_create( ) 115 | { 116 | return new run_stats; 117 | } 118 | 119 | void run_stats_destroy( run_stats * stats ) 120 | { 121 | delete stats; 122 | } 123 | 124 | void run_stats_clear( run_stats * stats ) 125 | { 126 | stats->values.clear(); 127 | } 128 | 129 | void run_stats_record( run_stats * stats, float val ) 130 | { 131 | stats->values.push_back( val ); 132 | } 133 | 134 | void run_stats_report( run_stats * stats, char const * desc ) 135 | { 136 | size_t count = stats->values.size(); 137 | if (count < 2) 138 | return; 139 | 140 | // Print min/max and different percentiles 141 | std::sort(stats->values.begin(), stats->values.end()); 142 | 143 | // desc, min,25th,med,75th,max, mean,sdev 144 | char buffer[512]; 145 | char *p = buffer; 146 | 147 | p += sprintf(p, "%s, ", desc ); 148 | 149 | for (int i=0; i < 5; i++) 150 | p += sprintf(p, "%.3f,", stats->values[i * (count - 1) / 4]); 151 | 152 | // Mean and standard deviation 153 | double mean = 0.0; 154 | for (std::vector::const_iterator it = stats->values.begin(); it != stats->values.end(); ++it) 155 | mean += *it; 156 | mean /= count; 157 | 158 | double varsum = 0.0; 159 | for (std::vector::const_iterator it = stats->values.begin(); it != stats->values.end(); ++it) 160 | varsum += (*it - mean) * (*it - mean); 161 | double sdev = sqrt(varsum / (count - 1.0)); 162 | 163 | p += sprintf(p, " %.3f,%.3f\n", mean, sdev ); 164 | printf( "%s", buffer ); 165 | } 166 | -------------------------------------------------------------------------------- /shaders.hlsl: -------------------------------------------------------------------------------- 1 | #define TEX_WIDTH_LOG2 10 2 | 3 | struct CubeVert { 4 | float4 clip_pos : SV_Position; 5 | float3 world_pos : WorldPos; // .xyz = world space position 6 | }; 7 | 8 | cbuffer CubeConsts : register(b0) { 9 | float4x4 clip_from_world; 10 | float3 world_down_vector; 11 | float time_offs; 12 | 13 | // diffuse trilight plus ambient 14 | float3 light_color_ambient; 15 | float3 light_color_key; 16 | float3 light_color_fill; 17 | float3 light_color_back; 18 | float3 light_dir; 19 | }; 20 | 21 | cbuffer UpdateConsts : register(b1) { 22 | float3 field_scale; 23 | float damping; 24 | float3 field_offs; 25 | float accel; 26 | float3 field_sample_scale; 27 | float vel_scale; 28 | }; 29 | 30 | float4 UpdateVertShader( 31 | uint vertex_id : SV_VertexID 32 | ) : SV_Position 33 | { 34 | return float4(float(vertex_id >> 1) * 4.0 - 1.0, 1.0 - float(vertex_id & 1) * 4.0, 0.5, 1.0); 35 | } 36 | 37 | float4 UpdatePosShader( 38 | float4 pos : SV_Position, 39 | SamplerState force_smp : register(s0), 40 | Texture2D tex_older_pos : register(t0), 41 | Texture2D tex_newer_pos : register(t1), 42 | Texture3D tex_force : register(t2) 43 | ) : SV_Target 44 | { 45 | int3 coord_pos = int3(int2(pos.xy), 0); 46 | float4 older_pos = tex_older_pos.Load(coord_pos); 47 | float4 newer_pos = tex_newer_pos.Load(coord_pos); 48 | 49 | // determine force field sample pos 50 | float3 force_pos = newer_pos.xyz * field_scale + field_offs; 51 | float3 force_frac = frac(force_pos); 52 | float3 force_smooth = force_frac * force_frac * (3.0 - 2.0 * force_frac); 53 | force_pos = (force_pos - force_frac) + force_smooth; 54 | 55 | // sample force from texture 56 | float3 force = tex_force.Sample(force_smp, force_pos * field_sample_scale).xyz; 57 | 58 | // verlet integration 59 | float3 new_pos = newer_pos.xyz + damping * (newer_pos.xyz - older_pos.xyz); 60 | new_pos += accel * force; 61 | 62 | float4 output = float4(new_pos, newer_pos.w); 63 | 64 | // nuke particles if they get too far from the origin 65 | if (dot(new_pos, new_pos) > 16.0) 66 | output.w = 0.0; 67 | 68 | return output; 69 | } 70 | 71 | float4 UpdateVelShader( 72 | float4 pos : SV_Position, 73 | Texture2D tex_older_pos : register(t0), 74 | Texture2D tex_newer_pos : register(t1) 75 | ) : SV_Target 76 | { 77 | int3 coord_pos = int3(int2(pos.xy), 0); 78 | float4 older_pos = tex_older_pos.Load(coord_pos); 79 | float4 newer_pos = tex_newer_pos.Load(coord_pos); 80 | 81 | return newer_pos - older_pos; 82 | } 83 | 84 | CubeVert RenderCubeVertexShader( 85 | uint vertex_id : SV_VertexID, 86 | uint instance_id : SV_InstanceID, 87 | Texture2D tex_pos : register(t0), 88 | Texture2D tex_fwd : register(t1) 89 | ) 90 | { 91 | CubeVert v; 92 | 93 | // fetch cube position and velocity from textures 94 | int3 fetch_coord = int3(vertex_id >> 3, instance_id, 0); 95 | float4 cube_pos = tex_pos.Load(fetch_coord); 96 | float4 cube_fwd = tex_fwd.Load(fetch_coord); 97 | 98 | // early-out if cube is off 99 | if (cube_pos.w == 0.0) { 100 | v.clip_pos = 0; 101 | v.world_pos = 0; 102 | return v; 103 | } 104 | 105 | // determine local coordinate system 106 | float3 x_axis = cube_fwd.xyz; 107 | float3 z_axis = normalize(cross(x_axis, world_down_vector)); 108 | float3 y_axis = normalize(cross(z_axis, x_axis)); 109 | 110 | // generate cube vertex 111 | float3 world_pos = cube_pos.xyz; 112 | float across_size = cube_pos.w; 113 | 114 | world_pos += (((vertex_id & 1) != 0) ? 1.0 : -1.0) * x_axis; 115 | world_pos += (((vertex_id & 2) != 0) ? across_size : -across_size) * y_axis; 116 | world_pos += (((vertex_id & 4) != 0) ? across_size : -across_size) * z_axis; 117 | 118 | // generate output vertex 119 | v.clip_pos = mul(clip_from_world, float4(world_pos, 1.0)); 120 | v.world_pos = world_pos; 121 | return v; 122 | } 123 | 124 | float4 RenderCubePixelShader( 125 | CubeVert v 126 | ) : SV_Target 127 | { 128 | // determine triangle plane from derivatives 129 | float3 dPos_dx = ddx(v.world_pos.xyz); 130 | float3 dPos_dy = ddy(v.world_pos.xyz); 131 | 132 | // world-space normal from tangents 133 | float3 world_normal = cross(dPos_dy, dPos_dx); 134 | 135 | // lighting model (trilight) 136 | float NdotL = dot(world_normal, light_dir) * rsqrt(dot(world_normal, world_normal)); 137 | 138 | float3 diffuse_lit = light_color_ambient 139 | + saturate(NdotL) * light_color_key 140 | + (1.0 - abs(NdotL)) * light_color_fill 141 | + saturate(-NdotL) * light_color_back; 142 | 143 | return float4(diffuse_lit, 1.0); 144 | } -------------------------------------------------------------------------------- /momentous.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {AD11938C-C989-43EE-B618-36CD6118C035} 15 | Win32Proj 16 | momentous 17 | 18 | 19 | 20 | Application 21 | true 22 | v110 23 | Unicode 24 | 25 | 26 | Application 27 | false 28 | v110 29 | true 30 | Unicode 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | true 44 | 45 | 46 | false 47 | 48 | 49 | 50 | 51 | 52 | Level3 53 | Disabled 54 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 55 | true 56 | 57 | 58 | Console 59 | true 60 | 61 | 62 | 63 | 64 | Level3 65 | 66 | 67 | MaxSpeed 68 | true 69 | true 70 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 71 | true 72 | MultiThreaded 73 | 74 | 75 | Console 76 | true 77 | true 78 | true 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | true 94 | true 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /math.h: -------------------------------------------------------------------------------- 1 | #ifndef MATH_H_INCLUDED 2 | #define MATH_H_INCLUDED 3 | 4 | #include 5 | 6 | // Matrices are column-major. 7 | // 8 | // Coordinate system conventions: 9 | // +x = right 10 | // +y = down 11 | // +z = into screen 12 | // this is a bit unorthodox but right-handed and convenient. 13 | 14 | #define IMPL_COMPONENT_OP2(op) \ 15 | this_type& operator op(const this_type& b) { x op b.x; y op b.y; return *this; } 16 | 17 | #define IMPL_COMPONENT_OP3(op) \ 18 | this_type& operator op(const this_type& b) { x op b.x; y op b.y; z op b.z; return *this; } 19 | 20 | #define IMPL_COMPONENT_OP4(op) \ 21 | this_type& operator op(const this_type& b) { x op b.x; y op b.y; z op b.z; w op b.w; return *this; } 22 | 23 | #define IMPL_LINEAR_OPS(type) \ 24 | template type operator -(const type& v) { type x = v; x *= T(-1); return x; } \ 25 | template type operator +(const type& a, const type& b) { type x = a; x += b; return x; } \ 26 | template type operator -(const type& a, const type& b) { type x = a; x -= b; return x; } \ 27 | template type operator *(const type& a, T s) { type x = a; x *= s; return x; } \ 28 | template type operator *(const type& a, const type& b) { type x = a; x *= b; return x; } \ 29 | template type operator *(T s, const type& b) { type x = b; x *= s; return x; } 30 | 31 | #define IMPL_VECTOR_OPS(type, dot_expr) \ 32 | IMPL_LINEAR_OPS(type) \ 33 | template T dot(const type& a, const type& b) { return dot_expr; } \ 34 | template T len_sq(const type& a) { return dot(a, a); } \ 35 | template T len(const type& a) { return std::sqrt(len_sq(a)); } \ 36 | template type normalize(const type& a) { return rsqrt(len_sq(a)) * a; } 37 | 38 | #define IMPL_MATRIX_OPS(mat_type, vec_type, mul_expr) \ 39 | IMPL_LINEAR_OPS(mat_type) \ 40 | template vec_type operator *(const mat_type& m, const vec_type& v) { return mul_expr; } 41 | 42 | namespace math { 43 | template 44 | T rsqrt(T x) 45 | { 46 | return T(1) / std::sqrt(x); 47 | } 48 | 49 | template 50 | struct vec2T { 51 | typedef vec2T this_type; 52 | 53 | union { 54 | struct { 55 | T x, y; 56 | }; 57 | T v[2]; 58 | }; 59 | 60 | vec2T() {} 61 | explicit vec2T(T s) : x(s), y(s) {} 62 | vec2T(T x, T y) : x(x), y(y) {} 63 | 64 | T operator[](int i) const { return v[i]; } 65 | T& operator[](int i) { return v[i]; } 66 | 67 | IMPL_COMPONENT_OP2(+=) 68 | IMPL_COMPONENT_OP2(-=) 69 | IMPL_COMPONENT_OP2(*=) 70 | this_type& operator *=(T s) { x *= s; y *= s; return *this; } 71 | }; 72 | 73 | IMPL_VECTOR_OPS(vec2T, a.x*b.x + a.y*b.y) 74 | 75 | template 76 | struct vec3T { 77 | typedef vec3T this_type; 78 | 79 | union { 80 | struct { 81 | T x, y, z; 82 | }; 83 | T v[3]; 84 | }; 85 | 86 | vec3T() {} 87 | explicit vec3T(T s) : x(s), y(s), z(s) {} 88 | vec3T(T x, T y, T z) : x(x), y(y), z(z) {} 89 | 90 | T operator[](int i) const { return v[i]; } 91 | T& operator[](int i) { return v[i]; } 92 | 93 | IMPL_COMPONENT_OP3(+=) 94 | IMPL_COMPONENT_OP3(-=) 95 | IMPL_COMPONENT_OP3(*=) 96 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; return *this; } 97 | }; 98 | 99 | IMPL_VECTOR_OPS(vec3T, a.x*b.x + a.y*b.y + a.z*b.z) 100 | template 101 | vec3T cross(const vec3T& a, const vec3T& b) 102 | { 103 | return vec3T(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); 104 | } 105 | 106 | template 107 | struct vec4T { 108 | typedef vec4T this_type; 109 | 110 | union { 111 | struct { 112 | T x, y, z, w; 113 | }; 114 | T v[4]; 115 | }; 116 | 117 | vec4T() {} 118 | explicit vec4T(T s) : x(s), y(s), z(s), w(s) {} 119 | vec4T(const vec3T& v, T w) : x(v.x), y(v.y), z(v.z), w(w) {} 120 | vec4T(T x, T y, T z, T w) : x(x), y(y), z(z), w(w) {} 121 | 122 | T operator[](int i) const { return v[i]; } 123 | T& operator[](int i) { return v[i]; } 124 | 125 | IMPL_COMPONENT_OP4(+=) 126 | IMPL_COMPONENT_OP4(-=) 127 | IMPL_COMPONENT_OP4(*=) 128 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; w *= s; return *this; } 129 | }; 130 | 131 | IMPL_VECTOR_OPS(vec4T, a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w) 132 | 133 | template 134 | struct mat33T { 135 | typedef vec3T vec_type; 136 | typedef mat33T this_type; 137 | 138 | vec_type x, y, z; // columns 139 | 140 | mat33T() {} 141 | mat33T(const vec_type& colX, const vec_type& colY, const vec_type& colZ) : x(colX), y(colY), z(colZ) {} 142 | mat33T( 143 | T _00, T _01, T _02, 144 | T _10, T _11, T _12, 145 | T _20, T _21, T _22 146 | ) : x(_00, _10, _20), y(_01, _11, _21), z(_02, _12, _22) {} 147 | 148 | T operator()(int i, int j) const { return (&x)[j][i]; } 149 | T& operator()(int i, int j) { return (&x)[j][i]; } 150 | 151 | const vec_type& get_col(int i) const { return (&x)[i]; } 152 | void set_col(int i, const vec_type& v) { (&x)[i] = v; } 153 | const vec_type get_row(int i) const { return vec_type((&x)[0][i], (&x)[1][i], (&x)[2][i]); } 154 | void set_row(int i, const vec_type& v) { (&x)[0][i] = v.x; (&x)[1][i] = v.y; (&x)[2][i] = v.z; } 155 | 156 | IMPL_COMPONENT_OP3(+=) 157 | IMPL_COMPONENT_OP3(-=) 158 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; return *this; } 159 | this_type& operator *=(const this_type& b); 160 | 161 | static this_type diag(T x, T y, T z) { return mat33T(x, T(0), T(0), T(0), y, T(0), T(0), T(0), z); } 162 | static this_type identity() { return diag(T(1), T(1), T(1)); } 163 | static this_type uniform_scale(T s) { return diag(s, s, s); } 164 | 165 | static this_type rotation(const vec_type& axis, T angle) 166 | { 167 | // Rodrigues rotation formula 168 | T cosv = std::cos(angle); 169 | vec_type sa = std::sin(angle) * axis; 170 | vec_type omca = (T(1) - cosv) * axis; 171 | 172 | return mat33T( 173 | omca.x*axis.x + cosv, omca.x*axis.y - sa.z, omca.x*axis.z + sa.y, 174 | omca.y*axis.x + sa.z, omca.y*axis.y + cosv, omca.y*axis.z - sa.x, 175 | omca.z*axis.x - sa.y, omca.z*axis.y + sa.x, omca.z*axis.z + cosv 176 | ); 177 | } 178 | }; 179 | 180 | IMPL_MATRIX_OPS(mat33T, vec3T, v.x*m.x + v.y*m.y + v.z*m.z) 181 | 182 | template 183 | mat33T& mat33T::operator *=(const mat33T& b) 184 | { 185 | const mat33T M = *this; 186 | x = M * b.x; 187 | y = M * b.y; 188 | z = M * b.z; 189 | w = M * b.w; 190 | return *this; 191 | } 192 | 193 | template 194 | mat33T transpose(const mat33T& m) 195 | { 196 | return mat33T(m.get_row(0), m.get_row(1), m.get_row(2)); 197 | } 198 | 199 | template 200 | struct mat44T { 201 | typedef vec4T vec_type; 202 | typedef vec3T vec3_type; 203 | typedef mat44T this_type; 204 | 205 | vec_type x, y, z, w; // columns 206 | 207 | mat44T() {} 208 | mat44T(const vec_type& colX, const vec_type& colY, const vec_type& colZ, const vec_type& colW) : x(colX), y(colY), z(colZ), w(colW) {} 209 | mat44T(const mat33T& mat3x3, const vec3_type& translate) : x(mat3x3.x, T(0)), y(mat3x3.y, T(0)), z(mat3x3.z, T(0)), w(translate, T(1)) {} 210 | mat44T( 211 | T _00, T _01, T _02, T _03, 212 | T _10, T _11, T _12, T _13, 213 | T _20, T _21, T _22, T _23, 214 | T _30, T _31, T _32, T _33 215 | ) : x(_00, _10, _20, _30), y(_01, _11, _21, _31), z(_02, _12, _22, _32), w(_03, _13, _23, _33) {} 216 | 217 | T operator()(int i, int j) const { return (&x)[j][i]; } 218 | T& operator()(int i, int j) { return (&x)[j][i]; } 219 | 220 | const vec_type& get_col(int i) const { return (&x)[i]; } 221 | void set_col(int i, const vec_type& v) { (&x)[i] = v; } 222 | const vec_type get_row(int i) const { return vec_type((&x)[0][i], (&x)[1][i], (&x)[2][i], (&x)[3][i]); } 223 | void set_row(int i, const vec_type& v) { (&x)[0][i] = v.x; (&x)[1][i] = v.y; (&x)[2][i] = v.z; (&x)[3][i] = v.w; } 224 | 225 | IMPL_COMPONENT_OP4(+=) 226 | IMPL_COMPONENT_OP4(-=) 227 | this_type& operator *=(T s) { x *= s; y *= s; z *= s; w *= s; return *this; } 228 | this_type& operator *=(const this_type& b); 229 | 230 | static this_type diag(T x, T y, T z, T w) { return mat44T(x, T(0), T(0), T(0), T(0), y, T(0), T(0), T(0), T(0), z, T(0), T(0), T(0), T(0), w); } 231 | static this_type identity() { return diag(T(1), T(1), T(1), T(1)); } 232 | 233 | static this_type look_at(const vec3_type& pos, const vec3_type& look_at, const vec3_type& down) 234 | { 235 | mat33T M; 236 | vec3_type z_axis = normalize(look_at - pos); 237 | vec3_type x_axis = normalize(cross(down, z_axis)); 238 | vec3_type y_axis = cross(z_axis, x_axis); 239 | 240 | M.set_row(0, x_axis); 241 | M.set_row(1, y_axis); 242 | M.set_row(2, z_axis); 243 | return this_type(M, M * -pos); 244 | } 245 | 246 | static this_type orthoD3D(T lft, T rgt, T top, T bot, T nearv, T farv) 247 | { 248 | vec3_type mid((lft + rgt) / T(2), (bot + top) / T(2), (nearv + farv) / T(2)); 249 | T sx = T(2) / (rgt - lft); 250 | T sy = T(2) / (top - bot); 251 | T sz = T(1) / (farv - nearv); 252 | 253 | return this_type(mat33T::diag(sx, sy, sz), vec3T(-mid.x * sx, -mid.y * sy, T(0.5) - mid.z * sz)); 254 | } 255 | 256 | // NOTE: this takes lft/rgt/bot/top at z=1 plane, not near plane! 257 | static this_type frustumD3D(T lft, T rgt, T top, T bot, T nearv, T farv) 258 | { 259 | T Q = farv / (farv - nearv); 260 | 261 | return this_type( 262 | T(2) / (rgt - lft), 0, (rgt + lft) / (rgt - lft), T(0), 263 | T(0), T(2) / (top - bot), (top + bot) / (top - bot), T(0), 264 | T(0), T(0), Q, -nearv * Q, 265 | T(0), T(0), T(1), T(0) 266 | ); 267 | } 268 | 269 | // w/h at z=1 plane, not near plane! 270 | static this_type perspectiveD3D(T w, T h, T nearv, T farv) 271 | { 272 | T wh = w / T(2); 273 | T hh = h / T(2); 274 | return frustumD3D(-wh, wh, -hh, hh, nearv, farv); 275 | } 276 | }; 277 | 278 | IMPL_MATRIX_OPS(mat44T, vec4T, v.x*m.x + v.y*m.y + v.z*m.z + v.w*m.w) 279 | 280 | template 281 | mat44T& mat44T::operator *=(const mat44T& b) 282 | { 283 | const mat44T M = *this; 284 | x = M * b.x; 285 | y = M * b.y; 286 | z = M * b.z; 287 | w = M * b.w; 288 | return *this; 289 | } 290 | 291 | template 292 | mat44T transpose(const mat44T& m) 293 | { 294 | return mat44T(m.get_row(0), m.get_row(1), m.get_row(2), m.get_row(3)); 295 | } 296 | 297 | typedef vec2T vec2i; 298 | typedef vec2T vec2; 299 | 300 | typedef vec3T vec3i; 301 | typedef vec3T vec3; 302 | 303 | typedef vec4T vec4i; 304 | typedef vec4T vec4; 305 | 306 | typedef mat33T mat33; 307 | 308 | typedef mat44T mat44; 309 | } 310 | 311 | #undef IMPL_COMPONENT_OP2 312 | #undef IMPL_COMPONENT_OP3 313 | #undef IMPL_COMPONENT_OP4 314 | 315 | #undef IMPL_LINEAR_OPS 316 | #undef IMPL_VECTOR_OPS 317 | 318 | #endif // MATH_H_INCLUDED -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #define WIN32_LEAN_AND_MEAN 2 | #define NOMINMAX 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "d3du.h" 10 | #include "util.h" 11 | #include "math.h" 12 | 13 | static union { 14 | ID3D11Buffer* buffers[16]; 15 | ID3D11ShaderResourceView* srvs[16]; 16 | ID3D11RenderTargetView* rtvs[16]; 17 | } s_no; 18 | 19 | struct CubeConstBuf { 20 | math::mat44 clip_from_world; 21 | math::vec3 world_down_vector; 22 | float time_offs; 23 | 24 | math::vec3 light_color_ambient; 25 | float pad1; 26 | math::vec3 light_color_key; 27 | float pad2; 28 | math::vec3 light_color_fill; 29 | float pad3; 30 | math::vec3 light_color_back; 31 | float pad4; 32 | math::vec3 light_dir; 33 | float pad5; 34 | }; 35 | 36 | struct UpdateConstBuf { 37 | math::vec3 field_scale; 38 | float damping; 39 | math::vec3 field_offs; 40 | float accel; 41 | math::vec3 field_sample_scale; 42 | float vel_scale; 43 | }; 44 | 45 | static float srgb2lin(float x) 46 | { 47 | static const float lin_thresh = 0.04045f; 48 | if (x < lin_thresh) 49 | return x * (1.0f / 12.92f); 50 | else 51 | return std::pow((x + 0.055f) / 1.055f, 2.4f); 52 | } 53 | 54 | static math::vec3 srgb_color(int col) 55 | { 56 | return math::vec3( 57 | srgb2lin(((col >> 16) & 0xff) / 255.0f), 58 | srgb2lin(((col >> 8) & 0xff) / 255.0f), 59 | srgb2lin(((col >> 0) & 0xff) / 255.0f) 60 | ); 61 | } 62 | 63 | static void* map_cbuf_typeless(d3du_context* ctx, ID3D11Buffer* buf) 64 | { 65 | D3D11_MAPPED_SUBRESOURCE mapped; 66 | HRESULT hr = ctx->ctx->Map(buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); 67 | if (FAILED(hr)) 68 | panic("D3D buffer map failed!\n"); 69 | return mapped.pData; 70 | } 71 | 72 | template 73 | static T* map_cbuf(d3du_context* ctx, ID3D11Buffer* buf) 74 | { 75 | return (T*) map_cbuf_typeless(ctx, buf); 76 | } 77 | 78 | static void unmap_cbuf(d3du_context* ctx, ID3D11Buffer* buf) 79 | { 80 | ctx->ctx->Unmap(buf, 0); 81 | } 82 | 83 | static ID3D11Buffer* make_cube_inds(ID3D11Device* dev, int num_cubes) 84 | { 85 | static const USHORT cube_inds[] = { 86 | 0, 2, 1, 3, 7, 2, 6, 0, 4, 1, 5, 7, 4, 6, 87 | }; 88 | 89 | assert(num_cubes * 8 < 65535); // 65535 = prim restart 90 | USHORT * ind_data = new USHORT[num_cubes * 15]; 91 | for (int i=0; i < num_cubes; i++) 92 | { 93 | USHORT * out_ind = ind_data + i*15; 94 | for (UINT j=0; j < 14; j++) 95 | out_ind[j] = cube_inds[j] + i*8; 96 | out_ind[14] = 0xffff; 97 | } 98 | 99 | ID3D11Buffer* ind_buf = d3du_make_buffer(dev, num_cubes * 15 * sizeof(USHORT), 100 | D3D11_USAGE_IMMUTABLE, D3D11_BIND_INDEX_BUFFER, ind_data); 101 | delete[] ind_data; 102 | 103 | return ind_buf; 104 | } 105 | 106 | static bool is_pow2(int x) 107 | { 108 | return x != 0 && (x & (x - 1)) == 0; 109 | } 110 | 111 | static float randf() 112 | { 113 | return 1.0f * rand() / RAND_MAX; 114 | } 115 | 116 | static math::vec3 rand_vec3_unit_sphere(float* len_sq_out = nullptr) 117 | { 118 | math::vec3 v; 119 | float l; 120 | 121 | do 122 | { 123 | v.x = 2.0f * randf() - 1.0f; 124 | v.y = 2.0f * randf() - 1.0f; 125 | v.z = 2.0f * randf() - 1.0f; 126 | l = math::len_sq(v); 127 | } while (l > 1.0f); 128 | 129 | if (len_sq_out) 130 | *len_sq_out = l; 131 | return v; 132 | } 133 | 134 | static math::vec3 rand_unit_vec3() 135 | { 136 | math::vec3 v; 137 | float l; 138 | 139 | do v = rand_vec3_unit_sphere(&l); while (l == 0.0f); 140 | return math::rsqrt(l) * v; 141 | } 142 | 143 | static int step_idx(int base, int step, int mask) 144 | { 145 | return (base & ~mask) | ((base + step) & mask); 146 | } 147 | 148 | static d3du_tex* make_force_tex(ID3D11Device* dev, int size, float strength, float post_scale) 149 | { 150 | using namespace math; 151 | assert(is_pow2(size)); 152 | 153 | int stepx = 1, maskx = size - 1; 154 | int stepy = size, masky = (size - 1) * size; 155 | int stepz = size*size, maskz = (size - 1) * size * size; 156 | int nelem = size * size * size; 157 | vec4* forces = new vec4[nelem]; 158 | 159 | // create a random vector field 160 | for (int zo = 0; zo <= maskz; zo += stepz) { 161 | for (int yo = 0; yo <= masky; yo += stepy) { 162 | for (int xo = 0; xo <= maskx; xo += stepx) { 163 | forces[xo + yo + zo] = math::vec4(strength * rand_unit_vec3(), 0.0f); 164 | } 165 | } 166 | } 167 | 168 | // calc divergences 169 | float* div = new float[nelem]; 170 | float* high = new float[nelem]; 171 | 172 | float div_scale = -0.5f / (float)size; 173 | 174 | for (int zo = 0; zo <= maskz; zo += stepz) { 175 | for (int yo = 0; yo <= masky; yo += stepy) { 176 | for (int xo = 0; xo <= maskx; xo += stepx) { 177 | int o = xo + yo + zo; 178 | 179 | div[o] = div_scale * 180 | ( 181 | forces[step_idx(o, stepx, maskx)].x - forces[step_idx(o, -stepx, maskx)].x + 182 | forces[step_idx(o, stepy, masky)].y - forces[step_idx(o, -stepy, masky)].y + 183 | forces[step_idx(o, stepz, maskz)].z - forces[step_idx(o, -stepz, maskz)].z 184 | ); 185 | high[o] = 0.0f; 186 | } 187 | } 188 | } 189 | 190 | // gauss-seidel iteration to calc density field 191 | for (int step = 0; step < 40; step++) { 192 | for (int zo = 0; zo <= maskz; zo += stepz) { 193 | for (int yo = 0; yo <= masky; yo += stepy) { 194 | for (int xo = 0; xo <= maskx; xo += stepx) { 195 | int o = xo + yo + zo; 196 | high[o] = 197 | ( 198 | high[step_idx(o, -stepx, maskx)] + high[step_idx(o, stepx, maskx)] + 199 | high[step_idx(o, -stepy, masky)] + high[step_idx(o, stepy, masky)] + 200 | high[step_idx(o, -stepz, maskz)] + high[step_idx(o, stepz, maskz)] 201 | ) * (1.0f / 6.0f) - div[o]; 202 | } 203 | } 204 | } 205 | } 206 | 207 | // remove gradients from vector field 208 | float grad_scale = 0.5f * (float)size; 209 | for (int zo = 0; zo <= maskz; zo += stepz) { 210 | for (int yo = 0; yo <= masky; yo += stepy) { 211 | for (int xo = 0; xo <= maskx; xo += stepx) { 212 | int o = xo + yo + zo; 213 | vec4* f = forces + o; 214 | 215 | f->x = (f->x - grad_scale * (high[step_idx(o, stepx, maskx)] - high[step_idx(o, -stepx, maskx)])) * post_scale; 216 | f->y = (f->y - grad_scale * (high[step_idx(o, stepy, masky)] - high[step_idx(o, -stepy, masky)])) * post_scale; 217 | f->z = (f->z - grad_scale * (high[step_idx(o, stepz, maskz)] - high[step_idx(o, -stepz, maskz)])) * post_scale; 218 | } 219 | } 220 | } 221 | 222 | d3du_tex* tex = d3du_tex::make3d(dev, size, size, size, 1, DXGI_FORMAT_R32G32B32A32_FLOAT, 223 | D3D11_USAGE_IMMUTABLE, D3D11_BIND_SHADER_RESOURCE, forces, stepy * sizeof(*forces), stepz * sizeof(*forces)); 224 | 225 | delete[] div; 226 | delete[] high; 227 | delete[] forces; 228 | return tex; 229 | } 230 | 231 | int main() 232 | { 233 | d3du_context* d3d = d3du_init("Momentous", 1280, 720, D3D_FEATURE_LEVEL_10_0); 234 | 235 | char* shader_source = read_file("shaders.hlsl"); 236 | 237 | ID3D11VertexShader *update_vs = d3du_compile_and_create_shader(d3d->dev, shader_source, 238 | "vs_4_0", "UpdateVertShader").vs; 239 | ID3D11PixelShader *update_pos_ps = d3du_compile_and_create_shader(d3d->dev, shader_source, 240 | "ps_4_0", "UpdatePosShader").ps; 241 | ID3D11PixelShader *update_vel_ps = d3du_compile_and_create_shader(d3d->dev, shader_source, 242 | "ps_4_0", "UpdateVelShader").ps; 243 | 244 | ID3D11VertexShader *cube_vs = d3du_compile_and_create_shader(d3d->dev, shader_source, 245 | "vs_4_0", "RenderCubeVertexShader").vs; 246 | ID3D11PixelShader *cube_ps = d3du_compile_and_create_shader(d3d->dev, shader_source, 247 | "ps_4_0", "RenderCubePixelShader").ps; 248 | 249 | free(shader_source); 250 | 251 | static const UINT kChunkSize = 1024; 252 | static const UINT kNumCubes = 48 * 1024; 253 | static const UINT kTexHeight = (kNumCubes + kChunkSize - 1) / kChunkSize; 254 | 255 | ID3D11Buffer* update_const_buf = d3du_make_buffer(d3d->dev, sizeof(UpdateConstBuf), 256 | D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, NULL); 257 | 258 | ID3D11Buffer* cube_const_buf = d3du_make_buffer(d3d->dev, sizeof(CubeConstBuf), 259 | D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, NULL); 260 | 261 | ID3D11Buffer* cube_index_buf = make_cube_inds(d3d->dev, kChunkSize); 262 | 263 | ID3D11RasterizerState* raster_state = d3du_simple_raster(d3d->dev, D3D11_CULL_BACK, true, false); 264 | ID3D11SamplerState* force_sampler = d3du_simple_sampler(d3d->dev, D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT, D3D11_TEXTURE_ADDRESS_WRAP); 265 | 266 | // triple-buffer for position, plus velocity 267 | d3du_tex* part_tex[4]; 268 | for (int i=0; i < 4; i++) 269 | part_tex[i] = d3du_tex::make2d(d3d->dev, kChunkSize, kTexHeight, 1, DXGI_FORMAT_R32G32B32A32_FLOAT, 270 | D3D11_USAGE_DEFAULT, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, NULL, 0); 271 | 272 | d3du_tex* force_tex = make_force_tex(d3d->dev, 32, 1.0f, 0.001f); 273 | 274 | D3D11_VIEWPORT part_vp = d3du_full_tex2d_viewport(part_tex[0]->tex2d); 275 | 276 | int frame = 0; 277 | unsigned int cur_part = 0; 278 | int num_cubes = kNumCubes; 279 | unsigned int spawn_counter = 0; 280 | 281 | while (d3du_handle_events(d3d)) { 282 | using namespace math; 283 | 284 | static const float part_size = 0.001f; 285 | 286 | vec3 emit_pos(0.0f); 287 | emit_pos.x = 0.7f * sin(frame * 0.001f); 288 | 289 | // spawn new particles 290 | { 291 | static const int kSpawnCount = 256; 292 | vec4 pos_old[kSpawnCount]; 293 | vec4 pos_new[kSpawnCount]; 294 | 295 | for (int i = 0; i < kSpawnCount; i++) { 296 | vec3 pos = emit_pos + rand_vec3_unit_sphere() * 0.002f; 297 | vec3 vel = rand_vec3_unit_sphere() * 0.003f; 298 | 299 | pos_old[i] = vec4(pos - vel, part_size); 300 | pos_new[i] = vec4(pos, part_size); 301 | } 302 | 303 | // upload 304 | D3D11_BOX box = { }; 305 | box.left = spawn_counter % kChunkSize; 306 | box.right = box.left + kSpawnCount; 307 | box.top = spawn_counter / kChunkSize; 308 | box.bottom = box.top + 1; 309 | box.front = 0; 310 | box.back = 1; 311 | d3d->ctx->UpdateSubresource(part_tex[(cur_part + 2) % 3]->tex2d, 0, &box, pos_old, 0, 0); 312 | d3d->ctx->UpdateSubresource(part_tex[cur_part]->tex2d, 0, &box, pos_new, 0, 0); 313 | 314 | spawn_counter = (spawn_counter + kSpawnCount) % num_cubes; 315 | } 316 | 317 | // set up update constant buffer 318 | auto update_consts = map_cbuf(d3d, update_const_buf); 319 | update_consts->field_scale = math::vec3(32.0f); 320 | update_consts->damping = 0.99f; 321 | update_consts->field_offs = math::vec3(0.0f); 322 | update_consts->accel = 0.75f; 323 | update_consts->field_sample_scale = math::vec3(1.0f / 32.0f); 324 | update_consts->vel_scale = part_size * 6.0f; 325 | unmap_cbuf(d3d, update_const_buf); 326 | 327 | // update position (potentially several time steps) 328 | d3d->ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); 329 | 330 | d3d->ctx->VSSetShader(update_vs, NULL, 0); 331 | d3d->ctx->RSSetViewports(1, &part_vp); 332 | 333 | d3d->ctx->PSSetShader(update_pos_ps, NULL, 0); 334 | d3d->ctx->PSSetSamplers(0, 1, &force_sampler); 335 | d3d->ctx->PSSetConstantBuffers(1, 1, &update_const_buf); 336 | d3d->ctx->PSSetShaderResources(2, 1, &force_tex->srv); 337 | for (int step=0; step < 1; step++) { 338 | cur_part = (cur_part + 1) % 3; 339 | 340 | ID3D11ShaderResourceView* srvs[2]; 341 | for (int i=0; i < 2; i++) 342 | srvs[i] = part_tex[(cur_part + 1 + i) % 3]->srv; 343 | 344 | d3d->ctx->PSSetShaderResources(0, 2, srvs); 345 | d3d->ctx->OMSetRenderTargets(1, &part_tex[cur_part]->rtv, NULL); 346 | d3d->ctx->Draw(3, 0); 347 | d3d->ctx->PSSetShaderResources(0, 2, s_no.srvs); 348 | d3d->ctx->OMSetRenderTargets(1, s_no.rtvs, NULL); 349 | } 350 | 351 | // update velocities 352 | { 353 | ID3D11ShaderResourceView* srvs[2]; 354 | for (int i=0; i < 2; i++) 355 | srvs[i] = part_tex[(cur_part + 2 + i) % 3]->srv; 356 | 357 | d3d->ctx->PSSetShader(update_vel_ps, NULL, 0); 358 | d3d->ctx->PSSetShaderResources(0, 2, srvs); 359 | d3d->ctx->OMSetRenderTargets(1, &part_tex[3]->rtv, NULL); 360 | d3d->ctx->Draw(3, 0); 361 | d3d->ctx->PSSetShaderResources(0, 2, s_no.srvs); 362 | d3d->ctx->OMSetRenderTargets(1, s_no.rtvs, NULL); 363 | } 364 | 365 | static const float clear_color[4] = { 0.2f, 0.4f, 0.6f, 1.0f }; 366 | d3d->ctx->ClearDepthStencilView(d3d->depthbuf_dsv, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0); 367 | d3d->ctx->ClearRenderTargetView(d3d->backbuf_rtv, clear_color); 368 | 369 | // back to main render target and viewport 370 | d3d->ctx->OMSetRenderTargets(1, &d3d->backbuf_rtv, d3d->depthbuf_dsv); 371 | d3d->ctx->RSSetViewports(1, &d3d->default_vp); 372 | 373 | // set up camera 374 | vec3 world_cam_pos(0.0f, 0.0f, -0.9f); 375 | vec3 world_cam_target = emit_pos; 376 | mat44 view_from_world = mat44::look_at(world_cam_pos, world_cam_target, vec3(0,1,0)); 377 | 378 | // projection 379 | mat44 clip_from_view = mat44::perspectiveD3D(1280.0f / 720.0f, 1.0f, 0.01f, 50.0f); 380 | mat44 clip_from_world = clip_from_view * view_from_world; 381 | 382 | auto cube_consts = map_cbuf(d3d, cube_const_buf); 383 | cube_consts->clip_from_world = clip_from_world; 384 | cube_consts->world_down_vector = math::vec3(0.0f, 1.0f, 0.0f); 385 | cube_consts->time_offs = frame * 0.0001f; 386 | cube_consts->light_color_ambient = srgb_color(0x202020); 387 | cube_consts->light_color_key = srgb_color(0xc0c0c0); 388 | cube_consts->light_color_back = srgb_color(0x101040); 389 | cube_consts->light_color_fill = srgb_color(0x602020); 390 | cube_consts->light_dir = normalize(vec3(0.0f, -0.7f, -0.3f)); 391 | unmap_cbuf(d3d, cube_const_buf); 392 | 393 | // render cubes 394 | ID3D11ShaderResourceView* part_pos_srvs[2]; 395 | part_pos_srvs[0] = part_tex[cur_part]->srv; 396 | part_pos_srvs[1] = part_tex[3]->srv; 397 | 398 | d3d->ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); 399 | d3d->ctx->IASetIndexBuffer(cube_index_buf, DXGI_FORMAT_R16_UINT, 0); 400 | 401 | d3d->ctx->VSSetShader(cube_vs, NULL, 0); 402 | d3d->ctx->VSSetShaderResources(0, 2, part_pos_srvs); 403 | d3d->ctx->VSSetConstantBuffers(0, 1, &cube_const_buf); 404 | 405 | d3d->ctx->RSSetState(raster_state); 406 | 407 | d3d->ctx->PSSetShader(cube_ps, NULL, 0); 408 | d3d->ctx->PSSetConstantBuffers(0, 1, &cube_const_buf); 409 | 410 | d3d->ctx->DrawIndexedInstanced(kChunkSize * 15, (num_cubes + kChunkSize - 1) / kChunkSize, 0, 0, 0); 411 | 412 | d3d->ctx->VSSetShaderResources(0, 2, s_no.srvs); 413 | 414 | d3du_swap_buffers(d3d, true); 415 | frame++; 416 | } 417 | 418 | for (int i=0; i < 4; i++) 419 | delete part_tex[i]; 420 | delete force_tex; 421 | 422 | update_const_buf->Release(); 423 | cube_const_buf->Release(); 424 | cube_index_buf->Release(); 425 | cube_ps->Release(); 426 | cube_vs->Release(); 427 | update_vs->Release(); 428 | update_pos_ps->Release(); 429 | update_vel_ps->Release(); 430 | raster_state->Release(); 431 | force_sampler->Release(); 432 | 433 | d3du_shutdown(d3d); 434 | return 0; 435 | } -------------------------------------------------------------------------------- /d3du.cpp: -------------------------------------------------------------------------------- 1 | #define WIN32_LEAN_AND_MEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "d3du.h" 7 | #include "util.h" 8 | 9 | #pragma comment(lib, "d3d11.lib") 10 | #pragma comment(lib, "d3dcompiler.lib") 11 | 12 | static LRESULT CALLBACK window_proc( HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam ) 13 | { 14 | //d3du_context * ctx = (glx_context *) GetWindowLongPtrA( hwnd, GWLP_USERDATA ); 15 | 16 | switch ( msg ) 17 | { 18 | case WM_CREATE: 19 | { 20 | CREATESTRUCTA * cs = (CREATESTRUCTA *) lparam; 21 | SetWindowLongPtrA( hwnd, GWLP_USERDATA, (LONG_PTR)cs->lpCreateParams ); 22 | } 23 | break; 24 | 25 | case WM_ERASEBKGND: 26 | return 1; 27 | 28 | case WM_PAINT: 29 | ValidateRect( hwnd, NULL ); 30 | return 0; 31 | 32 | case WM_CHAR: 33 | if ( wparam == 27 ) // escape 34 | PostMessage( hwnd, WM_CLOSE, 0, 0 ); 35 | return 0; 36 | 37 | case WM_DESTROY: 38 | PostQuitMessage( 0 ); 39 | break; 40 | } 41 | 42 | return DefWindowProcA( hwnd, msg, wparam, lparam ); 43 | } 44 | 45 | template 46 | static void safe_release( T * * p ) 47 | { 48 | if ( *p ) 49 | { 50 | (*p)->Release(); 51 | *p = NULL; 52 | } 53 | } 54 | 55 | static d3du_context * d3du_init_fail( d3du_context * ctx ) 56 | { 57 | safe_release( &ctx->backbuf ); 58 | safe_release( &ctx->depthbuf ); 59 | safe_release( &ctx->backbuf_rtv ); 60 | safe_release( &ctx->depthbuf_dsv ); 61 | safe_release( &ctx->swap ); 62 | safe_release( &ctx->ctx ); 63 | safe_release( &ctx->dev ); 64 | if ( ctx->hwnd ) DestroyWindow( ctx->hwnd ); 65 | delete ctx; 66 | return NULL; 67 | } 68 | 69 | d3du_context * d3du_init( char const * title, int w, int h, D3D_FEATURE_LEVEL feature_level ) 70 | { 71 | d3du_context * ctx = new d3du_context; 72 | memset( ctx, 0, sizeof( *ctx ) ); 73 | 74 | HINSTANCE hinst = GetModuleHandleA( NULL ); 75 | 76 | WNDCLASSA wc = { 0 }; 77 | wc.hbrBackground = (HBRUSH) GetStockObject( BLACK_BRUSH ); 78 | wc.hCursor = LoadCursor( 0, IDC_ARROW ); 79 | wc.hInstance = hinst; 80 | wc.lpfnWndProc = window_proc; 81 | wc.lpszClassName = "rad.d3du"; 82 | RegisterClassA( &wc ); 83 | 84 | DWORD style = WS_OVERLAPPEDWINDOW; 85 | 86 | RECT rc = { 0, 0, w, h }; 87 | AdjustWindowRect( &rc, style, FALSE ); 88 | 89 | ctx->hwnd = CreateWindowExA( 0, "rad.d3du", title, style | WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT, rc.right - rc.left, rc.bottom - rc.top, NULL, NULL, hinst, ctx ); 90 | if ( !ctx->hwnd ) 91 | return d3du_init_fail( ctx ); 92 | 93 | DXGI_SWAP_CHAIN_DESC swap_desc = { 0 }; 94 | swap_desc.BufferDesc.Width = w; 95 | swap_desc.BufferDesc.Height = h; 96 | swap_desc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; 97 | swap_desc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED; 98 | swap_desc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED; 99 | swap_desc.SampleDesc.Count = 1; 100 | swap_desc.SampleDesc.Quality = 0; 101 | swap_desc.BufferUsage = DXGI_USAGE_BACK_BUFFER | DXGI_USAGE_RENDER_TARGET_OUTPUT; 102 | swap_desc.BufferCount = 1; 103 | swap_desc.OutputWindow = ctx->hwnd; 104 | swap_desc.Windowed = TRUE; 105 | swap_desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; 106 | swap_desc.Flags = 0; 107 | 108 | D3D_FEATURE_LEVEL out_level; 109 | UINT flags = 0; 110 | #ifdef _DEBUG 111 | flags |= D3D11_CREATE_DEVICE_DEBUG; 112 | #endif 113 | HRESULT hr = D3D11CreateDeviceAndSwapChain( NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, 114 | &feature_level, 1, D3D11_SDK_VERSION, &swap_desc, &ctx->swap, &ctx->dev, &out_level, &ctx->ctx ); 115 | if ( FAILED( hr ) ) 116 | return d3du_init_fail( ctx ); 117 | 118 | // render target and rtv 119 | hr = ctx->swap->GetBuffer( 0, __uuidof(ID3D11Texture2D), (void **)&ctx->backbuf ); 120 | if ( FAILED( hr ) ) 121 | return d3du_init_fail( ctx ); 122 | 123 | hr = ctx->dev->CreateRenderTargetView( ctx->backbuf, NULL, &ctx->backbuf_rtv ); 124 | if ( FAILED( hr ) ) 125 | return d3du_init_fail( ctx ); 126 | 127 | // depth/stencil surface and dsv 128 | D3D11_TEXTURE2D_DESC desc = 129 | { 130 | w, h, 1, 1, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, { 1, 0 }, 131 | D3D11_USAGE_DEFAULT, D3D11_BIND_DEPTH_STENCIL, 0, 0 132 | }; 133 | hr = ctx->dev->CreateTexture2D( &desc, NULL, &ctx->depthbuf ); 134 | if ( FAILED( hr ) ) 135 | return d3du_init_fail( ctx ); 136 | 137 | hr = ctx->dev->CreateDepthStencilView( ctx->depthbuf, NULL, &ctx->depthbuf_dsv ); 138 | if ( FAILED( hr ) ) 139 | return d3du_init_fail( ctx ); 140 | 141 | ctx->default_vp.TopLeftX = 0.0f; 142 | ctx->default_vp.TopLeftY = 0.0f; 143 | ctx->default_vp.Width = (float)w; 144 | ctx->default_vp.Height = (float)h; 145 | ctx->default_vp.MinDepth = 0.0f; 146 | ctx->default_vp.MaxDepth = 1.0f; 147 | 148 | // bind default RT, DSV and viewport for convenience. 149 | ctx->ctx->OMSetRenderTargets( 1, &ctx->backbuf_rtv, ctx->depthbuf_dsv ); 150 | ctx->ctx->RSSetViewports( 1, &ctx->default_vp ); 151 | 152 | return ctx; 153 | } 154 | 155 | void d3du_shutdown( d3du_context * ctx ) 156 | { 157 | if ( ctx->ctx ) 158 | ctx->ctx->ClearState(); 159 | 160 | safe_release( &ctx->backbuf ); 161 | safe_release( &ctx->depthbuf ); 162 | safe_release( &ctx->backbuf_rtv ); 163 | safe_release( &ctx->depthbuf_dsv ); 164 | safe_release( &ctx->swap ); 165 | safe_release( &ctx->ctx ); 166 | 167 | #if 0 && defined(_DEBUG) // use to trace leaks 168 | if ( ctx->dev ) 169 | { 170 | ID3D11Debug * dbg; 171 | ctx->dev->QueryInterface( __uuidof(ID3D11Debug), (void**)&dbg ); 172 | dbg->ReportLiveDeviceObjects( D3D11_RLDO_DETAIL ); 173 | dbg->Release(); 174 | } 175 | #endif 176 | 177 | safe_release( &ctx->dev ); 178 | DestroyWindow( ctx->hwnd ); 179 | delete ctx; 180 | } 181 | 182 | int d3du_handle_events( d3du_context * ctx ) 183 | { 184 | MSG msg; 185 | int ok = 1; 186 | 187 | while ( PeekMessage( &msg, 0, 0, 0, PM_REMOVE ) ) 188 | { 189 | if ( msg.message == WM_QUIT ) 190 | ok = 0; 191 | TranslateMessage( &msg ); 192 | DispatchMessage( &msg ); 193 | } 194 | 195 | return ok; 196 | } 197 | 198 | void d3du_swap_buffers( d3du_context * ctx, bool vsync ) 199 | { 200 | ctx->swap->Present( vsync ? 1 : 0, 0 ); 201 | } 202 | 203 | D3D11_VIEWPORT d3du_full_tex2d_viewport( ID3D11Texture2D * tex ) 204 | { 205 | D3D11_TEXTURE2D_DESC desc; 206 | tex->GetDesc( &desc ); 207 | 208 | D3D11_VIEWPORT vp; 209 | vp.TopLeftX = 0.0f; 210 | vp.TopLeftY = 0.0f; 211 | vp.Width = (float)desc.Width; 212 | vp.Height = (float)desc.Height; 213 | vp.MinDepth = 0.0f; 214 | vp.MaxDepth = 1.0f; 215 | 216 | return vp; 217 | } 218 | 219 | ID3D11Buffer * d3du_make_buffer( ID3D11Device * dev, UINT size, D3D11_USAGE use, UINT bind_flags, const void * initial ) 220 | { 221 | D3D11_BUFFER_DESC desc; 222 | desc.ByteWidth = size; 223 | desc.Usage = use; 224 | desc.BindFlags = bind_flags; 225 | switch ( use ) 226 | { 227 | case D3D11_USAGE_DYNAMIC: 228 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; 229 | break; 230 | 231 | case D3D11_USAGE_STAGING: 232 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE; 233 | break; 234 | 235 | default: 236 | desc.CPUAccessFlags = 0; 237 | break; 238 | } 239 | desc.MiscFlags = 0; 240 | desc.StructureByteStride = 0; 241 | 242 | D3D11_SUBRESOURCE_DATA initial_data; 243 | initial_data.pSysMem = initial; 244 | initial_data.SysMemPitch = 0; 245 | initial_data.SysMemSlicePitch = 0; 246 | 247 | ID3D11Buffer * buf; 248 | HRESULT hr = dev->CreateBuffer( &desc, initial ? &initial_data : NULL, &buf ); 249 | if ( FAILED( hr ) ) 250 | panic( "D3D CreateBuffer failed: 0x%08x\n", hr ); 251 | 252 | return buf; 253 | } 254 | 255 | unsigned char * d3du_get_buffer( d3du_context * ctx, ID3D11Buffer * buf, int * size_in_bytes ) 256 | { 257 | D3D11_BUFFER_DESC desc; 258 | buf->GetDesc( &desc ); 259 | desc.Usage = D3D11_USAGE_STAGING; 260 | desc.BindFlags = 0; 261 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; 262 | desc.MiscFlags = 0; 263 | 264 | ID3D11Buffer * temp_buf; 265 | HRESULT hr = ctx->dev->CreateBuffer( &desc, NULL, &temp_buf ); 266 | if ( FAILED( hr ) ) 267 | return NULL; 268 | 269 | ctx->ctx->CopyResource( temp_buf, buf ); 270 | 271 | D3D11_MAPPED_SUBRESOURCE mapped; 272 | hr = ctx->ctx->Map( temp_buf, 0, D3D11_MAP_READ, 0, &mapped ); 273 | if ( FAILED( hr ) ) 274 | panic( "d3du_get_buffer map failed\n" ); 275 | 276 | unsigned char * result = new unsigned char[desc.ByteWidth]; 277 | memcpy( result, mapped.pData, desc.ByteWidth ); 278 | 279 | ctx->ctx->Unmap( temp_buf, 0 ); 280 | temp_buf->Release(); 281 | 282 | if ( size_in_bytes ) 283 | *size_in_bytes = desc.ByteWidth; 284 | 285 | return result; 286 | } 287 | 288 | static unsigned int get_bpp( DXGI_FORMAT fmt ) 289 | { 290 | unsigned int bpp = 0; 291 | 292 | switch ( fmt ) 293 | { 294 | case DXGI_FORMAT_R8_TYPELESS: 295 | case DXGI_FORMAT_R8_UNORM: 296 | case DXGI_FORMAT_R8_UINT: 297 | case DXGI_FORMAT_R8_SNORM: 298 | case DXGI_FORMAT_R8_SINT: 299 | bpp = 1; 300 | break; 301 | 302 | case DXGI_FORMAT_R8G8_TYPELESS: 303 | case DXGI_FORMAT_R8G8_UNORM: 304 | case DXGI_FORMAT_R8G8_UINT: 305 | case DXGI_FORMAT_R8G8_SNORM: 306 | case DXGI_FORMAT_R8G8_SINT: 307 | bpp = 2; 308 | break; 309 | 310 | default: 311 | panic( "unsupported DXGI format %d\n", fmt ); 312 | } 313 | 314 | return bpp; 315 | } 316 | 317 | unsigned char * d3du_read_texture_level( d3du_context * ctx, ID3D11ShaderResourceView * srv, int srv_level ) 318 | { 319 | D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc; 320 | srv->GetDesc( &srv_desc ); 321 | 322 | if ( srv_desc.ViewDimension != D3D11_SRV_DIMENSION_TEXTURE2D ) 323 | panic( "d3du_read_texture_level only supports 2D textures right now" ); 324 | 325 | unsigned int bpp = get_bpp( srv_desc.Format ); 326 | int res_level = srv_level + srv_desc.Texture2D.MostDetailedMip; 327 | 328 | D3D11_TEXTURE2D_DESC tex_desc; 329 | ID3D11Texture2D * tex2d; 330 | srv->GetResource( (ID3D11Resource **)&tex2d ); 331 | tex2d->GetDesc( &tex_desc ); 332 | 333 | tex_desc.Usage = D3D11_USAGE_STAGING; 334 | tex_desc.BindFlags = 0; 335 | tex_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; 336 | tex_desc.MiscFlags = 0; 337 | 338 | ID3D11Texture2D * temp_tex; 339 | HRESULT hr = ctx->dev->CreateTexture2D( &tex_desc, NULL, &temp_tex ); 340 | if ( FAILED( hr ) ) 341 | { 342 | tex2d->Release(); 343 | return NULL; 344 | } 345 | 346 | ctx->ctx->CopyResource( temp_tex, tex2d ); 347 | 348 | D3D11_MAPPED_SUBRESOURCE mapped; 349 | hr = ctx->ctx->Map( temp_tex, res_level, D3D11_MAP_READ, 0, &mapped ); 350 | if ( FAILED( hr ) ) 351 | panic( "d3du_read_texture_level map failed\n" ); 352 | 353 | unsigned int out_width = tex_desc.Width >> res_level; 354 | unsigned int out_height = tex_desc.Height >> res_level; 355 | 356 | if ( !out_width ) out_width = 1; 357 | if ( !out_height ) out_height = 1; 358 | 359 | unsigned int out_pitch = out_width * bpp; 360 | 361 | unsigned char * result = new unsigned char[out_pitch * out_height]; 362 | for ( unsigned int y = 0 ; y < out_height ; y++ ) 363 | memcpy( result + y*out_pitch, (unsigned char *)mapped.pData + y*mapped.RowPitch, out_pitch ); 364 | 365 | ctx->ctx->Unmap( temp_tex, res_level ); 366 | temp_tex->Release(); 367 | tex2d->Release(); 368 | 369 | return result; 370 | } 371 | 372 | ID3D11RasterizerState * d3du_simple_raster( ID3D11Device * dev, D3D11_CULL_MODE cull, bool front_ccw, bool scissor_enable ) 373 | { 374 | D3D11_RASTERIZER_DESC raster_desc = { D3D11_FILL_SOLID }; 375 | raster_desc.CullMode = cull; 376 | raster_desc.FrontCounterClockwise = front_ccw; 377 | raster_desc.DepthClipEnable = TRUE; 378 | raster_desc.ScissorEnable = scissor_enable; 379 | 380 | ID3D11RasterizerState * raster_state = NULL; 381 | HRESULT hr = dev->CreateRasterizerState( &raster_desc, &raster_state ); 382 | if ( FAILED( hr ) ) 383 | panic( "CreateRasterizerState failed\n" ); 384 | 385 | return raster_state; 386 | } 387 | 388 | ID3D11BlendState * d3du_simple_blend( ID3D11Device * dev, D3D11_BLEND src_blend, D3D11_BLEND dest_blend ) 389 | { 390 | D3D11_BLEND_DESC blend_desc = { FALSE, FALSE }; 391 | blend_desc.RenderTarget[0].BlendEnable = ( src_blend != D3D11_BLEND_ONE || dest_blend != D3D11_BLEND_ZERO ); 392 | blend_desc.RenderTarget[0].SrcBlend = src_blend; 393 | blend_desc.RenderTarget[0].DestBlend = dest_blend; 394 | blend_desc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; 395 | blend_desc.RenderTarget[0].SrcBlendAlpha = src_blend; 396 | blend_desc.RenderTarget[0].DestBlendAlpha = dest_blend; 397 | blend_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; 398 | blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; 399 | 400 | ID3D11BlendState * blend_state = NULL; 401 | HRESULT hr = dev->CreateBlendState( &blend_desc, &blend_state ); 402 | if ( FAILED( hr ) ) 403 | panic( "CreateBlendState failed\n" ); 404 | 405 | return blend_state; 406 | } 407 | 408 | ID3D11SamplerState * d3du_simple_sampler( ID3D11Device * dev, D3D11_FILTER filter, D3D11_TEXTURE_ADDRESS_MODE addr ) 409 | { 410 | HRESULT hr; 411 | ID3D11SamplerState * sampler = NULL; 412 | 413 | D3D11_SAMPLER_DESC desc; 414 | desc.Filter = filter; 415 | desc.AddressU = addr; 416 | desc.AddressV = addr; 417 | desc.AddressW = addr; 418 | desc.MipLODBias = 0.0f; 419 | desc.MaxAnisotropy = 8; 420 | desc.ComparisonFunc = D3D11_COMPARISON_NEVER; 421 | desc.BorderColor[0] = 1.0f; 422 | desc.BorderColor[1] = 1.0f; 423 | desc.BorderColor[2] = 1.0f; 424 | desc.BorderColor[3] = 1.0f; 425 | desc.MinLOD = -1e+20f; 426 | desc.MaxLOD = 1e+20f; 427 | 428 | hr = dev->CreateSamplerState( &desc, &sampler ); 429 | if ( FAILED( hr ) ) 430 | panic( "CreateSamplerState failed\n" ); 431 | 432 | return sampler; 433 | } 434 | 435 | ID3DBlob * d3du_compile_source_or_die( char const * source, char const * profile, char const * entrypt ) 436 | { 437 | ID3DBlob * code; 438 | ID3DBlob * errors; 439 | HRESULT hr = D3DCompile( source, strlen( source ), NULL, NULL, NULL, entrypt, profile, D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_OPTIMIZATION_LEVEL1, 0, 440 | &code, &errors ); 441 | 442 | if ( errors ) 443 | { 444 | OutputDebugStringA( "While compiling:\n" ); 445 | OutputDebugStringA( source ); 446 | OutputDebugStringA( "Got errors:\n" ); 447 | OutputDebugStringA( (char*)errors->GetBufferPointer() ); 448 | errors->Release(); 449 | } 450 | 451 | if ( FAILED( hr ) ) 452 | panic( "Shader compilation failed!\n" ); 453 | 454 | return code; 455 | } 456 | 457 | d3du_shader d3du_compile_and_create_shader( ID3D11Device * dev, char const * source, char const * profile, char const * entrypt ) 458 | { 459 | ID3DBlob * code = d3du_compile_source_or_die( source, profile, entrypt ); 460 | HRESULT hr = S_OK; 461 | d3du_shader sh; 462 | 463 | sh.generic = NULL; 464 | 465 | switch ( profile[0] ) 466 | { 467 | case 'p': hr = dev->CreatePixelShader( code->GetBufferPointer(), code->GetBufferSize(), NULL, &sh.ps ); break; 468 | case 'v': hr = dev->CreateVertexShader( code->GetBufferPointer(), code->GetBufferSize(), NULL, &sh.vs ); break; 469 | case 'c': hr = dev->CreateComputeShader( code->GetBufferPointer(), code->GetBufferSize(), NULL, &sh.cs ); break; 470 | default: panic( "Unsupported shader profile '%s'\n", profile ); 471 | } 472 | 473 | if ( FAILED( hr ) ) 474 | panic( "Error creating shader.\n" ); 475 | 476 | return sh; 477 | } 478 | 479 | d3du_tex::d3du_tex( ID3D11Resource * resrc, ID3D11ShaderResourceView * srv, ID3D11RenderTargetView * rtv ) 480 | : resrc(resrc), srv(srv), rtv(rtv) 481 | { 482 | } 483 | 484 | d3du_tex::~d3du_tex() 485 | { 486 | safe_release( &resrc ); 487 | safe_release( &srv ); 488 | safe_release( &rtv ); 489 | } 490 | 491 | d3du_tex * d3du_tex::make2d( ID3D11Device * dev, UINT w, UINT h, UINT num_mips, DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT initial_pitch ) 492 | { 493 | HRESULT hr = S_OK; 494 | ID3D11Texture2D *tex = NULL; 495 | ID3D11ShaderResourceView *srv = NULL; 496 | ID3D11RenderTargetView *rtv = NULL; 497 | 498 | D3D11_TEXTURE2D_DESC desc; 499 | desc.Width = w; 500 | desc.Height = h; 501 | desc.MipLevels = num_mips; 502 | desc.ArraySize = 1; 503 | desc.Format = fmt; 504 | desc.SampleDesc.Count = 1; 505 | desc.SampleDesc.Quality = 0; 506 | desc.Usage = usage; 507 | desc.BindFlags = bind_flags; 508 | desc.CPUAccessFlags = 0; 509 | desc.MiscFlags = 0; 510 | 511 | D3D11_SUBRESOURCE_DATA initial_data; 512 | initial_data.pSysMem = initial; 513 | initial_data.SysMemPitch = initial_pitch; 514 | initial_data.SysMemSlicePitch = 0; 515 | 516 | hr = dev->CreateTexture2D( &desc, initial ? &initial_data : nullptr, &tex ); 517 | 518 | if ( !FAILED( hr ) && ( bind_flags & D3D11_BIND_SHADER_RESOURCE ) ) 519 | hr = dev->CreateShaderResourceView( tex, nullptr, &srv ); 520 | 521 | if ( !FAILED( hr ) && ( bind_flags & D3D11_BIND_RENDER_TARGET ) ) 522 | hr = dev->CreateRenderTargetView( tex, nullptr, &rtv ); 523 | 524 | if ( FAILED( hr ) ) 525 | { 526 | safe_release( &tex ); 527 | safe_release( &srv ); 528 | safe_release( &rtv ); 529 | return NULL; 530 | } else 531 | return new d3du_tex( tex, srv, rtv ); 532 | } 533 | 534 | d3du_tex * d3du_tex::make3d( ID3D11Device * dev, UINT w, UINT h, UINT d, UINT num_mips, DXGI_FORMAT fmt, D3D11_USAGE usage, UINT bind_flags, void const * initial, UINT init_row_pitch, UINT init_depth_pitch ) 535 | { 536 | HRESULT hr = S_OK; 537 | ID3D11Texture3D *tex = NULL; 538 | ID3D11ShaderResourceView *srv = NULL; 539 | 540 | D3D11_TEXTURE3D_DESC desc; 541 | desc.Width = w; 542 | desc.Height = h; 543 | desc.Depth = d; 544 | desc.MipLevels = num_mips; 545 | desc.Format = fmt; 546 | desc.Usage = usage; 547 | desc.BindFlags = bind_flags; 548 | desc.CPUAccessFlags = 0; 549 | desc.MiscFlags = 0; 550 | 551 | D3D11_SUBRESOURCE_DATA initial_data; 552 | initial_data.pSysMem = initial; 553 | initial_data.SysMemPitch = init_row_pitch; 554 | initial_data.SysMemSlicePitch = init_depth_pitch; 555 | 556 | hr = dev->CreateTexture3D( &desc, initial ? &initial_data : nullptr, &tex ); 557 | 558 | if ( !FAILED( hr ) && ( bind_flags & D3D11_BIND_SHADER_RESOURCE ) ) 559 | hr = dev->CreateShaderResourceView( tex, nullptr, &srv ); 560 | 561 | if ( FAILED( hr ) ) 562 | { 563 | safe_release( &tex ); 564 | safe_release( &srv ); 565 | return NULL; 566 | } else 567 | return new d3du_tex( tex, srv, NULL ); 568 | } 569 | 570 | static const size_t TIMER_SLOTS = 4; // depth of queue of in-flight queries (must be pow2) 571 | 572 | struct d3du_timer_group 573 | { 574 | ID3D11Query * begin; 575 | ID3D11Query * end; 576 | ID3D11Query * disjoint; 577 | }; 578 | 579 | struct d3du_timer 580 | { 581 | d3du_timer_group grp[TIMER_SLOTS]; 582 | size_t issue_idx; // index of timer we're issuing 583 | size_t retire_idx; // index of timer we're retiring 584 | size_t warmup_frames; 585 | run_stats * stats; 586 | }; 587 | 588 | static d3du_timer_group * timer_get( d3du_timer * timer, size_t index ) 589 | { 590 | return &timer->grp[ index & ( TIMER_SLOTS - 1 ) ]; 591 | } 592 | 593 | static void timer_ensure_max_in_flight( d3du_context * ctx, d3du_timer * timer, size_t max_in_flight ) 594 | { 595 | while ( ( timer->issue_idx - timer->retire_idx ) > max_in_flight ) 596 | { 597 | // retire oldest timer in flight 598 | d3du_timer_group * grp = timer_get( timer, timer->retire_idx ); 599 | UINT64 start, end; 600 | D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint; 601 | HRESULT hr; 602 | 603 | while ( ( hr = ctx->ctx->GetData( grp->begin, &start, sizeof( UINT64 ), 0 ) ) != S_OK ); 604 | while ( ( hr = ctx->ctx->GetData( grp->end, &end, sizeof( UINT64 ), 0 ) ) != S_OK ); 605 | while ( ( hr = ctx->ctx->GetData( grp->disjoint, &disjoint, sizeof( disjoint ), 0 ) ) != S_OK ); 606 | 607 | if ( timer->retire_idx >= timer->warmup_frames && !disjoint.Disjoint ) 608 | run_stats_record( timer->stats, (float) ( 1000.0 * ( end - start ) / disjoint.Frequency ) ); 609 | 610 | timer->retire_idx++; 611 | } 612 | } 613 | 614 | d3du_timer * d3du_timer_create( d3du_context * ctx, size_t warmup_frames ) 615 | { 616 | d3du_timer * timer = new d3du_timer; 617 | 618 | for ( size_t i = 0 ; i < TIMER_SLOTS ; i++ ) 619 | { 620 | D3D11_QUERY_DESC desc = {}; 621 | HRESULT hr; 622 | desc.Query = D3D11_QUERY_TIMESTAMP; 623 | hr = ctx->dev->CreateQuery( &desc, &timer->grp[i].begin ); 624 | if ( FAILED( hr ) ) panic( "CreateQuery failed.\n" ); 625 | hr = ctx->dev->CreateQuery( &desc, &timer->grp[i].end ); 626 | if ( FAILED( hr ) ) panic( "CreateQuery failed.\n" ); 627 | 628 | desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; 629 | hr = ctx->dev->CreateQuery( &desc, &timer->grp[i].disjoint ); 630 | if ( FAILED( hr ) ) panic( "CreateQuery failed.\n" ); 631 | } 632 | 633 | timer->issue_idx = 0; 634 | timer->retire_idx = 0; 635 | timer->warmup_frames = warmup_frames; 636 | timer->stats = run_stats_create(); 637 | return timer; 638 | } 639 | 640 | void d3du_timer_destroy( d3du_timer * timer ) 641 | { 642 | if ( timer ) 643 | { 644 | for ( size_t i = 0 ; i < TIMER_SLOTS ; i++ ) 645 | { 646 | safe_release( &timer->grp[i].begin ); 647 | safe_release( &timer->grp[i].end ); 648 | safe_release( &timer->grp[i].disjoint ); 649 | } 650 | 651 | run_stats_destroy( timer->stats ); 652 | delete timer; 653 | } 654 | } 655 | 656 | void d3du_timer_bracket_begin( d3du_context * ctx, d3du_timer * timer ) 657 | { 658 | // make sure we have a free timer to issue first 659 | timer_ensure_max_in_flight( ctx, timer, TIMER_SLOTS - 1 ); 660 | 661 | d3du_timer_group * grp = timer_get( timer, timer->issue_idx ); 662 | 663 | ctx->ctx->Begin( grp->disjoint ); 664 | ctx->ctx->End( grp->begin ); 665 | timer->issue_idx++; 666 | } 667 | 668 | void d3du_timer_bracket_end( d3du_context * ctx, d3du_timer * timer ) 669 | { 670 | d3du_timer_group * grp = timer_get( timer, timer->issue_idx - 1 ); 671 | 672 | ctx->ctx->End( grp->end ); 673 | ctx->ctx->End( grp->disjoint ); 674 | } 675 | 676 | void d3du_timer_report( d3du_context * ctx, d3du_timer * timer, char const * label ) 677 | { 678 | timer_ensure_max_in_flight( ctx, timer, 0 ); 679 | run_stats_report( timer->stats, label ); 680 | } 681 | 682 | // @cdep pre $set(c8sysincludes, -I$dxPath/include $c8sysincludes) 683 | // @cdep pre $set(csysincludes64EMT, -I$dxPath/include $csysincludes64EMT) 684 | 685 | --------------------------------------------------------------------------------