├── .gitignore ├── DXConstantBuffer.cpp ├── DXConstantBuffer.h ├── DXShader.cpp ├── DXShader.h ├── DXStructuredBuffer.cpp ├── DXStructuredBuffer.h ├── DXTexture.cpp ├── DXTexture.h ├── DXWrapper.cpp ├── DXWrapper.h ├── Fractal.h ├── Main.cpp ├── ParallelReduction.hlsl ├── RadixSort.hlsl ├── qjulia4D.hlsl ├── radix_sort.sln ├── radix_sort.vcxproj └── radix_sort.vcxproj.filters /.gitignore: -------------------------------------------------------------------------------- 1 | # compiled shader files 2 | *.sh 3 | *.cso 4 | *.asm 5 | 6 | # Compiled Object files 7 | *.slo 8 | *.lo 9 | *.o 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | 15 | # Compiled Static libraries 16 | *.lai 17 | *.la 18 | *.a 19 | 20 | 21 | #OS junk files 22 | [Tt]humbs.db 23 | *.DS_Store 24 | 25 | #Visual Studio files 26 | *.[Oo]bj 27 | *.user 28 | *.aps 29 | *.pch 30 | *.vspscc 31 | *.vssscc 32 | *_i.c 33 | *_p.c 34 | *.ncb 35 | *.suo 36 | *.tlb 37 | *.tlh 38 | *.bak 39 | *.[Cc]ache 40 | *.ilk 41 | *.log 42 | *.lib 43 | *.sbr 44 | *.sdf 45 | *.opensdf 46 | *.unsuccessfulbuild 47 | ipch/ 48 | [Oo]bj/ 49 | [Bb]in 50 | [Dd]ebug*/ 51 | [Rr]elease*/ 52 | Ankh.NoLoad 53 | 54 | #MonoDevelop 55 | *.pidb 56 | *.userprefs 57 | 58 | #Tooling 59 | _ReSharper*/ 60 | *.resharper 61 | [Tt]est[Rr]esult* 62 | *.sass-cache 63 | 64 | #Project files 65 | [Bb]uild/ 66 | 67 | #Subversion files 68 | .svn 69 | 70 | # Office Temp Files 71 | ~$* 72 | 73 | # vim Temp Files 74 | *~ 75 | 76 | #NuGet 77 | packages/ 78 | *.nupkg 79 | 80 | #ncrunch 81 | *ncrunch* 82 | *crunch*.local.xml 83 | 84 | # visual studio database projects 85 | *.dbmdl 86 | 87 | #Test files 88 | *.testsettings 89 | 90 | #blender backups 91 | *.blend1 92 | *.blend2 93 | 94 | #obj files in resources 95 | /resources/models/objects/*.mtl 96 | !/resources/models/objects/*.obj 97 | -------------------------------------------------------------------------------- /DXConstantBuffer.cpp: -------------------------------------------------------------------------------- 1 | #include "DXConstantBuffer.h" 2 | 3 | DXConstantBuffer::DXConstantBuffer(ID3D11Device* device, unsigned int size) 4 | { 5 | D3D11_BUFFER_DESC Desc; 6 | ZeroMemory(&Desc, sizeof(Desc)); 7 | 8 | Desc.Usage = D3D11_USAGE_DYNAMIC; 9 | Desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; 10 | Desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; 11 | Desc.ByteWidth = ((size + 15) / 16) * 16; // must be multiple of 16 bytes 12 | 13 | HRESULT hr = device->CreateBuffer(&Desc, NULL, &(this->buffer)); 14 | 15 | if (hr != S_OK) throw 1; 16 | } 17 | 18 | DXConstantBuffer::~DXConstantBuffer() 19 | { 20 | //this->buffer->Release(); 21 | } 22 | 23 | void* DXConstantBuffer::map(ID3D11DeviceContext* context) { 24 | context->Map((ID3D11Resource*) this->buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &this->msr); 25 | return (msr.pData); 26 | } 27 | 28 | void DXConstantBuffer::unmap(ID3D11DeviceContext* context) { 29 | context->Unmap((ID3D11Resource *)this->buffer, 0); 30 | } -------------------------------------------------------------------------------- /DXConstantBuffer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class DXConstantBuffer 6 | { 7 | friend class DXWrapper; 8 | private: 9 | ID3D11Buffer* buffer; 10 | D3D11_MAPPED_SUBRESOURCE msr; 11 | 12 | public: 13 | DXConstantBuffer(ID3D11Device*, unsigned int); 14 | ~DXConstantBuffer(); 15 | 16 | void* map(ID3D11DeviceContext*); 17 | void unmap(ID3D11DeviceContext*); 18 | }; 19 | 20 | 21 | -------------------------------------------------------------------------------- /DXShader.cpp: -------------------------------------------------------------------------------- 1 | #include "DXShader.h" 2 | 3 | #include 4 | 5 | HRESULT CompileComputeShader(_In_ LPCWSTR srcFile, _In_ LPCSTR entryPoint, 6 | _In_ ID3D11Device* device, _Outptr_ ID3DBlob** blob) 7 | { 8 | if (!srcFile || !entryPoint || !device || !blob) 9 | return E_INVALIDARG; 10 | 11 | *blob = nullptr; 12 | 13 | UINT flags = D3DCOMPILE_ENABLE_STRICTNESS; 14 | #if defined( DEBUG ) || defined( _DEBUG ) 15 | flags |= D3DCOMPILE_DEBUG; 16 | #endif 17 | 18 | // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware 19 | LPCSTR profile = (device->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0) ? "cs_5_0" : "cs_4_0"; 20 | 21 | const D3D_SHADER_MACRO defines[] = 22 | { 23 | "EXAMPLE_DEFINE", "1", 24 | NULL, NULL 25 | }; 26 | 27 | ID3DBlob* shaderBlob = nullptr; 28 | ID3DBlob* errorBlob = nullptr; 29 | HRESULT hr = D3DCompileFromFile(srcFile, defines, D3D_COMPILE_STANDARD_FILE_INCLUDE, 30 | entryPoint, profile, 31 | flags, 0, &shaderBlob, &errorBlob); 32 | if (FAILED(hr)) 33 | { 34 | if (errorBlob) 35 | { 36 | OutputDebugStringA((char*) errorBlob->GetBufferPointer()); 37 | errorBlob->Release(); 38 | } 39 | 40 | if (shaderBlob) 41 | shaderBlob->Release(); 42 | 43 | return hr; 44 | } 45 | 46 | *blob = shaderBlob; 47 | 48 | return hr; 49 | } 50 | 51 | DXShader::DXShader(ID3D11Device* device, const BYTE* bytecode, unsigned int size) 52 | { 53 | HRESULT hr = device->CreateComputeShader(bytecode, size, NULL, &(this->shader)); 54 | 55 | if (hr != S_OK) throw 1; 56 | } 57 | 58 | 59 | 60 | DXShader::~DXShader() 61 | { 62 | } 63 | 64 | ID3D11ComputeShader* DXShader::getShader() { 65 | return this->shader; 66 | } 67 | -------------------------------------------------------------------------------- /DXShader.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class DXShader 6 | { 7 | private: 8 | ID3D11ComputeShader* shader; 9 | public: 10 | DXShader(ID3D11Device*, const BYTE*, unsigned int size); 11 | ~DXShader(); 12 | 13 | ID3D11ComputeShader* getShader(); 14 | }; 15 | 16 | -------------------------------------------------------------------------------- /DXStructuredBuffer.cpp: -------------------------------------------------------------------------------- 1 | #include "DXStructuredBuffer.h" 2 | 3 | 4 | DXStructuredBuffer::DXStructuredBuffer(ID3D11Device* d, unsigned int s, unsigned int n) 5 | { 6 | D3D11_BUFFER_DESC sbDesc; 7 | D3D11_UNORDERED_ACCESS_VIEW_DESC sbUAVDesc; 8 | D3D11_SHADER_RESOURCE_VIEW_DESC sbSRVDesc; 9 | 10 | ZeroMemory(&sbDesc, sizeof(sbDesc)); 11 | ZeroMemory(&sbUAVDesc, sizeof(sbUAVDesc)); 12 | ZeroMemory(&sbSRVDesc, sizeof(sbSRVDesc)); 13 | 14 | sbDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; 15 | sbDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; 16 | sbDesc.StructureByteStride = s; 17 | sbDesc.ByteWidth = sbDesc.StructureByteStride * n; 18 | sbDesc.Usage = D3D11_USAGE_DEFAULT; 19 | HRESULT hr = d->CreateBuffer(&sbDesc, NULL, &this->buffer); 20 | 21 | if (hr != S_OK) throw 1; 22 | 23 | // UAV 24 | sbUAVDesc.Buffer.NumElements = sbDesc.ByteWidth / sbDesc.StructureByteStride; 25 | sbUAVDesc.Format = DXGI_FORMAT_UNKNOWN; //DXGI_FORMAT_R8G8B8A8_UNORM 26 | sbUAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; 27 | 28 | hr = d->CreateUnorderedAccessView((ID3D11Resource *) this->buffer, &sbUAVDesc, &this->uav); 29 | 30 | if (hr != S_OK) throw 1; 31 | 32 | // SRV 33 | sbSRVDesc.Buffer.ElementWidth = sbDesc.StructureByteStride; 34 | sbSRVDesc.Buffer.FirstElement = sbUAVDesc.Buffer.FirstElement; 35 | sbSRVDesc.Buffer.NumElements = sbUAVDesc.Buffer.NumElements; 36 | sbSRVDesc.Format = DXGI_FORMAT_UNKNOWN; 37 | sbSRVDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; 38 | 39 | hr = d->CreateShaderResourceView((ID3D11Resource*) this->buffer, &sbSRVDesc, &this->srv); 40 | 41 | if (hr != S_OK) throw 1; 42 | 43 | } 44 | 45 | 46 | DXStructuredBuffer::~DXStructuredBuffer() 47 | { 48 | //this->buffer->Release(); 49 | //this->uav->Release(); 50 | //this->srv->Release(); 51 | } 52 | 53 | ID3D11UnorderedAccessView ** DXStructuredBuffer::getUAV() { 54 | return &this->uav; 55 | } 56 | 57 | ID3D11ShaderResourceView ** DXStructuredBuffer::getSRV() { 58 | return &this->srv; 59 | } 60 | -------------------------------------------------------------------------------- /DXStructuredBuffer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class DXStructuredBuffer 6 | { 7 | private: 8 | ID3D11Buffer* buffer; 9 | ID3D11UnorderedAccessView* uav; 10 | ID3D11ShaderResourceView* srv; 11 | 12 | 13 | public: 14 | DXStructuredBuffer(ID3D11Device*, unsigned int, unsigned int); 15 | ~DXStructuredBuffer(); 16 | 17 | ID3D11UnorderedAccessView ** getUAV(); 18 | ID3D11ShaderResourceView ** getSRV(); 19 | }; 20 | 21 | -------------------------------------------------------------------------------- /DXTexture.cpp: -------------------------------------------------------------------------------- 1 | #include "DXTexture.h" 2 | 3 | DXTexture::DXTexture(ID3D11Device* dev, unsigned int w, unsigned int h){ 4 | 5 | D3D11_TEXTURE2D_DESC desc; 6 | ZeroMemory(&desc, sizeof(desc)); 7 | 8 | desc.Width = w; 9 | desc.Height = h; 10 | desc.MipLevels = desc.ArraySize = 1; 11 | desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; 12 | desc.SampleDesc.Count = 1; 13 | desc.Usage = D3D11_USAGE_DEFAULT; 14 | desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;//D3D11.BindFlags.ShaderResource | D3D11.BindFlags.UnorderedAccess 15 | 16 | HRESULT hr = dev->CreateTexture2D(&desc, NULL, &this->texture); 17 | 18 | if (hr != S_OK) throw 1; 19 | if (this->texture == NULL) throw 2; 20 | dev->CreateUnorderedAccessView((ID3D11Resource*) this->texture, NULL, &this->uav); 21 | dev->CreateShaderResourceView((ID3D11Resource*) this->texture, NULL, &this->srv); 22 | 23 | 24 | } 25 | 26 | DXTexture::DXTexture(IDXGISwapChain* sc, ID3D11Device* dev, int id) 27 | { 28 | sc->GetBuffer(id, __uuidof(ID3D11Texture2D), (void**) &this->texture); 29 | dev->CreateUnorderedAccessView((ID3D11Resource*) this->texture, NULL, &this->uav); 30 | 31 | } 32 | 33 | 34 | DXTexture::~DXTexture() 35 | { 36 | this->texture->Release(); 37 | } 38 | -------------------------------------------------------------------------------- /DXTexture.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | class DXTexture 6 | { 7 | private: 8 | ID3D11Texture2D* texture; 9 | ID3D11UnorderedAccessView* uav; 10 | ID3D11ShaderResourceView* srv; 11 | public: 12 | DXTexture(ID3D11Device* dev, unsigned int w, unsigned int h); 13 | DXTexture(IDXGISwapChain*, ID3D11Device*, int); 14 | ~DXTexture(); 15 | 16 | ID3D11UnorderedAccessView** getUAV() { 17 | return &this->uav; 18 | } 19 | 20 | ID3D11ShaderResourceView** getSRV() { 21 | return &this->srv; 22 | } 23 | }; 24 | 25 | -------------------------------------------------------------------------------- /DXWrapper.cpp: -------------------------------------------------------------------------------- 1 | #include "DXWrapper.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | DXWrapper::DXWrapper(HWND window, int width, int height) 9 | { 10 | const DXGI_SWAP_CHAIN_DESC sd = { { width, height, { 60, 1 }, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED, DXGI_MODE_SCALING_UNSPECIFIED }, { 1, 0 }, DXGI_USAGE_RENDER_TARGET_OUTPUT, 1, NULL, TRUE, DXGI_SWAP_EFFECT_SEQUENTIAL, 0 }; 11 | 12 | DXGI_SWAP_CHAIN_DESC temp; 13 | temp = sd; 14 | temp.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_UNORDERED_ACCESS | DXGI_USAGE_SHADER_INPUT; 15 | temp.OutputWindow = window; 16 | 17 | 18 | D3D11CreateDeviceAndSwapChain( 19 | NULL, 20 | D3D_DRIVER_TYPE_HARDWARE, 21 | NULL, 22 | D3D11_CREATE_DEVICE_DEBUG, 23 | NULL, 24 | 0, 25 | D3D11_SDK_VERSION, 26 | &temp, 27 | &(this->swapChain), 28 | &(this->device), 29 | NULL, 30 | &(this->context) 31 | ); 32 | 33 | } 34 | 35 | 36 | DXWrapper::~DXWrapper() 37 | { 38 | this->context->ClearState(); 39 | this->device->Release(); 40 | this->swapChain->Release(); 41 | } 42 | 43 | void DXWrapper::present() { 44 | this->swapChain->Present(0, 0); 45 | } 46 | 47 | DXTexture DXWrapper::getTexture(int id) { 48 | return DXTexture(this->swapChain, this->device, id); 49 | } 50 | 51 | DXTexture DXWrapper::getTexture(unsigned int w, unsigned int h) { 52 | return DXTexture(this->device, w, h); 53 | } 54 | 55 | DXStructuredBuffer DXWrapper::getStructuredBuffer(unsigned int stride, unsigned int num) { 56 | return DXStructuredBuffer(this->device, stride, num); 57 | } 58 | 59 | DXConstantBuffer DXWrapper::getConstantBuffer(unsigned int size) { 60 | return DXConstantBuffer(this->device, size); 61 | } 62 | 63 | DXShader DXWrapper::getComputeShader(const BYTE* bytecode, unsigned int size) { 64 | return DXShader(device, bytecode, size); 65 | } 66 | 67 | void DXWrapper::setComputeShader(DXShader& shader) { 68 | this->context->CSSetShader(shader.getShader(), NULL, 0); 69 | } 70 | 71 | void DXWrapper::setUAV(int i, int j, ID3D11UnorderedAccessView** u) { 72 | this->context->CSSetUnorderedAccessViews(i, j, u, NULL); 73 | } 74 | 75 | void DXWrapper::setSRV(int i, int j, ID3D11ShaderResourceView** s) { 76 | this->context->CSSetShaderResources(i, j, s); 77 | } 78 | 79 | void DXWrapper::runShader(unsigned int x, unsigned int y, unsigned int z) { 80 | this->context->Dispatch(x, y, z); 81 | } 82 | 83 | void DXWrapper::resetShader() { 84 | ID3D11ShaderResourceView* pNull[3] = { NULL, NULL, NULL }; 85 | this->context->CSSetShaderResources(0, 3, pNull); 86 | 87 | ID3D11UnorderedAccessView* uNull[3] = { NULL, NULL, NULL }; 88 | this->context->CSSetUnorderedAccessViews(0, 3, uNull, NULL); 89 | } 90 | 91 | void DXWrapper::unmap(DXConstantBuffer& buffer) { 92 | return buffer.unmap(context); 93 | } 94 | 95 | void DXWrapper::setConstantBuffer(int i, int j, DXConstantBuffer& b) { 96 | this->context->CSSetConstantBuffers(i, j, &b.buffer); 97 | } 98 | 99 | -------------------------------------------------------------------------------- /DXWrapper.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "DXTexture.h" 9 | #include "DXStructuredBuffer.h" 10 | #include "DXShader.h" 11 | #include "DXConstantBuffer.h" 12 | 13 | class DXWrapper 14 | { 15 | private: 16 | ID3D11Device* device; 17 | IDXGISwapChain* swapChain; 18 | ID3D11DeviceContext* context; 19 | 20 | public: 21 | DXWrapper(HWND, int, int); 22 | ~DXWrapper(); 23 | 24 | void present(); 25 | DXTexture getTexture(int id); 26 | DXTexture getTexture(unsigned int, unsigned int); 27 | DXStructuredBuffer getStructuredBuffer(unsigned int, unsigned int); 28 | DXShader getComputeShader(const BYTE*, unsigned int); 29 | 30 | DXConstantBuffer getConstantBuffer(unsigned int); 31 | template S* map(DXConstantBuffer&); 32 | void unmap(DXConstantBuffer&); 33 | 34 | 35 | void setComputeShader(DXShader&); 36 | void setUAV(int, int, ID3D11UnorderedAccessView**); 37 | void setSRV(int, int, ID3D11ShaderResourceView**); 38 | void setConstantBuffer(int, int, DXConstantBuffer&); 39 | void runShader(unsigned int, unsigned int, unsigned int); 40 | void resetShader(); 41 | 42 | }; 43 | 44 | template 45 | S* DXWrapper::map(DXConstantBuffer& buffer) { 46 | return static_cast(buffer.map(context)); 47 | } 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /Fractal.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | namespace Fractal { 3 | 4 | struct MainConstantBuffer { 5 | // Julia 4D constants 6 | unsigned int c_height; 7 | unsigned int c_width; // view port size 8 | float epsilon; // detail julia 9 | int selfShadow; // selfshadowing on or off 10 | float diffuse[4]; // diffuse shading color 11 | float mu[4]; // quaternion julia parameter 12 | float orientation[4 * 4]; // rotation matrix 13 | float zoom; 14 | }; 15 | 16 | static float gSaturation = 1.0f; 17 | 18 | static float Epsilon = 0.003f; 19 | static float ColorT = 0.0f; 20 | static float ColorA[4] = { 0.25f, 0.45f, 1.0f, 1.0f }; 21 | static float ColorB[4] = { 0.25f, 0.45f, 1.0f, 1.0f }; 22 | static float ColorC[4] = { 0.25f, 0.45f, 1.0f, 1.0f }; 23 | 24 | static float MuT = 0.0f; 25 | static float MuA[4] = { -.278f, -.479f, 0.0f, 0.0f }; 26 | static float MuB[4] = { 0.278f, 0.479f, 0.0f, 0.0f }; 27 | static float MuC[4] = { -.278f, -.479f, -.231f, .235f }; 28 | 29 | static BOOL selfShadow = TRUE; 30 | static float zoom = 1.0f; 31 | 32 | // 33 | // Random number generator 34 | // see http://www.codeproject.com/KB/recipes/SimpleRNG.aspx 35 | 36 | // These values are not magical, just the default values Marsaglia used. 37 | // Any pair of unsigned integers should be fine. 38 | static unsigned int m_w = 521288629; 39 | //static unsigned int m_z = 362436069; 40 | #define MZ ((36969 * (362436069 & 65535) + (362436069 >> 16)) << 16) 41 | 42 | static void SetSeed(unsigned int u) 43 | { 44 | m_w = u; 45 | } 46 | 47 | // This is the heart of the generator. 48 | // It uses George Marsaglia's MWC algorithm to produce an unsigned integer. 49 | // See http://www.bobwheeler.com/statistics/Password/MarsagliaPost.txt 50 | static unsigned int GetUint() 51 | { 52 | // m_z = 36969 * (m_z & 65535) + (m_z >> 16); 53 | m_w = 18000 * (m_w & 65535) + (m_w >> 16); 54 | return (MZ) +m_w; 55 | } 56 | 57 | // Produce a uniform random sample from the interval (-1, 1). 58 | // The method will not return either end point. 59 | static float GetUniform() 60 | { 61 | // 0 <= u < 2^32 62 | unsigned int u = GetUint(); 63 | // The magic number below is 1/(2^32 + 2). 64 | // The result is strictly between 0 and 1. 65 | return (u) * (float) 2.328306435454494e-10 * 2.0f; 66 | } 67 | 68 | 69 | static void 70 | Interpolate(float m[4], float t, float a[4], float b[4]) 71 | { 72 | int i; 73 | for (i = 0; i < 4; i++) 74 | m[i] = (1.0f - t) * a[i] + t * b[i]; 75 | } 76 | 77 | 78 | float dt; // time increment depending on frame rendering time for same animation speed independent of rendering speed 79 | 80 | static void 81 | UpdateMu(float t[4], float a[4], float b[4]) 82 | { 83 | *t += 0.01f *dt; 84 | 85 | if (*t >= 1.0f) 86 | { 87 | *t = 0.0f; 88 | 89 | a[0] = b[0]; 90 | a[1] = b[1]; 91 | a[2] = b[2]; 92 | a[3] = b[3]; 93 | 94 | b[0] = GetUniform(); 95 | b[1] = GetUniform(); 96 | b[2] = GetUniform(); 97 | b[3] = GetUniform(); 98 | } 99 | } 100 | 101 | static void 102 | RandomColor(float v[4]) 103 | { 104 | do 105 | { 106 | v[0] = GetUniform(); 107 | v[1] = GetUniform(); 108 | v[2] = GetUniform(); 109 | } while (v[0] < 0 && v[1] <0 && v[2]<0); // prevent black colors 110 | v[3] = 1.0f; 111 | } 112 | 113 | static void 114 | UpdateColor(float t[4], float a[4], float b[4]) 115 | { 116 | *t += 0.01f *dt; 117 | 118 | if (*t >= 1.0f) 119 | { 120 | *t = 0.0f; 121 | 122 | a[0] = b[0]; 123 | a[1] = b[1]; 124 | a[2] = b[2]; 125 | a[3] = b[3]; 126 | 127 | RandomColor(b); 128 | } 129 | } 130 | 131 | // timer global variables 132 | DWORD StartTime; 133 | static DWORD CurrentTime; 134 | 135 | static void initialize() { 136 | // setup timer 137 | StartTime = GetTickCount(); 138 | 139 | // seed the random number generator 140 | SetSeed((unsigned int) GetCurrentTime()); 141 | } 142 | 143 | static void update() { 144 | CurrentTime = GetTickCount() - StartTime; 145 | dt = CurrentTime / (20000.0f); 146 | UpdateMu(&MuT, MuA, MuB); 147 | Interpolate(MuC, MuT, MuA, MuB); 148 | 149 | UpdateColor(&ColorT, ColorA, ColorB); 150 | Interpolate(ColorC, ColorT, ColorA, ColorB); 151 | } 152 | 153 | static void fill(MainConstantBuffer* mc) { 154 | // this is a continous constant buffer 155 | // that means each value is aligned in the buffer one after each other without any spaces 156 | // the layout need to be in the same order as the constant buffer struct in the shader 157 | 158 | mc->epsilon = Epsilon; 159 | mc->selfShadow = selfShadow; 160 | mc->diffuse[0] = ColorC[0]; 161 | mc->diffuse[1] = ColorC[1]; 162 | mc->diffuse[2] = ColorC[2]; 163 | mc->diffuse[3] = ColorC[3]; 164 | mc->mu[0] = MuC[0]; 165 | mc->mu[1] = MuC[1]; 166 | mc->mu[2] = MuC[2]; 167 | mc->mu[3] = MuC[3]; 168 | mc->orientation[0] = 1.0; 169 | // mc->orientation[1] = 0.0; 170 | // mc->orientation[2] = 0.0; 171 | // mc->orientation[3] = 0.0; 172 | // mc->orientation[4] = 0.0; 173 | mc->orientation[5] = 1.0; 174 | // mc->orientation[6] = 0.0; 175 | // mc->orientation[7] = 0.0; 176 | // mc->orientation[8] = 0.0; 177 | // mc->orientation[9] = 0.0; 178 | mc->orientation[10] = 1.0; 179 | // mc->orientation[11] = 0.0; 180 | // mc->orientation[12] = 0.0; 181 | // mc->orientation[13] = 0.0; 182 | // mc->orientation[14] = 0.0; 183 | mc->orientation[15] = 1.0; 184 | mc->zoom = zoom; 185 | } 186 | }; 187 | 188 | 189 | -------------------------------------------------------------------------------- /Main.cpp: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////// 2 | // 3 | // Radix Sort in DirectCompute 4 | // by Jacob Maskiewicz 5 | // 6 | // CSE 190: GPU Programming 7 | // 8 | //////////////////////////////////////// 9 | 10 | #include 11 | 12 | #include "DXWrapper.h" 13 | #include "Fractal.h" 14 | 15 | #include "RadixSort.sh" 16 | #include "qjulia4D.sh" 17 | #include "ParallelReduction.sh" 18 | 19 | // define the size of the window 20 | #define THREADSX 16 // number of threads in the thread group used in the compute shader 21 | #define THREADSY 16 // number of threads in the thread group used in the compute shader 22 | #define WINDOWWIDTH 1280 23 | #define WINDOWHEIGHT 720 24 | 25 | #define WINWIDTH ((((WINDOWWIDTH + THREADSX - 1) / THREADSX) * THREADSX)) // multiply of ThreadsX 26 | #define WINHEIGHT ((((WINDOWHEIGHT + THREADSY - 1) / THREADSY) * THREADSY)) // multiply of ThreadsY 27 | 28 | #define WINPOSX 50 29 | #define WINPOSY 50 30 | 31 | int WINAPI WinMain( 32 | HINSTANCE hInstance, // HANDLE TO AN INSTANCE. This is the "handle" to YOUR PROGRAM ITSELF. 33 | HINSTANCE hPrevInstance,// USELESS on modern windows (totally ignore hPrevInstance) 34 | LPSTR szCmdLine, // Command line arguments. similar to argv in standard C programs 35 | int iCmdShow) // Start window maximized, minimized, etc. 36 | { 37 | HWND windowHandle = CreateWindow(L"edit", 0, WS_POPUP | WS_VISIBLE, WINPOSX, WINPOSY, WINWIDTH, WINHEIGHT, 0, 0, 0, 0); 38 | 39 | ShowCursor(false); 40 | 41 | DXWrapper dx(windowHandle, WINWIDTH, WINHEIGHT); 42 | 43 | DXTexture& backBuffer = dx.getTexture(0); 44 | DXConstantBuffer& constantBuffer = dx.getConstantBuffer(sizeof(Fractal::MainConstantBuffer)); 45 | DXStructuredBuffer& juliaOut = dx.getStructuredBuffer(sizeof(float)*4, WINWIDTH * WINHEIGHT); 46 | DXStructuredBuffer& sums = dx.getStructuredBuffer(sizeof(float), WINWIDTH*WINHEIGHT/(THREADSX*THREADSY)); 47 | 48 | DXShader& shader = dx.getComputeShader(g_main, sizeof(g_main)); 49 | DXShader& qjulia = dx.getComputeShader(g_CS_QJulia4D, sizeof(g_CS_QJulia4D)); 50 | DXShader& reduce = dx.getComputeShader(g_PostFX, sizeof(g_PostFX)); 51 | 52 | Fractal::initialize(); 53 | 54 | MSG msg; 55 | 56 | bool running = true; 57 | while (running) { 58 | PeekMessage(&msg, windowHandle, 0, 0, PM_REMOVE); 59 | if (GetAsyncKeyState(VK_ESCAPE)) running = false; 60 | 61 | Fractal::update(); 62 | 63 | Fractal::MainConstantBuffer* b = dx.map(constantBuffer); 64 | 65 | b->c_height = WINHEIGHT; 66 | b->c_width = WINWIDTH; 67 | Fractal::fill(b); 68 | 69 | b = NULL; 70 | dx.unmap(constantBuffer); 71 | 72 | // QJulia 73 | dx.setComputeShader(qjulia); 74 | dx.setUAV(0, 1, juliaOut.getUAV()); 75 | dx.setConstantBuffer(0, 1, constantBuffer); 76 | dx.runShader(WINWIDTH / THREADSX, WINHEIGHT / THREADSY, 1); 77 | dx.resetShader(); 78 | 79 | // Sort each block 80 | dx.setComputeShader(shader); 81 | dx.setUAV(0, 1, backBuffer.getUAV()); //output 82 | dx.setConstantBuffer(0, 1, constantBuffer); 83 | dx.setSRV(0, 1, juliaOut.getSRV()); //input 84 | dx.runShader(WINWIDTH / THREADSX, WINHEIGHT / THREADSY, 1); 85 | 86 | dx.resetShader(); 87 | 88 | dx.present(); 89 | 90 | __noop; 91 | } 92 | 93 | // release buffers and shaders automatically with destructors 94 | return msg.wParam; 95 | } -------------------------------------------------------------------------------- /ParallelReduction.hlsl: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////// 2 | // 3 | // Instructional Post-Processing Parallel Reduction with DirectCompute 4 | // 5 | // by Wolfgang Engel 6 | // 7 | // Last time modified: 01/13/2014 8 | // 9 | /////////////////////////////////////////////////////////////////////// 10 | 11 | /* 12 | #0 is base line 13 | #1 Interleaved Shared Memory Addressing : Divergent Branching 14 | #2 Interleaved Shared Memory Addressing : Shared Memory Bank Conflicts 15 | #3 Idle Threads in Thread Group : First add during Global Load 16 | #4 Instruction Bottleneck : Unroll last Warp 17 | #5 Completely Unroll 18 | */ 19 | #define OPTIMIZATION 4 20 | 21 | StructuredBuffer Input : register(t0); 22 | RWStructuredBuffer Result : register(u0); 23 | 24 | #define THREADX 16 / 2 25 | #define THREADY 16 / 2 26 | 27 | 28 | cbuffer cbCS : register(b0) 29 | { 30 | int c_height : packoffset(c0.x); 31 | int c_width : packoffset(c0.y); // size view port 32 | /* 33 | This is in the constant buffer as well but not used in this shader, so I just keep it in here as a comment 34 | 35 | float c_epsilon : packoffset(c0.z); // julia detail 36 | int c_selfShadow : packoffset(c0.w); // selfshadowing on or off 37 | float4 c_diffuse : packoffset(c1); // diffuse shading color 38 | float4 c_mu : packoffset(c2); // julia quaternion parameter 39 | float4x4 rotation : packoffset(c3); 40 | float zoom : packoffset(c7.x); 41 | */ 42 | }; 43 | 44 | // 45 | // the following shader applies parallel reduction to an image and converts it to luminance 46 | // 47 | #define groupthreads THREADX * THREADY 48 | groupshared float sharedMem[groupthreads]; 49 | 50 | // SV_DispatchThreadID - index of the thread within the entire dispatch in each dimension: x - 0..x - 1; y - 0..y - 1; z - 0..z - 1 51 | // SV_GroupID - index of a thread group in the dispatch — for example, calling Dispatch(2,1,1) results in possible values of 0,0,0 and 1,0,0, varying from 0 to (numthreadsX * numthreadsY * numThreadsZ) – 1 52 | // SV_GroupThreadID - 3D version of SV_GroupIndex - if you specified numthreads(3,2,1), possible values for the SV_GroupThreadID input value have the range of values (0–2,0–1,0) 53 | // SV_GroupIndex - index of a thread within a thread group 54 | [numthreads(THREADX, THREADY, 1)] 55 | void PostFX( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex ) 56 | { 57 | const float4 LumVector = float4(0.2125f, 0.7154f, 0.0721f, 0.0f); 58 | 59 | // read from structured buffer 60 | uint idx = DTid.x + DTid.y * c_width; 61 | 62 | // #3 Idle Threads in Thread Group : First add during Global Load 63 | 64 | // store in shared memory 65 | sharedMem[GI] = dot(Input[idx * 2], LumVector) + dot(Input[idx * 2 + 1], LumVector); 66 | 67 | 68 | // wait until everything is transfered from device memory to shared memory 69 | GroupMemoryBarrierWithGroupSync(); 70 | 71 | // wonder if this does anything :-) 72 | [unroll(groupthreads)] 73 | for (uint s = groupthreads / 2; s > 32; s >>= 1) 74 | { 75 | if (GI < s) 76 | // store in shared memory 77 | sharedMem[GI] += sharedMem[GI + s]; 78 | GroupMemoryBarrierWithGroupSync(); 79 | } 80 | 81 | if (GI < 32) 82 | { 83 | sharedMem[GI] += sharedMem[GI + 32]; 84 | sharedMem[GI] += sharedMem[GI + 16]; 85 | sharedMem[GI] += sharedMem[GI + 8]; 86 | sharedMem[GI] += sharedMem[GI + 2]; 87 | sharedMem[GI] += sharedMem[GI + 1]; 88 | } 89 | 90 | 91 | // Have the first thread write out to the output 92 | if (GI == 0) 93 | { 94 | // write out the result for each thread group 95 | Result[Gid.x + Gid.y*c_width/16] = sharedMem[0] / (THREADX * THREADY); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /RadixSort.hlsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakemco/gpu-radix-sort/70d689f7c0479838f388325e704e6f878459c5ad/RadixSort.hlsl -------------------------------------------------------------------------------- /qjulia4D.hlsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakemco/gpu-radix-sort/70d689f7c0479838f388325e704e6f878459c5ad/qjulia4D.hlsl -------------------------------------------------------------------------------- /radix_sort.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.21005.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "radix_sort", "radix_sort.vcxproj", "{F3C5DCE0-E45D-45C9-8D13-38BC935C312E}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Release|Win32 = Release|Win32 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {F3C5DCE0-E45D-45C9-8D13-38BC935C312E}.Debug|Win32.ActiveCfg = Debug|Win32 15 | {F3C5DCE0-E45D-45C9-8D13-38BC935C312E}.Debug|Win32.Build.0 = Debug|Win32 16 | {F3C5DCE0-E45D-45C9-8D13-38BC935C312E}.Release|Win32.ActiveCfg = Release|Win32 17 | {F3C5DCE0-E45D-45C9-8D13-38BC935C312E}.Release|Win32.Build.0 = Release|Win32 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | EndGlobal 23 | -------------------------------------------------------------------------------- /radix_sort.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {F3C5DCE0-E45D-45C9-8D13-38BC935C312E} 15 | Win32Proj 16 | radix_sort 17 | 18 | 19 | 20 | Application 21 | true 22 | v120 23 | Unicode 24 | 25 | 26 | Application 27 | false 28 | v120 29 | true 30 | Unicode 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | true 44 | 45 | 46 | false 47 | 48 | 49 | 50 | 51 | 52 | Level3 53 | Disabled 54 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) 55 | 56 | 57 | Windows 58 | true 59 | d3d11.lib;d3dcompiler.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 60 | 61 | 62 | 63 | 64 | Level3 65 | 66 | 67 | MaxSpeed 68 | true 69 | true 70 | WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 71 | 72 | 73 | Windows 74 | true 75 | true 76 | true 77 | d3d11.lib;d3dcompiler.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | PostFX 99 | Compute 100 | 5.0 101 | %(Filename).cso 102 | %(Filename).sh 103 | %(Filename).asm 104 | 105 | 106 | CS_QJulia4D 107 | Compute 108 | 5.0 109 | %(Filename).sh 110 | %(Filename).cso 111 | %(Filename).asm 112 | 113 | 114 | Compute 115 | 5.0 116 | %(Filename).cso 117 | %(Filename).sh 118 | %(Filename).asm 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /radix_sort.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Header Files 20 | 21 | 22 | Header Files 23 | 24 | 25 | Header Files 26 | 27 | 28 | Header Files 29 | 30 | 31 | Header Files 32 | 33 | 34 | Header Files 35 | 36 | 37 | 38 | 39 | Source Files 40 | 41 | 42 | Source Files 43 | 44 | 45 | Source Files 46 | 47 | 48 | Source Files 49 | 50 | 51 | Source Files 52 | 53 | 54 | Source Files 55 | 56 | 57 | 58 | 59 | Source Files 60 | 61 | 62 | Source Files 63 | 64 | 65 | Source Files 66 | 67 | 68 | --------------------------------------------------------------------------------