├── src ├── d3d12translationlayer │ ├── Util.cpp │ ├── View.cpp │ ├── DeviceChild.cpp │ ├── RootSignature.cpp │ ├── PipelineState.cpp │ ├── Allocator.cpp │ ├── Sampler.cpp │ ├── Fence.cpp │ └── Query.cpp ├── openclon12 │ ├── sampler.hpp │ ├── cache.hpp │ ├── queue.hpp │ ├── kernel.hpp │ ├── openclon12.def │ ├── cache.cpp │ ├── gl_tokens.hpp │ ├── context.hpp │ ├── device.hpp │ ├── resources.hpp │ ├── scheduler.hpp │ ├── program.hpp │ ├── sampler.cpp │ ├── compiler.hpp │ ├── task.hpp │ ├── spookyv2.cpp │ ├── platform.cpp │ └── main.cpp └── compilers │ ├── v2 │ ├── dxil_versions.h │ ├── clc_compiler.h │ └── clc │ │ └── clc.h │ └── compiler_common.cpp ├── include └── d3d12translationlayer │ ├── commandlistmanager.inl │ ├── Query.hpp │ ├── RootSignature.hpp │ ├── D3D12TranslationLayerDependencyIncludes.h │ ├── PipelineState.hpp │ ├── Sampler.hpp │ ├── Fence.hpp │ ├── DeviceChild.hpp │ ├── View.hpp │ ├── XPlatHelpers.h │ ├── CommandListManager.hpp │ ├── ImmediateContext.inl │ ├── View.inl │ ├── SubresourceHelpers.hpp │ ├── Allocator.h │ └── FormatDesc.hpp ├── SUPPORT.MD ├── test └── CMakeLists.txt ├── CONTRIBUTING.md ├── LICENSE ├── .github └── workflows │ └── prbuild.yml ├── external ├── d3d12_interop_public.h └── MicrosoftTelemetry.h ├── CMakeLists.txt ├── SECURITY.md └── README.md /src/d3d12translationlayer/Util.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "FormatDesc.hpp" 5 | #include "Util.hpp" 6 | 7 | namespace D3D12TranslationLayer 8 | { 9 | UINT GetByteAlignment(DXGI_FORMAT format) 10 | { 11 | return CD3D11FormatHelper::GetByteAlignment(format); 12 | } 13 | } -------------------------------------------------------------------------------- /src/d3d12translationlayer/View.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "Resource.hpp" 5 | #include "View.hpp" 6 | 7 | namespace D3D12TranslationLayer 8 | { 9 | void ViewBase::UsedInCommandList(UINT64 id) 10 | { 11 | if (m_pResource) { m_pResource->UsedInCommandList(id); } 12 | } 13 | }; -------------------------------------------------------------------------------- /src/d3d12translationlayer/DeviceChild.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "DeviceChild.hpp" 4 | #include "ImmediateContext.hpp" 5 | 6 | namespace D3D12TranslationLayer 7 | { 8 | void DeviceChild::AddToDeferredDeletionQueue(ID3D12Object* pObject) 9 | { 10 | m_pParent->AddObjectToDeferredDeletionQueue(pObject, m_LastUsedCommandListID); 11 | } 12 | }; -------------------------------------------------------------------------------- /include/d3d12translationlayer/commandlistmanager.inl: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | namespace D3D12TranslationLayer 5 | { 6 | //---------------------------------------------------------------------------------------------------------------------------------- 7 | // This allows one to atomically read 64 bit values 8 | inline LONGLONG InterlockedRead64(volatile LONGLONG* p) 9 | { 10 | return InterlockedCompareExchange64(p, 0, 0); 11 | } 12 | }; -------------------------------------------------------------------------------- /SUPPORT.MD: -------------------------------------------------------------------------------- 1 | # Support 2 | 3 | ## How to file issues and get help 4 | 5 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 6 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 7 | feature request as a new Issue. 8 | 9 | For help and questions about using this project, please use GitHub issues or get in touch with 10 | us on the [DirectX Discord Server](https://discord.com/invite/directx). 11 | 12 | ## Microsoft Support Policy 13 | 14 | Support for this PROJECT is limited to the resources listed above. 15 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 4 | FetchContent_Declare( 5 | googletest 6 | GIT_REPOSITORY https://github.com/google/googletest.git 7 | GIT_TAG 2d924d7a971e9667d76ad09727fb2402b4f8a1e3 8 | ) 9 | FetchContent_MakeAvailable(googletest) 10 | 11 | file(GLOB SRC CONFIGURE_DEPENDS *.cpp) 12 | file(GLOB INC *.h *.hpp) 13 | 14 | add_executable(openclon12test ${SRC} ${INC}) 15 | target_include_directories(openclon12test PRIVATE ../src/openclon12) 16 | target_link_libraries(openclon12test openclon12 gtest_main opengl32 gdi32 user32) 17 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/Query.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "DeviceChild.hpp" 7 | #include "Resource.hpp" 8 | 9 | namespace D3D12TranslationLayer 10 | { 11 | class TimestampQuery : public DeviceChild 12 | { 13 | public: 14 | TimestampQuery(ImmediateContext* pDevice) noexcept(false); 15 | ~TimestampQuery() noexcept; 16 | 17 | void End() noexcept; 18 | UINT64 GetData() noexcept; 19 | 20 | private: 21 | unique_comptr m_spQueryHeap; 22 | D3D12ResourceSuballocation m_spResultBuffer; 23 | }; 24 | }; -------------------------------------------------------------------------------- /include/d3d12translationlayer/RootSignature.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "DeviceChild.hpp" 7 | #include 8 | 9 | namespace D3D12TranslationLayer 10 | { 11 | class RootSignature : protected DeviceChildImpl 12 | { 13 | public: 14 | RootSignature(ImmediateContext* pParent) 15 | : DeviceChildImpl(pParent) 16 | { 17 | } 18 | 19 | void Create(D3D12_VERSIONED_ROOT_SIGNATURE_DESC const& rootDesc) noexcept(false); 20 | void Create(const void* pBlob, SIZE_T BlobSize) noexcept(false); 21 | using DeviceChildImpl::GetForUse; 22 | using DeviceChildImpl::GetForImmediateUse; 23 | }; 24 | }; 25 | -------------------------------------------------------------------------------- /src/d3d12translationlayer/RootSignature.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "ImmediateContext.hpp" 5 | #include "RootSignature.hpp" 6 | #include 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | void RootSignature::Create(D3D12_VERSIONED_ROOT_SIGNATURE_DESC const& rootDesc) noexcept(false) 11 | { 12 | CComPtr spBlob; 13 | 14 | ThrowFailure(D3D12SerializeVersionedRootSignature(&rootDesc, &spBlob, NULL)); 15 | 16 | Create(spBlob->GetBufferPointer(), spBlob->GetBufferSize()); 17 | } 18 | void RootSignature::Create(const void* pBlob, SIZE_T BlobSize) noexcept(false) 19 | { 20 | Destroy(); 21 | 22 | ThrowFailure(m_pParent->m_pDevice12->CreateRootSignature(1, pBlob, BlobSize, IID_PPV_ARGS(GetForCreate()))); 23 | } 24 | }; -------------------------------------------------------------------------------- /src/openclon12/sampler.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "context.hpp" 6 | 7 | #include "../d3d12translationlayer/Sampler.hpp" 8 | 9 | class Sampler : public CLChildBase 10 | { 11 | public: 12 | struct Desc 13 | { 14 | cl_bool NormalizedCoords; 15 | cl_addressing_mode AddressingMode; 16 | cl_filter_mode FilterMode; 17 | }; 18 | Sampler(Context& Parent, Desc const& desc, const cl_sampler_properties *properties); 19 | 20 | D3D12TranslationLayer::Sampler& GetUnderlying(D3DDevice*); 21 | 22 | const Desc m_Desc; 23 | const std::vector m_Properties; 24 | private: 25 | std::mutex m_Lock; 26 | std::unordered_map m_UnderlyingSamplers; 27 | }; 28 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/D3D12TranslationLayerDependencyIncludes.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | // The Windows build uses DBG for debug builds, but Visual Studio defaults to NDEBUG for retail 6 | // We'll pick TRANSLATION_LAYER_DBG for CMake (VS) builds, and we'll convert DBG to that here 7 | // for Windows builds 8 | #if DBG 9 | #define TRANSLATION_LAYER_DBG 1 10 | #endif 11 | 12 | //SDK Headers 13 | #define NOMINMAX 14 | #define WIN32_LEAN_AND_MEAN 15 | #define _ATL_NO_WIN_SUPPORT 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #define INITGUID 22 | #include 23 | #include 24 | #undef INITGUID 25 | #include 26 | #include 27 | 28 | #include 29 | using std::min; 30 | using std::max; 31 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This project welcomes contributions and suggestions. Most contributions require you to 4 | agree to a Contributor License Agreement (CLA) declaring that you have the right to, 5 | and actually do, grant us the rights to use your contribution. For details, visit 6 | https://cla.microsoft.com. 7 | 8 | When you submit a pull request, a CLA-bot will automatically determine whether you need 9 | to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the 10 | instructions provided by the bot. You will only need to do this once across all repositories using our CLA. 11 | 12 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 13 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 14 | or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. -------------------------------------------------------------------------------- /src/d3d12translationlayer/PipelineState.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "ImmediateContext.hpp" 4 | #include "RootSignature.hpp" 5 | #include "PipelineState.hpp" 6 | 7 | namespace D3D12TranslationLayer 8 | { 9 | 10 | PipelineState::PipelineState(ImmediateContext *pContext, const D3D12_SHADER_BYTECODE &CS, RootSignature* pRS, D3D12_CACHED_PIPELINE_STATE Cached) 11 | : DeviceChildImpl(pContext) 12 | , m_pRootSignature(pRS) 13 | , m_Desc { nullptr, CS, 0, Cached } 14 | { 15 | m_Desc.pRootSignature = m_pRootSignature->GetForImmediateUse(); 16 | 17 | Create(); 18 | } 19 | 20 | PipelineState::~PipelineState() = default; 21 | 22 | void PipelineState::Create() 23 | { 24 | HRESULT hr = m_pParent->m_pDevice12->CreateComputePipelineState(&m_Desc, IID_PPV_ARGS(GetForCreate())); 25 | ThrowFailure(hr); // throw( _com_error ) 26 | } 27 | } -------------------------------------------------------------------------------- /include/d3d12translationlayer/PipelineState.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "DeviceChild.hpp" 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | class RootSignature; 11 | 12 | struct PipelineState : protected DeviceChildImpl 13 | { 14 | public: 15 | RootSignature* GetRootSignature() { return m_pRootSignature; } 16 | 17 | PipelineState(ImmediateContext *pContext, const D3D12_SHADER_BYTECODE &CS, RootSignature *pRS, D3D12_CACHED_PIPELINE_STATE Cached = {}); 18 | ~PipelineState(); 19 | 20 | using DeviceChildImpl::GetForUse; 21 | using DeviceChildImpl::GetForImmediateUse; 22 | 23 | protected: 24 | RootSignature* const m_pRootSignature; 25 | D3D12_COMPUTE_PIPELINE_STATE_DESC m_Desc; 26 | 27 | void Create(); 28 | }; 29 | }; 30 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/Sampler.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "DeviceChild.hpp" 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | //================================================================================================================================== 11 | // Sampler 12 | // Stores data responsible for remapping D3D11 samplers to underlying D3D12 samplers 13 | //================================================================================================================================== 14 | class Sampler : public DeviceChild 15 | { 16 | public: 17 | Sampler(ImmediateContext* pDevice, D3D12_SAMPLER_DESC const& desc) noexcept(false); 18 | ~Sampler() noexcept; 19 | 20 | public: 21 | D3D12_CPU_DESCRIPTOR_HANDLE m_Descriptor; 22 | UINT m_DescriptorHeapIndex; 23 | }; 24 | }; -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Microsoft Corporation. 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/d3d12translationlayer/Allocator.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "Allocator.h" 4 | #include "ImmediateContext.hpp" 5 | #include "ImmediateContext.inl" 6 | 7 | namespace D3D12TranslationLayer 8 | { 9 | ID3D12Resource* InternalHeapAllocator::Allocate(UINT64 size) 10 | { 11 | // Transfer ownership of ID3D12Resource to the calling allocator 12 | return m_pContext->AcquireTransitionableUploadBuffer(m_HeapType, size).release(); 13 | } 14 | 15 | void InternalHeapAllocator::Deallocate(ID3D12Resource* pResource) 16 | { 17 | m_pContext->ReturnTransitionableBufferToPool( 18 | m_HeapType, 19 | pResource->GetDesc().Width, 20 | std::move(unique_comptr(pResource)), 21 | // Guaranteed to be finished since this is only called after 22 | // all suballocations have been through the deferred deletion queue 23 | m_pContext->GetCompletedFenceValue()); 24 | 25 | // Leave ownership to the buffer pool 26 | pResource->Release(); 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /src/openclon12/cache.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "d3d12.h" 6 | #include 7 | #include 8 | #include 9 | 10 | class ShaderCache 11 | { 12 | public: 13 | ShaderCache(ID3D12Device*, bool driverVersioned); 14 | 15 | bool HasCache() const 16 | { 17 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 18 | return m_pSession; 19 | #else 20 | return false; 21 | #endif 22 | } 23 | 24 | void Store(const void* key, size_t keySize, const void* value, size_t valueSize) noexcept; 25 | void Store(const void* const* keys, const size_t* keySizes, unsigned keyParts, const void* value, size_t valueSize); 26 | 27 | using FoundValue = std::pair, size_t>; 28 | FoundValue Find(const void* key, size_t keySize); 29 | FoundValue Find(const void* const* keys, const size_t* keySizes, unsigned keyParts); 30 | 31 | void Close(); 32 | 33 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 34 | private: 35 | Microsoft::WRL::ComPtr m_pSession; 36 | #endif 37 | }; 38 | -------------------------------------------------------------------------------- /src/d3d12translationlayer/Sampler.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "ImmediateContext.hpp" 6 | #include "Sampler.hpp" 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | 11 | //---------------------------------------------------------------------------------------------------------------------------------- 12 | Sampler::Sampler(ImmediateContext* pDevice, D3D12_SAMPLER_DESC const& desc) noexcept(false) 13 | : DeviceChild(pDevice) 14 | { 15 | if (!pDevice->ComputeOnly()) 16 | { 17 | m_Descriptor = pDevice->m_SamplerAllocator.AllocateHeapSlot(&m_DescriptorHeapIndex); // throw( _com_error ) 18 | pDevice->m_pDevice12->CreateSampler(&desc, m_Descriptor); 19 | } 20 | } 21 | 22 | //---------------------------------------------------------------------------------------------------------------------------------- 23 | Sampler::~Sampler() noexcept 24 | { 25 | if (!m_pParent->ComputeOnly()) 26 | { 27 | m_pParent->m_SamplerAllocator.FreeHeapSlot(m_Descriptor, m_DescriptorHeapIndex); 28 | } 29 | } 30 | }; -------------------------------------------------------------------------------- /src/openclon12/queue.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "device.hpp" 6 | #include "context.hpp" 7 | #include "task.hpp" 8 | 9 | class CommandQueue : public CLChildBase 10 | { 11 | public: 12 | CommandQueue(D3DDevice& device, Context& context, const cl_queue_properties* properties, bool synthesizedProperties); 13 | 14 | friend cl_int CL_API_CALL clGetCommandQueueInfo(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*); 15 | 16 | Context& GetContext() const { return m_Context.get(); } 17 | Device& GetDevice() const { return m_Parent.get(); } 18 | D3DDevice &GetD3DDevice() const { return m_D3DDevice; } 19 | 20 | void Flush(TaskPoolLock const&, bool flushDevice); 21 | void QueueTask(Task*, TaskPoolLock const&); 22 | void NotifyTaskCompletion(Task*, TaskPoolLock const&); 23 | void AddAllTasksAsDependencies(Task*, TaskPoolLock const&); 24 | 25 | const bool m_bOutOfOrder; 26 | const bool m_bProfile; 27 | const bool m_bPropertiesSynthesized; 28 | std::vector const m_Properties; 29 | 30 | protected: 31 | Context::ref_int m_Context; 32 | D3DDevice &m_D3DDevice; 33 | 34 | std::deque m_QueuedTasks; 35 | std::vector m_OutstandingTasks; 36 | Task* m_LastQueuedTask = nullptr; 37 | Task* m_LastQueuedBarrier = nullptr; 38 | }; 39 | -------------------------------------------------------------------------------- /.github/workflows/prbuild.yml: -------------------------------------------------------------------------------- 1 | # This starter workflow is for a CMake project running on a single platform. There is a different starter workflow if you need cross-platform coverage. 2 | # See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-multi-platform.yml 3 | name: PR Build 4 | 5 | on: 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) 11 | BUILD_TYPE: RelWithDebInfo 12 | 13 | jobs: 14 | build: 15 | # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. 16 | # You can convert this to a matrix build if you need cross-platform coverage. 17 | # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix 18 | runs-on: windows-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - name: Configure CMake 24 | # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. 25 | # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type 26 | run: cmake -B ${{github.workspace}}/build -DUSE_PIX=OFF -DCMAKE_SYSTEM_VERSION="10.0.22621.0" 27 | 28 | - name: Build 29 | # Build your program with the given configuration 30 | run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} 31 | 32 | -------------------------------------------------------------------------------- /src/d3d12translationlayer/Fence.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "Fence.hpp" 4 | #include "ImmediateContext.hpp" 5 | 6 | namespace D3D12TranslationLayer 7 | { 8 | Fence::Fence(ImmediateContext* pParent, FENCE_FLAGS Flags, UINT64 InitialValue) 9 | : DeviceChild(pParent) 10 | { 11 | D3D12_FENCE_FLAGS Flags12 = 12 | ((Flags & FENCE_FLAG_SHARED) ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE) | 13 | ((Flags & FENCE_FLAG_SHARED_CROSS_ADAPTER) ? D3D12_FENCE_FLAG_SHARED_CROSS_ADAPTER : D3D12_FENCE_FLAG_NONE); 14 | 15 | ThrowFailure(pParent->m_pDevice12->CreateFence(InitialValue, Flags12, IID_PPV_ARGS(&m_spFence))); 16 | } 17 | 18 | Fence::Fence(ImmediateContext* pParent, HANDLE hSharedHandle) 19 | : DeviceChild(pParent) 20 | { 21 | ThrowFailure(pParent->m_pDevice12->OpenSharedHandle(hSharedHandle, IID_PPV_ARGS(&m_spFence))); 22 | } 23 | 24 | Fence::Fence(ImmediateContext* pParent, ID3D12Fence* pFence) 25 | : DeviceChild(pParent) 26 | { 27 | ThrowFailure(pFence->QueryInterface(&m_spFence)); 28 | } 29 | 30 | Fence::~Fence() 31 | { 32 | AddToDeferredDeletionQueue(m_spFence); 33 | } 34 | 35 | HRESULT Fence::CreateSharedHandle( 36 | _In_opt_ const SECURITY_ATTRIBUTES *pAttributes, 37 | _In_ DWORD dwAccess, 38 | _In_opt_ LPCWSTR lpName, 39 | _Out_ HANDLE *pHandle) 40 | { 41 | return m_pParent->m_pDevice12->CreateSharedHandle(m_spFence.get(), pAttributes, dwAccess, lpName, pHandle); 42 | } 43 | } -------------------------------------------------------------------------------- /include/d3d12translationlayer/Fence.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "DeviceChild.hpp" 7 | #include "Util.hpp" 8 | 9 | namespace D3D12TranslationLayer 10 | { 11 | enum FENCE_FLAGS 12 | { 13 | FENCE_FLAG_NONE = 0x0, 14 | FENCE_FLAG_SHARED = 0x1, 15 | FENCE_FLAG_SHARED_CROSS_ADAPTER = 0x2, 16 | }; 17 | DEFINE_ENUM_FLAG_OPERATORS(FENCE_FLAGS); 18 | 19 | class Fence : public DeviceChild 20 | { 21 | public: 22 | Fence(ImmediateContext* pParent, FENCE_FLAGS Flags, UINT64 InitialValue); 23 | Fence(ImmediateContext* pParent, HANDLE SharedHandle); 24 | Fence(ImmediateContext* pParent, ID3D12Fence* pFence); 25 | Fence(Fence const&) = delete; 26 | Fence& operator=(Fence const&) = delete; 27 | Fence(Fence&&) = delete; 28 | Fence& operator=(Fence&&) = delete; 29 | 30 | ~Fence(); 31 | 32 | UINT64 GetCompletedValue() const { return m_spFence->GetCompletedValue(); } 33 | void Signal(UINT64 Value) const { ThrowFailure(m_spFence->Signal(Value)); } 34 | HRESULT SetEventOnCompletion(UINT64 Value, HANDLE hEvent) const { return m_spFence->SetEventOnCompletion(Value, hEvent); } 35 | HRESULT CreateSharedHandle( 36 | _In_opt_ const SECURITY_ATTRIBUTES *pAttributes, 37 | _In_ DWORD dwAccess, 38 | _In_opt_ LPCWSTR lpName, 39 | _Out_ HANDLE *pHandle); 40 | 41 | ID3D12Fence1* Get() const { return m_spFence.get(); } 42 | 43 | private: 44 | unique_comptr m_spFence; 45 | }; 46 | } -------------------------------------------------------------------------------- /external/d3d12_interop_public.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © Microsoft Corporation 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | 26 | #ifndef D3D12_INTEROP_PUBLIC_H 27 | #define D3D12_INTEROP_PUBLIC_H 28 | 29 | #ifdef __cplusplus 30 | extern "C" { 31 | #endif 32 | 33 | struct ID3D12Device; 34 | struct ID3D12CommandQueue; 35 | struct ID3D12Resource; 36 | 37 | struct d3d12_interop_device_info { 38 | uint64_t adapter_luid; 39 | ID3D12Device *device; 40 | ID3D12CommandQueue *queue; 41 | }; 42 | 43 | struct d3d12_interop_resource_info { 44 | ID3D12Resource *resource; 45 | uint64_t buffer_offset; 46 | }; 47 | 48 | #ifdef __cplusplus 49 | } 50 | #endif 51 | 52 | #endif /* D3D12_INTEROP_PUBLIC_H */ 53 | -------------------------------------------------------------------------------- /src/compilers/v2/dxil_versions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © Microsoft Corporation 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef DXIL_VERSIONS_H 25 | #define DXIL_VERSIONS_H 26 | 27 | #ifdef __cplusplus 28 | extern "C" { 29 | #endif 30 | 31 | enum dxil_shader_model { 32 | SHADER_MODEL_6_0 = 0x60000, 33 | SHADER_MODEL_6_1, 34 | SHADER_MODEL_6_2, 35 | SHADER_MODEL_6_3, 36 | SHADER_MODEL_6_4, 37 | SHADER_MODEL_6_5, 38 | SHADER_MODEL_6_6, 39 | SHADER_MODEL_6_7, 40 | }; 41 | 42 | enum dxil_validator_version { 43 | NO_DXIL_VALIDATION, 44 | DXIL_VALIDATOR_1_0 = 0x10000, 45 | DXIL_VALIDATOR_1_1, 46 | DXIL_VALIDATOR_1_2, 47 | DXIL_VALIDATOR_1_3, 48 | DXIL_VALIDATOR_1_4, 49 | DXIL_VALIDATOR_1_5, 50 | DXIL_VALIDATOR_1_6, 51 | DXIL_VALIDATOR_1_7, 52 | }; 53 | 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /src/openclon12/kernel.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "program.hpp" 6 | #include "resources.hpp" 7 | #include 8 | 9 | class Sampler; 10 | class Kernel : public CLChildBase 11 | { 12 | private: 13 | CompiledDxil const& m_Dxil; 14 | std::string const m_Name; 15 | ComPtr m_SerializedRootSignature; 16 | 17 | std::vector m_KernelArgsCbData; 18 | std::vector m_ArgMetadataToCompiler; 19 | std::vector m_ArgsSet; 20 | 21 | // These are weak references for the API kernel object, however 22 | // these will be converted into strong references by an *execution* 23 | // of that kernel. Releasing an object *while a kernel is enqueued* 24 | // must be safe (according to the CTS), while the API kernel must not 25 | // hold any references. 26 | std::vector m_UAVs; 27 | std::vector m_SRVs; 28 | std::vector m_Samplers; 29 | 30 | std::vector<::ref_ptr> m_ConstSamplers; 31 | std::vector<::ref_ptr> m_InlineConsts; 32 | 33 | friend class ExecuteKernel; 34 | friend extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelInfo(cl_kernel, cl_kernel_info, size_t, void*, size_t*); 35 | friend extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelArgInfo(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*); 36 | friend extern CL_API_ENTRY cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*); 37 | 38 | public: 39 | Kernel(Program& Parent, std::string const& name, CompiledDxil const& Dxil, ProgramBinary::Kernel const& meta); 40 | Kernel(Kernel const&); 41 | ~Kernel(); 42 | 43 | cl_int SetArg(cl_uint arg_index, size_t arg_size, const void* arg_value); 44 | bool AllArgsSet() const; 45 | 46 | uint16_t const* GetRequiredLocalDims() const; 47 | uint16_t const* GetLocalDimsHint() const; 48 | 49 | std::unique_ptr GetRootSignature(ImmCtx &ImmCtx) const; 50 | 51 | const ProgramBinary::Kernel m_Meta; 52 | }; 53 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT License. 3 | cmake_minimum_required(VERSION 3.14) 4 | project(openclon12) 5 | include(FetchContent) 6 | 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | 10 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 11 | 12 | file(GLOB_RECURSE MAIN_SRC CONFIGURE_DEPENDS src/openclon12/*.cpp src/compilers/*.cpp src/openclon12/*.def) 13 | file(GLOB MAIN_INC src/openclon12/*.h src/openclon12/*.hpp) 14 | file(GLOB_RECURSE EXTERNAL_INC external/*.h external/*.hpp) 15 | 16 | file(GLOB TRANS_SRC CONFIGURE_DEPENDS src/d3d12translationlayer/*.cpp) 17 | file(GLOB TRANS_INC include/d3d12translationlayer/*.*) 18 | file(GLOB TRANS_INL include/d3d12translationlayer/*.inl src/d3d12translationlayer/*.inl) 19 | 20 | include(FetchContent) 21 | FetchContent_Declare( 22 | DirectX-Headers 23 | GIT_REPOSITORY https://github.com/Microsoft/DirectX-Headers.git 24 | GIT_TAG v1.610.0 25 | ) 26 | FetchContent_MakeAvailable(DirectX-Headers) 27 | 28 | FetchContent_Declare( 29 | opencl_headers 30 | GIT_REPOSITORY https://github.com/KhronosGroup/OpenCL-Headers.git 31 | GIT_TAG 1bb9ec797d14abed6167e3a3d66ede25a702a5c7 32 | ) 33 | FetchContent_MakeAvailable(opencl_headers) 34 | add_library(OpenCL::Headers ALIAS Headers) 35 | 36 | set(WIL_BUILD_PACKAGING OFF CACHE BOOL "" FORCE) 37 | set(WIL_BUILD_TESTS OFF CACHE BOOL "" FORCE) 38 | FetchContent_Declare( 39 | wil 40 | GIT_REPOSITORY https://github.com/microsoft/wil.git 41 | GIT_TAG ed429e64eb3b91848bf19c17e1431c1b0f2c6d2b 42 | ) 43 | FetchContent_MakeAvailable(wil) 44 | 45 | add_library(d3d12translationlayer STATIC ${TRANS_SRC} ${TRANS_INC} ${TRANS_INL}) 46 | target_include_directories(d3d12translationlayer 47 | PUBLIC include/d3d12translationlayer 48 | PRIVATE external) 49 | 50 | target_link_libraries(d3d12translationlayer Microsoft::DirectX-Headers d3d12 dxgi atls dxcore) 51 | 52 | target_compile_definitions(d3d12translationlayer PRIVATE $<$:DBG>) 53 | target_compile_definitions(d3d12translationlayer PUBLIC $<$:TRANSLATION_LAYER_DBG=1>) 54 | 55 | add_library(openclon12 SHARED ${MAIN_SRC} ${MAIN_INC} ${EXTERNAL_INC}) 56 | target_include_directories(openclon12 57 | PRIVATE src/openclon12 58 | PRIVATE external 59 | PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) 60 | target_link_libraries(openclon12 61 | d3d12translationlayer 62 | OpenCL::Headers 63 | WIL 64 | user32 65 | gdi32) 66 | source_group("Header Files\\External" FILES ${EXTERNAL_INC}) 67 | 68 | option(BUILD_TESTS "Build tests" ON) 69 | 70 | if (BUILD_TESTS) 71 | add_subdirectory(test) 72 | endif() 73 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /src/d3d12translationlayer/Query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "ImmediateContext.hpp" 5 | #include "ImmediateContext.inl" 6 | #include "Query.hpp" 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | TimestampQuery::TimestampQuery(ImmediateContext* pDevice) noexcept(false) 11 | : DeviceChild(pDevice) 12 | { 13 | D3D12_QUERY_HEAP_DESC QueryHeapDesc = { D3D12_QUERY_HEAP_TYPE_TIMESTAMP, 1, 1 }; 14 | 15 | HRESULT hr = m_pParent->m_pDevice12->CreateQueryHeap( 16 | &QueryHeapDesc, 17 | IID_PPV_ARGS(&m_spQueryHeap) 18 | ); 19 | ThrowFailure(hr); // throw( _com_error ) 20 | 21 | // Query data goes into a readback heap for CPU readback in GetData 22 | m_spResultBuffer = m_pParent->AcquireSuballocatedHeap( 23 | AllocatorHeapType::Readback, sizeof(UINT64), ResourceAllocationContext::FreeThread); // throw( _com_error ) 24 | } 25 | 26 | //---------------------------------------------------------------------------------------------------------------------------------- 27 | void TimestampQuery::End() noexcept 28 | { 29 | // Store data in the query object, then resolve into the result buffer 30 | auto pIface = m_pParent->GetGraphicsCommandList(); 31 | 32 | pIface->EndQuery(m_spQueryHeap.get(), D3D12_QUERY_TYPE_TIMESTAMP, 0); 33 | pIface->ResolveQueryData( 34 | m_spQueryHeap.get(), 35 | D3D12_QUERY_TYPE_TIMESTAMP, 36 | 0, 37 | 1, 38 | m_spResultBuffer.GetResource(), 39 | m_spResultBuffer.GetOffset() 40 | ); 41 | m_pParent->AdditionalCommandsAdded(); 42 | m_LastUsedCommandListID = m_pParent->GetCommandListID(); 43 | } 44 | 45 | //---------------------------------------------------------------------------------------------------------------------------------- 46 | UINT64 TimestampQuery::GetData() noexcept 47 | { 48 | void* pMappedData = nullptr; 49 | 50 | CD3DX12_RANGE ReadRange(0, sizeof(UINT64)); 51 | HRESULT hr = m_spResultBuffer.Map( 52 | 0, 53 | &ReadRange, 54 | &pMappedData 55 | ); 56 | ThrowFailure(hr); 57 | 58 | const UINT64* pSrc = reinterpret_cast(pMappedData); 59 | UINT64 result = *pSrc; 60 | CD3DX12_RANGE WrittenRange(0, 0); 61 | m_spResultBuffer.Unmap(0, &WrittenRange); 62 | return result; 63 | } 64 | 65 | //---------------------------------------------------------------------------------------------------------------------------------- 66 | TimestampQuery::~TimestampQuery() 67 | { 68 | AddToDeferredDeletionQueue(m_spQueryHeap); 69 | if (m_spResultBuffer.IsInitialized()) 70 | { 71 | m_pParent->ReleaseSuballocatedHeap(AllocatorHeapType::Readback, m_spResultBuffer, m_LastUsedCommandListID); 72 | } 73 | } 74 | }; -------------------------------------------------------------------------------- /include/d3d12translationlayer/DeviceChild.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "Util.hpp" 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | class ImmediateContext; 11 | 12 | class DeviceChild 13 | { 14 | public: 15 | DeviceChild(ImmediateContext* pParent) noexcept 16 | : m_pParent(pParent) 17 | { 18 | } 19 | 20 | ImmediateContext* m_pParent = nullptr; 21 | UINT64 m_LastUsedCommandListID = {}; 22 | 23 | // Warning: this method is hidden in some derived child types, and is not virtual 24 | // Always ensure that this method is called on the most derived type. 25 | void UsedInCommandList(UINT64 CommandListID) noexcept 26 | { 27 | assert(CommandListID >= m_LastUsedCommandListID); 28 | m_LastUsedCommandListID = CommandListID; 29 | } 30 | 31 | void MarkUsedInCommandListIfNewer(UINT64 CommandListID) noexcept 32 | { 33 | if (CommandListID >= m_LastUsedCommandListID) 34 | { 35 | UsedInCommandList(CommandListID); 36 | } 37 | } 38 | 39 | void ResetLastUsedInCommandList() 40 | { 41 | m_LastUsedCommandListID = 0; 42 | } 43 | 44 | protected: 45 | template 46 | void AddToDeferredDeletionQueue(unique_comptr& spObject) 47 | { 48 | if (spObject) 49 | { 50 | AddToDeferredDeletionQueue(spObject.get()); 51 | spObject.reset(); 52 | } 53 | } 54 | 55 | template 56 | void AddToDeferredDeletionQueue(unique_comptr& spObject, UINT64 CommandListID) 57 | { 58 | m_LastUsedCommandListID = CommandListID; 59 | AddToDeferredDeletionQueue(spObject); 60 | } 61 | 62 | void AddToDeferredDeletionQueue(ID3D12Object* pObject); 63 | }; 64 | 65 | template 66 | class DeviceChildImpl : public DeviceChild 67 | { 68 | public: 69 | DeviceChildImpl(ImmediateContext* pParent) noexcept 70 | : DeviceChild(pParent) 71 | { 72 | } 73 | void Destroy() { AddToDeferredDeletionQueue(m_spIface); } 74 | ~DeviceChildImpl() { Destroy(); } 75 | 76 | bool Created() { return m_spIface.get() != nullptr; } 77 | TIface** GetForCreate() { Destroy(); return &m_spIface; } 78 | TIface* GetForUse(UINT64 CommandListID) 79 | { 80 | MarkUsedInCommandListIfNewer(CommandListID); 81 | return m_spIface.get(); 82 | } 83 | TIface* GetForUse() 84 | { 85 | return GetForUse(m_pParent->GetCommandListID()); 86 | } 87 | TIface* GetForImmediateUse() { return m_spIface.get(); } 88 | 89 | private: 90 | unique_comptr m_spIface; 91 | }; 92 | }; -------------------------------------------------------------------------------- /src/compilers/compiler_common.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | 4 | #include "compiler.hpp" 5 | #include "platform.hpp" 6 | #include 7 | 8 | void Logger::Log(const char *msg) const 9 | { 10 | std::lock_guard lock(m_lock); 11 | m_buildLog += msg; 12 | } 13 | 14 | static ProgramBinary::Kernel const& FindKernelInfo(std::vector const& kernels, const char *name) 15 | { 16 | auto iter = std::find_if(kernels.begin(), kernels.end(), [name](ProgramBinary::Kernel const& k) { return strcmp(k.name, name) == 0; }); 17 | assert(iter != kernels.end()); // We can't get DXIL if there's no data for a kernel with this name 18 | return *iter; 19 | } 20 | 21 | CompiledDxil::CompiledDxil(ProgramBinary const& parent, const char *name) 22 | : m_Parent(parent) 23 | , m_Metadata(FindKernelInfo(parent.GetKernelInfo(), name)) 24 | { 25 | } 26 | 27 | CompiledDxil::CompiledDxil(ProgramBinary const& parent, Metadata const &metadata) 28 | : m_Parent(parent) 29 | , m_Metadata(metadata) 30 | { 31 | } 32 | 33 | CompiledDxil::Metadata const& CompiledDxil::GetMetadata() const 34 | { 35 | return m_Metadata; 36 | } 37 | 38 | static void SignBlob(void* pBlob, size_t size) 39 | { 40 | auto& DXIL = g_Platform->GetDXIL(); 41 | auto pfnCreateInstance = DXIL.proc_address("DxcCreateInstance"); 42 | ComPtr spValidator; 43 | if (SUCCEEDED(pfnCreateInstance(CLSID_DxcValidator, IID_PPV_ARGS(&spValidator)))) 44 | { 45 | struct Blob : IDxcBlob 46 | { 47 | void* pBlob; 48 | UINT Size; 49 | Blob(void* p, UINT s) : pBlob(p), Size(s) { } 50 | STDMETHOD(QueryInterface)(REFIID, void** ppv) { *ppv = this; return S_OK; } 51 | STDMETHOD_(ULONG, AddRef)() { return 1; } 52 | STDMETHOD_(ULONG, Release)() { return 0; } 53 | STDMETHOD_(void*, GetBufferPointer)() override { return pBlob; } 54 | STDMETHOD_(SIZE_T, GetBufferSize)() override { return Size; } 55 | } Blob = { pBlob, (UINT)size }; 56 | ComPtr spResult; 57 | (void)spValidator->Validate(&Blob, DxcValidatorFlags_InPlaceEdit, &spResult); 58 | HRESULT hr = S_OK; 59 | if (spResult) 60 | { 61 | (void)spResult->GetStatus(&hr); 62 | } 63 | if (FAILED(hr)) 64 | { 65 | ComPtr spError; 66 | spResult->GetErrorBuffer(&spError); 67 | BOOL known = FALSE; 68 | UINT32 cp = 0; 69 | spError->GetEncoding(&known, &cp); 70 | if (cp == CP_UTF8 || cp == CP_ACP) 71 | printf("%s", (char*)spError->GetBufferPointer()); 72 | else 73 | printf("%S", (wchar_t*)spError->GetBufferPointer()); 74 | DebugBreak(); 75 | } 76 | } 77 | } 78 | 79 | void CompiledDxil::Sign() 80 | { 81 | SignBlob(GetBinary(), GetBinarySize()); 82 | } 83 | 84 | std::vector const& ProgramBinary::GetKernelInfo() const 85 | { 86 | return m_KernelInfo; 87 | } 88 | 89 | const ProgramBinary::SpecConstantInfo *ProgramBinary::GetSpecConstantInfo(uint32_t ID) const 90 | { 91 | auto iter = m_SpecConstants.find(ID); 92 | if (iter == m_SpecConstants.end()) 93 | return nullptr; 94 | 95 | return &iter->second; 96 | } 97 | -------------------------------------------------------------------------------- /src/openclon12/openclon12.def: -------------------------------------------------------------------------------- 1 | ; Copyright (c) Microsoft Corporation. 2 | ; Licensed under the MIT License. 3 | EXPORTS 4 | 5 | ; 6 | ; Note: 7 | ; 8 | ; 1. Functions are grouped into blocks according to the OpenCL API version they 9 | ; were introduced in. 10 | ; 11 | ; 2. Function blocks are sorted in ascending order of the API version. 12 | ; 13 | ; 3. Functions within a block are sorted alphabetically. 14 | ; 15 | 16 | ; OpenCL 1.0 API 17 | clBuildProgram 18 | clCreateBuffer 19 | clCreateCommandQueue 20 | clCreateContext 21 | clCreateContextFromType 22 | clCreateFromGLBuffer 23 | clCreateFromGLRenderbuffer 24 | clCreateFromGLTexture2D 25 | clCreateFromGLTexture3D 26 | clCreateImage2D 27 | clCreateImage3D 28 | clCreateKernel 29 | clCreateKernelsInProgram 30 | clCreateProgramWithBinary 31 | clCreateProgramWithSource 32 | clCreateSampler 33 | clEnqueueAcquireGLObjects 34 | clEnqueueBarrier 35 | clEnqueueCopyBuffer 36 | clEnqueueCopyBufferToImage 37 | clEnqueueCopyImage 38 | clEnqueueCopyImageToBuffer 39 | clEnqueueMapBuffer 40 | clEnqueueMapImage 41 | clEnqueueMarker 42 | clEnqueueNDRangeKernel 43 | clEnqueueNativeKernel 44 | clEnqueueReadBuffer 45 | clEnqueueReadImage 46 | clEnqueueReleaseGLObjects 47 | clEnqueueTask 48 | clEnqueueUnmapMemObject 49 | clEnqueueWaitForEvents 50 | clEnqueueWriteBuffer 51 | clEnqueueWriteImage 52 | clFinish 53 | clFlush 54 | clGetCommandQueueInfo 55 | clGetContextInfo 56 | clGetDeviceIDs 57 | clGetDeviceInfo 58 | clGetEventInfo 59 | clGetEventProfilingInfo 60 | clGetExtensionFunctionAddress 61 | clGetGLContextInfoKHR 62 | clGetGLObjectInfo 63 | clGetGLTextureInfo 64 | clGetImageInfo 65 | clGetKernelInfo 66 | clGetKernelWorkGroupInfo 67 | clGetMemObjectInfo 68 | clGetPlatformIDs 69 | clGetPlatformInfo 70 | clGetProgramBuildInfo 71 | clGetProgramInfo 72 | clGetSamplerInfo 73 | clGetSupportedImageFormats 74 | clReleaseCommandQueue 75 | clReleaseContext 76 | clReleaseEvent 77 | clReleaseKernel 78 | clReleaseMemObject 79 | clReleaseProgram 80 | clReleaseSampler 81 | clRetainCommandQueue 82 | clRetainContext 83 | clRetainEvent 84 | clRetainKernel 85 | clRetainMemObject 86 | clRetainProgram 87 | clRetainSampler 88 | clSetCommandQueueProperty 89 | clSetKernelArg 90 | clUnloadCompiler 91 | clWaitForEvents 92 | 93 | ; OpenCL 1.1 API 94 | clCreateSubBuffer 95 | clCreateUserEvent 96 | clEnqueueCopyBufferRect 97 | clEnqueueReadBufferRect 98 | clEnqueueWriteBufferRect 99 | clSetEventCallback 100 | clSetMemObjectDestructorCallback 101 | clSetUserEventStatus 102 | 103 | ; OpenCL 1.2 API 104 | clCompileProgram 105 | clCreateFromGLTexture 106 | clCreateImage 107 | clCreateProgramWithBuiltInKernels 108 | clCreateSubDevices 109 | clEnqueueBarrierWithWaitList 110 | clEnqueueFillBuffer 111 | clEnqueueFillImage 112 | clEnqueueMarkerWithWaitList 113 | clEnqueueMigrateMemObjects 114 | clGetExtensionFunctionAddressForPlatform 115 | clGetKernelArgInfo 116 | clLinkProgram 117 | clReleaseDevice 118 | clRetainDevice 119 | clUnloadPlatformCompiler 120 | 121 | ; OpenCL 2.0 API 122 | clCreateCommandQueueWithProperties 123 | clCreatePipe 124 | clCreateSamplerWithProperties 125 | clEnqueueSVMFree 126 | clEnqueueSVMMap 127 | clEnqueueSVMMemcpy 128 | clEnqueueSVMMemFill 129 | clEnqueueSVMUnmap 130 | clGetPipeInfo 131 | clSetKernelArgSVMPointer 132 | clSetKernelExecInfo 133 | clSVMAlloc 134 | clSVMFree 135 | 136 | ; OpenCL 2.1 API 137 | clCloneKernel 138 | clCreateProgramWithIL 139 | clEnqueueSVMMigrateMem 140 | clGetDeviceAndHostTimer 141 | clGetHostTimer 142 | clGetKernelSubGroupInfo 143 | clSetDefaultDeviceCommandQueue 144 | 145 | ; OpenCL 2.2 API 146 | clSetProgramReleaseCallback 147 | clSetProgramSpecializationConstant 148 | 149 | ; OpenCL 3.0 API 150 | clCreateBufferWithProperties 151 | clCreateImageWithProperties 152 | clSetContextDestructorCallback 153 | -------------------------------------------------------------------------------- /src/openclon12/cache.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "platform.hpp" 6 | #include "cache.hpp" 7 | #include "compiler.hpp" 8 | #include 9 | 10 | #pragma warning(disable: 4100) 11 | 12 | ShaderCache::ShaderCache(ID3D12Device* d, bool driverVersioned) 13 | { 14 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 15 | ComPtr device9; 16 | if (FAILED(d->QueryInterface(device9.ReleaseAndGetAddressOf()))) 17 | return; 18 | 19 | D3D12_SHADER_CACHE_SESSION_DESC Desc = {}; 20 | // {17CB474E-4C55-4DBC-BC2E-D5132115BDA3} 21 | Desc.Identifier = { 0x17cb474e, 0x4c55, 0x4dbc, { 0xbc, 0x2e, 0xd5, 0x13, 0x21, 0x15, 0xbd, 0xa3 } }; 22 | Desc.Mode = D3D12_SHADER_CACHE_MODE_DISK; 23 | Desc.Flags = driverVersioned ? D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED : D3D12_SHADER_CACHE_FLAG_NONE; 24 | 25 | auto pCompiler = g_Platform->GetCompiler(); 26 | Desc.Version = pCompiler->GetVersionForCache(); 27 | 28 | (void)device9->CreateShaderCacheSession(&Desc, IID_PPV_ARGS(&m_pSession)); 29 | #endif 30 | } 31 | 32 | void ShaderCache::Store(const void* key, size_t keySize, const void* value, size_t valueSize) noexcept 33 | { 34 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 35 | if (m_pSession) 36 | { 37 | (void)m_pSession->StoreValue(key, (UINT)keySize, value, (UINT)valueSize); 38 | } 39 | #endif 40 | } 41 | 42 | void ShaderCache::Store(const void* const* keys, const size_t* keySizes, unsigned keyParts, const void* value, size_t valueSize) 43 | { 44 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 45 | if (m_pSession) 46 | { 47 | size_t combinedSize = std::accumulate(keySizes, keySizes + keyParts, (size_t)0); 48 | std::unique_ptr combinedKey(new byte[combinedSize]); 49 | 50 | unsigned i = 0; 51 | for (byte* ptr = combinedKey.get(); ptr != combinedKey.get() + combinedSize; ptr += keySizes[i++]) 52 | { 53 | memcpy(ptr, keys[i], keySizes[i]); 54 | } 55 | 56 | Store(combinedKey.get(), combinedSize, value, valueSize); 57 | } 58 | #endif 59 | } 60 | 61 | ShaderCache::FoundValue ShaderCache::Find(const void* key, size_t keySize) 62 | { 63 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 64 | if (m_pSession) 65 | { 66 | UINT valueSize = 0; 67 | if (SUCCEEDED(m_pSession->FindValue(key, (UINT)keySize, nullptr, &valueSize))) 68 | { 69 | ShaderCache::FoundValue value(new byte[valueSize], valueSize); 70 | if (SUCCEEDED(m_pSession->FindValue(key, (UINT)keySize, value.first.get(), &valueSize))) 71 | { 72 | return value; 73 | } 74 | } 75 | } 76 | #endif 77 | return {}; 78 | } 79 | 80 | ShaderCache::FoundValue ShaderCache::Find(const void* const* keys, const size_t* keySizes, unsigned keyParts) 81 | { 82 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 83 | if (m_pSession) 84 | { 85 | size_t combinedSize = std::accumulate(keySizes, keySizes + keyParts, (size_t)0); 86 | std::unique_ptr combinedKey(new byte[combinedSize]); 87 | 88 | unsigned i = 0; 89 | for (byte* ptr = combinedKey.get(); ptr != combinedKey.get() + combinedSize; ptr += keySizes[i++]) 90 | { 91 | memcpy(ptr, keys[i], keySizes[i]); 92 | } 93 | 94 | return Find(combinedKey.get(), combinedSize); 95 | } 96 | #endif 97 | return {}; 98 | } 99 | 100 | void ShaderCache::Close() 101 | { 102 | #ifdef __ID3D12ShaderCacheSession_INTERFACE_DEFINED__ 103 | m_pSession.Reset(); 104 | #endif 105 | } 106 | -------------------------------------------------------------------------------- /external/MicrosoftTelemetry.h: -------------------------------------------------------------------------------- 1 | /* ++ 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | Licensed under the MIT License. See LICENSE in the project root for license information. 5 | 6 | Module Name: 7 | 8 | TraceLoggingConfig.h 9 | 10 | Abstract: 11 | 12 | Macro definitions used by this project's TraceLogging ETW providers: 13 | 14 | - Configuration macros that select the ETW Provider Groups to be used by 15 | this project. 16 | - Constants for tags that are commonly used in Microsoft's 17 | TraceLogging-based ETW. 18 | 19 | Different versions of this file use different definitions for the 20 | TraceLoggingOption configuration macros. The definitions in this file are 21 | empty. As a result, providers using this configuration file will not join 22 | any ETW Provider Groups and will not be given any special treatment by 23 | group-sensitive ETW listeners. 24 | 25 | Environment: 26 | 27 | User mode or kernel mode. 28 | 29 | --*/ 30 | 31 | #pragma once 32 | 33 | // Configuration macro for use in TRACELOGGING_DEFINE_PROVIDER. The definition 34 | // in this file configures the provider as a normal (non-telemetry) provider. 35 | #define TraceLoggingOptionMicrosoftTelemetry() \ 36 | // Empty definition for TraceLoggingOptionMicrosoftTelemetry 37 | 38 | // Configuration macro for use in TRACELOGGING_DEFINE_PROVIDER. The definition 39 | // in this file configures the provider as a normal (non-telemetry) provider. 40 | #define TraceLoggingOptionWindowsCoreTelemetry() \ 41 | // Empty definition for TraceLoggingOptionWindowsCoreTelemetry 42 | 43 | // Event privacy tags. Use the PDT macro values for the tag parameter, e.g.: 44 | // TraceLoggingWrite(..., 45 | // TelemetryPrivacyDataTag(PDT_BrowsingHistory | PDT_ProductAndServiceUsage), 46 | // ...); 47 | #define TelemetryPrivacyDataTag(tag) TraceLoggingUInt64((tag), "PartA_PrivTags") 48 | #define PDT_BrowsingHistory 0x0000000000000002u 49 | #define PDT_DeviceConnectivityAndConfiguration 0x0000000000000800u 50 | #define PDT_InkingTypingAndSpeechUtterance 0x0000000000020000u 51 | #define PDT_ProductAndServicePerformance 0x0000000001000000u 52 | #define PDT_ProductAndServiceUsage 0x0000000002000000u 53 | #define PDT_SoftwareSetupAndInventory 0x0000000080000000u 54 | 55 | // Event categories specified via keywords, e.g.: 56 | // TraceLoggingWrite(..., 57 | // TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), 58 | // ...); 59 | #define MICROSOFT_KEYWORD_CRITICAL_DATA 0x0000800000000000 // Bit 47 60 | #define MICROSOFT_KEYWORD_MEASURES 0x0000400000000000 // Bit 46 61 | #define MICROSOFT_KEYWORD_TELEMETRY 0x0000200000000000 // Bit 45 62 | #define MICROSOFT_KEYWORD_RESERVED_44 0x0000100000000000 // Bit 44 (reserved for future assignment) 63 | 64 | // Event categories specified via event tags, e.g.: 65 | // TraceLoggingWrite(..., 66 | // TraceLoggingEventTag(MICROSOFT_EVENTTAG_REALTIME_LATENCY), 67 | // ...); 68 | #define MICROSOFT_EVENTTAG_DROP_USER_IDS 0x00008000 69 | #define MICROSOFT_EVENTTAG_AGGREGATE 0x00010000 70 | #define MICROSOFT_EVENTTAG_DROP_PII_EXCEPT_IP 0x00020000 71 | #define MICROSOFT_EVENTTAG_COSTDEFERRED_LATENCY 0x00040000 72 | #define MICROSOFT_EVENTTAG_CORE_DATA 0x00080000 73 | #define MICROSOFT_EVENTTAG_INJECT_XTOKEN 0x00100000 74 | #define MICROSOFT_EVENTTAG_REALTIME_LATENCY 0x00200000 75 | #define MICROSOFT_EVENTTAG_NORMAL_LATENCY 0x00400000 76 | #define MICROSOFT_EVENTTAG_CRITICAL_PERSISTENCE 0x00800000 77 | #define MICROSOFT_EVENTTAG_NORMAL_PERSISTENCE 0x01000000 78 | #define MICROSOFT_EVENTTAG_DROP_PII 0x02000000 79 | #define MICROSOFT_EVENTTAG_HASH_PII 0x04000000 80 | #define MICROSOFT_EVENTTAG_MARK_PII 0x08000000 81 | 82 | // Field categories specified via field tags, e.g.: 83 | // TraceLoggingWrite(..., 84 | // TraceLoggingString(szUser, "UserName", "User's name", MICROSOFT_FIELDTAG_HASH_PII), 85 | // ...); 86 | #define MICROSOFT_FIELDTAG_DROP_PII 0x04000000 87 | #define MICROSOFT_FIELDTAG_HASH_PII 0x08000000 88 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenCLOn12 2 | 3 | OpenCLOn12 is a mapping layer, which implements the OpenCL 1.2 API on top of D3D12. It also implements the OpenCL ICD extension, and can therefore be loaded by the existing ICD loader. 4 | 5 | This repository contains the implementations of the APIs. It depends on various other repositories for further functionality, either implicitly or explicitly: 6 | * [WIL](https://github.com/microsoft/wil) is used occasionally throughout. 7 | * The [OpenCL headers](https://github.com/KhronosGroup/OpenCL-Headers) are referenced. 8 | * [GoogleTest](https://github.com/google/googletest) is used for unit testing. 9 | * The [D3D12TranslationLayer](https://github.com/microsoft/D3D12TranslationLayer) handles some of the complexity of using D3D12 for us. 10 | * The compiler infrastructure for consuming OpenCL C and SPIR-V and converting to DXIL comes from the [Mesa](https://gitlab.freedesktop.org/mesa/mesa) project. This dependency is only required at runtime, as a copy of the compiler interface header is contained in this repo. The compiler interface in the `master` branch of this repository tracks `master` of Mesa. 11 | * The compiler was originally developed downstream from `master`, and the `downstream-abi` branch of this repository is intended to interface with that downstream interface. 12 | 13 | Additionally, DXIL.dll from the Windows SDK will be required at runtime to sign and validate the DXIL shaders produced by the compiler. 14 | 15 | For more details about OpenCLOn12, see: 16 | * [Product release blog post](https://devblogs.microsoft.com/directx/announcing-the-opencl-and-opengl-compatibility-pack-for-windows-10-on-arm) 17 | * [Microsoft blog post](https://devblogs.microsoft.com/directx/in-the-works-opencl-and-opengl-mapping-layers-to-directx/) 18 | * [Collabora blog post](https://www.collabora.com/news-and-blog/news-and-events/introducing-opencl-and-opengl-on-directx.html) 19 | 20 | Make sure that you visit the [DirectX Landing Page](https://devblogs.microsoft.com/directx/landing-page/) for more resources for DirectX developers. 21 | 22 | ## Current Status 23 | 24 | At this point, the OpenCL 1.2 API is fully implemented, with no optional extensions. It has not yet been certified conformant, though it passes every conformance test for OpenCL 1.2, but has not yet passed all tests on a single underlying implementation. 25 | 26 | ## Building 27 | 28 | The D3D12TranslationLayer project will be fetched from GitHub when building with CMake if D3D12TranslationLayer isn't already declared as a FetchContent source, such as by a parent CMakeLists.txt. Assuming there was a top level `CMakeLists.txt` in a directory that included both OpenCLOn12 and D3D12TranslationLayer, you could achieve that like this: 29 | 30 | ```CMake 31 | cmake_minimum_required(VERSION 3.14) 32 | include(FetchContent) 33 | 34 | FetchContent_Declare( 35 | d3d12translationlayer 36 | SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/D3D12TranslationLayer 37 | ) 38 | FetchContent_MakeAvailable(d3d12translationlayer) 39 | 40 | add_subdirectory(OpenCLOn12) 41 | ``` 42 | 43 | At the time of publishing, OpenCLOn12 and the D3D12TranslationLayer require the latest released version of the SDK (19041). 44 | 45 | OpenCLOn12 requires C++17, and only supports building with MSVC at the moment. 46 | 47 | ## Data Collection 48 | 49 | The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described in the repository. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft's privacy statement. Our privacy statement is located at https://go.microsoft.com/fwlink/?LinkID=824704. You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices. 50 | 51 | Note however that no data collection is performed when using your private builds. 52 | -------------------------------------------------------------------------------- /src/openclon12/gl_tokens.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include 6 | 7 | // This header contains just the small subset of GL function 8 | // definitions and defines needed for GL/CL interop, without 9 | // pulling in all of glext.h 10 | 11 | #define GL_ARRAY_BUFFER 0x8892 12 | #define GL_RENDERBUFFER 0x8D41 13 | #define GL_TEXTURE_BUFFER 0x8C2A 14 | #define GL_TEXTURE_1D 0x0DE0 15 | #define GL_TEXTURE_2D 0x0DE1 16 | #define GL_TEXTURE_3D 0x806F 17 | #define GL_TEXTURE_RECTANGLE 0x84F5 18 | #define GL_TEXTURE_1D_ARRAY 0x8C18 19 | #define GL_TEXTURE_2D_ARRAY 0x8C1A 20 | #define GL_TEXTURE_CUBE_MAP_ARRAY 0x9009 21 | #define GL_TEXTURE_CUBE_MAP 0x8513 22 | #define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 23 | #define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 24 | #define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 25 | #define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 26 | #define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 27 | #define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A 28 | #define GL_TEXTURE_2D_MULTISAMPLE 0x9100 29 | #define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 30 | 31 | #define GL_RGBA8 0x8058 32 | #define GL_SRGB8_ALPHA8 0x8C43 33 | #define GL_RGBA 0x1908 34 | #define GL_BGRA 0x80E1 35 | #define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367 36 | #define GL_RGBA8I 0x8D8E 37 | #define GL_RGBA16I 0x8D88 38 | #define GL_RGBA32I 0x8D82 39 | #define GL_RGBA8UI 0x8D7C 40 | #define GL_RGBA16UI 0x8D76 41 | #define GL_RGBA32UI 0x8D70 42 | #define GL_RGBA8_SNORM 0x8F97 43 | #define GL_RGBA16 0x805B 44 | #define GL_RGBA16_SNORM 0x8F9B 45 | #define GL_RGBA16F 0x881A 46 | #define GL_RGBA32F 0x8814 47 | #define GL_R8 0x8229 48 | #define GL_R8_SNORM 0x8F94 49 | #define GL_R16 0x822A 50 | #define GL_R16_SNORM 0x8F98 51 | #define GL_R16F 0x822D 52 | #define GL_R32F 0x822E 53 | #define GL_R8I 0x8231 54 | #define GL_R16I 0x8233 55 | #define GL_R32I 0x8235 56 | #define GL_R8UI 0x8232 57 | #define GL_R16UI 0x8234 58 | #define GL_R32UI 0x8236 59 | #define GL_RG8 0x822B 60 | #define GL_RG8_SNORM 0x8F95 61 | #define GL_RG16 0x822C 62 | #define GL_RG16_SNORM 0x8F99 63 | #define GL_RG16F 0x822F 64 | #define GL_RG32F 0x8230 65 | #define GL_RG8I 0x8237 66 | #define GL_RG16I 0x8239 67 | #define GL_RG32I 0x823B 68 | #define GL_RG8UI 0x8238 69 | #define GL_RG16UI 0x823A 70 | #define GL_RG32UI 0x823C 71 | 72 | #define EGL_PLATFORM_SURFACELESS_MESA 0x31DD 73 | 74 | HGLRC WINAPI wglCreateContextAttribsARB(HDC, HGLRC, const int *); 75 | 76 | typedef struct __GLsync *GLsync; 77 | void WINAPI glWaitSync(GLsync, unsigned flags, uint64_t timeout); 78 | void WINAPI glDeleteSync(GLsync); 79 | 80 | typedef void *EGLDisplay, *EGLSurface, *EGLContext, *EGLConfig; 81 | typedef unsigned int EGLBoolean, EGLenum; 82 | typedef intptr_t EGLAttrib; 83 | typedef int32_t EGLint; 84 | typedef void (*__eglMustCastToProperFunctionPointerType)(void); 85 | EGLDisplay WINAPI eglGetPlatformDisplay(EGLenum, void *, const EGLAttrib *); 86 | EGLBoolean WINAPI eglInitialize(EGLDisplay, EGLint *, EGLint *); 87 | EGLBoolean WINAPI eglTerminate(EGLDisplay); 88 | EGLContext WINAPI eglCreateContext(EGLDisplay, EGLConfig, EGLContext, const int32_t *); 89 | EGLBoolean WINAPI eglMakeCurrent(EGLDisplay, EGLSurface, EGLSurface, EGLContext); 90 | EGLBoolean WINAPI eglDestroyContext(EGLDisplay, EGLContext); 91 | EGLContext WINAPI eglGetCurrentContext(); 92 | __eglMustCastToProperFunctionPointerType WINAPI eglGetProcAddress(const char *); 93 | -------------------------------------------------------------------------------- /src/openclon12/context.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "platform.hpp" 6 | #include "device.hpp" 7 | #include "gl_tokens.hpp" 8 | 9 | struct GLProperties; 10 | struct d3d12_interop_device_info; 11 | struct mesa_glinterop_device_info; 12 | struct mesa_glinterop_export_in; 13 | struct mesa_glinterop_export_out; 14 | 15 | class GLInteropManager 16 | { 17 | public: 18 | static std::unique_ptr Create(GLProperties const &glProps); 19 | virtual ~GLInteropManager() = default; 20 | virtual bool GetDeviceData(d3d12_interop_device_info &d3d12DevInfo) = 0; 21 | virtual int GetResourceData(mesa_glinterop_export_in &in, mesa_glinterop_export_out &out) = 0; 22 | virtual bool AcquireResources(std::vector &resources, GLsync *sync) = 0; 23 | virtual bool IsAppContextBoundToThread() = 0; 24 | bool SyncWait(GLsync fence, bool deleteSync); 25 | protected: 26 | void PrepQueryDeviceInfo(mesa_glinterop_device_info &mesaDevInfo, 27 | d3d12_interop_device_info &d3d12DevInfo); 28 | virtual bool BindContext() = 0; 29 | virtual void UnbindContext() = 0; 30 | GLInteropManager(XPlatHelpers::unique_module mod) 31 | : m_hMod(std::move(mod)) 32 | { 33 | } 34 | XPlatHelpers::unique_module m_hMod; 35 | decltype(&glWaitSync) m_WaitSync; 36 | decltype(&glDeleteSync) m_DeleteSync; 37 | }; 38 | 39 | class Context : public CLChildBase 40 | { 41 | public: 42 | using PfnCallbackType = void (CL_CALLBACK *)(const char * errinfo, 43 | const void * private_info, 44 | size_t cb, 45 | void * user_data); 46 | 47 | struct DestructorCallback 48 | { 49 | using Fn = void(CL_CALLBACK *)(cl_context, void*); 50 | Fn m_pfn; 51 | void* m_userData; 52 | }; 53 | 54 | private: 55 | std::vector m_AssociatedDevices; 56 | const PfnCallbackType m_ErrorCallback; 57 | void* const m_CallbackContext; 58 | 59 | std::vector const m_Properties; 60 | 61 | mutable std::mutex m_DestructorLock; 62 | std::vector m_DestructorCallbacks; 63 | 64 | std::unique_ptr m_GLInteropManager; 65 | ID3D12CommandQueue *m_GLCommandQueue = nullptr; // weak 66 | 67 | static void CL_CALLBACK DummyCallback(const char*, const void*, size_t, void*) {} 68 | 69 | friend cl_int CL_API_CALL clGetContextInfo(cl_context, cl_context_info, size_t, void*, size_t*); 70 | 71 | public: 72 | Context(std::vector Devices, 73 | const cl_context_properties* Properties, 74 | std::unique_ptr glManager, 75 | PfnCallbackType pfnErrorCb, void* CallbackContext); 76 | ~Context(); 77 | 78 | void ReportError(const char* Error); 79 | auto GetErrorReporter(cl_int* errcode_ret) 80 | { 81 | if (errcode_ret) 82 | *errcode_ret = CL_SUCCESS; 83 | return [=](const char* ErrorMsg, cl_int ErrorCode) 84 | { 85 | if (ErrorMsg) 86 | ReportError(ErrorMsg); 87 | if (errcode_ret) 88 | *errcode_ret = ErrorCode; 89 | return nullptr; 90 | }; 91 | } 92 | auto GetErrorReporter() 93 | { 94 | return [this](const char* ErrorMsg, cl_int ErrorCode) 95 | { 96 | if (ErrorMsg) 97 | ReportError(ErrorMsg); 98 | return ErrorCode; 99 | }; 100 | } 101 | 102 | cl_uint GetDeviceCount() const noexcept; 103 | Device& GetDevice(cl_uint index) const noexcept; 104 | D3DDevice &GetD3DDevice(cl_uint index) const noexcept; 105 | D3DDevice *D3DDeviceForContext(Device &device) const noexcept; 106 | GLInteropManager *GetGLManager() const noexcept { return m_GLInteropManager.get(); } 107 | void InsertGLWait(ID3D12Fence *fence, UINT64 value) const noexcept { m_GLCommandQueue->Wait(fence, value); } 108 | std::vector GetDevices() const noexcept { return m_AssociatedDevices; } 109 | 110 | void AddDestructionCallback(DestructorCallback::Fn pfn, void* pUserData); 111 | }; 112 | -------------------------------------------------------------------------------- /src/openclon12/device.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | #include "platform.hpp" 5 | #include "cache.hpp" 6 | #include 7 | #include 8 | #include 9 | 10 | #include "ImmediateContext.hpp" 11 | using ImmCtx = D3D12TranslationLayer::ImmediateContext; 12 | 13 | class Task; 14 | class Device; 15 | 16 | using Submission = std::vector<::ref_ptr_int>; 17 | 18 | class D3DDevice 19 | { 20 | public: 21 | ID3D12Device* GetDevice() const noexcept { return m_spDevice.Get(); } 22 | ShaderCache &GetShaderCache() const noexcept { return m_ShaderCache; } 23 | ShaderCache &GetDriverShaderCache() const noexcept { return m_DriverShaderCache; } 24 | 25 | ImmCtx& ImmCtx() noexcept { return m_ImmCtx; } 26 | UINT64 GetTimestampFrequency() const noexcept { return m_TimestampFrequency; } 27 | INT64 GPUToQPCTimestampOffset() const noexcept { return m_GPUToQPCTimestampOffset; } 28 | 29 | void SubmitTask(Task*, TaskPoolLock const&); 30 | void ReadyTask(Task*, TaskPoolLock const&); 31 | void Flush(TaskPoolLock const&); 32 | 33 | //std::unique_ptr CreatePSO(D3D12TranslationLayer::COMPUTE_PIPELINE_STATE_DESC const& Desc); 34 | Device &GetParent() const noexcept { return m_Parent; } 35 | 36 | protected: 37 | D3DDevice(Device &parent, ID3D12Device *pDevice, ID3D12CommandQueue *pQueue, 38 | D3D12_FEATURE_DATA_D3D12_OPTIONS &options, bool IsImportedDevice); 39 | ~D3DDevice() = default; 40 | 41 | friend class Device; 42 | 43 | void ExecuteTasks(std::unique_ptr spTasks); 44 | unsigned m_ContextCount = 1; 45 | const bool m_IsImportedDevice; 46 | 47 | Device &m_Parent; 48 | const ComPtr m_spDevice; 49 | ::ImmCtx m_ImmCtx; 50 | 51 | std::unique_ptr m_RecordingSubmission; 52 | 53 | BackgroundTaskScheduler::Scheduler m_ExecutionScheduler; 54 | BackgroundTaskScheduler::Scheduler m_CompletionScheduler; 55 | mutable ShaderCache m_ShaderCache; 56 | mutable ShaderCache m_DriverShaderCache; 57 | 58 | UINT64 m_TimestampFrequency = 0; 59 | INT64 m_GPUToQPCTimestampOffset = 0; 60 | }; 61 | 62 | class Device : public CLChildBase 63 | { 64 | public: 65 | Device(Platform& parent, IDXCoreAdapter* pAdapter); 66 | ~Device(); 67 | 68 | cl_bool IsAvailable() const noexcept; 69 | cl_ulong GetGlobalMemSize(); 70 | DXCoreHardwareID const& GetHardwareIds() const noexcept; 71 | cl_device_type GetType() const noexcept; 72 | bool IsMCDM() const noexcept; 73 | bool IsUMA(); 74 | bool SupportsInt16(); 75 | bool SupportsTypedUAVLoad(); 76 | 77 | std::string GetDeviceName() const; 78 | LUID GetAdapterLuid() const; 79 | D3D_SHADER_MODEL GetShaderModel() const { return m_ShaderModel; } 80 | std::pair GetWaveSizes() const 81 | { 82 | if (!m_D3D12Options1.WaveOps) 83 | { 84 | return { 32, 64 }; 85 | } 86 | return { m_D3D12Options1.WaveLaneCountMin, m_D3D12Options1.WaveLaneCountMax }; 87 | } 88 | 89 | D3DDevice &InitD3D(ID3D12Device *device = nullptr, ID3D12CommandQueue *queue = nullptr); 90 | void ReleaseD3D(D3DDevice &device); 91 | void SetDefaultDevice() { m_DefaultDevice = true; } 92 | 93 | bool HasD3DDevice() const noexcept { return !m_D3DDevices.empty(); } 94 | void CloseCaches(); 95 | void FlushAllDevices(TaskPoolLock const& Lock); 96 | 97 | protected: 98 | void CacheCaps(std::lock_guard const&, ComPtr spDevice = {}); 99 | 100 | ComPtr m_spAdapter; 101 | DXCoreHardwareID m_HWIDs; 102 | std::vector<::D3DDevice *> m_D3DDevices; 103 | 104 | // Lazy-initialized 105 | std::mutex m_InitLock; 106 | bool m_CapsValid = false; 107 | bool m_DefaultDevice = false; 108 | D3D12_FEATURE_DATA_D3D12_OPTIONS m_D3D12Options = {}; 109 | D3D12_FEATURE_DATA_D3D12_OPTIONS1 m_D3D12Options1 = {}; 110 | D3D12_FEATURE_DATA_D3D12_OPTIONS4 m_D3D12Options4 = {}; 111 | D3D12_FEATURE_DATA_ARCHITECTURE m_Architecture = {}; 112 | D3D_SHADER_MODEL m_ShaderModel = D3D_SHADER_MODEL_6_0; 113 | }; 114 | 115 | using D3DDeviceAndRef = std::pair; 116 | -------------------------------------------------------------------------------- /src/openclon12/resources.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "context.hpp" 6 | #include 7 | 8 | class MapTask; 9 | class Task; 10 | 11 | struct mesa_glinterop_export_in; 12 | struct mesa_glinterop_export_out; 13 | 14 | class Resource : public CLChildBase 15 | { 16 | public: 17 | using UnderlyingResource = D3D12TranslationLayer::Resource; 18 | using UnderlyingResourcePtr = D3D12TranslationLayer::unique_comptr; 19 | struct DestructorCallback 20 | { 21 | using Fn = void(CL_CALLBACK *)(cl_mem, void*); 22 | Fn m_pfn; 23 | void* m_userData; 24 | }; 25 | 26 | const cl_mem_flags m_Flags; 27 | void* const m_pHostPointer; 28 | const ref_ptr_int m_ParentBuffer; 29 | const size_t m_Offset = 0; 30 | const cl_image_format m_Format = {}; 31 | const cl_image_desc m_Desc; 32 | std::vector const m_Properties; 33 | D3D12TranslationLayer::ResourceCreationArgs m_CreationArgs; 34 | 35 | struct GLInfo 36 | { 37 | cl_gl_object_type ObjectType; 38 | cl_GLuint ObjectName; 39 | cl_GLenum TextureTarget; 40 | cl_GLint MipLevel; 41 | size_t BufferOffset; 42 | uint32_t BaseArray; 43 | }; 44 | std::optional m_GLInfo; 45 | 46 | static Resource* CreateBuffer(Context& Parent, D3D12TranslationLayer::ResourceCreationArgs& Args, void* pHostPointer, cl_mem_flags flags, const cl_mem_properties* properties); 47 | static Resource* CreateSubBuffer(Resource& ParentBuffer, const cl_buffer_region& region, cl_mem_flags flags, const cl_mem_properties *properties); 48 | static Resource* CreateImage(Context& Parent, D3D12TranslationLayer::ResourceCreationArgs& Args, void* pHostPointer, const cl_image_format& image_format, const cl_image_desc& image_desc, cl_mem_flags flags, const cl_mem_properties *properties); 49 | static Resource* CreateImage1DBuffer(Resource& ParentBuffer, const cl_image_format& image_format, const cl_image_desc& image_desc, cl_mem_flags flags, const cl_mem_properties *properties); 50 | static Resource *ImportGLResource(Context &Parent, cl_mem_flags flags, mesa_glinterop_export_in &in, cl_int *error); 51 | 52 | UnderlyingResource* GetUnderlyingResource(D3DDevice*); 53 | void SetActiveDevice(D3DDevice*); 54 | UnderlyingResource* GetActiveUnderlyingResource() const { return m_ActiveUnderlying; } 55 | cl_uint GetMapCount() const { std::lock_guard MapLock(m_MapLock); return m_MapCount; } 56 | 57 | void EnqueueMigrateResource(D3DDevice* newDevice, Task* triggeringTask, cl_mem_migration_flags flags); 58 | 59 | D3D12TranslationLayer::SRV& GetSRV(D3DDevice*); 60 | D3D12TranslationLayer::UAV& GetUAV(D3DDevice*); 61 | ~Resource(); 62 | 63 | void AddMapTask(MapTask*); 64 | MapTask* GetMapTask(void* MapPtr); 65 | void RemoveMapTask(MapTask*); 66 | 67 | void AddDestructionCallback(DestructorCallback::Fn pfn, void* pUserData); 68 | 69 | protected: 70 | std::recursive_mutex m_MultiDeviceLock; 71 | D3DDevice *m_CurrentActiveDevice = nullptr; 72 | UnderlyingResource *m_ActiveUnderlying = nullptr; 73 | std::unordered_map m_UnderlyingMap; 74 | std::unordered_map m_SRVs; 75 | std::unordered_map m_UAVs; 76 | 77 | std::unique_ptr m_InitialData; 78 | D3D12_UNORDERED_ACCESS_VIEW_DESC m_UAVDesc; 79 | D3D12_SHADER_RESOURCE_VIEW_DESC m_SRVDesc; 80 | 81 | mutable std::mutex m_MapLock; 82 | std::unordered_map>> m_OutstandingMaps; 83 | cl_uint m_MapCount = 0; 84 | 85 | mutable std::mutex m_DestructorLock; 86 | std::vector m_DestructorCallbacks; 87 | 88 | Resource(Context& Parent, decltype(m_CreationArgs) const& CreationArgs, void* pHostPointer, size_t size, cl_mem_flags flags, std::optional glInfo, const cl_mem_properties *properties); 89 | Resource(Resource& ParentBuffer, size_t offset, size_t size, const cl_image_format& image_format, cl_mem_object_type type, cl_mem_flags flags, const cl_mem_properties *properties); 90 | Resource(Context& Parent, decltype(m_CreationArgs) const& CreationArgs, void* pHostPointer, const cl_image_format& image_format, const cl_image_desc& image_desc, cl_mem_flags flags, std::optional glInfo, const cl_mem_properties *properties); 91 | 92 | static cl_image_desc GetBufferDesc(size_t size, cl_mem_object_type type); 93 | void UploadInitialData(Task* triggeringTask); 94 | friend class UploadInitialData; 95 | }; 96 | -------------------------------------------------------------------------------- /src/openclon12/scheduler.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #ifndef WIN32_LEAN_AND_MEAN 15 | #define WIN32_LEAN_AND_MEAN 16 | #endif 17 | #ifndef NOMINMAX 18 | #define NOMINMAX 19 | #endif 20 | #include 21 | 22 | #include 23 | 24 | namespace BackgroundTaskScheduler 25 | { 26 | enum class Priority { Idle, Normal }; 27 | struct SchedulingMode 28 | { 29 | uint32_t NumThreads; 30 | Priority ThreadPriority; 31 | bool operator==(SchedulingMode const& b) { return NumThreads == b.NumThreads && ThreadPriority == b.ThreadPriority; } 32 | bool operator!=(SchedulingMode const& b) { return !(*this == b); } 33 | bool operator>(SchedulingMode const& b) { return NumThreads > b.NumThreads || (int)ThreadPriority > (int)b.ThreadPriority; } 34 | }; 35 | 36 | struct Task 37 | { 38 | using FnType = void(APIENTRY*)(void* pContext); 39 | FnType m_Callback; 40 | FnType m_Cancel; 41 | void* m_pContext; 42 | }; 43 | 44 | class Scheduler 45 | { 46 | protected: 47 | struct QueuedEventSignal 48 | { 49 | std::atomic m_RefCount; 50 | XPlatHelpers::unique_event m_Event; 51 | }; 52 | std::list m_QueuedEvents; 53 | std::list::iterator m_QueuedEventsPseudoEnd; 54 | 55 | struct QueuedTask : Task 56 | { 57 | std::list::iterator m_QueuedEventsAtTimeOfTaskSubmission; 58 | QueuedTask() = default; 59 | QueuedTask(Task const& t, decltype(m_QueuedEventsAtTimeOfTaskSubmission) iter) 60 | : Task(t), m_QueuedEventsAtTimeOfTaskSubmission(iter) 61 | { 62 | } 63 | QueuedTask(QueuedTask const&) = default; 64 | QueuedTask(QueuedTask&&) = default; 65 | QueuedTask& operator=(QueuedTask const&) = default; 66 | QueuedTask& operator=(QueuedTask&&) = default; 67 | }; 68 | 69 | // These are the tasks that are waiting for a thread to consume them. 70 | std::deque m_Tasks; 71 | // This is a counter of how many tasks are currently being processed by 72 | // worker threads. Adding this to the size of m_Tasks enables determining 73 | // the total number of currently not-completed tasks. 74 | uint32_t m_TasksInProgress = 0; 75 | std::vector m_Threads; 76 | std::vector m_ExitingThreads; 77 | mutable std::mutex m_Lock; 78 | std::condition_variable m_CV; 79 | 80 | SchedulingMode m_CurrentMode = { 0, Priority::Idle }; 81 | SchedulingMode m_EffectiveMode = { 0, Priority::Idle }; 82 | bool m_bShutdown = false; 83 | 84 | // These methods require the lock to be held. 85 | // Const-ref methods just require it, non-const-ref methods may release it. 86 | bool IsSchedulerIdle(std::unique_lock const&) const noexcept { return m_Tasks.empty() && m_TasksInProgress == 0; } 87 | void SetSchedulingModeImpl(SchedulingMode mode, std::unique_lock& lock); // Releases lock 88 | void QueueSetSchedulingModeTask(SchedulingMode mode, std::unique_lock const&); 89 | void RetireTask(QueuedTask const& task, std::unique_lock const&) noexcept; 90 | 91 | // These methods will take the lock. 92 | void SetSchedulingModeTask(SchedulingMode mode) noexcept; 93 | static void __stdcall SetSchedulingModeTaskStatic(void* pContext); 94 | void TaskThread(int ThreadID) noexcept; 95 | 96 | public: 97 | Scheduler(); 98 | ~Scheduler() { Shutdown(); } 99 | 100 | void SetSchedulingMode(SchedulingMode mode); 101 | void QueueTask(Task task); 102 | void SignalEventOnCompletionOfCurrentTasks(XPlatHelpers::Event hEvent, SchedulingMode modeAfterSignal); 103 | void CancelExistingTasks() noexcept; 104 | void Shutdown() noexcept; 105 | 106 | SchedulingMode GetCurrentMode() const 107 | { 108 | std::lock_guard lock(m_Lock); 109 | return m_CurrentMode; 110 | } 111 | SchedulingMode GetEffectiveMode() const 112 | { 113 | std::lock_guard lock(m_Lock); 114 | return m_EffectiveMode; 115 | } 116 | }; 117 | } -------------------------------------------------------------------------------- /include/d3d12translationlayer/View.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "DeviceChild.hpp" 7 | #include "SubresourceHelpers.hpp" 8 | #include 9 | 10 | namespace D3D12TranslationLayer 11 | { 12 | class Resource; 13 | //================================================================================================================================== 14 | // View 15 | //================================================================================================================================== 16 | 17 | // These types are purely used to specialize the templated 18 | // view class 19 | enum class ShaderResourceViewType {}; 20 | enum class UnorderedAccessViewType {}; 21 | 22 | template< class TIface > 23 | struct CViewMapper; 24 | 25 | #define DECLARE_VIEW_MAPPER(View, DescType12, TranslationLayerDesc) \ 26 | template<> struct CViewMapper<##View##Type> \ 27 | { \ 28 | typedef TranslationLayerDesc TTranslationLayerDesc; \ 29 | typedef D3D12_##DescType12 TDesc12; \ 30 | static decltype(&ID3D12Device::Create##View) GetCreate() { return &ID3D12Device::Create##View; } \ 31 | } 32 | 33 | DECLARE_VIEW_MAPPER(ShaderResourceView, SHADER_RESOURCE_VIEW_DESC, D3D12_SHADER_RESOURCE_VIEW_DESC); 34 | DECLARE_VIEW_MAPPER(UnorderedAccessView, UNORDERED_ACCESS_VIEW_DESC, D3D12_UNORDERED_ACCESS_VIEW_DESC); 35 | #undef DECLARE_VIEW_MAPPER 36 | 37 | class ViewBase : public DeviceChild 38 | { 39 | public: // Methods 40 | ViewBase(ImmediateContext* pDevice, Resource* pResource, CViewSubresourceSubset const& Subresources) noexcept; 41 | 42 | // Note: This is hiding the base class implementation not overriding it 43 | // Warning: this method is hidden in the UAV type, and is not virtual 44 | // Always ensure that this method is called on the most derived type. 45 | void UsedInCommandList(UINT64 id); 46 | 47 | public: // Members 48 | Resource* const m_pResource; 49 | 50 | protected: 51 | D3D12_CPU_DESCRIPTOR_HANDLE m_Descriptor; 52 | UINT m_DescriptorHeapIndex; 53 | 54 | public: 55 | CViewSubresourceSubset m_subresources; 56 | UINT m_ViewUniqueness; 57 | }; 58 | 59 | template< class TIface > 60 | class View : public ViewBase 61 | { 62 | public: // Types 63 | typedef CViewMapper TMapper; 64 | typedef typename CViewMapper::TDesc12 TDesc12; 65 | typedef typename CViewMapper::TTranslationLayerDesc TTranslationLayerDesc; 66 | 67 | public: // Methods 68 | static View *CreateView(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) { return new View(pDevice, Desc, ViewResource); } 69 | static void DestroyView(View* pView) { delete pView; } 70 | 71 | View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false); 72 | ~View() noexcept; 73 | 74 | const TDesc12& GetDesc12() noexcept; 75 | 76 | bool IsUpToDate() const noexcept { return m_pResource->GetUniqueness() == m_ViewUniqueness; } 77 | HRESULT RefreshUnderlying() noexcept; 78 | D3D12_CPU_DESCRIPTOR_HANDLE GetRefreshedDescriptorHandle() 79 | { 80 | HRESULT hr = RefreshUnderlying(); 81 | if (FAILED(hr)) 82 | { 83 | assert(hr != E_INVALIDARG); 84 | ThrowFailure(hr); 85 | } 86 | return m_Descriptor; 87 | } 88 | 89 | private: 90 | TDesc12 m_Desc; 91 | 92 | // We tamper with m_Desc.Buffer.FirstElement when renaming resources for map discard so it is important that we record the 93 | // original first element expected by the API 94 | UINT64 APIFirstElement; 95 | 96 | void UpdateMinLOD(float MinLOD); 97 | }; 98 | 99 | typedef View TSRV; 100 | typedef View TUAV; 101 | 102 | class CDescriptorHeapManager; 103 | struct DescriptorHeapEntry 104 | { 105 | DescriptorHeapEntry(CDescriptorHeapManager *pDescriptorHeapManager, D3D12_CPU_DESCRIPTOR_HANDLE Descriptor, UINT DescriptorHeapIndex, UINT64 LastUsedCommandListID) : 106 | m_pDescriptorHeapManager(pDescriptorHeapManager), m_Descriptor(Descriptor), m_DescriptorHeapIndex(DescriptorHeapIndex), m_LastUsedCommandListID(LastUsedCommandListID) {} 107 | 108 | D3D12_CPU_DESCRIPTOR_HANDLE m_Descriptor; 109 | CDescriptorHeapManager *m_pDescriptorHeapManager; 110 | UINT m_DescriptorHeapIndex; 111 | UINT64 m_LastUsedCommandListID; 112 | }; 113 | 114 | typedef TSRV SRV; 115 | typedef TUAV UAV; 116 | }; -------------------------------------------------------------------------------- /include/d3d12translationlayer/XPlatHelpers.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | namespace XPlatHelpers 6 | { 7 | #ifdef _WIN32 8 | using Event = HANDLE; 9 | constexpr Event InvalidEvent = nullptr; 10 | inline void SetEvent(Event e) { ::SetEvent(e); } 11 | inline Event CreateEvent() { return ::CreateEvent(nullptr, false, false, nullptr); } 12 | inline bool WaitForEvent(Event e, DWORD timeoutMs) { return WaitForSingleObject(e, timeoutMs) == WAIT_OBJECT_0; } 13 | inline bool WaitForEvent(Event e) { return WaitForEvent(e, INFINITE); } 14 | inline Event DuplicateEvent(Event e) 15 | { 16 | Event eNew = nullptr; 17 | (void)DuplicateHandle(GetCurrentProcess(), e, GetCurrentProcess(), &eNew, 0, FALSE, DUPLICATE_SAME_ACCESS); 18 | return eNew; 19 | } 20 | inline void CloseEvent(Event e) { CloseHandle(e); } 21 | inline Event EventFromHANDLE(HANDLE h) { return h; } 22 | #else 23 | using Event = int; 24 | constexpr Event InvalidEvent = -1; 25 | inline void SetEvent(Event e) { eventfd_write(e, 1); } 26 | inline Event CreateEvent() { return eventfd(0, 0); } 27 | inline bool WaitForEvent(Event e) 28 | { 29 | eventfd_t val; 30 | return eventfd_read(e, &val) == 0; 31 | } 32 | inline bool WaitForEvent(Event e, int timeoutMs) 33 | { 34 | pollfd fds = { e, POLLIN, 0 }; 35 | if (poll(&fds, 1, timeoutMs) && (fds.revents & POLLIN)) 36 | { 37 | return WaitForEvent(e); 38 | } 39 | return false; 40 | } 41 | inline Event DuplicateEvent(Event e) { return dup(e); } 42 | inline void CloseEvent(Event e) { close(e); } 43 | inline Event EventFromHANDLE(HANDLE h) 44 | { 45 | return static_cast(reinterpret_cast(h)); 46 | } 47 | #endif 48 | 49 | class unique_event 50 | { 51 | Event m_event = InvalidEvent; 52 | public: 53 | struct copy_tag {}; 54 | unique_event() = default; 55 | unique_event(Event e) : m_event(e) { } 56 | unique_event(Event e, copy_tag) : m_event(DuplicateEvent(e)) { } 57 | unique_event(unique_event&& e) : m_event(e.detach()) { } 58 | unique_event& operator=(unique_event&& e) 59 | { 60 | close(); 61 | m_event = e.detach(); 62 | return *this; 63 | } 64 | ~unique_event() { close(); } 65 | void close() 66 | { 67 | if (*this) 68 | { 69 | CloseEvent(m_event); 70 | } 71 | m_event = InvalidEvent; 72 | } 73 | void reset(Event e = InvalidEvent) { close(); m_event = e; } 74 | void create() { reset(CreateEvent()); } 75 | Event get() { return m_event; } 76 | Event detach() { Event e = m_event; m_event = InvalidEvent; return e; } 77 | void set() const { SetEvent(m_event); } 78 | bool poll() const { return WaitForEvent(m_event, 0); } 79 | void wait() const { WaitForEvent(m_event); } 80 | operator bool() const { return m_event != InvalidEvent; } 81 | }; 82 | 83 | // This class relies on the fact that modules are void* in both, and using the same Windows API names in the Linux Windows.h. 84 | class unique_module 85 | { 86 | HMODULE _hM = nullptr; 87 | public: 88 | unique_module() = default; 89 | explicit unique_module(HMODULE hM) : _hM(hM) { } 90 | explicit unique_module(const char* pCStr) : _hM(LoadLibraryA(pCStr)) { } 91 | #ifdef _WIN32 92 | explicit unique_module(const wchar_t* pWStr) : _hM(LoadLibraryW(pWStr)) { } 93 | #else 94 | explicit unique_module(const wchar_t* pWStr) 95 | : _hM(LoadLibraryA(std::wstring_convert>().to_bytes(pWStr).c_str())) 96 | { 97 | } 98 | #endif 99 | 100 | void reset(HMODULE hM = nullptr) 101 | { 102 | if (_hM) 103 | FreeLibrary(_hM); 104 | _hM = hM; 105 | } 106 | void load(const char* pCStr) { reset(LoadLibraryA(pCStr)); } 107 | #ifdef _WIN32 108 | void load(const wchar_t* pWStr) { reset(LoadLibraryW(pWStr)); } 109 | #else 110 | void load(const wchar_t* pWStr) { *this = unique_module(pWStr); } 111 | #endif 112 | HMODULE detach() 113 | { 114 | HMODULE hM = _hM; 115 | _hM = nullptr; 116 | return hM; 117 | } 118 | 119 | ~unique_module() { reset(); } 120 | unique_module(unique_module&& o) : _hM(o.detach()) { } 121 | unique_module& operator=(unique_module&& o) 122 | { 123 | reset(o.detach()); 124 | return *this; 125 | } 126 | 127 | HMODULE* get_for_external_load() 128 | { 129 | reset(); 130 | return &_hM; 131 | } 132 | HMODULE get() const { return _hM; } 133 | operator bool() const { return _hM != nullptr; } 134 | 135 | void* proc_address(const char* pCStr) const { return GetProcAddress(_hM, pCStr); } 136 | template T proc_address(const char* pCStr) const { return reinterpret_cast(proc_address(pCStr)); } 137 | }; 138 | } -------------------------------------------------------------------------------- /include/d3d12translationlayer/CommandListManager.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "Fence.hpp" 7 | #include "Util.hpp" 8 | 9 | namespace D3D12TranslationLayer 10 | { 11 | LONGLONG InterlockedRead64(volatile LONGLONG* p); 12 | 13 | class ImmediateContext; 14 | class ResidencySet; 15 | 16 | class CommandListManager 17 | { 18 | public: 19 | CommandListManager(ImmediateContext *pParent, ID3D12CommandQueue *pQueue); 20 | 21 | ~CommandListManager(); 22 | 23 | void AdditionalCommandsAdded() noexcept; 24 | void DispatchCommandAdded() noexcept; 25 | void UploadHeapSpaceAllocated(UINT64 heapSize) noexcept; 26 | void ReadbackInitiated() noexcept; 27 | void SubmitCommandListIfNeeded(); 28 | 29 | bool HasCommands() const noexcept { return m_NumCommands > 0; } 30 | 31 | void SubmitCommandList(); 32 | void InitCommandList(); 33 | void ResetCommandList(); 34 | void CloseCommandList() { CloseCommandList(nullptr); } 35 | void DiscardCommandList(); 36 | void ResetResidencySet(); 37 | 38 | void PrepForCommandQueueSync(); 39 | 40 | // Returns true if synchronization was successful, false likely means device is removed 41 | bool WaitForCompletion(); 42 | bool WaitForFenceValue(UINT64 FenceValue); 43 | UINT64 GetCompletedFenceValue() noexcept { return m_Fence.GetCompletedValue(); } 44 | HRESULT EnqueueSetEvent(HANDLE hEvent) noexcept; 45 | UINT64 EnsureFlushedAndFenced(); 46 | HANDLE GetEvent() noexcept { return m_hWaitEvent; } 47 | void AddResourceToResidencySet(Resource *pResource); 48 | 49 | UINT64 GetCommandListID() { return m_commandListID; } 50 | UINT64 GetCommandListIDInterlockedRead() { return InterlockedRead64((volatile LONGLONG*)&m_commandListID); } 51 | ID3D12CommandQueue* GetCommandQueue() { return m_pCommandQueue.get(); } 52 | ID3D12CommandList* GetCommandList() { return m_pCommandList.get(); } 53 | ID3D12SharingContract* GetSharingContract() { return m_pSharingContract.get(); } 54 | Fence* GetFence() { return &m_Fence; } 55 | 56 | ID3D12GraphicsCommandList* GetGraphicsCommandList(ID3D12CommandList *pCommandList = nullptr) { return static_cast(pCommandList ? pCommandList : m_pCommandList.get()); } 57 | 58 | bool WaitForFenceValueInternal(bool IsImmediateContextThread, UINT64 FenceValue); 59 | 60 | private: 61 | void ResetCommandListTrackingData() 62 | { 63 | m_NumCommands = 0; 64 | m_NumDispatches = 0; 65 | m_UploadHeapSpaceAllocated = 0; 66 | } 67 | 68 | void SubmitCommandListImpl(); 69 | 70 | ImmediateContext* const m_pParent; // weak-ref 71 | unique_comptr m_pCommandList; 72 | unique_comptr m_pCommandAllocator; 73 | unique_comptr m_pCommandQueue; 74 | unique_comptr m_pSharingContract; 75 | Fence m_Fence{m_pParent, FENCE_FLAG_NONE, 0}; 76 | #if TRANSLATION_LAYER_DBG 77 | Fence m_StallFence{m_pParent, FENCE_FLAG_NONE, 0}; 78 | #endif 79 | std::unique_ptr m_pResidencySet; 80 | UINT m_NumFlushesWithNoReadback = 0; 81 | UINT m_NumCommands = 0; 82 | UINT m_NumDispatches = 0; 83 | UINT64 m_UploadHeapSpaceAllocated = 0; 84 | ThrowingSafeHandle m_hWaitEvent; 85 | 86 | // The more upload heap space allocated in a command list, the more memory we are 87 | // potentially holding up that could have been recycled into the pool. If too 88 | // much is held up, flush the command list 89 | static constexpr UINT cMaxAllocatedUploadHeapSpacePerCommandList = 256 * 1024 * 1024; 90 | 91 | DWORD m_MaxAllocatedUploadHeapSpacePerCommandList; 92 | 93 | // Command allocator pools 94 | CBoundedFencePool< unique_comptr > m_AllocatorPool; 95 | 96 | // Some notes on threading related to this command list ID / fence value. 97 | // The fence value is and should only ever be written by the immediate context thread. 98 | // The immediate context thread may read the fence value through GetCommandListID(). 99 | // Other threads may read this value, but should only do so via CommandListIDInterlockedRead(). 100 | UINT64 m_commandListID = 1; 101 | 102 | // Number of maximum in-flight command lists at a given time 103 | static constexpr UINT GetMaxInFlightDepth() 104 | { 105 | return 1024; 106 | }; 107 | 108 | void SubmitFence() noexcept; 109 | void CloseCommandList(ID3D12CommandList *pCommandList); 110 | void PrepareNewCommandList(); 111 | void IncrementFence(); 112 | void UpdateLastUsedCommandListIDs(); 113 | }; 114 | 115 | } // namespace D3D12TranslationLayer 116 | -------------------------------------------------------------------------------- /src/compilers/v2/clc_compiler.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © Microsoft Corporation 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef CLC_COMPILER_H 25 | #define CLC_COMPILER_H 26 | 27 | #include "clc/clc.h" 28 | #include "dxil_versions.h" 29 | 30 | #ifdef __cplusplus 31 | extern "C" { 32 | #endif 33 | 34 | #define CLC_MAX_CONSTS 32 35 | #define CLC_MAX_BINDINGS_PER_ARG 3 36 | #define CLC_MAX_SAMPLERS 16 37 | 38 | struct clc_printf_info { 39 | unsigned num_args; 40 | unsigned *arg_sizes; 41 | char *str; 42 | }; 43 | 44 | struct clc_dxil_metadata { 45 | struct { 46 | unsigned offset; 47 | unsigned size; 48 | union { 49 | struct { 50 | unsigned buf_ids[CLC_MAX_BINDINGS_PER_ARG]; 51 | unsigned num_buf_ids; 52 | } image; 53 | struct { 54 | unsigned sampler_id; 55 | } sampler; 56 | struct { 57 | unsigned buf_id; 58 | } globconstptr; 59 | struct { 60 | unsigned sharedmem_offset; 61 | } localptr; 62 | }; 63 | } *args; 64 | unsigned kernel_inputs_cbv_id; 65 | unsigned kernel_inputs_buf_size; 66 | unsigned work_properties_cbv_id; 67 | size_t num_uavs; 68 | size_t num_srvs; 69 | size_t num_samplers; 70 | 71 | struct { 72 | void *data; 73 | size_t size; 74 | unsigned uav_id; 75 | } consts[CLC_MAX_CONSTS]; 76 | size_t num_consts; 77 | 78 | struct { 79 | unsigned sampler_id; 80 | unsigned addressing_mode; 81 | unsigned normalized_coords; 82 | unsigned filter_mode; 83 | } const_samplers[CLC_MAX_SAMPLERS]; 84 | size_t num_const_samplers; 85 | size_t local_mem_size; 86 | size_t priv_mem_size; 87 | 88 | uint16_t local_size[3]; 89 | uint16_t local_size_hint[3]; 90 | 91 | struct { 92 | unsigned info_count; 93 | struct clc_printf_info *infos; 94 | int uav_id; 95 | } printf; 96 | }; 97 | 98 | struct clc_dxil_object { 99 | const struct clc_kernel_info *kernel; 100 | struct clc_dxil_metadata metadata; 101 | struct { 102 | void *data; 103 | size_t size; 104 | } binary; 105 | }; 106 | 107 | struct clc_runtime_arg_info { 108 | union { 109 | struct { 110 | unsigned size; 111 | } localptr; 112 | struct { 113 | unsigned normalized_coords; 114 | unsigned addressing_mode; /* See SPIR-V spec for value meanings */ 115 | unsigned linear_filtering; 116 | } sampler; 117 | }; 118 | }; 119 | 120 | struct clc_runtime_kernel_conf { 121 | uint16_t local_size[3]; 122 | struct clc_runtime_arg_info *args; 123 | unsigned lower_bit_size; 124 | unsigned support_global_work_id_offsets; 125 | unsigned support_workgroup_id_offsets; 126 | 127 | enum dxil_shader_model max_shader_model; 128 | enum dxil_validator_version validator_version; 129 | }; 130 | 131 | struct clc_libclc_dxil_options { 132 | unsigned optimize; 133 | }; 134 | 135 | struct clc_libclc * 136 | clc_libclc_new_dxil(const struct clc_logger *logger, 137 | const struct clc_libclc_dxil_options *dxil_options); 138 | 139 | bool 140 | clc_spirv_to_dxil(struct clc_libclc *lib, 141 | const struct clc_binary *linked_spirv, 142 | const struct clc_parsed_spirv *parsed_data, 143 | const char *entrypoint, 144 | const struct clc_runtime_kernel_conf *conf, 145 | const struct clc_spirv_specialization_consts *consts, 146 | const struct clc_logger *logger, 147 | struct clc_dxil_object *out_dxil); 148 | 149 | void clc_free_dxil_object(struct clc_dxil_object *dxil); 150 | 151 | /* This struct describes the layout of data expected in the CB bound at global_work_offset_cbv_id */ 152 | struct clc_work_properties_data { 153 | /* Returned from get_global_offset(), and added into get_global_id() */ 154 | unsigned global_offset_x; 155 | unsigned global_offset_y; 156 | unsigned global_offset_z; 157 | /* Returned from get_work_dim() */ 158 | unsigned work_dim; 159 | /* The number of work groups being launched (i.e. the parameters to Dispatch). 160 | * If the requested global size doesn't fit in a single Dispatch, these values should 161 | * indicate the total number of groups that *should* have been launched. */ 162 | unsigned group_count_total_x; 163 | unsigned group_count_total_y; 164 | unsigned group_count_total_z; 165 | unsigned padding; 166 | /* If the requested global size doesn't fit in a single Dispatch, subsequent dispatches 167 | * should fill out these offsets to indicate how many groups have already been launched */ 168 | unsigned group_id_offset_x; 169 | unsigned group_id_offset_y; 170 | unsigned group_id_offset_z; 171 | }; 172 | 173 | uint64_t clc_compiler_get_version(void); 174 | 175 | #ifdef __cplusplus 176 | } 177 | #endif 178 | 179 | #endif 180 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/ImmediateContext.inl: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "View.inl" 6 | 7 | namespace D3D12TranslationLayer 8 | { 9 | 10 | inline ID3D12Resource* GetUnderlyingResource(Resource* pResource) 11 | { 12 | if (!pResource) 13 | return nullptr; 14 | return pResource->GetUnderlyingResource(); 15 | } 16 | 17 | //---------------------------------------------------------------------------------------------------------------------------------- 18 | template 19 | inline void GetBufferViewDesc(Resource *pBuffer, TDesc &Desc, UINT APIOffset, UINT APISize = -1) 20 | { 21 | if (pBuffer) 22 | { 23 | Desc.SizeInBytes = 24 | min(GetDynamicBufferSize(pBuffer, APIOffset), APISize); 25 | Desc.BufferLocation = Desc.SizeInBytes == 0 ? 0 : 26 | // TODO: Cache the GPU VA, frequent calls to this cause a CPU hotspot 27 | (pBuffer->GetUnderlyingResource()->GetGPUVirtualAddress() // Base of the DX12 resource 28 | + pBuffer->GetSubresourcePlacement(0).Offset // Base of the DX11 resource after renaming 29 | + APIOffset); // Offset from the base of the DX11 resource 30 | } 31 | else 32 | { 33 | Desc.BufferLocation = 0; 34 | Desc.SizeInBytes = 0; 35 | } 36 | } 37 | 38 | //---------------------------------------------------------------------------------------------------------------------------------- 39 | inline void ImmediateContext::Dispatch(UINT x, UINT y, UINT z) 40 | { 41 | try 42 | { 43 | GetGraphicsCommandList()->Dispatch(x, y, z); 44 | PostDispatch(); 45 | } 46 | catch (_com_error) {} // already handled, but can't touch the command list 47 | } 48 | 49 | //---------------------------------------------------------------------------------------------------------------------------------- 50 | inline ID3D12CommandQueue *ImmediateContext::GetCommandQueue() noexcept 51 | { 52 | return m_CommandList.GetCommandQueue(); 53 | } 54 | 55 | //---------------------------------------------------------------------------------------------------------------------------------- 56 | inline ID3D12GraphicsCommandList *ImmediateContext::GetGraphicsCommandList() noexcept 57 | { 58 | return m_CommandList.GetGraphicsCommandList(); 59 | } 60 | 61 | // There is an MSVC bug causing a bogus warning to be emitted here for x64 only, while compiling ApplyAllResourceTransitions 62 | #pragma warning(push) 63 | #pragma warning(disable: 4789) 64 | //---------------------------------------------------------------------------------------------------------------------------------- 65 | inline CommandListManager *ImmediateContext::GetCommandListManager() noexcept 66 | { 67 | return &m_CommandList; 68 | } 69 | #pragma warning(pop) 70 | 71 | //---------------------------------------------------------------------------------------------------------------------------------- 72 | inline ID3D12CommandList *ImmediateContext::GetCommandList() noexcept 73 | { 74 | return m_CommandList.GetCommandList(); 75 | } 76 | 77 | //---------------------------------------------------------------------------------------------------------------------------------- 78 | inline UINT64 ImmediateContext::GetCommandListID() noexcept 79 | { 80 | return m_CommandList.GetCommandListID(); 81 | } 82 | 83 | //---------------------------------------------------------------------------------------------------------------------------------- 84 | inline UINT64 ImmediateContext::GetCommandListIDInterlockedRead() noexcept 85 | { 86 | return m_CommandList.GetCommandListIDInterlockedRead(); 87 | } 88 | 89 | //---------------------------------------------------------------------------------------------------------------------------------- 90 | inline UINT64 ImmediateContext::GetCompletedFenceValue() noexcept 91 | { 92 | return m_CommandList.GetCompletedFenceValue(); 93 | } 94 | 95 | //---------------------------------------------------------------------------------------------------------------------------------- 96 | inline Fence *ImmediateContext::GetFence() noexcept 97 | { 98 | return m_CommandList.GetFence(); 99 | } 100 | 101 | //---------------------------------------------------------------------------------------------------------------------------------- 102 | inline void ImmediateContext::CloseCommandList() noexcept 103 | { 104 | m_CommandList.CloseCommandList(); 105 | } 106 | 107 | 108 | //---------------------------------------------------------------------------------------------------------------------------------- 109 | inline void ImmediateContext::ResetCommandList() noexcept 110 | { 111 | m_CommandList.ResetCommandList(); 112 | } 113 | 114 | //---------------------------------------------------------------------------------------------------------------------------------- 115 | inline HRESULT ImmediateContext::EnqueueSetEvent(HANDLE hEvent) noexcept 116 | { 117 | return m_CommandList.EnqueueSetEvent(hEvent); 118 | } 119 | 120 | //---------------------------------------------------------------------------------------------------------------------------------- 121 | inline bool ImmediateContext::WaitForCompletion() 122 | { 123 | return m_CommandList.WaitForCompletion(); // throws 124 | } 125 | 126 | //---------------------------------------------------------------------------------------------------------------------------------- 127 | inline bool ImmediateContext::WaitForFenceValue(UINT64 FenceValue) 128 | { 129 | return m_CommandList.WaitForFenceValue(FenceValue); // throws 130 | } 131 | 132 | //---------------------------------------------------------------------------------------------------------------------------------- 133 | inline void ImmediateContext::SubmitCommandList() 134 | { 135 | m_CommandList.SubmitCommandList(); // throws 136 | } 137 | 138 | //---------------------------------------------------------------------------------------------------------------------------------- 139 | inline void ImmediateContext::AdditionalCommandsAdded() noexcept 140 | { 141 | m_CommandList.AdditionalCommandsAdded(); 142 | } 143 | 144 | inline void ImmediateContext::UploadHeapSpaceAllocated(UINT64 HeapSize) noexcept 145 | { 146 | m_CommandList.UploadHeapSpaceAllocated(HeapSize); 147 | } 148 | 149 | //---------------------------------------------------------------------------------------------------------------------------------- 150 | inline bool ImmediateContext::HasCommands() noexcept 151 | { 152 | return m_CommandList.HasCommands(); 153 | } 154 | 155 | }; 156 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/View.inl: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | namespace D3D12TranslationLayer 5 | { 6 | template<> 7 | inline void View::UpdateMinLOD(float MinLOD) 8 | { 9 | switch (m_Desc.ViewDimension) 10 | { 11 | case D3D12_SRV_DIMENSION_BUFFER: 12 | case D3D12_SRV_DIMENSION_TEXTURE2DMS: 13 | case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: 14 | break; 15 | case D3D12_SRV_DIMENSION_TEXTURE1D: 16 | m_Desc.Texture1D.ResourceMinLODClamp = MinLOD; 17 | break; 18 | case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: 19 | m_Desc.Texture1DArray.ResourceMinLODClamp = MinLOD; 20 | break; 21 | case D3D12_SRV_DIMENSION_TEXTURE2D: 22 | m_Desc.Texture2D.ResourceMinLODClamp = MinLOD; 23 | break; 24 | case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: 25 | m_Desc.Texture2DArray.ResourceMinLODClamp = MinLOD; 26 | break; 27 | case D3D12_SRV_DIMENSION_TEXTURE3D: 28 | m_Desc.Texture3D.ResourceMinLODClamp = MinLOD; 29 | break; 30 | case D3D12_SRV_DIMENSION_TEXTURECUBE: 31 | m_Desc.TextureCube.ResourceMinLODClamp = MinLOD; 32 | break; 33 | case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: 34 | m_Desc.TextureCubeArray.ResourceMinLODClamp = MinLOD; 35 | break; 36 | } 37 | } 38 | 39 | template< class TIface > 40 | inline void View::UpdateMinLOD(float /*MinLOD*/) 41 | { 42 | // Do nothing 43 | } 44 | 45 | //---------------------------------------------------------------------------------------------------------------------------------- 46 | inline UINT GetDynamicBufferOffset(Resource* pBuffer) 47 | { 48 | UINT64 offset = pBuffer ? pBuffer->GetSubresourcePlacement(0).Offset : 0; 49 | assert(offset < (UINT)-1); // D3D11 resources shouldn't be able to produce offsetable buffers of more than UINT_MAX 50 | return (UINT)offset; 51 | } 52 | 53 | template 54 | inline UINT GetDynamicBufferSize(Resource* pBuffer, UINT offset) 55 | { 56 | UINT width = pBuffer->GetSubresourcePlacement(0).Footprint.Width; 57 | return offset > width ? 0 : width - offset; 58 | } 59 | 60 | template<> 61 | inline UINT GetDynamicBufferSize(Resource* pBuffer, UINT offset) 62 | { 63 | UINT pitch = pBuffer->GetSubresourcePlacement(0).Footprint.RowPitch; 64 | return offset > pitch ? 0 : pitch - offset; 65 | } 66 | 67 | //---------------------------------------------------------------------------------------------------------------------------------- 68 | template 69 | const typename View::TDesc12& View::GetDesc12() noexcept 70 | { 71 | typedef decltype(TDesc12::Buffer) TBufferDesc12; 72 | if (m_pResource->AppDesc()->ResourceDimension() == D3D11_RESOURCE_DIMENSION_BUFFER) 73 | { 74 | UINT Divisor = GetByteAlignment(m_Desc.Format); 75 | if (m_Desc.Buffer.StructureByteStride != 0) 76 | { 77 | Divisor = m_Desc.Buffer.StructureByteStride; 78 | } 79 | UINT ByteOffset = GetDynamicBufferOffset(m_pResource); 80 | assert(ByteOffset % Divisor == 0); 81 | m_Desc.Buffer.FirstElement = APIFirstElement + ByteOffset / Divisor; 82 | } 83 | return m_Desc; 84 | } 85 | 86 | //---------------------------------------------------------------------------------------------------------------------------------- 87 | template 88 | HRESULT View::RefreshUnderlying() noexcept 89 | { 90 | if (m_ViewUniqueness == UINT_MAX) 91 | { 92 | UpdateMinLOD(0.0f); 93 | 94 | const TDesc12 &Desc = GetDesc12(); 95 | (m_pParent->m_pDevice12.get()->*CViewMapper::GetCreate())( 96 | m_pResource->GetUnderlyingResource(), 97 | &Desc, 98 | m_Descriptor); 99 | 100 | m_ViewUniqueness = 0; 101 | return S_OK; 102 | } 103 | return S_FALSE; 104 | } 105 | 106 | //---------------------------------------------------------------------------------------------------------------------------------- 107 | // Specialized because ID3D12Device::CreateUnorderedAccessView takes 2 resources as input 108 | template<> 109 | inline HRESULT View::RefreshUnderlying() noexcept 110 | { 111 | if (m_ViewUniqueness == UINT_MAX) 112 | { 113 | const TDesc12 &Desc = GetDesc12(); 114 | 115 | m_pParent->m_pDevice12.get()->CreateUnorderedAccessView( 116 | m_pResource->GetUnderlyingResource(), 117 | nullptr, 118 | &Desc, 119 | m_Descriptor 120 | ); 121 | 122 | m_ViewUniqueness = 0; 123 | } 124 | return S_OK; 125 | } 126 | 127 | //---------------------------------------------------------------------------------------------------------------------------------- 128 | inline ViewBase::ViewBase(ImmediateContext* pDevice, Resource* pResource, CViewSubresourceSubset const& Subresources) noexcept 129 | : DeviceChild(pDevice) 130 | , m_pResource(pResource) 131 | , m_ViewUniqueness(UINT_MAX) 132 | , m_subresources(Subresources) 133 | { 134 | } 135 | 136 | //---------------------------------------------------------------------------------------------------------------------------------- 137 | template 138 | View::View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) 139 | : ViewBase(pDevice, 140 | &ViewResource, 141 | CViewSubresourceSubset(Desc, 142 | (UINT8)ViewResource.AppDesc()->MipLevels(), 143 | (UINT16)ViewResource.AppDesc()->ArraySize(), 144 | (UINT8)ViewResource.AppDesc()->NonOpaquePlaneCount() * ViewResource.SubresourceMultiplier())), 145 | m_Desc(Desc), 146 | APIFirstElement(0) 147 | { 148 | __if_exists(TDesc12::Buffer) 149 | { 150 | APIFirstElement = Desc.Buffer.FirstElement; 151 | } 152 | 153 | m_Descriptor = pDevice->GetViewAllocator().AllocateHeapSlot(&m_DescriptorHeapIndex); // throw( _com_error ) 154 | } 155 | 156 | //---------------------------------------------------------------------------------------------------------------------------------- 157 | template 158 | View::~View() noexcept 159 | { 160 | m_pParent->GetViewAllocator().FreeHeapSlot(m_Descriptor, m_DescriptorHeapIndex); 161 | } 162 | 163 | }; -------------------------------------------------------------------------------- /include/d3d12translationlayer/SubresourceHelpers.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include 7 | 8 | namespace D3D12TranslationLayer 9 | { 10 | struct CBufferView {}; 11 | 12 | class CSubresourceSubset 13 | { 14 | public: 15 | CSubresourceSubset() noexcept {} 16 | explicit CSubresourceSubset(UINT8 NumMips, UINT16 NumArraySlices, UINT8 NumPlanes, UINT8 FirstMip = 0, UINT16 FirstArraySlice = 0, UINT8 FirstPlane = 0) noexcept; 17 | explicit CSubresourceSubset(const CBufferView&); 18 | explicit CSubresourceSubset(const D3D12_SHADER_RESOURCE_VIEW_DESC&) noexcept; 19 | explicit CSubresourceSubset(const D3D12_UNORDERED_ACCESS_VIEW_DESC&) noexcept; 20 | explicit CSubresourceSubset(const D3D12_RENDER_TARGET_VIEW_DESC&) noexcept; 21 | explicit CSubresourceSubset(const D3D12_DEPTH_STENCIL_VIEW_DESC&) noexcept; 22 | 23 | SIZE_T DoesNotOverlap(const CSubresourceSubset&) const noexcept; 24 | UINT Mask() const noexcept; // Only useable/used when the result will fit in 32 bits. 25 | 26 | UINT NumNonExtendedSubresources() const noexcept; 27 | UINT NumExtendedSubresources() const noexcept; 28 | 29 | public: 30 | UINT16 m_BeginArray; // Also used to store Tex3D slices. 31 | UINT16 m_EndArray; // End - Begin == Array Slices 32 | UINT8 m_BeginMip; 33 | UINT8 m_EndMip; // End - Begin == Mip Levels 34 | UINT8 m_BeginPlane; 35 | UINT8 m_EndPlane; 36 | }; 37 | 38 | inline void DecomposeSubresourceIdxNonExtended(UINT Subresource, UINT NumMips, _Out_ UINT& MipLevel, _Out_ UINT& ArraySlice) 39 | { 40 | MipLevel = Subresource % NumMips; 41 | ArraySlice = Subresource / NumMips; 42 | } 43 | 44 | inline void DecomposeSubresourceIdxNonExtended(UINT Subresource, UINT8 NumMips, _Out_ UINT8& MipLevel, _Out_ UINT16& ArraySlice) 45 | { 46 | MipLevel = Subresource % NumMips; 47 | ArraySlice = static_cast(Subresource / NumMips); 48 | } 49 | 50 | template 51 | inline void DecomposeSubresourceIdxExtended(UINT Subresource, UINT NumMips, UINT ArraySize, _Out_ T& MipLevel, _Out_ U& ArraySlice, _Out_ V& PlaneSlice) 52 | { 53 | D3D12DecomposeSubresource(Subresource, NumMips, ArraySize, MipLevel, ArraySlice, PlaneSlice); 54 | } 55 | 56 | inline UINT DecomposeSubresourceIdxExtendedGetMip(UINT Subresource, UINT NumMips) 57 | { 58 | return Subresource % NumMips; 59 | } 60 | 61 | inline UINT ComposeSubresourceIdxNonExtended(UINT MipLevel, UINT ArraySlice, UINT NumMips) 62 | { 63 | return D3D11CalcSubresource(MipLevel, ArraySlice, NumMips); 64 | } 65 | 66 | inline UINT ComposeSubresourceIdxExtended(UINT MipLevel, UINT ArraySlice, UINT PlaneSlice, UINT NumMips, UINT ArraySize) 67 | { 68 | return D3D12CalcSubresource(MipLevel, ArraySlice, PlaneSlice, NumMips, ArraySize); 69 | } 70 | 71 | inline UINT ComposeSubresourceIdxArrayThenPlane(UINT NumMips, UINT PlaneCount, UINT MipLevel, UINT ArraySlice, UINT PlaneSlice) 72 | { 73 | return (ArraySlice * PlaneCount * NumMips) + (PlaneSlice * NumMips) + MipLevel; 74 | } 75 | 76 | inline UINT ConvertSubresourceIndexAddPlane(UINT Subresource, UINT NumSubresourcesPerPlane, UINT PlaneSlice) 77 | { 78 | assert(Subresource < NumSubresourcesPerPlane || PlaneSlice == 0); 79 | return (Subresource + NumSubresourcesPerPlane * PlaneSlice); 80 | } 81 | 82 | inline UINT ConvertSubresourceIndexRemovePlane(UINT Subresource, UINT NumSubresourcesPerPlane) 83 | { 84 | return (Subresource % NumSubresourcesPerPlane); 85 | } 86 | 87 | inline UINT GetPlaneIdxFromSubresourceIdx(UINT Subresource, UINT NumSubresourcesPerPlane) 88 | { 89 | return (Subresource / NumSubresourcesPerPlane); 90 | } 91 | 92 | class CViewSubresourceSubset : public CSubresourceSubset 93 | { 94 | public: 95 | enum DepthStencilMode { ReadOnly, WriteOnly, ReadOrWrite }; 96 | 97 | public: 98 | CViewSubresourceSubset() {} 99 | explicit CViewSubresourceSubset(CSubresourceSubset const& Subresources, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); 100 | explicit CViewSubresourceSubset(const CBufferView&); 101 | CViewSubresourceSubset(const D3D12_SHADER_RESOURCE_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); 102 | CViewSubresourceSubset(const D3D12_UNORDERED_ACCESS_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); 103 | CViewSubresourceSubset(const D3D12_RENDER_TARGET_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); 104 | CViewSubresourceSubset(const D3D12_DEPTH_STENCIL_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount, DepthStencilMode DSMode = ReadOrWrite); 105 | 106 | template 107 | static CViewSubresourceSubset FromView(const T* pView); 108 | 109 | public: 110 | class CViewSubresourceIterator; 111 | 112 | public: 113 | CViewSubresourceIterator begin() const; 114 | CViewSubresourceIterator end() const; 115 | bool IsWholeResource() const; 116 | bool IsEmpty() const; 117 | UINT ArraySize() const; 118 | 119 | UINT MinSubresource() const; 120 | UINT MaxSubresource() const; 121 | 122 | private: 123 | void Reduce(); 124 | 125 | protected: 126 | UINT8 m_MipLevels; 127 | UINT16 m_ArraySlices; 128 | UINT8 m_PlaneCount; 129 | }; 130 | 131 | // This iterator iterates over contiguous ranges of subresources within a subresource subset. eg: 132 | // 133 | // // For each contiguous subresource range. 134 | // for( CViewSubresourceSubset::CViewSubresourceIterator it = ViewSubset.begin(); it != ViewSubset.end(); ++it ) 135 | // { 136 | // // StartSubresource and EndSubresource members of the iterator describe the contiguous range. 137 | // for( UINT SubresourceIndex = it.StartSubresource(); SubresourceIndex < it.EndSubresource(); SubresourceIndex++ ) 138 | // { 139 | // // Action for each subresource within the current range. 140 | // } 141 | // } 142 | // 143 | class CViewSubresourceSubset::CViewSubresourceIterator 144 | { 145 | public: 146 | CViewSubresourceIterator(CViewSubresourceSubset const& SubresourceSet, UINT16 ArraySlice, UINT8 PlaneCount); 147 | CViewSubresourceIterator& operator++(); 148 | CViewSubresourceIterator& operator--(); 149 | 150 | bool operator==(CViewSubresourceIterator const& other) const; 151 | bool operator!=(CViewSubresourceIterator const& other) const; 152 | 153 | UINT StartSubresource() const; 154 | UINT EndSubresource() const; 155 | std::pair operator*() const; 156 | 157 | private: 158 | CViewSubresourceSubset const& m_Subresources; 159 | UINT16 m_CurrentArraySlice; 160 | UINT8 m_CurrentPlaneSlice; 161 | }; 162 | 163 | template< typename T > 164 | inline bool IsPow2( 165 | T num 166 | ) 167 | { 168 | static_assert(static_cast(-1) > 0, "Signed type passed to IsPow2"); 169 | return !(num & (num - 1)); 170 | } 171 | }; -------------------------------------------------------------------------------- /src/openclon12/program.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "context.hpp" 6 | #include "compiler.hpp" 7 | #include 8 | #undef GetBinaryType 9 | 10 | #include "RootSignature.hpp" 11 | #include "PipelineState.hpp" 12 | 13 | using unique_dxil = std::unique_ptr; 14 | 15 | class Kernel; 16 | class Program : public CLChildBase 17 | { 18 | public: 19 | const std::string m_Source; 20 | const std::shared_ptr m_ParsedIL; 21 | 22 | Context& GetContext() const { return m_Parent.get(); } 23 | 24 | Program(Context& Parent, std::string Source); 25 | Program(Context& Parent, std::shared_ptr ParsedIL); 26 | Program(Context& Parent, std::vector Devices); 27 | using Callback = void(CL_CALLBACK*)(cl_program, void*); 28 | 29 | cl_int Build(std::vector Devices, const char* options, Callback pfn_notify, void* user_data); 30 | cl_int Compile(std::vector Devices, const char* options, cl_uint num_input_headers, const cl_program *input_headers, const char**header_include_names, Callback pfn_notify, void* user_data); 31 | cl_int Link(const char* options, cl_uint num_input_programs, const cl_program* input_programs, Callback pfn_notify, void* user_data); 32 | 33 | void StoreBinary(Device* Device, std::shared_ptr OwnedBinary, cl_program_binary_type Type); 34 | void SetSpecConstant(cl_uint ID, size_t size, const void *value); 35 | 36 | const ProgramBinary* GetSpirV(Device* device) const; 37 | 38 | friend cl_int CL_API_CALL clGetProgramInfo(cl_program, cl_program_info, size_t, void*, size_t*); 39 | friend cl_int CL_API_CALL clGetProgramBuildInfo(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*); 40 | friend cl_kernel CL_API_CALL clCreateKernel(cl_program, const char*, cl_int*); 41 | friend cl_int CL_API_CALL clCreateKernelsInProgram(cl_program, cl_uint, cl_kernel*, cl_uint*); 42 | 43 | void KernelCreated(); 44 | void KernelFreed(); 45 | 46 | struct SpecializationKey 47 | { 48 | D3DDevice const* Device; 49 | union 50 | { 51 | struct 52 | { 53 | uint16_t LocalSize[3]; 54 | uint16_t LowerInt64 : 1; 55 | uint16_t LowerInt16 : 1; 56 | uint16_t SupportGlobalOffsets : 1; 57 | uint16_t SupportLocalOffsets : 1; 58 | uint16_t Padding : 12; 59 | } Bits; 60 | uint64_t Value; 61 | } ConfigData; 62 | uint32_t NumArgs; 63 | union PackedArgData 64 | { 65 | uint32_t LocalArgSize; 66 | struct 67 | { 68 | unsigned NormalizedCoords : 1; 69 | unsigned AddressingMode : 3; 70 | unsigned LinearFiltering : 1; 71 | unsigned Padding : 27; 72 | } SamplerArgData; 73 | } Args[1]; 74 | static size_t AllocatedByteSize(uint32_t NumArgs); 75 | static size_t HashByteSize(uint32_t NumArgs); 76 | static std::unique_ptr Allocate(D3DDevice const* Device, CompiledDxil::Configuration const& conf); 77 | private: 78 | SpecializationKey(D3DDevice const* Device, CompiledDxil::Configuration const& conf); 79 | }; 80 | struct SpecializationKeyHash 81 | { 82 | size_t operator()(std::unique_ptr const&) const; 83 | }; 84 | struct SpecializationKeyEqual 85 | { 86 | bool operator()(std::unique_ptr const& a, std::unique_ptr const& b) const; 87 | }; 88 | struct SpecializationValue 89 | { 90 | bool m_Error = false; 91 | unique_dxil m_Dxil; 92 | std::unique_ptr m_RS; 93 | std::unique_ptr m_PSO; 94 | SpecializationValue() = default; 95 | SpecializationValue(decltype(m_Dxil) d, decltype(m_RS) rs, decltype(m_PSO) p) 96 | : m_Dxil(std::move(d)), m_RS(std::move(rs)), m_PSO(std::move(p)) { } 97 | SpecializationValue(SpecializationValue &&) = default; 98 | SpecializationValue &operator=(SpecializationValue &&) = default; 99 | }; 100 | 101 | struct SpecializationData 102 | { 103 | const SpecializationKey *KeyInMap; 104 | SpecializationValue *Value; 105 | bool NeedToCreate; 106 | uint64_t ProgramHash[2]; 107 | }; 108 | 109 | SpecializationData GetSpecializationData( 110 | Device* device, std::string const& kernelName, std::unique_ptr key); 111 | std::unique_lock GetSpecializationUpdateLock() const { return std::unique_lock(m_SpecializationUpdateLock); } 112 | void SpecializationComplete() const { m_SpecializationEvent.notify_all(); }; 113 | void WaitForSpecialization(std::unique_lock &lock) const { m_SpecializationEvent.wait(lock); } 114 | 115 | private: 116 | mutable std::recursive_mutex m_Lock; 117 | mutable std::mutex m_SpecializationUpdateLock; 118 | mutable std::condition_variable m_SpecializationEvent; 119 | uint32_t m_NumLiveKernels = 0; 120 | 121 | struct KernelData 122 | { 123 | KernelData(ProgramBinary::Kernel meta, unique_dxil d) : m_Meta(meta), m_GenericDxil(std::move(d)) {} 124 | 125 | ProgramBinary::Kernel m_Meta; 126 | unique_dxil m_GenericDxil; 127 | std::unordered_map, SpecializationValue, 128 | SpecializationKeyHash, SpecializationKeyEqual> m_SpecializationCache; 129 | }; 130 | 131 | struct PerDeviceData 132 | { 133 | Device* m_Device; 134 | D3DDevice *m_D3DDevice; 135 | cl_build_status m_BuildStatus = CL_BUILD_IN_PROGRESS; 136 | std::string m_BuildLog; 137 | std::shared_ptr m_OwnedBinary; 138 | uint64_t m_Hash[2] = {}; 139 | cl_program_binary_type m_BinaryType = CL_PROGRAM_BINARY_TYPE_NONE; 140 | std::string m_LastBuildOptions; 141 | std::map m_Kernels; 142 | 143 | uint32_t m_NumPendingLinks = 0; 144 | 145 | void CreateKernels(Program& program); 146 | 147 | std::mutex m_SpecializationCacheLock; 148 | }; 149 | std::unordered_map> m_BuildData; 150 | 151 | friend struct Loggers; 152 | 153 | const std::vector m_AssociatedDevices; 154 | ProgramBinary::SpecConstantValues m_SpecConstants; 155 | 156 | struct CommonOptions 157 | { 158 | std::shared_ptr BuildData; 159 | 160 | Compiler::CompileArgs::Features Features; 161 | std::vector Args; 162 | bool CreateLibrary; 163 | bool EnableLinkOptions; // Does nothing, validation only 164 | Callback pfn_notify; 165 | void* CallbackUserData; 166 | }; 167 | struct CompileArgs 168 | { 169 | std::map Headers; 170 | CommonOptions Common; 171 | }; 172 | struct LinkArgs 173 | { 174 | CommonOptions Common; 175 | std::vector LinkPrograms; 176 | }; 177 | struct BuildArgs 178 | { 179 | CommonOptions Common; 180 | std::vector BinaryBuildDevices; 181 | }; 182 | 183 | void AddBuiltinOptions(std::vector const& devices, CommonOptions& optionsStruct); 184 | cl_int ParseOptions(const char* optionsStr, CommonOptions& optionsStruct, bool SupportCompilerOptions, bool SupportLinkerOptions); 185 | cl_int BuildImpl(BuildArgs const& Args); 186 | cl_int CompileImpl(CompileArgs const& Args); 187 | cl_int LinkImpl(LinkArgs const& Args); 188 | }; 189 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/Allocator.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include "D3D12TranslationLayerDependencyIncludes.h" 6 | #include "BlockAllocators.h" 7 | #include "BlockAllocators.inl" 8 | #include "Util.hpp" 9 | 10 | namespace D3D12TranslationLayer 11 | { 12 | class HeapSuballocationBlock : public BlockAllocators::CGenericBlock 13 | { 14 | public: 15 | HeapSuballocationBlock() : BlockAllocators::CGenericBlock(), m_pDirectHeapAllocation(nullptr) {} 16 | HeapSuballocationBlock(UINT64 newOffset, UINT64 newSize, ID3D12Resource *pResource = nullptr) : BlockAllocators::CGenericBlock(newOffset, newSize), m_pDirectHeapAllocation(pResource) {} 17 | 18 | bool IsDirectAllocation() const { return m_pDirectHeapAllocation; } 19 | ID3D12Resource *GetDirectHeapAllocation() const { assert(IsDirectAllocation()); return m_pDirectHeapAllocation; } 20 | private: 21 | ID3D12Resource *m_pDirectHeapAllocation; 22 | }; 23 | 24 | class ImmediateContext; // Forward Declaration 25 | class InternalHeapAllocator 26 | { 27 | public: 28 | InternalHeapAllocator(ImmediateContext *pContext, AllocatorHeapType heapType) : 29 | m_pContext(pContext), m_HeapType(heapType) {} 30 | 31 | ID3D12Resource* Allocate(UINT64 size); 32 | void Deallocate(ID3D12Resource* pResource); 33 | private: 34 | ImmediateContext *m_pContext; 35 | AllocatorHeapType m_HeapType; 36 | }; 37 | 38 | // Doesn't do any suballocation and instead just allocates a whole new 39 | // resource directly for each call to Allocate() 40 | // Note: Because it doesn't suballocate at all, it is thread safe by default 41 | template 42 | class DirectAllocator 43 | { 44 | private: 45 | using InnerAllocatorDecayed = typename std::decay<_InnerAllocator>::type; 46 | public: 47 | template 48 | DirectAllocator(InnerAllocatorArgs&&... innerArgs) : // throw(std::bad_alloc) 49 | m_InnerAllocator(std::forward(innerArgs)...) {} 50 | DirectAllocator() = default; 51 | DirectAllocator(DirectAllocator&&) = default; 52 | DirectAllocator& operator=(DirectAllocator&&) = default; 53 | 54 | _BlockType Allocate(_SizeType size) { 55 | _BlockType block(0, size, m_InnerAllocator.Allocate(size)); 56 | return block; 57 | } 58 | void Deallocate(const _BlockType &block) { m_InnerAllocator.Deallocate(block.GetDirectHeapAllocation()); } 59 | 60 | typedef typename std::invoke_result::type AllocationType; 61 | 62 | inline bool IsOwner(_In_ const _BlockType &block) const { return block.GetOffset() == 0; } 63 | AllocationType GetInnerAllocation(const _BlockType &block) const { return block.GetDirectHeapAllocation(); } 64 | _SizeType GetInnerAllocationOffset(const _BlockType &block) const { return 0; } 65 | private: 66 | _InnerAllocator m_InnerAllocator; 67 | }; 68 | 69 | typedef DirectAllocator DirectHeapAllocator; 70 | typedef BlockAllocators::CDisjointBuddyAllocator DisjointBuddyHeapAllocator; 71 | 72 | class ThreadSafeBuddyHeapAllocator : DisjointBuddyHeapAllocator 73 | { 74 | public: 75 | template 76 | ThreadSafeBuddyHeapAllocator(UINT64 maxBlockSize, UINT64 threshold, InnerAllocatorArgs&&... innerArgs) : // throw(std::bad_alloc) 77 | DisjointBuddyHeapAllocator(maxBlockSize, threshold, std::forward(innerArgs)...) 78 | {} 79 | ThreadSafeBuddyHeapAllocator() = default; 80 | ThreadSafeBuddyHeapAllocator(ThreadSafeBuddyHeapAllocator&&) = default; 81 | ThreadSafeBuddyHeapAllocator& operator=(ThreadSafeBuddyHeapAllocator&&) = default; 82 | 83 | HeapSuballocationBlock Allocate(UINT64 size) 84 | { 85 | auto scopedLock = std::lock_guard(m_Lock); 86 | return DisjointBuddyHeapAllocator::Allocate(size); 87 | } 88 | 89 | void Deallocate(const HeapSuballocationBlock &block) 90 | { 91 | auto scopedLock = std::lock_guard(m_Lock); 92 | DisjointBuddyHeapAllocator::Deallocate(block); 93 | } 94 | 95 | auto GetInnerAllocation(const HeapSuballocationBlock &block) const 96 | { 97 | auto scopedLock = std::lock_guard(m_Lock); 98 | return DisjointBuddyHeapAllocator::GetInnerAllocation(block); 99 | } 100 | 101 | // Exposing methods that don't require locks. 102 | using DisjointBuddyHeapAllocator::IsOwner; 103 | 104 | private: 105 | mutable std::mutex m_Lock; 106 | }; 107 | 108 | // Allocator that will conditionally choose to individually allocate resources or suballocate based on a 109 | // passed in function 110 | template 111 | class ConditionalAllocator 112 | { 113 | public: 114 | typedef bool(*RequiresDirectAllocationFunctionType)(typename _SizeType, typename AllocationArgs); 115 | 116 | template 117 | ConditionalAllocator(std::tuple suballocatedAllocatorArgs, 118 | std::tuple directHeapAllocatorArgs, 119 | RequiresDirectAllocationFunctionType pfnRequiresDirectHeapAllocator) 120 | : m_DirectAllocator(std::get(directHeapAllocatorArgs)...), 121 | m_SuballocationAllocator(std::get(suballocatedAllocatorArgs)...), 122 | m_pfnUseDirectHeapAllocator(pfnRequiresDirectHeapAllocator) 123 | { 124 | } 125 | 126 | _BlockType Allocate(_SizeType size, AllocationArgs args) 127 | { 128 | if (m_pfnUseDirectHeapAllocator(size, args)) { return m_DirectAllocator.Allocate(size); } 129 | else { return m_SuballocationAllocator.Allocate(size); } 130 | } 131 | 132 | void Deallocate(const _BlockType &block) 133 | { 134 | assert(IsOwner(block)); 135 | if (block.IsDirectAllocation()) { m_DirectAllocator.Deallocate(block); } 136 | else { m_SuballocationAllocator.Deallocate(block); } 137 | } 138 | 139 | bool IsOwner(const _BlockType &block) const 140 | { 141 | if (block.IsDirectAllocation()) { return m_DirectAllocator.IsOwner(block); } 142 | else { return m_SuballocationAllocator.IsOwner(block); } 143 | } 144 | 145 | void Reset() 146 | { 147 | m_DirectAllocator.Reset(); 148 | m_SuballocationAllocator.Reset(); 149 | } 150 | 151 | auto GetInnerAllocation(const _BlockType &block) const 152 | { 153 | assert(IsOwner(block)); 154 | if (block.IsDirectAllocation()) { return m_DirectAllocator.GetInnerAllocation(block); } 155 | else { return m_SuballocationAllocator.GetInnerAllocation(block); } 156 | } 157 | 158 | _SizeType GetInnerAllocationOffset(const _BlockType &block) const 159 | { 160 | assert(IsOwner(block)); 161 | if (block.IsDirectAllocation()) { return m_DirectAllocator.GetInnerAllocationOffset(block); } 162 | else { return m_SuballocationAllocator.GetInnerAllocationOffset(block); } 163 | } 164 | 165 | private: 166 | SuballocationAllocator m_SuballocationAllocator; 167 | DirectAllocator m_DirectAllocator; 168 | RequiresDirectAllocationFunctionType m_pfnUseDirectHeapAllocator; 169 | }; 170 | static constexpr UINT cBuddyAllocatorThreshold = 64 * 1024; 171 | } 172 | -------------------------------------------------------------------------------- /include/d3d12translationlayer/FormatDesc.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include 6 | #include 7 | 8 | #define D3DFORMATDESC 1 9 | 10 | #define MAP_ALIGN_REQUIREMENT 16 // Map is required to return 16-byte aligned addresses 11 | 12 | // ---------------------------------------------------------------------------- 13 | // Some enumerations used in the D3D11_FORMAT_DETAIL structure 14 | // ---------------------------------------------------------------------------- 15 | typedef enum D3D11_FORMAT_LAYOUT 16 | { 17 | D3D11FL_STANDARD = 0, // standard layout 18 | D3D11FL_CUSTOM = -1 // custom layout 19 | // Note, 1 bit allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... 20 | // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ 21 | } D3D11_FORMAT_LAYOUT; 22 | 23 | typedef enum D3D11_FORMAT_TYPE_LEVEL 24 | { 25 | D3D11FTL_NO_TYPE = 0, 26 | D3D11FTL_PARTIAL_TYPE = -2, 27 | D3D11FTL_FULL_TYPE = -1, 28 | // Note, 2 bits allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... 29 | // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ 30 | } D3D11_FORMAT_TYPE_LEVEL; 31 | 32 | typedef enum D3D11_FORMAT_COMPONENT_NAME 33 | { 34 | D3D11FCN_R = -4, 35 | D3D11FCN_G = -3, 36 | D3D11FCN_B = -2, 37 | D3D11FCN_A = -1, 38 | D3D11FCN_D = 0, 39 | D3D11FCN_S = 1, 40 | D3D11FCN_X = 2, 41 | // Note, 3 bits allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... 42 | // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ 43 | } D3D11_FORMAT_COMPONENT_NAME; 44 | 45 | typedef enum D3D11_FORMAT_COMPONENT_INTERPRETATION 46 | { 47 | D3D11FCI_TYPELESS = 0, 48 | D3D11FCI_FLOAT = -4, 49 | D3D11FCI_SNORM = -3, 50 | D3D11FCI_UNORM = -2, 51 | D3D11FCI_SINT = -1, 52 | D3D11FCI_UINT = 1, 53 | D3D11FCI_UNORM_SRGB = 2, 54 | D3D11FCI_BIASED_FIXED_2_8 = 3, 55 | // Note, 3 bits allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... 56 | // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ 57 | } D3D11_FORMAT_COMPONENT_INTERPRETATION; 58 | 59 | // ---------------------------------------------------------------------------- 60 | // 61 | // CD3D11FormatHelper 62 | // 63 | // ---------------------------------------------------------------------------- 64 | class CD3D11FormatHelper 65 | { 66 | private: 67 | // ---------------------------------------------------------------------------- 68 | // Information describing everything about a D3D11 Resource Format 69 | // ---------------------------------------------------------------------------- 70 | 71 | // This struct holds information about formats that is feature level and driver version agnostic 72 | typedef struct FORMAT_DETAIL 73 | { 74 | DXGI_FORMAT DXGIFormat; 75 | DXGI_FORMAT ParentFormat; 76 | const DXGI_FORMAT* pDefaultFormatCastSet; // This is dependent on FL/driver version, but is here to save a lot of space 77 | UINT8 BitsPerComponent[4]; // only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE 78 | UINT8 BitsPerUnit; // BitsPerUnit is bits per pixel for non-compressed formats and bits per block for compressed formats 79 | BOOL SRGBFormat : 1; 80 | UINT WidthAlignment : 4; // number of texels to align to in a mip level. 81 | UINT HeightAlignment : 4; // Top level dimensions must be a multiple of these 82 | UINT DepthAlignment : 1; // values. 83 | D3D11_FORMAT_LAYOUT Layout : 1; 84 | D3D11_FORMAT_TYPE_LEVEL TypeLevel : 2; 85 | D3D11_FORMAT_COMPONENT_NAME ComponentName0 : 3; // RED ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE 86 | D3D11_FORMAT_COMPONENT_NAME ComponentName1 : 3; // GREEN ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE 87 | D3D11_FORMAT_COMPONENT_NAME ComponentName2 : 3; // BLUE ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE 88 | D3D11_FORMAT_COMPONENT_NAME ComponentName3 : 3; // ALPHA ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE 89 | D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation0 : 3; // only used for D3D11FTL_FULL_TYPE 90 | D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation1 : 3; // only used for D3D11FTL_FULL_TYPE 91 | D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation2 : 3; // only used for D3D11FTL_FULL_TYPE 92 | D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation3 : 3; // only used for D3D11FTL_FULL_TYPE 93 | bool bPlanar : 1; 94 | bool bYUV : 1; 95 | } FORMAT_DETAIL; 96 | 97 | static const FORMAT_DETAIL s_FormatDetail[]; 98 | static const LPCSTR s_FormatNames[]; // separate from above structure so it can be compiled out of runtime. 99 | static const UINT s_NumFormats; 100 | 101 | public: 102 | static bool IsBlockCompressFormat(DXGI_FORMAT Format); 103 | static UINT GetByteAlignment(DXGI_FORMAT Format); 104 | static HRESULT CalculateExtraPlanarRows(DXGI_FORMAT format, UINT plane0Height, _Out_ UINT& totalHeight); 105 | static HRESULT CalculateMinimumRowMajorRowPitch(DXGI_FORMAT Format, UINT Width, _Out_ UINT& RowPitch); 106 | static HRESULT CalculateMinimumRowMajorSlicePitch(DXGI_FORMAT Format, UINT ContextBasedRowPitch, UINT Height, _Out_ UINT& SlicePitch); 107 | static bool IsSRGBFormat(DXGI_FORMAT Format); 108 | static UINT GetNumComponentsInFormat( DXGI_FORMAT Format ); 109 | // Converts the sequential component index (range from 0 to GetNumComponentsInFormat()) to 110 | // the absolute component index (range 0 to 3). 111 | static DXGI_FORMAT GetParentFormat(DXGI_FORMAT Format); 112 | static const DXGI_FORMAT* GetFormatCastSet(DXGI_FORMAT Format); 113 | static D3D11_FORMAT_TYPE_LEVEL GetTypeLevel(DXGI_FORMAT Format); 114 | static UINT GetBitsPerUnit(DXGI_FORMAT Format); 115 | static UINT GetBitsPerElement(DXGI_FORMAT Format); // Legacy function used to support D3D10on9 only. Do not use. 116 | static UINT GetWidthAlignment(DXGI_FORMAT Format); 117 | static UINT GetHeightAlignment(DXGI_FORMAT Format); 118 | static UINT GetDepthAlignment(DXGI_FORMAT Format); 119 | static D3D11_FORMAT_COMPONENT_NAME GetComponentName(DXGI_FORMAT Format, UINT AbsoluteComponentIndex); 120 | static UINT GetBitsPerComponent(DXGI_FORMAT Format, UINT AbsoluteComponentIndex); 121 | static D3D11_FORMAT_COMPONENT_INTERPRETATION GetFormatComponentInterpretation(DXGI_FORMAT Format, UINT AbsoluteComponentIndex); 122 | static BOOL Planar(DXGI_FORMAT Format); 123 | static BOOL NonOpaquePlanar(DXGI_FORMAT Format); 124 | static BOOL YUV(DXGI_FORMAT Format); 125 | static BOOL Opaque(DXGI_FORMAT Format) { return Format == DXGI_FORMAT_420_OPAQUE; } 126 | static bool FamilySupportsStencil(DXGI_FORMAT Format); 127 | static void GetYCbCrChromaSubsampling(DXGI_FORMAT Format, _Out_ UINT& HorizontalSubsampling, _Out_ UINT& VerticalSubsampling); 128 | static UINT NonOpaquePlaneCount(DXGI_FORMAT Format); 129 | 130 | protected: 131 | static UINT GetDetailTableIndex(DXGI_FORMAT Format); 132 | static UINT GetDetailTableIndexNoThrow(DXGI_FORMAT Format); 133 | private: 134 | static const FORMAT_DETAIL* GetFormatDetail( DXGI_FORMAT Format ); 135 | }; 136 | 137 | // End of file 138 | -------------------------------------------------------------------------------- /src/openclon12/sampler.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "sampler.hpp" 4 | 5 | static D3D12_SAMPLER_DESC TranslateSamplerDesc(Sampler::Desc const& desc) 6 | { 7 | D3D12_SAMPLER_DESC ret = {}; 8 | ret.AddressU = ret.AddressV = ret.AddressW = 9 | [](cl_addressing_mode mode) 10 | { 11 | switch (mode) 12 | { 13 | default: 14 | case CL_ADDRESS_CLAMP: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; 15 | case CL_ADDRESS_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; 16 | case CL_ADDRESS_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; 17 | case CL_ADDRESS_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; 18 | } 19 | }(desc.AddressingMode); 20 | ret.Filter = [](cl_filter_mode mode) 21 | { 22 | switch (mode) 23 | { 24 | default: 25 | case CL_FILTER_NEAREST: return D3D12_FILTER_MIN_MAG_MIP_POINT; 26 | case CL_FILTER_LINEAR: return D3D12_FILTER_MIN_MAG_MIP_LINEAR; 27 | } 28 | }(desc.FilterMode); 29 | ret.MaxLOD = std::numeric_limits::max(); 30 | return ret; 31 | } 32 | 33 | Sampler::Sampler(Context& Parent, Desc const& desc, const cl_sampler_properties *properties) 34 | : CLChildBase(Parent) 35 | , m_Desc(desc) 36 | , m_Properties(PropertiesToVector(properties)) 37 | { 38 | } 39 | 40 | D3D12TranslationLayer::Sampler& Sampler::GetUnderlying(D3DDevice* device) 41 | { 42 | std::lock_guard Lock(m_Lock); 43 | auto iter = m_UnderlyingSamplers.find(device); 44 | if (iter != m_UnderlyingSamplers.end()) 45 | return iter->second; 46 | 47 | auto ret = m_UnderlyingSamplers.try_emplace(device, &device->ImmCtx(), TranslateSamplerDesc(m_Desc)); 48 | return ret.first->second; 49 | } 50 | 51 | 52 | 53 | template 54 | bool ValidateSamplerProperties(cl_sampler_properties const* properties, TReporter&& ReportError) 55 | { 56 | constexpr cl_sampler_properties KnownProperties[] = 57 | { 58 | CL_SAMPLER_NORMALIZED_COORDS, 59 | CL_SAMPLER_ADDRESSING_MODE, 60 | CL_SAMPLER_FILTER_MODE 61 | }; 62 | bool SeenProperties[std::extent_v] = {}; 63 | for (auto CurProp = properties; properties && *CurProp; CurProp += 2) 64 | { 65 | auto KnownPropIter = std::find(KnownProperties, std::end(KnownProperties), *CurProp); 66 | if (KnownPropIter == std::end(KnownProperties)) 67 | { 68 | return !ReportError("Unknown property.", CL_INVALID_PROPERTY); 69 | } 70 | 71 | auto PropIndex = std::distance(KnownProperties, KnownPropIter); 72 | if (SeenProperties[PropIndex]) 73 | { 74 | return !ReportError("Property specified twice.", CL_INVALID_PROPERTY); 75 | } 76 | 77 | SeenProperties[PropIndex] = true; 78 | } 79 | 80 | return true; 81 | } 82 | 83 | static cl_sampler 84 | clCreateSamplerWithPropertiesImpl(cl_context context_, 85 | const cl_sampler_properties * sampler_properties, 86 | Sampler::Desc & desc, 87 | cl_int * errcode_ret) 88 | { 89 | if (!context_) 90 | { 91 | if (errcode_ret) *errcode_ret = CL_INVALID_CONTEXT; 92 | return nullptr; 93 | } 94 | Context& context = *static_cast(context_); 95 | auto ReportError = context.GetErrorReporter(errcode_ret); 96 | 97 | if (desc.NormalizedCoords > 1) 98 | desc.NormalizedCoords = 1; 99 | switch (desc.AddressingMode) 100 | { 101 | case CL_ADDRESS_NONE: 102 | case CL_ADDRESS_CLAMP_TO_EDGE: 103 | case CL_ADDRESS_CLAMP: 104 | case CL_ADDRESS_REPEAT: 105 | case CL_ADDRESS_MIRRORED_REPEAT: 106 | break; 107 | default: return ReportError("Invalid sampler addressing mode.", CL_INVALID_VALUE); 108 | } 109 | switch (desc.FilterMode) 110 | { 111 | case CL_FILTER_LINEAR: 112 | case CL_FILTER_NEAREST: 113 | break; 114 | default: return ReportError("Invalid sampler filter mode.", CL_INVALID_VALUE); 115 | } 116 | 117 | try 118 | { 119 | if (errcode_ret) *errcode_ret = CL_SUCCESS; 120 | return new Sampler(context, desc, sampler_properties); 121 | } 122 | catch (std::bad_alloc&) { return ReportError(nullptr, CL_OUT_OF_HOST_MEMORY); } 123 | catch (std::exception& e) { return ReportError(e.what(), CL_OUT_OF_RESOURCES); } 124 | catch (_com_error &) { return ReportError(nullptr, CL_OUT_OF_RESOURCES); } 125 | } 126 | 127 | CL_API_ENTRY cl_sampler CL_API_CALL 128 | clCreateSamplerWithProperties(cl_context context_, 129 | const cl_sampler_properties * sampler_properties, 130 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_2_0 131 | { 132 | if (!context_) 133 | { 134 | if (errcode_ret) *errcode_ret = CL_INVALID_CONTEXT; 135 | return nullptr; 136 | } 137 | Context& context = *static_cast(context_); 138 | auto ReportError = context.GetErrorReporter(errcode_ret); 139 | 140 | if (!ValidateSamplerProperties(sampler_properties, ReportError)) 141 | { 142 | return nullptr; 143 | } 144 | 145 | Sampler::Desc desc = { false, CL_ADDRESS_CLAMP, CL_FILTER_NEAREST }; 146 | if (auto FoundVal = FindProperty(sampler_properties, CL_SAMPLER_NORMALIZED_COORDS); FoundVal) 147 | desc.NormalizedCoords = (cl_bool)*FoundVal; 148 | if (auto FoundVal = FindProperty(sampler_properties, CL_SAMPLER_ADDRESSING_MODE); FoundVal) 149 | desc.AddressingMode = (cl_addressing_mode)*FoundVal; 150 | if (auto FoundVal = FindProperty(sampler_properties, CL_SAMPLER_FILTER_MODE); FoundVal) 151 | desc.FilterMode = (cl_filter_mode)*FoundVal; 152 | 153 | return clCreateSamplerWithPropertiesImpl(context_, sampler_properties, desc, errcode_ret); 154 | } 155 | 156 | CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL 157 | clCreateSampler(cl_context context, 158 | cl_bool normalized_coords, 159 | cl_addressing_mode addressing_mode, 160 | cl_filter_mode filter_mode, 161 | cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED 162 | { 163 | Sampler::Desc desc = { normalized_coords, addressing_mode, filter_mode }; 164 | return clCreateSamplerWithPropertiesImpl(context, nullptr, desc, errcode_ret); 165 | } 166 | 167 | CL_API_ENTRY cl_int CL_API_CALL 168 | clRetainSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0 169 | { 170 | if (!sampler) return CL_INVALID_SAMPLER; 171 | static_cast(sampler)->Retain(); 172 | return CL_SUCCESS; 173 | } 174 | 175 | CL_API_ENTRY cl_int CL_API_CALL 176 | clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0 177 | { 178 | if (!sampler) return CL_INVALID_SAMPLER; 179 | static_cast(sampler)->Release(); 180 | return CL_SUCCESS; 181 | } 182 | 183 | CL_API_ENTRY cl_int CL_API_CALL 184 | clGetSamplerInfo(cl_sampler sampler_, 185 | cl_sampler_info param_name, 186 | size_t param_value_size, 187 | void * param_value, 188 | size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 189 | { 190 | if (!sampler_) 191 | { 192 | return CL_INVALID_SAMPLER; 193 | } 194 | 195 | Sampler& sampler = *static_cast(sampler_); 196 | auto& desc = sampler.m_Desc; 197 | auto RetValue = [&](auto&& param) 198 | { 199 | return CopyOutParameter(param, param_value_size, param_value, param_value_size_ret); 200 | }; 201 | 202 | switch (param_name) 203 | { 204 | case CL_SAMPLER_REFERENCE_COUNT: return RetValue(sampler.GetRefCount()); 205 | case CL_SAMPLER_CONTEXT: return RetValue((cl_context)&sampler.m_Parent.get()); 206 | case CL_SAMPLER_NORMALIZED_COORDS: return RetValue(desc.NormalizedCoords); 207 | case CL_SAMPLER_ADDRESSING_MODE: return RetValue(desc.AddressingMode); 208 | case CL_SAMPLER_FILTER_MODE: return RetValue(desc.FilterMode); 209 | case CL_SAMPLER_PROPERTIES: 210 | return CopyOutParameterImpl(sampler.m_Properties.data(), 211 | sampler.m_Properties.size() * sizeof(sampler.m_Properties[0]), 212 | param_value_size, param_value, param_value_size_ret); 213 | } 214 | return sampler.m_Parent->GetErrorReporter()("Unknown param_name", CL_INVALID_VALUE); 215 | } 216 | 217 | -------------------------------------------------------------------------------- /src/compilers/v2/clc/clc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © Microsoft Corporation 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef MESA_CLC_H 25 | #define MESA_CLC_H 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | #ifdef __cplusplus 32 | extern "C" { 33 | #endif 34 | 35 | typedef struct nir_shader nir_shader; 36 | struct nir_shader_compiler_options; 37 | 38 | struct clc_named_value { 39 | const char *name; 40 | const char *value; 41 | }; 42 | 43 | enum clc_spirv_version { 44 | CLC_SPIRV_VERSION_MAX = 0, 45 | CLC_SPIRV_VERSION_1_0, 46 | CLC_SPIRV_VERSION_1_1, 47 | CLC_SPIRV_VERSION_1_2, 48 | CLC_SPIRV_VERSION_1_3, 49 | CLC_SPIRV_VERSION_1_4, 50 | }; 51 | 52 | struct clc_optional_features { 53 | bool extended_bit_ops; 54 | bool fp16; 55 | bool fp64; 56 | bool int64; 57 | bool images; 58 | bool images_depth; 59 | bool images_gl_depth; 60 | bool images_gl_msaa; 61 | bool images_mipmap; 62 | bool images_mipmap_writes; 63 | bool images_read_write; 64 | bool images_unorm_int_2_101010; 65 | bool images_write_3d; 66 | bool integer_dot_product; 67 | bool intel_subgroups; 68 | bool kernel_clock; 69 | /* OpenCL core subgroups */ 70 | bool subgroups; 71 | /* OpenCL extension cl_khr_subgroups, which requires independent forward 72 | * progress 73 | */ 74 | bool subgroups_ifp; 75 | bool subgroups_shuffle; 76 | bool subgroups_shuffle_relative; 77 | bool subgroups_ballot; 78 | }; 79 | 80 | struct clc_compile_args { 81 | const struct clc_named_value *headers; 82 | unsigned num_headers; 83 | struct clc_named_value source; 84 | const char * const *args; 85 | unsigned num_args; 86 | 87 | /* SPIRV version to target. */ 88 | enum clc_spirv_version spirv_version; 89 | struct clc_optional_features features; 90 | bool use_llvm_spirv_target; 91 | 92 | /* Allowed extensions SPIRV extensions the OpenCL->SPIRV translation can 93 | * enable. A pointer to a NULL terminated array of strings, allow any 94 | * extension if NULL. 95 | */ 96 | const char * const *allowed_spirv_extensions; 97 | 98 | /* Indicate that the input file tries to be compatible with C code. This 99 | * means that for example the bit-field clang extension is enabled. 100 | */ 101 | bool c_compatible; 102 | 103 | unsigned address_bits; 104 | }; 105 | 106 | struct clc_validator_options { 107 | uint32_t limit_max_function_arg; 108 | }; 109 | 110 | struct clc_binary { 111 | void *data; 112 | size_t size; 113 | }; 114 | 115 | struct clc_linker_args { 116 | const struct clc_binary * const *in_objs; 117 | unsigned num_in_objs; 118 | unsigned create_library; 119 | }; 120 | 121 | typedef void (*clc_msg_callback)(void *priv, const char *msg); 122 | 123 | struct clc_logger { 124 | void *priv; 125 | clc_msg_callback error; 126 | clc_msg_callback warning; 127 | }; 128 | 129 | enum clc_kernel_arg_type_qualifier { 130 | CLC_KERNEL_ARG_TYPE_CONST = 1 << 0, 131 | CLC_KERNEL_ARG_TYPE_RESTRICT = 1 << 1, 132 | CLC_KERNEL_ARG_TYPE_VOLATILE = 1 << 2, 133 | }; 134 | 135 | enum clc_kernel_arg_access_qualifier { 136 | CLC_KERNEL_ARG_ACCESS_READ = 1 << 0, 137 | CLC_KERNEL_ARG_ACCESS_WRITE = 1 << 1, 138 | }; 139 | 140 | enum clc_kernel_arg_address_qualifier { 141 | CLC_KERNEL_ARG_ADDRESS_PRIVATE, 142 | CLC_KERNEL_ARG_ADDRESS_CONSTANT, 143 | CLC_KERNEL_ARG_ADDRESS_LOCAL, 144 | CLC_KERNEL_ARG_ADDRESS_GLOBAL, 145 | }; 146 | 147 | struct clc_kernel_arg { 148 | const char *name; 149 | const char *type_name; 150 | unsigned type_qualifier; 151 | unsigned access_qualifier; 152 | enum clc_kernel_arg_address_qualifier address_qualifier; 153 | }; 154 | 155 | enum clc_vec_hint_type { 156 | CLC_VEC_HINT_TYPE_CHAR = 0, 157 | CLC_VEC_HINT_TYPE_SHORT = 1, 158 | CLC_VEC_HINT_TYPE_INT = 2, 159 | CLC_VEC_HINT_TYPE_LONG = 3, 160 | CLC_VEC_HINT_TYPE_HALF = 4, 161 | CLC_VEC_HINT_TYPE_FLOAT = 5, 162 | CLC_VEC_HINT_TYPE_DOUBLE = 6 163 | }; 164 | 165 | struct clc_kernel_info { 166 | const char *name; 167 | size_t num_args; 168 | const struct clc_kernel_arg *args; 169 | 170 | unsigned vec_hint_size; 171 | enum clc_vec_hint_type vec_hint_type; 172 | 173 | unsigned local_size[3]; 174 | unsigned local_size_hint[3]; 175 | }; 176 | 177 | enum clc_spec_constant_type { 178 | CLC_SPEC_CONSTANT_UNKNOWN, 179 | CLC_SPEC_CONSTANT_BOOL, 180 | CLC_SPEC_CONSTANT_HALF, 181 | CLC_SPEC_CONSTANT_FLOAT, 182 | CLC_SPEC_CONSTANT_DOUBLE, 183 | CLC_SPEC_CONSTANT_INT8, 184 | CLC_SPEC_CONSTANT_UINT8, 185 | CLC_SPEC_CONSTANT_INT16, 186 | CLC_SPEC_CONSTANT_UINT16, 187 | CLC_SPEC_CONSTANT_INT32, 188 | CLC_SPEC_CONSTANT_UINT32, 189 | CLC_SPEC_CONSTANT_INT64, 190 | CLC_SPEC_CONSTANT_UINT64, 191 | }; 192 | 193 | struct clc_parsed_spec_constant { 194 | uint32_t id; 195 | enum clc_spec_constant_type type; 196 | }; 197 | 198 | struct clc_parsed_spirv { 199 | const struct clc_kernel_info *kernels; 200 | unsigned num_kernels; 201 | 202 | const struct clc_parsed_spec_constant *spec_constants; 203 | unsigned num_spec_constants; 204 | }; 205 | 206 | struct clc_libclc; 207 | 208 | struct clc_libclc_options { 209 | unsigned optimize; 210 | const struct nir_shader_compiler_options *nir_options; 211 | }; 212 | 213 | struct clc_libclc *clc_libclc_new(const struct clc_logger *logger, const struct clc_libclc_options *options); 214 | 215 | void clc_free_libclc(struct clc_libclc *lib); 216 | 217 | const nir_shader *clc_libclc_get_clc_shader(struct clc_libclc *lib); 218 | 219 | void clc_libclc_serialize(struct clc_libclc *lib, void **serialized, size_t *size); 220 | void clc_libclc_free_serialized(void *serialized); 221 | struct clc_libclc *clc_libclc_deserialize(const void *serialized, size_t size); 222 | 223 | /* Forward declare */ 224 | struct set; 225 | 226 | bool 227 | clc_compile_c_to_spir(const struct clc_compile_args *args, 228 | const struct clc_logger *logger, 229 | struct clc_binary *out_spir, 230 | struct set *dependencies); 231 | 232 | void 233 | clc_free_spir(struct clc_binary *spir); 234 | 235 | bool 236 | clc_compile_spir_to_spirv(const struct clc_binary *in_spir, 237 | const struct clc_logger *logger, 238 | struct clc_binary *out_spirv); 239 | 240 | void 241 | clc_free_spirv(struct clc_binary *spirv); 242 | 243 | bool 244 | clc_compile_c_to_spirv(const struct clc_compile_args *args, 245 | const struct clc_logger *logger, 246 | struct clc_binary *out_spirv, 247 | struct set *dependencies); 248 | 249 | bool 250 | clc_link_spirv(const struct clc_linker_args *args, 251 | const struct clc_logger *logger, 252 | struct clc_binary *out_spirv); 253 | 254 | bool 255 | clc_parse_spirv(const struct clc_binary *in_spirv, 256 | const struct clc_logger *logger, 257 | struct clc_parsed_spirv *out_data); 258 | 259 | void 260 | clc_free_parsed_spirv(struct clc_parsed_spirv *data); 261 | 262 | typedef union { 263 | bool b; 264 | float f32; 265 | double f64; 266 | int8_t i8; 267 | uint8_t u8; 268 | int16_t i16; 269 | uint16_t u16; 270 | int32_t i32; 271 | uint32_t u32; 272 | int64_t i64; 273 | uint64_t u64; 274 | } clc_spirv_const_value; 275 | 276 | struct clc_spirv_specialization { 277 | uint32_t id; 278 | clc_spirv_const_value value; 279 | bool defined_on_module; 280 | }; 281 | 282 | struct clc_spirv_specialization_consts { 283 | const struct clc_spirv_specialization *specializations; 284 | unsigned num_specializations; 285 | }; 286 | 287 | bool 288 | clc_specialize_spirv(const struct clc_binary *in_spirv, 289 | const struct clc_parsed_spirv *parsed_data, 290 | const struct clc_spirv_specialization_consts *consts, 291 | struct clc_binary *out_spirv); 292 | 293 | enum clc_debug_flags { 294 | CLC_DEBUG_DUMP_SPIRV = 1 << 0, 295 | CLC_DEBUG_DUMP_LLVM = 1 << 1, 296 | CLC_DEBUG_VERBOSE = 1 << 2, 297 | }; 298 | uint64_t clc_debug_flags(void); 299 | 300 | #ifdef __cplusplus 301 | } 302 | #endif 303 | 304 | #endif /* MESA_CLC_H */ 305 | -------------------------------------------------------------------------------- /src/openclon12/compiler.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | class Logger 15 | { 16 | protected: 17 | std::recursive_mutex &m_lock; 18 | std::string &m_buildLog; 19 | 20 | public: 21 | Logger(std::recursive_mutex &lock, std::string& build_log) 22 | : m_lock(lock), m_buildLog(build_log) 23 | { 24 | } 25 | void Log(const char *msg) const; 26 | }; 27 | 28 | // An abstraction over a program binary 29 | class ProgramBinary 30 | { 31 | public: 32 | struct Kernel 33 | { 34 | struct Arg 35 | { 36 | enum class AddressSpace 37 | { 38 | Private, Constant, Local, Global 39 | }; 40 | const char *name; 41 | const char *type_name; 42 | bool readable, writable; 43 | bool is_const, is_restrict, is_volatile; 44 | AddressSpace address_qualifier; 45 | }; 46 | enum class VecHintType 47 | { 48 | Char, Short, Int, Long, Half, Float, Double 49 | }; 50 | 51 | const char *name; 52 | std::vector args; 53 | unsigned vec_hint_size; 54 | VecHintType vec_hint_type; 55 | }; 56 | 57 | struct SpecConstantInfo 58 | { 59 | unsigned value_size; 60 | }; 61 | struct SpecConstantValue 62 | { 63 | static constexpr size_t MaxValueSize = 8; 64 | char value[MaxValueSize]; 65 | }; 66 | using SpecConstantValues = std::unordered_map; 67 | 68 | virtual ~ProgramBinary() = default; 69 | virtual bool Parse(Logger const *logger) = 0; 70 | virtual size_t GetBinarySize() const = 0; 71 | virtual const void* GetBinary() const = 0; 72 | 73 | const std::vector &GetKernelInfo() const; 74 | const SpecConstantInfo *GetSpecConstantInfo(uint32_t ID) const; 75 | 76 | protected: 77 | std::vector m_KernelInfo; 78 | std::unordered_map m_SpecConstants; 79 | }; 80 | 81 | // An abstraction over DXIL + metadata 82 | class CompiledDxil 83 | { 84 | public: 85 | struct Metadata 86 | { 87 | struct Arg 88 | { 89 | unsigned offset, size; 90 | 91 | struct Image 92 | { 93 | unsigned buffer_ids[3]; 94 | unsigned num_buffer_ids; 95 | }; 96 | struct Sampler 97 | { 98 | unsigned sampler_id; 99 | }; 100 | struct Memory 101 | { 102 | unsigned buffer_id; 103 | }; 104 | struct Local 105 | { 106 | unsigned sharedmem_offset; 107 | }; 108 | std::variant properties; 109 | }; 110 | struct Consts 111 | { 112 | void *data; 113 | size_t size; 114 | unsigned uav_id; 115 | }; 116 | struct ConstSampler 117 | { 118 | unsigned sampler_id; 119 | unsigned addressing_mode; 120 | unsigned filter_mode; 121 | bool normalized_coords; 122 | }; 123 | struct Printf 124 | { 125 | unsigned num_args; 126 | unsigned *arg_sizes; 127 | char *str; 128 | }; 129 | 130 | ProgramBinary::Kernel const& program_kernel_info; 131 | 132 | std::vector args; 133 | std::vector consts; 134 | std::vector constSamplers; 135 | std::vector printfs; 136 | 137 | unsigned kernel_inputs_cbv_id; 138 | unsigned kernel_inputs_buf_size; 139 | unsigned work_properties_cbv_id; 140 | int printf_uav_id; 141 | size_t num_uavs; 142 | size_t num_srvs; 143 | size_t num_samplers; 144 | size_t local_mem_size; 145 | size_t priv_mem_size; 146 | 147 | uint16_t local_size[3]; 148 | uint16_t local_size_hint[3]; 149 | 150 | Metadata(ProgramBinary::Kernel const& parent) 151 | : program_kernel_info(parent) 152 | { 153 | } 154 | Metadata(Metadata const &) = default; 155 | }; 156 | 157 | struct Configuration 158 | { 159 | struct Arg 160 | { 161 | struct Local 162 | { 163 | unsigned size; 164 | }; 165 | struct Sampler 166 | { 167 | bool normalizedCoords, linearFiltering; 168 | unsigned addressingMode; 169 | }; 170 | std::variant config; 171 | }; 172 | 173 | uint16_t local_size[3]; 174 | std::vector args; 175 | bool lower_int64; 176 | bool lower_int16; 177 | bool support_global_work_id_offsets; 178 | bool support_work_group_id_offsets; 179 | 180 | D3D_SHADER_MODEL shader_model; 181 | }; 182 | 183 | virtual ~CompiledDxil() = default; 184 | virtual size_t GetBinarySize() const = 0; 185 | virtual const void* GetBinary() const = 0; 186 | virtual void *GetBinary() = 0; 187 | 188 | CompiledDxil(ProgramBinary const& parent, const char *name); 189 | CompiledDxil(ProgramBinary const& parent, Metadata const &metadata); 190 | void Sign(); 191 | Metadata const& GetMetadata() const; 192 | 193 | protected: 194 | Metadata m_Metadata; 195 | ProgramBinary const& m_Parent; 196 | }; 197 | 198 | struct WorkProperties { 199 | /* Returned from get_global_offset(), and added into get_global_id() */ 200 | unsigned global_offset_x; 201 | unsigned global_offset_y; 202 | unsigned global_offset_z; 203 | /* Returned from get_work_dim() */ 204 | unsigned work_dim; 205 | /* The number of work groups being launched (i.e. the parameters to Dispatch). 206 | * If the requested global size doesn't fit in a single Dispatch, these values should 207 | * indicate the total number of groups that *should* have been launched. */ 208 | unsigned group_count_total_x; 209 | unsigned group_count_total_y; 210 | unsigned group_count_total_z; 211 | unsigned padding; 212 | /* If the requested global size doesn't fit in a single Dispatch, subsequent dispatches 213 | * should fill out these offsets to indicate how many groups have already been launched */ 214 | unsigned group_id_offset_x; 215 | unsigned group_id_offset_y; 216 | unsigned group_id_offset_z; 217 | }; 218 | 219 | class ShaderCache; 220 | class Compiler 221 | { 222 | public: 223 | struct CompileArgs 224 | { 225 | struct Header 226 | { 227 | const char *name; 228 | const char *contents; 229 | }; 230 | std::vector
headers; 231 | const char *program_source; 232 | struct Features 233 | { 234 | bool fp16; 235 | bool fp64; 236 | bool int64; 237 | bool images; 238 | bool images_read_write; 239 | bool images_write_3d; 240 | bool intel_subgroups; 241 | bool subgroups; 242 | } features; 243 | std::vector cmdline_args; 244 | }; 245 | 246 | struct LinkerArgs 247 | { 248 | std::vector objs; 249 | bool create_library; 250 | }; 251 | 252 | static std::unique_ptr GetV2(); 253 | 254 | virtual ~Compiler() = default; 255 | 256 | // Ensure libclc is loaded and ready to go 257 | virtual bool Initialize(ShaderCache &cache) = 0; 258 | 259 | // Compile OpenCL C into SPIR-V 260 | virtual std::unique_ptr Compile(CompileArgs const& args, Logger const& logger) const = 0; 261 | 262 | // Link multiple SPIR-V binaries into one, and remove linkage info 263 | virtual std::unique_ptr Link(LinkerArgs const& args, Logger const& logger) const = 0; 264 | 265 | // Load a SPIR-V binary from a memory blob 266 | virtual std::unique_ptr Load(const void *data, size_t size) const = 0; 267 | 268 | // Given a SPIR-V binay, return a new SPIR-V binary that has specialization constant default values replaced with the given ones 269 | virtual std::unique_ptr Specialize(ProgramBinary const& obj, ProgramBinary::SpecConstantValues const& values, Logger const& logger) const = 0; 270 | 271 | // Convert a kernel from SPIR-V into DXIL with configuration properties 272 | virtual std::unique_ptr GetKernel(const char *name, ProgramBinary const& obj, CompiledDxil::Configuration const*, Logger const* logger) const = 0; 273 | 274 | // Load a DXIL binary from a memory blob; the caller is responsible for serializing and deserializing metadata 275 | virtual std::unique_ptr LoadKernel(ProgramBinary const &obj, const void *data, size_t size, CompiledDxil::Metadata const &metadata) const = 0; 276 | 277 | // Copy the work properties into a constant buffer 278 | virtual std::byte* CopyWorkProperties(std::byte* WorkPropertiesBuffer, WorkProperties const& props) const = 0; 279 | virtual size_t GetWorkPropertiesChunkSize() const = 0; 280 | 281 | // Return a version that can be used for initializing a shader cache 282 | virtual uint64_t GetVersionForCache() const = 0; 283 | }; 284 | -------------------------------------------------------------------------------- /src/openclon12/task.hpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #pragma once 4 | #include "platform.hpp" 5 | #include "context.hpp" 6 | #include 7 | #include 8 | 9 | #include "Query.hpp" 10 | 11 | // A task is an encapsulation of something that can be submitted to a command queue 12 | // and/or something that can be waited on (i.e. cl_event). 13 | // 14 | // A task goes through several state transitions: 15 | // 1. A task is created in the 'queued' state. This means it's in a command queue 16 | // but hasn't yet been submitted to the 'device.' 17 | // It goes through a state transition when its corresponding command queue is flushed. 18 | // 2. After the queue is flushed, the task enteres the 'submitted' state. In this state, 19 | // the device needs to check conditions on the task before deciding to schedule it. 20 | // A task can have other tasks that it's dependent on. It stays in this state until 21 | // those dependencies are satisfied. 22 | // 3. Once all dependencies are satisfied, the task enters the 'ready' state. 23 | // In the ready state, the work in the task is free to be scheduled by the device 24 | // whenever the device is ready to execute more work. Note that from the API there 25 | // is no distinction between the submitted and ready states. 26 | // Therefore, this state is more like a pseudo-state that only exists in theory. 27 | // 4. Once the device has started working on a given task, it enters the 'running' state. 28 | // 5. Once the device finishes all work associated with the task, but not necessarily 29 | // work that was submitted to a device-side queue by the task, it enters the 'ended' state. 30 | // 6. Once all work associated with the task and its children are done, it enters the 'complete' state. 31 | // The distinction between ended and complete only matters for tasks which are kernel enqueues, 32 | // and which submit child kernel executions to a device queue. 33 | // This implementation does not support device queues. 34 | // 35 | // These state transitions are visible out of the API in two ways, both exposed via events. 36 | // Every time a task is created, the developer has the opportunity to request an event for that task. 37 | // 1. The event can be polled for the current state of the associated task, as well as have 38 | // callbacks registered on it for event state changes. Neither the 'ready' nor 'ended' state are visible here. 39 | // 2. If the task was submitted to a queue with profiling enabled, then the event can also 40 | // be queried for profiling information: specifically, the timestamp at which point 41 | // the task entered a particular state. Note that the 'ready' state does not have a timestamp. 42 | // 43 | // This implementation implements tasks in the following way: 44 | // - Any time an 'enqueue' API is called, a task object is created in the 'queued' state. 45 | // - The task object implements the event API. 46 | // - The task object sits in a command queue. 47 | // - The task has a list of all other tasks that it is dependent on, as well as a list of tasks 48 | // that are dependent on it. 49 | // --- The list of backward dependencies is pruned as dependencies are satisfied. 50 | // --- The list of forwards dependencies is cleared upon task completion/readiness. 51 | // - If a task is submitted to an in-order command queue, then the task immediately preceeding 52 | // it in the queue is added as a dependency. 53 | // - When a command queue is flushed, all tasks in the queue are submitted to the device and become 'submitted'. 54 | // --- Note that the command queue needs to keep track of what tasks it has submitted but are not yet 55 | // complete, for the purposes of implementing 'finish', markers, and barriers, as well as specifically 56 | // the last task that was submitted for in-order queues. 57 | // - Tasks that are submitted with no dependencies are considered 'ready'. Ready tasks are added to a list. 58 | // --- When a task becomes ready, any other tasks that depend on it that can execute on the same device will also 59 | // be marked ready. Technically, this is a violation of the CL spec for events, because it means that given task A 60 | // and task B, where B depends on A, both A and B can be considered 'running' at the same time. The CL spec explicitly 61 | // says that an event should only be marked running when previous events are 'complete', but this seems like a more 62 | // desireable design than the one imposed by the spec. 63 | // --- At the end of the flush operation, a work item is created for a worker thread to execute all ready tasks. 64 | // --- After recording all ready tasks into a command list, the command list is submitted, and the thread waits for it to complete. 65 | // All tasks that were part of the command list are considered to be running at this point. 66 | // --- Then, all tasks that were part of that command list are marked complete. This enables new tasks to be marked ready. 67 | // --- If there are any newly ready tasks, then another worker thread work item is created to execute those. 68 | 69 | class Task : public CLChildBase 70 | { 71 | struct NotificationRequest 72 | { 73 | using Fn = void(CL_CALLBACK *)(cl_event, cl_int, void*); 74 | Fn m_pfn; 75 | void* m_userData; 76 | }; 77 | 78 | public: 79 | struct DependencyException {}; 80 | friend class D3DDevice; 81 | enum class State 82 | { 83 | // API-visible states (sorted in reverse order so CL_COMPLETE == CL_SUCCESS == 0) 84 | Complete = CL_COMPLETE, 85 | Running = CL_RUNNING, 86 | Submitted = CL_SUBMITTED, 87 | Queued = CL_QUEUED, 88 | 89 | // Internal states 90 | Ready, 91 | }; 92 | 93 | void Record(); 94 | State GetState() const { return m_State; } 95 | cl_ulong& GetTimestamp(cl_profiling_info timestampType); 96 | 97 | void AddDependencies(const cl_event* event_wait_list, cl_uint num_events_in_wait_list, TaskPoolLock const&); 98 | cl_int WaitForCompletion(); 99 | void RegisterCallback(cl_int command_exec_callback_type, NotificationRequest::Fn pfn_notify, void* user_data); 100 | 101 | const cl_command_type m_CommandType; 102 | const ::ref_ptr_int m_CommandQueue; 103 | const ::ref_ptr_int m_Device; 104 | class D3DDevice *const m_D3DDevice; 105 | 106 | Task(Context& Parent, cl_command_type command_type, cl_command_queue command_queue); 107 | Task(Context& Parent, D3DDevice& device); 108 | virtual ~Task(); 109 | 110 | static cl_ulong TimestampToNanoseconds(cl_ulong Ticks, cl_ulong Frequency); 111 | static cl_ulong TimestampFromQPC(); 112 | 113 | protected: 114 | void Submit(); 115 | void Ready(TaskPoolLock const&); 116 | void Started(TaskPoolLock const&); 117 | void Complete(cl_int error, TaskPoolLock const&); 118 | 119 | virtual void MigrateResources() = 0; 120 | virtual void RecordImpl() = 0; 121 | virtual void OnComplete() { } 122 | 123 | void FireNotification(NotificationRequest const& callback, cl_int state); 124 | void FireNotifications(); 125 | 126 | // State changes can only be made while holding the task pool lock 127 | State m_State = State::Queued; 128 | cl_ulong m_ProfilingTimestamps[4] = {}; 129 | 130 | std::vector m_TasksToWaitOn; 131 | std::set m_TasksWaitingOnThis; 132 | std::vector m_CompletionCallbacks; 133 | std::vector m_RunningCallbacks; 134 | std::vector m_SubmittedCallbacks; 135 | std::promise m_CompletionPromise; 136 | std::future m_CompletionFuture{ m_CompletionPromise.get_future() }; 137 | 138 | std::shared_ptr m_StartTimestamp; 139 | std::shared_ptr m_StopTimestamp; 140 | }; 141 | 142 | class UserEvent : public Task 143 | { 144 | public: 145 | UserEvent(Context& parent); 146 | using Task::Complete; 147 | 148 | private: 149 | void RecordImpl() final { } 150 | void MigrateResources() final { } 151 | }; 152 | 153 | class DummyTask : public Task 154 | { 155 | public: 156 | DummyTask(Context& Parent, cl_command_type type, cl_command_queue command_queue); 157 | 158 | private: 159 | void RecordImpl() final { } 160 | void MigrateResources() final { } 161 | }; 162 | 163 | class Resource; 164 | // Map tasks will be enqueued on a queue, and also tracked on the resource which has been mapped. 165 | // That'll allow Unmap to do the right thing, by looking up the type of map operation that was 166 | // done for a given pointer. 167 | class MapTask : public Task 168 | { 169 | public: 170 | struct Args 171 | { 172 | cl_uint SrcX; 173 | cl_uint SrcY; 174 | cl_uint SrcZ; 175 | cl_uint Width; 176 | cl_uint Height; 177 | cl_uint Depth; 178 | cl_ushort FirstArraySlice; 179 | cl_ushort NumArraySlices; 180 | cl_uchar FirstMipLevel; 181 | }; 182 | 183 | MapTask(Context& Parent, cl_command_queue command_queue, Resource& resource, cl_map_flags flags, cl_command_type command, Args const& args); 184 | ~MapTask(); 185 | virtual void Unmap(bool IsResourceBeingDestroyed) = 0; 186 | void* GetPointer() const { return m_Pointer; } 187 | size_t GetRowPitch() const { return m_RowPitch; } 188 | size_t GetSlicePitch() const { return m_SlicePitch; } 189 | Resource& GetResource() const { return m_Resource; } 190 | 191 | protected: 192 | void* m_Pointer = nullptr; 193 | size_t m_RowPitch = 0, m_SlicePitch = 0; 194 | Resource& m_Resource; 195 | const cl_map_flags m_MapFlags; 196 | const Args m_Args; 197 | 198 | void OnComplete() override; 199 | void MigrateResources() override; 200 | }; 201 | -------------------------------------------------------------------------------- /src/openclon12/spookyv2.cpp: -------------------------------------------------------------------------------- 1 | // Spooky Hash 2 | // A 128-bit noncryptographic hash, for checksums and table lookup 3 | // By Bob Jenkins. Public domain. 4 | // Oct 31 2010: published framework, disclaimer ShortHash isn't right 5 | // Nov 7 2010: disabled ShortHash 6 | // Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again 7 | // April 10 2012: buffer overflow on platforms without unaligned reads 8 | // July 12 2012: was passing out variables in final to in/out in short 9 | // July 30 2012: I reintroduced the buffer overflow 10 | // August 5 2012: SpookyV2: d = should be d += in short hash, and remove extra mix from long hash 11 | 12 | #include 13 | #include "SpookyV2.h" 14 | 15 | #define ALLOW_UNALIGNED_READS 1 16 | 17 | // 18 | // short hash ... it could be used on any message, 19 | // but it's used by Spooky just for short messages. 20 | // 21 | void SpookyHash::Short( 22 | const void *message, 23 | size_t length, 24 | uint64 *hash1, 25 | uint64 *hash2) 26 | { 27 | uint64 buf[2*sc_numVars]; 28 | union 29 | { 30 | const uint8 *p8; 31 | uint32 *p32; 32 | uint64 *p64; 33 | size_t i; 34 | } u; 35 | 36 | u.p8 = (const uint8 *)message; 37 | 38 | if (!ALLOW_UNALIGNED_READS && (u.i & 0x7)) 39 | { 40 | memcpy(buf, message, length); 41 | u.p64 = buf; 42 | } 43 | 44 | size_t remainder = length%32; 45 | uint64 a=*hash1; 46 | uint64 b=*hash2; 47 | uint64 c=sc_const; 48 | uint64 d=sc_const; 49 | 50 | if (length > 15) 51 | { 52 | const uint64 *end = u.p64 + (length/32)*4; 53 | 54 | // handle all complete sets of 32 bytes 55 | for (; u.p64 < end; u.p64 += 4) 56 | { 57 | c += u.p64[0]; 58 | d += u.p64[1]; 59 | ShortMix(a,b,c,d); 60 | a += u.p64[2]; 61 | b += u.p64[3]; 62 | } 63 | 64 | //Handle the case of 16+ remaining bytes. 65 | if (remainder >= 16) 66 | { 67 | c += u.p64[0]; 68 | d += u.p64[1]; 69 | ShortMix(a,b,c,d); 70 | u.p64 += 2; 71 | remainder -= 16; 72 | } 73 | } 74 | 75 | // Handle the last 0..15 bytes, and its length 76 | d += ((uint64)length) << 56; 77 | switch (remainder) 78 | { 79 | case 15: 80 | d += ((uint64)u.p8[14]) << 48; 81 | case 14: 82 | d += ((uint64)u.p8[13]) << 40; 83 | case 13: 84 | d += ((uint64)u.p8[12]) << 32; 85 | case 12: 86 | d += u.p32[2]; 87 | c += u.p64[0]; 88 | break; 89 | case 11: 90 | d += ((uint64)u.p8[10]) << 16; 91 | case 10: 92 | d += ((uint64)u.p8[9]) << 8; 93 | case 9: 94 | d += (uint64)u.p8[8]; 95 | case 8: 96 | c += u.p64[0]; 97 | break; 98 | case 7: 99 | c += ((uint64)u.p8[6]) << 48; 100 | case 6: 101 | c += ((uint64)u.p8[5]) << 40; 102 | case 5: 103 | c += ((uint64)u.p8[4]) << 32; 104 | case 4: 105 | c += u.p32[0]; 106 | break; 107 | case 3: 108 | c += ((uint64)u.p8[2]) << 16; 109 | case 2: 110 | c += ((uint64)u.p8[1]) << 8; 111 | case 1: 112 | c += (uint64)u.p8[0]; 113 | break; 114 | case 0: 115 | c += sc_const; 116 | d += sc_const; 117 | } 118 | ShortEnd(a,b,c,d); 119 | *hash1 = a; 120 | *hash2 = b; 121 | } 122 | 123 | 124 | 125 | 126 | // do the whole hash in one call 127 | void SpookyHash::Hash128( 128 | const void *message, 129 | size_t length, 130 | uint64 *hash1, 131 | uint64 *hash2) 132 | { 133 | if (length < sc_bufSize) 134 | { 135 | Short(message, length, hash1, hash2); 136 | return; 137 | } 138 | 139 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; 140 | uint64 buf[sc_numVars]; 141 | uint64 *end; 142 | union 143 | { 144 | const uint8 *p8; 145 | uint64 *p64; 146 | size_t i; 147 | } u; 148 | size_t remainder; 149 | 150 | h0=h3=h6=h9 = *hash1; 151 | h1=h4=h7=h10 = *hash2; 152 | h2=h5=h8=h11 = sc_const; 153 | 154 | u.p8 = (const uint8 *)message; 155 | end = u.p64 + (length/sc_blockSize)*sc_numVars; 156 | 157 | // handle all whole sc_blockSize blocks of bytes 158 | if (ALLOW_UNALIGNED_READS || ((u.i & 0x7) == 0)) 159 | { 160 | while (u.p64 < end) 161 | { 162 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 163 | u.p64 += sc_numVars; 164 | } 165 | } 166 | else 167 | { 168 | while (u.p64 < end) 169 | { 170 | memcpy(buf, u.p64, sc_blockSize); 171 | Mix(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 172 | u.p64 += sc_numVars; 173 | } 174 | } 175 | 176 | // handle the last partial block of sc_blockSize bytes 177 | remainder = (length - ((const uint8 *)end-(const uint8 *)message)); 178 | memcpy(buf, end, remainder); 179 | memset(((uint8 *)buf)+remainder, 0, sc_blockSize-remainder); 180 | ((uint8 *)buf)[sc_blockSize-1] = (uint8)remainder; 181 | 182 | // do some final mixing 183 | End(buf, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 184 | *hash1 = h0; 185 | *hash2 = h1; 186 | } 187 | 188 | 189 | 190 | // init spooky state 191 | void SpookyHash::Init(uint64 seed1, uint64 seed2) 192 | { 193 | m_length = 0; 194 | m_remainder = 0; 195 | m_state[0] = seed1; 196 | m_state[1] = seed2; 197 | } 198 | 199 | 200 | // add a message fragment to the state 201 | void SpookyHash::Update(const void *message, size_t length) 202 | { 203 | uint64 h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11; 204 | size_t newLength = length + m_remainder; 205 | uint8 remainder; 206 | union 207 | { 208 | const uint8 *p8; 209 | uint64 *p64; 210 | size_t i; 211 | } u; 212 | const uint64 *end; 213 | 214 | // Is this message fragment too short? If it is, stuff it away. 215 | if (newLength < sc_bufSize) 216 | { 217 | memcpy(&((uint8 *)m_data)[m_remainder], message, length); 218 | m_length = length + m_length; 219 | m_remainder = (uint8)newLength; 220 | return; 221 | } 222 | 223 | // init the variables 224 | if (m_length < sc_bufSize) 225 | { 226 | h0=h3=h6=h9 = m_state[0]; 227 | h1=h4=h7=h10 = m_state[1]; 228 | h2=h5=h8=h11 = sc_const; 229 | } 230 | else 231 | { 232 | h0 = m_state[0]; 233 | h1 = m_state[1]; 234 | h2 = m_state[2]; 235 | h3 = m_state[3]; 236 | h4 = m_state[4]; 237 | h5 = m_state[5]; 238 | h6 = m_state[6]; 239 | h7 = m_state[7]; 240 | h8 = m_state[8]; 241 | h9 = m_state[9]; 242 | h10 = m_state[10]; 243 | h11 = m_state[11]; 244 | } 245 | m_length = length + m_length; 246 | 247 | // if we've got anything stuffed away, use it now 248 | if (m_remainder) 249 | { 250 | uint8 prefix = sc_bufSize-m_remainder; 251 | memcpy(&(((uint8 *)m_data)[m_remainder]), message, prefix); 252 | u.p64 = m_data; 253 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 254 | Mix(&u.p64[sc_numVars], h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 255 | u.p8 = ((const uint8 *)message) + prefix; 256 | length -= prefix; 257 | } 258 | else 259 | { 260 | u.p8 = (const uint8 *)message; 261 | } 262 | 263 | // handle all whole blocks of sc_blockSize bytes 264 | end = u.p64 + (length/sc_blockSize)*sc_numVars; 265 | remainder = (uint8)(length-((const uint8 *)end-u.p8)); 266 | if (ALLOW_UNALIGNED_READS || (u.i & 0x7) == 0) 267 | { 268 | while (u.p64 < end) 269 | { 270 | Mix(u.p64, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 271 | u.p64 += sc_numVars; 272 | } 273 | } 274 | else 275 | { 276 | while (u.p64 < end) 277 | { 278 | memcpy(m_data, u.p8, sc_blockSize); 279 | Mix(m_data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 280 | u.p64 += sc_numVars; 281 | } 282 | } 283 | 284 | // stuff away the last few bytes 285 | m_remainder = remainder; 286 | memcpy(m_data, end, remainder); 287 | 288 | // stuff away the variables 289 | m_state[0] = h0; 290 | m_state[1] = h1; 291 | m_state[2] = h2; 292 | m_state[3] = h3; 293 | m_state[4] = h4; 294 | m_state[5] = h5; 295 | m_state[6] = h6; 296 | m_state[7] = h7; 297 | m_state[8] = h8; 298 | m_state[9] = h9; 299 | m_state[10] = h10; 300 | m_state[11] = h11; 301 | } 302 | 303 | 304 | // report the hash for the concatenation of all message fragments so far 305 | void SpookyHash::Final(uint64 *hash1, uint64 *hash2) 306 | { 307 | // init the variables 308 | if (m_length < sc_bufSize) 309 | { 310 | *hash1 = m_state[0]; 311 | *hash2 = m_state[1]; 312 | Short( m_data, m_length, hash1, hash2); 313 | return; 314 | } 315 | 316 | const uint64 *data = (const uint64 *)m_data; 317 | uint8 remainder = m_remainder; 318 | 319 | uint64 h0 = m_state[0]; 320 | uint64 h1 = m_state[1]; 321 | uint64 h2 = m_state[2]; 322 | uint64 h3 = m_state[3]; 323 | uint64 h4 = m_state[4]; 324 | uint64 h5 = m_state[5]; 325 | uint64 h6 = m_state[6]; 326 | uint64 h7 = m_state[7]; 327 | uint64 h8 = m_state[8]; 328 | uint64 h9 = m_state[9]; 329 | uint64 h10 = m_state[10]; 330 | uint64 h11 = m_state[11]; 331 | 332 | if (remainder >= sc_blockSize) 333 | { 334 | // m_data can contain two blocks; handle any whole first block 335 | Mix(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 336 | data += sc_numVars; 337 | remainder -= sc_blockSize; 338 | } 339 | 340 | // mix in the last partial block, and the length mod sc_blockSize 341 | memset(&((uint8 *)data)[remainder], 0, (sc_blockSize-remainder)); 342 | 343 | ((uint8 *)data)[sc_blockSize-1] = remainder; 344 | 345 | // do some final mixing 346 | End(data, h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11); 347 | 348 | *hash1 = h0; 349 | *hash2 = h1; 350 | } 351 | 352 | -------------------------------------------------------------------------------- /src/openclon12/platform.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "platform.hpp" 4 | #include "cache.hpp" 5 | #include "compiler.hpp" 6 | 7 | CL_API_ENTRY cl_int CL_API_CALL 8 | clGetPlatformInfo(cl_platform_id platform, 9 | cl_platform_info param_name, 10 | size_t param_value_size, 11 | void * param_value, 12 | size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 13 | { 14 | if (param_value_size == 0 && param_value != NULL) 15 | { 16 | return CL_INVALID_VALUE; 17 | } 18 | if (platform != g_Platform) 19 | { 20 | return CL_INVALID_PLATFORM; 21 | } 22 | 23 | if (param_name == CL_PLATFORM_HOST_TIMER_RESOLUTION) 24 | { 25 | if (param_value_size && param_value_size < sizeof(cl_ulong)) 26 | { 27 | return CL_INVALID_VALUE; 28 | } 29 | if (param_value_size) 30 | { 31 | LARGE_INTEGER TicksPerSecond; 32 | QueryPerformanceFrequency(&TicksPerSecond); 33 | *reinterpret_cast(param_value) = 34 | 1000000000 / TicksPerSecond.QuadPart; 35 | } 36 | if (param_value_size_ret) 37 | { 38 | *param_value_size_ret = sizeof(cl_ulong); 39 | } 40 | return CL_SUCCESS; 41 | } 42 | else if (param_name == CL_PLATFORM_NUMERIC_VERSION) 43 | { 44 | return CopyOutParameter( 45 | #ifdef CLON12_SUPPORT_3_0 46 | CL_MAKE_VERSION(3, 0, 0), 47 | #else 48 | CL_MAKE_VERSION(1, 2, 0), 49 | #endif 50 | param_value_size, param_value, param_value_size_ret); 51 | } 52 | else if (param_name == CL_PLATFORM_EXTENSIONS_WITH_VERSION) 53 | { 54 | constexpr cl_name_version extensions[] = 55 | { 56 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_icd" }, 57 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_extended_versioning" }, 58 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_base_atomics" }, 59 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_extended_atomics" }, 60 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_base_atomics" }, 61 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_extended_atomics" }, 62 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_byte_addressable_store" }, 63 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_il_program" }, 64 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_gl_sharing" }, 65 | { CL_MAKE_VERSION(1, 0, 0), "cl_khr_gl_event" }, 66 | // TODO: Maybe loop over devices to see if they're all GPUs? 67 | // { CL_MAKE_VERSION(1, 0, 0), "cl_khr_3d_image_writes" }, 68 | }; 69 | return CopyOutParameter(extensions, param_value_size, param_value, param_value_size_ret); 70 | } 71 | 72 | auto pPlatform = Platform::CastFrom(platform); 73 | auto pString = [pPlatform, param_name]() -> const char* 74 | { 75 | switch (param_name) 76 | { 77 | case CL_PLATFORM_PROFILE: return pPlatform->Profile; 78 | case CL_PLATFORM_VERSION: return pPlatform->Version; 79 | case CL_PLATFORM_NAME: return pPlatform->Name; 80 | case CL_PLATFORM_VENDOR: return pPlatform->Vendor; 81 | case CL_PLATFORM_EXTENSIONS: return pPlatform->Extensions; 82 | case CL_PLATFORM_ICD_SUFFIX_KHR: return pPlatform->ICDSuffix; 83 | } 84 | return nullptr; 85 | }(); 86 | 87 | if (!pString) 88 | { 89 | return CL_INVALID_VALUE; 90 | } 91 | 92 | auto stringlen = strlen(pString) + 1; 93 | if (param_value_size && param_value_size < stringlen) 94 | { 95 | return CL_INVALID_VALUE; 96 | } 97 | if (param_value_size) 98 | { 99 | memcpy(param_value, pString, stringlen); 100 | } 101 | if (param_value_size_ret) 102 | { 103 | *param_value_size_ret = stringlen; 104 | } 105 | return CL_SUCCESS; 106 | } 107 | 108 | extern CL_API_ENTRY cl_int CL_API_CALL 109 | clUnloadPlatformCompiler(cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2 110 | { 111 | if (!platform) 112 | { 113 | return CL_INVALID_PLATFORM; 114 | } 115 | static_cast(platform)->UnloadCompiler(); 116 | return CL_SUCCESS; 117 | } 118 | 119 | static bool CheckWarpIsHardware() 120 | { 121 | char *warpIsHardwareStr = nullptr; 122 | bool warpIsHardware = _dupenv_s(&warpIsHardwareStr, nullptr, "CLON12_WARP_IS_HARDWARE") == 0 && 123 | warpIsHardwareStr && 124 | strcmp(warpIsHardwareStr, "1") == 0; 125 | free(warpIsHardwareStr); 126 | return warpIsHardware; 127 | } 128 | 129 | #include "device.hpp" 130 | Platform::Platform(cl_icd_dispatch* dispatch) 131 | : m_bWarpIsHardware(CheckWarpIsHardware()) 132 | { 133 | this->dispatch = dispatch; 134 | 135 | ComPtr spFactory; 136 | THROW_IF_FAILED(DXCoreCreateAdapterFactory(IID_PPV_ARGS(&spFactory))); 137 | 138 | THROW_IF_FAILED(spFactory->CreateAdapterList(1, &DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE, IID_PPV_ARGS(&m_spAdapters))); 139 | 140 | m_Devices.resize(m_spAdapters->GetAdapterCount()); 141 | for (cl_uint i = 0; i < m_Devices.size(); ++i) 142 | { 143 | ComPtr spAdapter; 144 | THROW_IF_FAILED(m_spAdapters->GetAdapter(i, IID_PPV_ARGS(&spAdapter))); 145 | m_Devices[i] = std::make_unique(*this, spAdapter.Get()); 146 | } 147 | 148 | char *forceWarpStr = nullptr; 149 | bool forceWarp = _dupenv_s(&forceWarpStr, nullptr, "CLON12_FORCE_WARP") == 0 && 150 | forceWarpStr && 151 | strcmp(forceWarpStr, "1") == 0; 152 | free(forceWarpStr); 153 | 154 | char *forceHardwareStr = nullptr; 155 | bool forceHardware = !forceWarp && 156 | _dupenv_s(&forceHardwareStr, nullptr, "CLON12_FORCE_HARDWARE") == 0 && 157 | forceHardwareStr && 158 | strcmp(forceHardwareStr, "1") == 0; 159 | free(forceHardwareStr); 160 | 161 | if (forceWarp) 162 | { 163 | (void)std::remove_if(m_Devices.begin(), m_Devices.end(), [](std::unique_ptr const& a) 164 | { 165 | auto&& hwids = a->GetHardwareIds(); 166 | return hwids.deviceID != 0x8c && hwids.vendorID != 0x1414; 167 | }); 168 | } 169 | if (forceWarp || forceHardware) 170 | { 171 | m_Devices.resize(1); 172 | } 173 | m_Devices[0]->SetDefaultDevice(); 174 | } 175 | 176 | Platform::~Platform() = default; 177 | 178 | void Platform::RemoveInvalidDevices() noexcept 179 | { 180 | for (cl_uint i = 0; i < m_Devices.size(); ++i) 181 | { 182 | try 183 | { 184 | if (m_Devices[i]->IsMCDM()) 185 | { 186 | auto &Device = m_Devices[i]->InitD3D(); 187 | m_Devices[i]->ReleaseD3D(Device); 188 | } 189 | } 190 | catch (...) 191 | { 192 | m_Devices.erase(m_Devices.begin() + i); 193 | } 194 | } 195 | } 196 | 197 | cl_uint Platform::GetNumDevices() const noexcept 198 | { 199 | return (cl_uint)m_Devices.size(); 200 | } 201 | 202 | Device *Platform::GetDevice(cl_uint i) const noexcept 203 | { 204 | return m_Devices[i].get(); 205 | } 206 | 207 | TaskPoolLock Platform::GetTaskPoolLock() 208 | { 209 | TaskPoolLock lock; 210 | lock.m_Lock = std::unique_lock{ m_TaskLock }; 211 | return lock; 212 | } 213 | 214 | void Platform::FlushAllDevices(TaskPoolLock const& Lock) 215 | { 216 | for (auto &device : m_Devices) 217 | { 218 | device->FlushAllDevices(Lock); 219 | } 220 | } 221 | 222 | void Platform::DeviceInit() 223 | { 224 | std::lock_guard Lock(m_ModuleLock); 225 | if (m_ActiveDeviceCount++ > 0) 226 | { 227 | return; 228 | } 229 | 230 | BackgroundTaskScheduler::SchedulingMode mode{ 1u, BackgroundTaskScheduler::Priority::Normal }; 231 | m_CallbackScheduler.SetSchedulingMode(mode); 232 | 233 | mode.NumThreads = std::thread::hardware_concurrency(); 234 | m_CompileAndLinkScheduler.SetSchedulingMode(mode); 235 | } 236 | 237 | void Platform::DeviceUninit() 238 | { 239 | std::lock_guard Lock(m_ModuleLock); 240 | if (--m_ActiveDeviceCount > 0) 241 | { 242 | return; 243 | } 244 | 245 | BackgroundTaskScheduler::SchedulingMode mode{ 0u, BackgroundTaskScheduler::Priority::Normal }; 246 | m_CallbackScheduler.SetSchedulingMode(mode); 247 | m_CompileAndLinkScheduler.SetSchedulingMode(mode); 248 | } 249 | 250 | #ifdef _WIN32 251 | extern "C" extern IMAGE_DOS_HEADER __ImageBase; 252 | #endif 253 | 254 | void LoadFromNextToSelf(XPlatHelpers::unique_module& mod, const char* name) 255 | { 256 | #ifdef _WIN32 257 | char selfPath[MAX_PATH] = ""; 258 | if (auto pathSize = GetModuleFileNameA((HINSTANCE)&__ImageBase, selfPath, sizeof(selfPath)); 259 | pathSize == 0 || pathSize == sizeof(selfPath)) 260 | { 261 | return; 262 | } 263 | 264 | auto lastSlash = strrchr(selfPath, '\\'); 265 | if (!lastSlash) 266 | { 267 | return; 268 | } 269 | 270 | *(lastSlash + 1) = '\0'; 271 | if (strcat_s(selfPath, name) != 0) 272 | { 273 | return; 274 | } 275 | 276 | mod.load(selfPath); 277 | #endif 278 | } 279 | 280 | Compiler *Platform::GetCompiler() 281 | { 282 | std::lock_guard lock(m_ModuleLock); 283 | if (!m_Compiler) 284 | { 285 | m_Compiler = Compiler::GetV2(); 286 | } 287 | return m_Compiler.get(); 288 | } 289 | 290 | XPlatHelpers::unique_module const& Platform::GetDXIL() 291 | { 292 | std::lock_guard lock(m_ModuleLock); 293 | if (!m_DXIL) 294 | { 295 | m_DXIL.load("DXIL.dll"); 296 | } 297 | if (!m_DXIL) 298 | { 299 | LoadFromNextToSelf(m_DXIL, "DXIL.dll"); 300 | } 301 | return m_DXIL; 302 | } 303 | 304 | void Platform::UnloadCompiler() 305 | { 306 | // If we want to actually support unloading the compiler, 307 | // we'll need to track all live programs/kernels, because 308 | // they need to call back into the compiler to be able to 309 | // free their program memory. 310 | } 311 | 312 | bool Platform::AnyD3DDevicesExist() const noexcept 313 | { 314 | return std::any_of(m_Devices.begin(), m_Devices.end(), 315 | [](std::unique_ptr const& dev) { return dev->HasD3DDevice(); }); 316 | } 317 | 318 | void Platform::CloseCaches() 319 | { 320 | for (auto& device : m_Devices) 321 | { 322 | device->CloseCaches(); 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /src/openclon12/main.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Microsoft Corporation. 2 | // Licensed under the MIT License. 3 | #include "platform.hpp" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // {1926C47A-D119-4A54-BA9C-D119D68F50A7} 11 | TRACELOGGING_DEFINE_PROVIDER( 12 | g_hOpenCLOn12Provider, 13 | "Microsoft.OpenCLOn12", 14 | (0x1926c47a, 0xd119, 0x4a54, 0xba, 0x9c, 0xd1, 0x19, 0xd6, 0x8f, 0x50, 0xa7) 15 | ); 16 | 17 | struct TraceLoggingRegistration 18 | { 19 | TraceLoggingRegistration() { TraceLoggingRegister(g_hOpenCLOn12Provider); } 20 | ~TraceLoggingRegistration() { TraceLoggingUnregister(g_hOpenCLOn12Provider); } 21 | } g_TraceLoggingRegistration; 22 | 23 | struct ExtensionTableEntry 24 | { 25 | const char *name; 26 | void *func; 27 | }; 28 | 29 | #define EXT_FUNC(name) { #name, (void*)(name) } 30 | 31 | static struct ExtensionTableEntry clExtensions[] = 32 | { 33 | // cl_khr_icd 34 | EXT_FUNC(clIcdGetPlatformIDsKHR), 35 | 36 | // cl_khr_gl_sharing 37 | EXT_FUNC(clGetGLContextInfoKHR), 38 | EXT_FUNC(clCreateFromGLBuffer), 39 | EXT_FUNC(clCreateFromGLTexture), 40 | EXT_FUNC(clCreateFromGLTexture2D), 41 | EXT_FUNC(clCreateFromGLTexture3D), 42 | EXT_FUNC(clCreateFromGLRenderbuffer), 43 | EXT_FUNC(clEnqueueAcquireGLObjects), 44 | EXT_FUNC(clEnqueueReleaseGLObjects), 45 | EXT_FUNC(clGetGLObjectInfo), 46 | EXT_FUNC(clGetGLTextureInfo), 47 | 48 | // cl_khr_gl_event 49 | EXT_FUNC(clCreateEventFromGLsyncKHR), 50 | 51 | // cl_khr_il_program 52 | EXT_FUNC(clCreateProgramWithILKHR), 53 | }; 54 | 55 | static const int clExtensionCount = sizeof(clExtensions) / sizeof(clExtensions[0]); 56 | 57 | void * CL_API_CALL 58 | clGetExtensionFunctionAddress(const char *name) 59 | { 60 | int ii; 61 | 62 | for (ii = 0; ii < clExtensionCount; ii++) { 63 | if (!strcmp(name, clExtensions[ii].name)) { 64 | return clExtensions[ii].func; 65 | } 66 | } 67 | 68 | return nullptr; 69 | } 70 | 71 | /* Extension function access 72 | * 73 | * Returns the extension function address for the given function name, 74 | * or NULL if a valid function can not be found. The client must 75 | * check to make sure the address is not NULL, before using or 76 | * calling the returned function address. 77 | */ 78 | extern CL_API_ENTRY void * CL_API_CALL 79 | clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, 80 | const char * func_name) CL_API_SUFFIX__VERSION_1_2 81 | { 82 | if (platform != g_Platform) 83 | { 84 | return nullptr; 85 | } 86 | return clGetExtensionFunctionAddress(func_name); 87 | } 88 | 89 | cl_icd_dispatch g_DispatchTable 90 | { 91 | /* OpenCL 1.0 */ 92 | clGetPlatformIDs, 93 | clGetPlatformInfo, 94 | clGetDeviceIDs, 95 | clGetDeviceInfo, 96 | clCreateContext, 97 | clCreateContextFromType, 98 | clRetainContext, 99 | clReleaseContext, 100 | clGetContextInfo, 101 | clCreateCommandQueue, 102 | clRetainCommandQueue, 103 | clReleaseCommandQueue, 104 | clGetCommandQueueInfo, 105 | clSetCommandQueueProperty, 106 | clCreateBuffer, 107 | clCreateImage2D, 108 | clCreateImage3D, 109 | clRetainMemObject, 110 | clReleaseMemObject, 111 | clGetSupportedImageFormats, 112 | clGetMemObjectInfo, 113 | clGetImageInfo, 114 | clCreateSampler, 115 | clRetainSampler, 116 | clReleaseSampler, 117 | clGetSamplerInfo, 118 | clCreateProgramWithSource, 119 | clCreateProgramWithBinary, 120 | clRetainProgram, 121 | clReleaseProgram, 122 | clBuildProgram, 123 | clUnloadCompiler, 124 | clGetProgramInfo, 125 | clGetProgramBuildInfo, 126 | clCreateKernel, 127 | clCreateKernelsInProgram, 128 | clRetainKernel, 129 | clReleaseKernel, 130 | clSetKernelArg, 131 | clGetKernelInfo, 132 | clGetKernelWorkGroupInfo, 133 | clWaitForEvents, 134 | clGetEventInfo, 135 | clRetainEvent, 136 | clReleaseEvent, 137 | clGetEventProfilingInfo, 138 | clFlush, 139 | clFinish, 140 | clEnqueueReadBuffer, 141 | clEnqueueWriteBuffer, 142 | clEnqueueCopyBuffer, 143 | clEnqueueReadImage, 144 | clEnqueueWriteImage, 145 | clEnqueueCopyImage, 146 | clEnqueueCopyImageToBuffer, 147 | clEnqueueCopyBufferToImage, 148 | clEnqueueMapBuffer, 149 | clEnqueueMapImage, 150 | clEnqueueUnmapMemObject, 151 | clEnqueueNDRangeKernel, 152 | clEnqueueTask, 153 | clEnqueueNativeKernel, 154 | clEnqueueMarker, 155 | clEnqueueWaitForEvents, 156 | clEnqueueBarrier, 157 | clGetExtensionFunctionAddress, 158 | clCreateFromGLBuffer, 159 | clCreateFromGLTexture2D, 160 | clCreateFromGLTexture3D, 161 | clCreateFromGLRenderbuffer, 162 | clGetGLObjectInfo, 163 | clGetGLTextureInfo, 164 | clEnqueueAcquireGLObjects, 165 | clEnqueueReleaseGLObjects, 166 | clGetGLContextInfoKHR, 167 | 168 | /* cl_khr_d3d10_sharing */ 169 | nullptr, // clGetDeviceIDsFromD3D10KHR, 170 | nullptr, // clCreateFromD3D10BufferKHR, 171 | nullptr, // clCreateFromD3D10Texture2DKHR, 172 | nullptr, // clCreateFromD3D10Texture3DKHR, 173 | nullptr, // clEnqueueAcquireD3D10ObjectsKHR, 174 | nullptr, // clEnqueueReleaseD3D10ObjectsKHR, 175 | 176 | /* OpenCL 1.1 */ 177 | clSetEventCallback, 178 | clCreateSubBuffer, 179 | clSetMemObjectDestructorCallback, 180 | clCreateUserEvent, 181 | clSetUserEventStatus, 182 | clEnqueueReadBufferRect, 183 | clEnqueueWriteBufferRect, 184 | clEnqueueCopyBufferRect, 185 | 186 | /* cl_ext_device_fission */ 187 | nullptr, // clCreateSubDevicesEXT, 188 | nullptr, // clRetainDeviceEXT, 189 | nullptr, // clReleaseDeviceEXT, 190 | 191 | /* cl_khr_gl_event */ 192 | clCreateEventFromGLsyncKHR, 193 | 194 | /* OpenCL 1.2 */ 195 | clCreateSubDevices, 196 | clRetainDevice, 197 | clReleaseDevice, 198 | clCreateImage, 199 | clCreateProgramWithBuiltInKernels, 200 | clCompileProgram, 201 | clLinkProgram, 202 | clUnloadPlatformCompiler, 203 | clGetKernelArgInfo, 204 | clEnqueueFillBuffer, 205 | clEnqueueFillImage, 206 | clEnqueueMigrateMemObjects, 207 | clEnqueueMarkerWithWaitList, 208 | clEnqueueBarrierWithWaitList, 209 | clGetExtensionFunctionAddressForPlatform, 210 | clCreateFromGLTexture, 211 | 212 | /* cl_khr_d3d11_sharing */ 213 | nullptr, // clGetDeviceIDsFromD3D11KHR, 214 | nullptr, // clCreateFromD3D11BufferKHR, 215 | nullptr, // clCreateFromD3D11Texture2DKHR, 216 | nullptr, // clCreateFromD3D11Texture3DKHR, 217 | nullptr, // clCreateFromDX9MediaSurfaceKHR, 218 | nullptr, // clEnqueueAcquireD3D11ObjectsKHR, 219 | nullptr, // clEnqueueReleaseD3D11ObjectsKHR, 220 | 221 | /* cl_khr_dx9_media_sharing */ 222 | nullptr, // clGetDeviceIDsFromDX9MediaAdapterKHR, 223 | nullptr, // clEnqueueAcquireDX9MediaSurfacesKHR, 224 | nullptr, // clEnqueueReleaseDX9MediaSurfacesKHR, 225 | 226 | /* cl_khr_egl_image */ 227 | nullptr, // clCreateFromEGLImageKHR, 228 | nullptr, // clEnqueueAcquireEGLObjectsKHR, 229 | nullptr, // clEnqueueReleaseEGLObjectsKHR, 230 | 231 | /* cl_khr_egl_event */ 232 | nullptr, // clCreateEventFromEGLSyncKHR, 233 | 234 | /* OpenCL 2.0 */ 235 | clCreateCommandQueueWithProperties, 236 | clCreatePipe, 237 | clGetPipeInfo, 238 | clSVMAlloc, 239 | clSVMFree, 240 | clEnqueueSVMFree, 241 | clEnqueueSVMMemcpy, 242 | clEnqueueSVMMemFill, 243 | clEnqueueSVMMap, 244 | clEnqueueSVMUnmap, 245 | clCreateSamplerWithProperties, 246 | clSetKernelArgSVMPointer, 247 | clSetKernelExecInfo, 248 | 249 | /* cl_khr_sub_groups */ 250 | nullptr, // clGetKernelSubGroupInfoKHR, 251 | 252 | /* OpenCL 2.1 */ 253 | clCloneKernel, 254 | clCreateProgramWithIL, 255 | clEnqueueSVMMigrateMem, 256 | clGetDeviceAndHostTimer, 257 | clGetHostTimer, 258 | clGetKernelSubGroupInfo, 259 | clSetDefaultDeviceCommandQueue, 260 | 261 | /* OpenCL 2.2 */ 262 | clSetProgramReleaseCallback, 263 | clSetProgramSpecializationConstant, 264 | 265 | /* OpenCL 3.0 */ 266 | clCreateBufferWithProperties, 267 | clCreateImageWithProperties, 268 | clSetContextDestructorCallback, 269 | }; 270 | 271 | Platform* g_Platform = nullptr; 272 | 273 | CL_API_ENTRY cl_int CL_API_CALL 274 | clIcdGetPlatformIDsKHR(cl_uint num_entries, 275 | cl_platform_id * platforms, 276 | cl_uint * num_platforms) 277 | { 278 | if (!g_Platform) 279 | { 280 | try 281 | { 282 | g_Platform = new Platform(&g_DispatchTable); 283 | g_Platform->RemoveInvalidDevices(); 284 | } 285 | catch (std::bad_alloc&) { return CL_OUT_OF_HOST_MEMORY; } 286 | catch (std::exception&) { return CL_OUT_OF_RESOURCES; } 287 | } 288 | 289 | if ((platforms && num_entries <= 0) || 290 | (!platforms && num_entries >= 1)) 291 | { 292 | return CL_INVALID_VALUE; 293 | } 294 | 295 | if (platforms && num_entries >= 1) 296 | { 297 | platforms[0] = g_Platform; 298 | } 299 | 300 | if (num_platforms) 301 | { 302 | *num_platforms = 1; 303 | } 304 | 305 | return CL_SUCCESS; 306 | } 307 | 308 | CL_API_ENTRY cl_int CL_API_CALL 309 | clGetPlatformIDs(cl_uint num_entries, 310 | cl_platform_id * platforms, 311 | cl_uint * num_platforms) CL_API_SUFFIX__VERSION_1_0 312 | { 313 | return clIcdGetPlatformIDsKHR(num_entries, platforms, num_platforms); 314 | } 315 | 316 | extern "C" extern BOOL WINAPI DllMain(HINSTANCE, UINT dwReason, LPVOID lpReserved) 317 | { 318 | if (dwReason == DLL_PROCESS_DETACH) 319 | { 320 | if (!g_Platform) 321 | return TRUE; 322 | 323 | // If this is process termination, and we have D3D devices owned by 324 | // the platform, just go ahead and leak them, rather than trying 325 | // to clean them up. 326 | if (lpReserved && g_Platform->AnyD3DDevicesExist()) 327 | { 328 | // At the very least close shader caches cleanly so they can be flushed 329 | // to disk. 330 | g_Platform->CloseCaches(); 331 | return TRUE; 332 | } 333 | 334 | delete g_Platform; 335 | } 336 | 337 | return TRUE; 338 | } 339 | 340 | #ifndef HAS_TELASSERT 341 | void __stdcall MicrosoftTelemetryAssertTriggeredNoArgs() { } 342 | #endif 343 | 344 | --------------------------------------------------------------------------------