├── README.md ├── patches ├── 0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch ├── 0005-wined3d-Disable-persistently-mapped-shader-resource-.patch ├── 0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch ├── 0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch ├── 0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch ├── 0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch ├── 0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch ├── 0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch └── 0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch └── LICENSE /README.md: -------------------------------------------------------------------------------- 1 | # wine-pba 2 | 3 | A set of patches to allocate dynamic wined3d_buffers from a single persistently mapped buffer (via `ARB_buffer_storage`) managed by a heap allocator, reducing the need for command stream synchronization. 4 | 5 | Several related changes are included in the patchset as well: 6 | 7 | - `ARB_multi_bind` is used to speed up UBO updates 8 | - This vastly improves constant buffer performance as PBA causes much more frequent rebinds. 9 | 10 | **This patchset is prototype-quality at the moment. If `ARB_buffer_storage` is not present, you're not going to have a good time.** 11 | 12 | Currently, these patches are based off wine-staging 3.7. 13 | 14 | [Details can be found here.](https://comminos.com/posts/2018-02-21-wined3d-profiling.html) 15 | -------------------------------------------------------------------------------- /patches/0004-wined3d-Use-GL_CLIENT_STORAGE_BIT-for-persistent-map.patch: -------------------------------------------------------------------------------- 1 | From 8039b7c863b0c01d12a25c3dbe59e850832559a2 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Tue, 6 Mar 2018 02:07:31 -0800 4 | Subject: [PATCH 4/9] wined3d: Use GL_CLIENT_STORAGE_BIT for persistent 5 | mappings. 6 | 7 | --- 8 | dlls/wined3d/buffer_heap.c | 2 +- 9 | 1 file changed, 1 insertion(+), 1 deletion(-) 10 | 11 | diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c 12 | index b133bd68933..75f84b00882 100644 13 | --- a/dlls/wined3d/buffer_heap.c 14 | +++ b/dlls/wined3d/buffer_heap.c 15 | @@ -169,7 +169,7 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s 16 | { 17 | access_flags |= GL_MAP_READ_BIT; 18 | } 19 | - storage_flags = access_flags; 20 | + storage_flags = GL_CLIENT_STORAGE_BIT | access_flags; 21 | 22 | // TODO(acomminos): where should we be checking for errors here? 23 | GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); 24 | -- 25 | 2.17.0 26 | 27 | -------------------------------------------------------------------------------- /patches/0005-wined3d-Disable-persistently-mapped-shader-resource-.patch: -------------------------------------------------------------------------------- 1 | From 2a0957bcb0bf9731715a46dc681efd516a6d8191 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Thu, 8 Mar 2018 22:00:33 -0800 4 | Subject: [PATCH 5/9] wined3d: Disable persistently mapped shader resource 5 | buffers. 6 | 7 | --- 8 | dlls/wined3d/buffer.c | 4 ++++ 9 | 1 file changed, 4 insertions(+) 10 | 11 | diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c 12 | index 40c454e4668..59334816132 100644 13 | --- a/dlls/wined3d/buffer.c 14 | +++ b/dlls/wined3d/buffer.c 15 | @@ -1596,6 +1596,10 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device 16 | { 17 | WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); 18 | } 19 | + else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE) 20 | + { 21 | + FIXME_(d3d_perf)("Not using a persistent mapping for shader resource buffer %p (unimplemented)\n", buffer); 22 | + } 23 | else 24 | { 25 | // If supported, use persistent mapped buffers instead of a 26 | -- 27 | 2.17.0 28 | 29 | -------------------------------------------------------------------------------- /patches/0009-wined3d-Add-quirk-to-use-GL_CLIENT_STORAGE_BIT-for-m.patch: -------------------------------------------------------------------------------- 1 | From e6f5ba632e17f58949272f84814a8522f6ee2c29 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Thu, 15 Mar 2018 21:22:06 -0700 4 | Subject: [PATCH 9/9] wined3d: Add quirk to use GL_CLIENT_STORAGE_BIT for mesa. 5 | 6 | --- 7 | dlls/wined3d/buffer_heap.c | 15 ++++++++++++++- 8 | dlls/wined3d/directx.c | 19 +++++++++++++++++++ 9 | dlls/wined3d/wined3d_private.h | 1 + 10 | 3 files changed, 34 insertions(+), 1 deletion(-) 11 | 12 | diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c 13 | index 899aad96126..9e8f2d799df 100644 14 | --- a/dlls/wined3d/buffer_heap.c 15 | +++ b/dlls/wined3d/buffer_heap.c 16 | @@ -165,7 +165,20 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s 17 | access_flags |= GL_MAP_READ_BIT; 18 | } 19 | 20 | - storage_flags = GL_CLIENT_STORAGE_BIT | access_flags; 21 | + storage_flags = access_flags; 22 | + // FIXME(acomminos): So, about GL_CLIENT_STORAGE_BIT: 23 | + // - On NVIDIA, DMA CACHED memory is used when this flag is set. SYSTEM HEAP 24 | + // memory is used without it, which (in my testing) is much faster. 25 | + // - On Mesa, GTT is used when this flag is set. This is what we want- we 26 | + // upload to VRAM occur otherwise, which is unusably slow (on radeon). 27 | + // 28 | + // Thus, we're only going to set this on mesa for now. 29 | + // Hints are awful anyway. 30 | + if (gl_info->quirks & WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT) 31 | + { 32 | + FIXME_(d3d_perf)("PBA: using GL_CLIENT_STORAGE_BIT quirk"); 33 | + storage_flags |= GL_CLIENT_STORAGE_BIT; 34 | + } 35 | 36 | GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); 37 | checkGLcall("glGenBuffers"); 38 | diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c 39 | index 2831baa1d7e..ee0d4c7b2d8 100644 40 | --- a/dlls/wined3d/directx.c 41 | +++ b/dlls/wined3d/directx.c 42 | @@ -946,6 +946,13 @@ static BOOL match_broken_viewport_subpixel_bits(const struct wined3d_gl_info *gl 43 | return !wined3d_caps_gl_ctx_test_viewport_subpixel_bits(ctx); 44 | } 45 | 46 | +static BOOL match_mesa(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx, 47 | + const char *gl_renderer, enum wined3d_gl_vendor gl_vendor, 48 | + enum wined3d_pci_vendor card_vendor, enum wined3d_pci_device device) 49 | +{ 50 | + return gl_vendor == GL_VENDOR_MESA; 51 | +} 52 | + 53 | static void quirk_apple_glsl_constants(struct wined3d_gl_info *gl_info) 54 | { 55 | /* MacOS needs uniforms for relative addressing offsets. This can accumulate to quite a few uniforms. 56 | @@ -1083,6 +1090,13 @@ static void quirk_broken_viewport_subpixel_bits(struct wined3d_gl_info *gl_info) 57 | } 58 | } 59 | 60 | +static void quirk_use_client_storage_bit(struct wined3d_gl_info *gl_info) 61 | +{ 62 | + // Using ARB_buffer_storage on Mesa requires the GL_CLIENT_STORAGE_BIT to be 63 | + // set to use GTT for immutable buffers on radeon (see PIPE_USAGE_STREAM). 64 | + gl_info->quirks |= WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT; 65 | +} 66 | + 67 | struct driver_quirk 68 | { 69 | BOOL (*match)(const struct wined3d_gl_info *gl_info, struct wined3d_caps_gl_ctx *ctx, 70 | @@ -1179,6 +1193,11 @@ static const struct driver_quirk quirk_table[] = 71 | quirk_broken_viewport_subpixel_bits, 72 | "Nvidia viewport subpixel bits bug" 73 | }, 74 | + { 75 | + match_mesa, 76 | + quirk_use_client_storage_bit, 77 | + "Use GL_CLIENT_STORAGE_BIT for persistent buffers on mesa", 78 | + }, 79 | }; 80 | 81 | /* Certain applications (Steam) complain if we report an outdated driver version. In general, 82 | diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h 83 | index 1335c122890..71fef7ff88b 100644 84 | --- a/dlls/wined3d/wined3d_private.h 85 | +++ b/dlls/wined3d/wined3d_private.h 86 | @@ -75,6 +75,7 @@ 87 | #define WINED3D_QUIRK_INFO_LOG_SPAM 0x00000080 88 | #define WINED3D_QUIRK_LIMITED_TEX_FILTERING 0x00000100 89 | #define WINED3D_QUIRK_BROKEN_ARB_FOG 0x00000200 90 | +#define WINED3D_QUIRK_USE_CLIENT_STORAGE_BIT 0x00000400 91 | 92 | enum wined3d_ffp_idx 93 | { 94 | -- 95 | 2.17.0 96 | 97 | -------------------------------------------------------------------------------- /patches/0006-wined3d-Perform-initial-allocation-of-persistent-buf.patch: -------------------------------------------------------------------------------- 1 | From edc7f11845fcafb7414dde498b73c69507dfc5e8 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Thu, 8 Mar 2018 22:42:03 -0800 4 | Subject: [PATCH 6/9] wined3d: Perform initial allocation of persistent buffers 5 | asynchronously. 6 | 7 | --- 8 | dlls/wined3d/buffer.c | 30 ++++++++++++++++++++---------- 9 | 1 file changed, 20 insertions(+), 10 deletions(-) 10 | 11 | diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c 12 | index 59334816132..825a796db1b 100644 13 | --- a/dlls/wined3d/buffer.c 14 | +++ b/dlls/wined3d/buffer.c 15 | @@ -272,7 +272,7 @@ fail: 16 | } 17 | 18 | /* Context activation is done by the caller. */ 19 | -static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context) 20 | +static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) 21 | { 22 | struct wined3d_device *device = buffer->resource.device; 23 | struct wined3d_buffer_heap *heap; 24 | @@ -685,7 +685,7 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer, 25 | WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer); 26 | return FALSE; 27 | } 28 | - return buffer_alloc_persistent_map(buffer, context); 29 | + return buffer_alloc_persistent_map(buffer); 30 | 31 | default: 32 | ERR("Invalid location %s.\n", wined3d_debug_location(location)); 33 | @@ -1113,7 +1113,7 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI 34 | const struct wined3d_gl_info *gl_info; 35 | context = context_acquire(device, NULL, 0); 36 | 37 | - FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n"); 38 | + FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish (flags: %x)\n", flags); 39 | 40 | gl_info = context->gl_info; 41 | gl_info->gl_ops.gl.p_glFinish(); 42 | @@ -1389,8 +1389,20 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc 43 | 44 | // Support immediate mapping of persistent buffers off the command thread, 45 | // which require no GL calls to interface with. 46 | - if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) 47 | + if (buffer->flags & WINED3D_BUFFER_PERSISTENT) 48 | { 49 | + // Attempt to load a persistent map without syncing, if possible. 50 | + if (!(buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP)) 51 | + { 52 | + wined3d_resource_wait_idle(resource); 53 | + if (!buffer_alloc_persistent_map(buffer)) 54 | + { 55 | + ERR_(d3d_perf)("Failed to allocate persistent buffer, falling back to sync path."); 56 | + return E_FAIL; 57 | + } 58 | + wined3d_buffer_validate_location(buffer, WINED3D_LOCATION_PERSISTENT_MAP); 59 | + } 60 | + 61 | map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width; 62 | if (flags & WINED3D_MAP_DISCARD) 63 | { 64 | @@ -1410,6 +1422,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc 65 | // currently used buffer to the free pool, along with the fence that 66 | // must be called before the buffer can be reused. 67 | wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); 68 | + 69 | return WINED3D_OK; 70 | } 71 | else if (flags & WINED3D_MAP_NOOVERWRITE) 72 | @@ -1420,14 +1433,11 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc 73 | struct wined3d_map_range map_range = buffer->mt_persistent_map; 74 | map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; 75 | resource->map_count++; 76 | + 77 | return WINED3D_OK; 78 | } 79 | - else 80 | - { 81 | - // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified. 82 | - WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); 83 | - // XXX(acomminos): kill this early return. they're the worst. 84 | - } 85 | + 86 | + WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); 87 | } 88 | 89 | return E_NOTIMPL; 90 | -- 91 | 2.17.0 92 | 93 | -------------------------------------------------------------------------------- /patches/0003-wined3d-Use-ARB_multi_bind-to-speed-up-UBO-updates.patch: -------------------------------------------------------------------------------- 1 | From 0b9f86381d6cb80e95dcce5374b22574ce395ca0 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Mon, 5 Mar 2018 20:28:34 -0800 4 | Subject: [PATCH 3/9] wined3d: Use ARB_multi_bind to speed up UBO updates. 5 | 6 | More frequent UBO remaps as a result of the persistent buffer allocator 7 | causes glBindBufferRange to be a bottleneck. Using ARB_multi_bind 8 | massively reduces state change overhead. 9 | --- 10 | dlls/wined3d/directx.c | 4 ++++ 11 | dlls/wined3d/state.c | 46 +++++++++++++++++++++++++++++++++------ 12 | dlls/wined3d/wined3d_gl.h | 1 + 13 | 3 files changed, 44 insertions(+), 7 deletions(-) 14 | 15 | diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c 16 | index eabdb658b48..2831baa1d7e 100644 17 | --- a/dlls/wined3d/directx.c 18 | +++ b/dlls/wined3d/directx.c 19 | @@ -149,6 +149,7 @@ static const struct wined3d_extension_map gl_extension_map[] = 20 | {"GL_ARB_internalformat_query2", ARB_INTERNALFORMAT_QUERY2 }, 21 | {"GL_ARB_map_buffer_alignment", ARB_MAP_BUFFER_ALIGNMENT }, 22 | {"GL_ARB_map_buffer_range", ARB_MAP_BUFFER_RANGE }, 23 | + {"GL_ARB_multi_bind", ARB_MULTI_BIND }, 24 | {"GL_ARB_multisample", ARB_MULTISAMPLE }, 25 | {"GL_ARB_multitexture", ARB_MULTITEXTURE }, 26 | {"GL_ARB_occlusion_query", ARB_OCCLUSION_QUERY }, 27 | @@ -2795,6 +2796,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info) 28 | /* GL_ARB_map_buffer_range */ 29 | USE_GL_FUNC(glFlushMappedBufferRange) 30 | USE_GL_FUNC(glMapBufferRange) 31 | + /* GL_ARB_multi_bind */ 32 | + USE_GL_FUNC(glBindBuffersRange) 33 | /* GL_ARB_multisample */ 34 | USE_GL_FUNC(glSampleCoverageARB) 35 | /* GL_ARB_multitexture */ 36 | @@ -3954,6 +3957,7 @@ static BOOL wined3d_adapter_init_gl_caps(struct wined3d_adapter *adapter, 37 | {ARB_TEXTURE_VIEW, MAKEDWORD_VERSION(4, 3)}, 38 | 39 | {ARB_CLEAR_TEXTURE, MAKEDWORD_VERSION(4, 4)}, 40 | + {ARB_MULTI_BIND, MAKEDWORD_VERSION(4, 4)}, 41 | 42 | {ARB_CLIP_CONTROL, MAKEDWORD_VERSION(4, 5)}, 43 | {ARB_CULL_DISTANCE, MAKEDWORD_VERSION(4, 5)}, 44 | diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c 45 | index 6564f7cc180..c66ec71647e 100644 46 | --- a/dlls/wined3d/state.c 47 | +++ b/dlls/wined3d/state.c 48 | @@ -4428,19 +4428,51 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state 49 | shader_type = WINED3D_SHADER_TYPE_COMPUTE; 50 | 51 | wined3d_gl_limits_get_uniform_block_range(&gl_info->limits, shader_type, &base, &count); 52 | - for (i = 0; i < count; ++i) 53 | + 54 | + if (gl_info->supported[ARB_MULTI_BIND]) 55 | { 56 | - buffer = state->cb[shader_type][i]; 57 | - if (buffer) 58 | + GLuint buffer_objects[count]; 59 | + GLsizeiptr buffer_offsets[count]; 60 | + GLsizeiptr buffer_sizes[count]; 61 | + 62 | + for (i = 0; i < count; ++i) 63 | { 64 | - wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); 65 | - GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); 66 | + buffer = state->cb[shader_type][i]; 67 | + if (buffer) 68 | + { 69 | + wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); 70 | + buffer_objects[i] = bo_addr.buffer_object; 71 | + buffer_offsets[i] = bo_addr.addr; 72 | + buffer_sizes[i] = bo_addr.length; 73 | + } 74 | + else 75 | + { 76 | + buffer_objects[i] = buffer_offsets[i] = 0; 77 | + // The ARB_multi_bind spec states that an error may be thrown if 78 | + // `size` is less than or equal to zero, Thus, we specify a size for 79 | + // unused buffers anyway. 80 | + buffer_sizes[i] = 1; 81 | + } 82 | } 83 | - else 84 | + GL_EXTCALL(glBindBuffersRange(GL_UNIFORM_BUFFER, base, count, buffer_objects, buffer_offsets, buffer_sizes)); 85 | + } 86 | + else 87 | + { 88 | + for (i = 0; i < count; ++i) 89 | { 90 | - GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); 91 | + buffer = state->cb[shader_type][i]; 92 | + if (buffer) 93 | + { 94 | + wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); 95 | + GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); 96 | + } 97 | + else 98 | + { 99 | + GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); 100 | + } 101 | } 102 | } 103 | + 104 | checkGLcall("bind constant buffers"); 105 | } 106 | 107 | diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h 108 | index 730eff131f3..4ece45c26f0 100644 109 | --- a/dlls/wined3d/wined3d_gl.h 110 | +++ b/dlls/wined3d/wined3d_gl.h 111 | @@ -82,6 +82,7 @@ enum wined3d_gl_extension 112 | ARB_INTERNALFORMAT_QUERY2, 113 | ARB_MAP_BUFFER_ALIGNMENT, 114 | ARB_MAP_BUFFER_RANGE, 115 | + ARB_MULTI_BIND, 116 | ARB_MULTISAMPLE, 117 | ARB_MULTITEXTURE, 118 | ARB_OCCLUSION_QUERY, 119 | -- 120 | 2.17.0 121 | 122 | -------------------------------------------------------------------------------- /patches/0008-wined3d-Add-DISABLE_PBA-envvar-some-PBA-cleanup.patch: -------------------------------------------------------------------------------- 1 | From bffc7d7444b681a5a797dafd136c978e9c97c0db Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Thu, 15 Mar 2018 21:07:21 -0700 4 | Subject: [PATCH 8/9] wined3d: Add DISABLE_PBA envvar, some PBA cleanup. 5 | 6 | --- 7 | dlls/wined3d/buffer.c | 4 ++-- 8 | dlls/wined3d/buffer_heap.c | 34 +++++++++++++++++++++++------- 9 | dlls/wined3d/device.c | 38 +++++++++++++++++++++++----------- 10 | dlls/wined3d/query.c | 2 +- 11 | dlls/wined3d/wined3d_private.h | 6 ++---- 12 | 5 files changed, 57 insertions(+), 27 deletions(-) 13 | 14 | diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c 15 | index 533befd2fcb..3a56d9c9a39 100644 16 | --- a/dlls/wined3d/buffer.c 17 | +++ b/dlls/wined3d/buffer.c 18 | @@ -1601,9 +1601,9 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device 19 | 20 | if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC) 21 | { 22 | - if (!gl_info->supported[ARB_BUFFER_STORAGE]) 23 | + if (!device->use_pba) 24 | { 25 | - WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); 26 | + WARN_(d3d_perf)("Not creating a persistent mapping for dynamic buffer %p because the PBA is disabled.\n", buffer); 27 | } 28 | else if (bind_flags & WINED3D_BIND_SHADER_RESOURCE) 29 | { 30 | diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c 31 | index 80670c515f7..899aad96126 100644 32 | --- a/dlls/wined3d/buffer_heap.c 33 | +++ b/dlls/wined3d/buffer_heap.c 34 | @@ -25,6 +25,9 @@ 35 | WINE_DEFAULT_DEBUG_CHANNEL(d3d); 36 | WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 37 | 38 | +// Arbitrary binding to use when binding the persistent buffer. 39 | +#define BIND_TARGET GL_ARRAY_BUFFER 40 | + 41 | struct wined3d_buffer_heap_fenced_element 42 | { 43 | struct wined3d_buffer_heap_bin_set free_list; 44 | @@ -140,7 +143,6 @@ static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) 45 | HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) 46 | { 47 | const struct wined3d_gl_info *gl_info = context->gl_info; 48 | - const GLenum buffer_target = GL_ARRAY_BUFFER; 49 | GLbitfield access_flags; 50 | GLbitfield storage_flags; 51 | struct wined3d_buffer_heap_element *initial_elem; 52 | @@ -162,22 +164,23 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s 53 | { 54 | access_flags |= GL_MAP_READ_BIT; 55 | } 56 | + 57 | storage_flags = GL_CLIENT_STORAGE_BIT | access_flags; 58 | 59 | - // TODO(acomminos): where should we be checking for errors here? 60 | GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); 61 | + checkGLcall("glGenBuffers"); 62 | 63 | - context_bind_bo(context, buffer_target, object->buffer_object); 64 | + context_bind_bo(context, BIND_TARGET, object->buffer_object); 65 | 66 | - // TODO(acomminos): assert glBufferStorage supported? 67 | - GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags)); 68 | + GL_EXTCALL(glBufferStorage(BIND_TARGET, size, NULL, storage_flags)); 69 | + checkGLcall("glBufferStorage"); 70 | 71 | - if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags)))) 72 | + if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(BIND_TARGET, 0, size, access_flags)))) 73 | { 74 | ERR("Couldn't map persistent buffer.\n"); 75 | return -1; // FIXME(acomminos): proper error code, cleanup 76 | } 77 | - context_bind_bo(context, buffer_target, 0); 78 | + context_bind_bo(context, BIND_TARGET, 0); 79 | 80 | object->fenced_head = object->fenced_tail = NULL; 81 | object->alignment = alignment; 82 | @@ -195,7 +198,22 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s 83 | /* Context activation is done by the caller. */ 84 | HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) 85 | { 86 | - FIXME("Unimplemented, leaking buffer"); 87 | + const struct wined3d_gl_info *gl_info = context->gl_info; 88 | + 89 | + context_bind_bo(context, BIND_TARGET, heap->buffer_object); 90 | + GL_EXTCALL(glUnmapBuffer(BIND_TARGET)); 91 | + checkGLcall("glUnmapBuffer"); 92 | + context_bind_bo(context, BIND_TARGET, 0); 93 | + 94 | + GL_EXTCALL(glDeleteBuffers(1, &heap->buffer_object)); 95 | + checkGLcall("glDeleteBuffers"); 96 | + 97 | + DeleteCriticalSection(&heap->temp_lock); 98 | + 99 | + // TODO(acomminos): cleanup free lists, fenced list, etc. 100 | + 101 | + HeapFree(GetProcessHeap(), 0, heap); 102 | + 103 | return WINED3D_OK; 104 | } 105 | 106 | diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c 107 | index f6bf07604c7..a79241f28cc 100644 108 | --- a/dlls/wined3d/device.c 109 | +++ b/dlls/wined3d/device.c 110 | @@ -841,16 +841,27 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined 111 | static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) 112 | { 113 | const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 114 | - // TODO(acomminos): kill this magic number. perhaps base on vram. 115 | - GLsizeiptr geo_heap_size = 512 * 1024 * 1024; 116 | - // We choose a constant buffer size of 128MB, the same as NVIDIA claims to 117 | - // use in their Direct3D driver for discarded constant buffers. 118 | - GLsizeiptr cb_heap_size = 128 * 1024 * 1024; 119 | - GLint ub_alignment; 120 | - HRESULT hr; 121 | + BOOL use_pba = FALSE; 122 | + char *env_pba_disable; 123 | 124 | - if (gl_info->supported[ARB_BUFFER_STORAGE]) 125 | + if (!gl_info->supported[ARB_BUFFER_STORAGE]) 126 | + { 127 | + FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); 128 | + } 129 | + else if ((env_pba_disable = getenv("PBA_DISABLE")) && *env_pba_disable != '0') 130 | { 131 | + FIXME("Not using PBA, envvar 'PBA_DISABLE' set.\n"); 132 | + } 133 | + else 134 | + { 135 | + // TODO(acomminos): kill this magic number. perhaps base on vram. 136 | + GLsizeiptr geo_heap_size = 512 * 1024 * 1024; 137 | + // We choose a constant buffer size of 128MB, the same as NVIDIA claims to 138 | + // use in their Direct3D driver for discarded constant buffers. 139 | + GLsizeiptr cb_heap_size = 128 * 1024 * 1024; 140 | + GLint ub_alignment; 141 | + HRESULT hr; 142 | + 143 | gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); 144 | 145 | // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason). 146 | @@ -859,19 +870,22 @@ static void create_buffer_heap(struct wined3d_device *device, struct wined3d_con 147 | if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap))) 148 | { 149 | ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); 150 | + goto fail; 151 | } 152 | 153 | if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap))) 154 | { 155 | ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); 156 | + goto fail; 157 | } 158 | 159 | FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment); 160 | + 161 | + use_pba = TRUE; 162 | } 163 | - else 164 | - { 165 | - FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); 166 | - } 167 | + 168 | +fail: 169 | + device->use_pba = use_pba; 170 | } 171 | 172 | /* Context activation is done by the caller. */ 173 | diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c 174 | index f3ca1630e58..5ea79b6e4a7 100644 175 | --- a/dlls/wined3d/query.c 176 | +++ b/dlls/wined3d/query.c 177 | @@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) 178 | return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; 179 | } 180 | 181 | -enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, 182 | +static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, 183 | const struct wined3d_device *device, DWORD flags) 184 | { 185 | const struct wined3d_gl_info *gl_info; 186 | diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h 187 | index 34f3fb540df..1335c122890 100644 188 | --- a/dlls/wined3d/wined3d_private.h 189 | +++ b/dlls/wined3d/wined3d_private.h 190 | @@ -1727,9 +1727,6 @@ void wined3d_fence_destroy(struct wined3d_fence *fence) DECLSPEC_HIDDEN; 191 | void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN; 192 | enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence, 193 | const struct wined3d_device *device) DECLSPEC_HIDDEN; 194 | -// XXX(acomminos): really expose this? 195 | -enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, 196 | - const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN; 197 | 198 | /* Direct3D terminology with little modifications. We do not have an issued 199 | * state because only the driver knows about it, but we have a created state 200 | @@ -2968,7 +2965,8 @@ struct wined3d_device 201 | BYTE inScene : 1; /* A flag to check for proper BeginScene / EndScene call pairs */ 202 | BYTE softwareVertexProcessing : 1; /* process vertex shaders using software or hardware */ 203 | BYTE filter_messages : 1; 204 | - BYTE padding : 3; 205 | + BYTE use_pba : 1; /* A flag to use the persistent buffer allocator for dynamic buffers. */ 206 | + BYTE padding : 2; 207 | 208 | unsigned char surface_alignment; /* Line Alignment of surfaces */ 209 | 210 | -- 211 | 2.17.0 212 | 213 | -------------------------------------------------------------------------------- /patches/0007-wined3d-Avoid-freeing-persistent-buffer-heap-element.patch: -------------------------------------------------------------------------------- 1 | From f47b2a6c8b6f1cea62cb26965fe662e919547913 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Thu, 8 Mar 2018 23:01:50 -0800 4 | Subject: [PATCH 7/9] wined3d: Avoid freeing persistent buffer heap elements 5 | during use. 6 | 7 | Using HeapFree is expensive, especially when we don't have our buffers 8 | for long. 9 | --- 10 | dlls/wined3d/buffer.c | 29 +++++++++-------- 11 | dlls/wined3d/buffer_heap.c | 57 ++++++++++++++-------------------- 12 | dlls/wined3d/context.c | 4 +-- 13 | dlls/wined3d/cs.c | 6 ++-- 14 | dlls/wined3d/wined3d_private.h | 25 ++++++++++----- 15 | 5 files changed, 61 insertions(+), 60 deletions(-) 16 | 17 | diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c 18 | index 825a796db1b..533befd2fcb 100644 19 | --- a/dlls/wined3d/buffer.c 20 | +++ b/dlls/wined3d/buffer.c 21 | @@ -276,7 +276,7 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) 22 | { 23 | struct wined3d_device *device = buffer->resource.device; 24 | struct wined3d_buffer_heap *heap; 25 | - struct wined3d_map_range map_range; 26 | + struct wined3d_buffer_heap_element *elem; 27 | HRESULT hr; 28 | 29 | if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) 30 | @@ -292,12 +292,12 @@ static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer) 31 | } 32 | 33 | buffer->buffer_heap = heap; 34 | - if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range))) 35 | + if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &elem))) 36 | { 37 | goto fail; 38 | } 39 | - buffer->cs_persistent_map = map_range; 40 | - buffer->mt_persistent_map = map_range; 41 | + buffer->cs_persistent_map = elem; 42 | + buffer->mt_persistent_map = elem; 43 | return TRUE; 44 | 45 | fail: 46 | @@ -750,7 +750,7 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer, 47 | if (buffer->conversion_map) 48 | FIXME("Attempting to use conversion map with persistent mapping.\n"); 49 | memcpy(buffer->buffer_heap->map_ptr + 50 | - buffer->cs_persistent_map.offset, 51 | + buffer->cs_persistent_map->range.offset, 52 | buffer->resource.heap_memory, buffer->resource.size); 53 | break; 54 | 55 | @@ -798,11 +798,11 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, 56 | { 57 | // FIXME(acomminos): should we expose a buffer object we don't wholly own here? 58 | data->buffer_object = buffer->buffer_heap->buffer_object; 59 | - data->addr = buffer->cs_persistent_map.offset; 60 | + data->addr = buffer->cs_persistent_map->range.offset; 61 | // Note that the size of the underlying buffer allocation may be larger 62 | // than the buffer knows about. In this case, we've rounded it up to be 63 | // aligned (e.g. for uniform buffer offsets). 64 | - data->length = buffer->cs_persistent_map.size; 65 | + data->length = buffer->cs_persistent_map->range.size; 66 | return WINED3D_LOCATION_PERSISTENT_MAP; 67 | } 68 | if (locations & WINED3D_LOCATION_SYSMEM) 69 | @@ -1119,7 +1119,7 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI 70 | gl_info->gl_ops.gl.p_glFinish(); 71 | 72 | base = buffer->buffer_heap->map_ptr 73 | - + buffer->cs_persistent_map.offset; 74 | + + buffer->cs_persistent_map->range.offset; 75 | *data = base + offset; 76 | 77 | context_release(context); 78 | @@ -1407,22 +1407,21 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc 79 | if (flags & WINED3D_MAP_DISCARD) 80 | { 81 | HRESULT hr; 82 | - struct wined3d_map_range map_range; 83 | - if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range))) 84 | + struct wined3d_buffer_heap_element *mt_elem; 85 | + if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &mt_elem))) 86 | { 87 | FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n"); 88 | return hr; 89 | } 90 | - map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; 91 | + map_desc->data = buffer->buffer_heap->map_ptr + mt_elem->range.offset + offset; 92 | resource->map_count++; 93 | 94 | - buffer->mt_persistent_map = map_range; 95 | + buffer->mt_persistent_map = mt_elem; 96 | 97 | // Discard handler on CSMT thread is responsible for returning the 98 | // currently used buffer to the free pool, along with the fence that 99 | // must be called before the buffer can be reused. 100 | - wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); 101 | - 102 | + wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, mt_elem); 103 | return WINED3D_OK; 104 | } 105 | else if (flags & WINED3D_MAP_NOOVERWRITE) 106 | @@ -1430,7 +1429,7 @@ static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resourc 107 | // Allow immediate access for persistent buffers without a fence. 108 | // Always use the latest buffer in this case in case the latest 109 | // DISCARDed one hasn't reached the command stream yet. 110 | - struct wined3d_map_range map_range = buffer->mt_persistent_map; 111 | + struct wined3d_map_range map_range = buffer->mt_persistent_map->range; 112 | map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; 113 | resource->map_count++; 114 | 115 | diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c 116 | index 75f84b00882..80670c515f7 100644 117 | --- a/dlls/wined3d/buffer_heap.c 118 | +++ b/dlls/wined3d/buffer_heap.c 119 | @@ -25,18 +25,6 @@ 120 | WINE_DEFAULT_DEBUG_CHANNEL(d3d); 121 | WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 122 | 123 | -struct wined3d_buffer_heap_element 124 | -{ 125 | - struct wined3d_map_range range; 126 | - 127 | - // rbtree data 128 | - struct wine_rb_entry entry; 129 | - 130 | - // Binned free list positions 131 | - struct wined3d_buffer_heap_element *next; 132 | - struct wined3d_buffer_heap_element *prev; 133 | -}; 134 | - 135 | struct wined3d_buffer_heap_fenced_element 136 | { 137 | struct wined3d_buffer_heap_bin_set free_list; 138 | @@ -82,6 +70,11 @@ static int element_bin(struct wined3d_buffer_heap_element *elem) 139 | // Inserts an element into the appropriate free list bin. 140 | static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) 141 | { 142 | + if (elem->prev || elem->next) 143 | + { 144 | + ERR("Element %p in already in a free list (for some reason).\n", elem); 145 | + } 146 | + 147 | int bin = element_bin(elem); 148 | 149 | elem->prev = NULL; 150 | @@ -206,7 +199,7 @@ HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct win 151 | return WINED3D_OK; 152 | } 153 | 154 | -HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) 155 | +HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element **out_elem) 156 | { 157 | int initial_bin; 158 | int initial_size = size; 159 | @@ -233,24 +226,24 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s 160 | remaining_range.offset = elem->range.offset + size; 161 | remaining_range.size = elem->range.size - size; 162 | 163 | - out_range->offset = elem->range.offset; 164 | - out_range->size = size; 165 | + // Take the element from the free list, transferring ownership to 166 | + // the caller. 167 | + element_remove_free(heap, elem); 168 | + // Resize the element so that we can free the remainder. 169 | + elem->range.size = size; 170 | 171 | - TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); 172 | + *out_elem = elem; 173 | 174 | - // Remove the element from its current free bin to move it to the correct list. 175 | - element_remove_free(heap, elem); 176 | + TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); 177 | 178 | if (remaining_range.size > 0) 179 | { 180 | + struct wined3d_buffer_heap_element *remaining_elem; 181 | + 182 | TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset); 183 | 184 | - elem->range = remaining_range; 185 | - element_insert_free_bin(heap, elem); 186 | - } 187 | - else 188 | - { 189 | - HeapFree(GetProcessHeap(), 0, elem); 190 | + remaining_elem = element_new(remaining_range.offset, remaining_range.size); 191 | + element_insert_free_bin(heap, remaining_elem); 192 | } 193 | 194 | LeaveCriticalSection(&heap->temp_lock); 195 | @@ -265,7 +258,7 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s 196 | if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced))) 197 | { 198 | if (num_coalesced > 0) 199 | - return wined3d_buffer_heap_alloc(heap, size, out_range); 200 | + return wined3d_buffer_heap_alloc(heap, size, out_elem); 201 | } 202 | 203 | FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n"); 204 | @@ -273,16 +266,15 @@ HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr s 205 | return WINED3DERR_OUTOFVIDEOMEMORY; 206 | } 207 | 208 | -HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) 209 | +HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) 210 | { 211 | - struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); 212 | - 213 | - if (!elem) 214 | - return E_OUTOFMEMORY; 215 | - 216 | EnterCriticalSection(&heap->temp_lock); 217 | 218 | // Only insert the element into a free bin, coalescing will occur later. 219 | + // 220 | + // Note that the reason that we pass around wined3d_buffer_heap_element 221 | + // instead of a range is to avoid frequent HeapAlloc/HeapFree operations 222 | + // when we're reusing buffers. 223 | element_insert_free_bin(heap, elem); 224 | 225 | LeaveCriticalSection(&heap->temp_lock); 226 | @@ -290,9 +282,8 @@ HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3 227 | return WINED3D_OK; 228 | } 229 | 230 | -HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) 231 | +HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) 232 | { 233 | - struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); 234 | int bin_index = element_bin(elem); 235 | struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index]; 236 | 237 | diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c 238 | index 698ba6fc8f2..a08f93ac53f 100644 239 | --- a/dlls/wined3d/context.c 240 | +++ b/dlls/wined3d/context.c 241 | @@ -4970,9 +4970,9 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s 242 | if (parameters->indexed) 243 | { 244 | struct wined3d_buffer *index_buffer = state->index_buffer; 245 | - if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) 246 | + if (index_buffer->cs_persistent_map) 247 | { 248 | - idx_data = index_buffer->cs_persistent_map.offset; 249 | + idx_data = index_buffer->cs_persistent_map->range.offset; 250 | } 251 | else if (!index_buffer->buffer_object || !stream_info->all_vbo) 252 | { 253 | diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c 254 | index a11d21d1961..6814a8d1022 100644 255 | --- a/dlls/wined3d/cs.c 256 | +++ b/dlls/wined3d/cs.c 257 | @@ -444,7 +444,7 @@ struct wined3d_cs_discard_buffer 258 | { 259 | enum wined3d_cs_op opcode; 260 | struct wined3d_buffer *buffer; 261 | - struct wined3d_map_range map_range; 262 | + struct wined3d_buffer_heap_element *map_range; 263 | }; 264 | 265 | struct wined3d_cs_stop 266 | @@ -2495,14 +2495,14 @@ static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *da 267 | wined3d_resource_release(&op->buffer->resource); 268 | } 269 | 270 | -void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) 271 | +void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *elem) 272 | { 273 | struct wined3d_cs_discard_buffer *op; 274 | 275 | op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); 276 | op->opcode = WINED3D_CS_OP_DISCARD_BUFFER; 277 | op->buffer = buffer; 278 | - op->map_range = map_range; 279 | + op->map_range = elem; 280 | 281 | wined3d_resource_acquire(&buffer->resource); 282 | 283 | diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h 284 | index 4918293b82b..34f3fb540df 100644 285 | --- a/dlls/wined3d/wined3d_private.h 286 | +++ b/dlls/wined3d/wined3d_private.h 287 | @@ -3540,6 +3540,18 @@ struct wined3d_map_range 288 | GLsizeiptr size; 289 | }; 290 | 291 | +struct wined3d_buffer_heap_element 292 | +{ 293 | + struct wined3d_map_range range; 294 | + 295 | + // rbtree data 296 | + struct wine_rb_entry entry; 297 | + 298 | + // Binned free list positions 299 | + struct wined3d_buffer_heap_element *next; 300 | + struct wined3d_buffer_heap_element *prev; 301 | +}; 302 | + 303 | enum wined3d_cs_queue_id 304 | { 305 | WINED3D_CS_QUEUE_DEFAULT = 0, 306 | @@ -3684,7 +3696,7 @@ void wined3d_cs_emit_unload_resource(struct wined3d_cs *cs, struct wined3d_resou 307 | void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource, 308 | unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch, 309 | unsigned int slice_pitch) DECLSPEC_HIDDEN; 310 | -void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN; 311 | +void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_buffer_heap_element *map_range) DECLSPEC_HIDDEN; 312 | void wined3d_cs_init_object(struct wined3d_cs *cs, 313 | void (*callback)(void *object), void *object) DECLSPEC_HIDDEN; 314 | HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx, 315 | @@ -3718,7 +3730,6 @@ enum wined3d_buffer_conversion_type 316 | CONV_POSITIONT, 317 | }; 318 | 319 | -struct wined3d_buffer_heap_element; 320 | struct wined3d_buffer_heap_fenced_element; 321 | 322 | // Number of power-of-two buckets to populate. 323 | @@ -3757,11 +3768,11 @@ HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr s 324 | HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; 325 | // Fetches a buffer from the heap of at least the given size. 326 | // Attempts to coalesce blocks under memory pressure. 327 | -HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; 328 | +HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_buffer_heap_element** out_elem) DECLSPEC_HIDDEN; 329 | // Immediately frees a heap-allocated buffer segment. 330 | -HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; 331 | +HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN; 332 | // Enqueues a buffer segment to return to the heap once its fence has been signaled. 333 | -HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN; 334 | +HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_buffer_heap_element *elem) DECLSPEC_HIDDEN; 335 | // Issues a fence for the current set of pending fenced buffers. 336 | // Double-buffered: if the last fence issued has not yet been triggered, waits 337 | // on it. 338 | @@ -3800,8 +3811,8 @@ struct wined3d_buffer 339 | 340 | /* persistent mapped buffer */ 341 | struct wined3d_buffer_heap *buffer_heap; 342 | - struct wined3d_map_range cs_persistent_map; 343 | - struct wined3d_map_range mt_persistent_map; // TODO: make struct list? 344 | + struct wined3d_buffer_heap_element *cs_persistent_map; 345 | + struct wined3d_buffer_heap_element *mt_persistent_map; 346 | }; 347 | 348 | static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource) 349 | -- 350 | 2.17.0 351 | 352 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | 460 | How to Apply These Terms to Your New Libraries 461 | 462 | If you develop a new library, and you want it to be of the greatest 463 | possible use to the public, we recommend making it free software that 464 | everyone can redistribute and change. You can do so by permitting 465 | redistribution under these terms (or, alternatively, under the terms of the 466 | ordinary General Public License). 467 | 468 | To apply these terms, attach the following notices to the library. It is 469 | safest to attach them to the start of each source file to most effectively 470 | convey the exclusion of warranty; and each file should have at least the 471 | "copyright" line and a pointer to where the full notice is found. 472 | 473 | 474 | Copyright (C) 475 | 476 | This library is free software; you can redistribute it and/or 477 | modify it under the terms of the GNU Lesser General Public 478 | License as published by the Free Software Foundation; either 479 | version 2.1 of the License, or (at your option) any later version. 480 | 481 | This library is distributed in the hope that it will be useful, 482 | but WITHOUT ANY WARRANTY; without even the implied warranty of 483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 484 | Lesser General Public License for more details. 485 | 486 | You should have received a copy of the GNU Lesser General Public 487 | License along with this library; if not, write to the Free Software 488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 489 | 490 | Also add information on how to contact you by electronic and paper mail. 491 | 492 | You should also get your employer (if you work as a programmer) or your 493 | school, if any, to sign a "copyright disclaimer" for the library, if 494 | necessary. Here is a sample; alter the names: 495 | 496 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 497 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. 498 | 499 | , 1 April 1990 500 | Ty Coon, President of Vice 501 | 502 | That's all there is to it! 503 | -------------------------------------------------------------------------------- /patches/0002-wined3d-Add-support-for-backing-dynamic-wined3d_buff.patch: -------------------------------------------------------------------------------- 1 | From 21a562c674ab753e3f818d4c2fc96af6cbe27013 Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Mon, 5 Mar 2018 15:39:11 -0800 4 | Subject: [PATCH 2/9] wined3d: Add support for backing dynamic wined3d_buffer 5 | objects by a persistent map. 6 | 7 | --- 8 | dlls/wined3d/buffer.c | 220 ++++++++++++++++++++++++++++++++- 9 | dlls/wined3d/context.c | 6 +- 10 | dlls/wined3d/cs.c | 60 ++++++++- 11 | dlls/wined3d/resource.c | 18 ++- 12 | dlls/wined3d/state.c | 17 ++- 13 | dlls/wined3d/texture.c | 13 ++ 14 | dlls/wined3d/utils.c | 1 + 15 | dlls/wined3d/wined3d_private.h | 11 ++ 16 | 8 files changed, 336 insertions(+), 10 deletions(-) 17 | 18 | diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c 19 | index 2350661e363..40c454e4668 100644 20 | --- a/dlls/wined3d/buffer.c 21 | +++ b/dlls/wined3d/buffer.c 22 | @@ -28,12 +28,14 @@ 23 | #include "wined3d_private.h" 24 | 25 | WINE_DEFAULT_DEBUG_CHANNEL(d3d); 26 | +WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 27 | 28 | #define WINED3D_BUFFER_HASDESC 0x01 /* A vertex description has been found. */ 29 | #define WINED3D_BUFFER_USE_BO 0x02 /* Use a buffer object for this buffer. */ 30 | #define WINED3D_BUFFER_PIN_SYSMEM 0x04 /* Keep a system memory copy for this buffer. */ 31 | #define WINED3D_BUFFER_DISCARD 0x08 /* A DISCARD lock has occurred since the last preload. */ 32 | #define WINED3D_BUFFER_APPLESYNC 0x10 /* Using sync as in GL_APPLE_flush_buffer_range. */ 33 | +#define WINED3D_BUFFER_PERSISTENT 0x20 /* Uses a persistent-mapped buffer via ARB_buffer_storage. */ 34 | 35 | #define VB_MAXDECLCHANGES 100 /* After that number of decl changes we stop converting */ 36 | #define VB_RESETDECLCHANGE 1000 /* Reset the decl changecount after that number of draws */ 37 | @@ -269,6 +271,52 @@ fail: 38 | return FALSE; 39 | } 40 | 41 | +/* Context activation is done by the caller. */ 42 | +static BOOL buffer_alloc_persistent_map(struct wined3d_buffer *buffer, struct wined3d_context *context) 43 | +{ 44 | + struct wined3d_device *device = buffer->resource.device; 45 | + struct wined3d_buffer_heap *heap; 46 | + struct wined3d_map_range map_range; 47 | + HRESULT hr; 48 | + 49 | + if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) 50 | + { 51 | + // Use a heap aligned to constant buffer offset requirements. 52 | + heap = device->cb_buffer_heap; 53 | + } 54 | + else 55 | + { 56 | + if (!(buffer->resource.usage & WINED3DUSAGE_WRITEONLY)) 57 | + FIXME("Using a write-only persistent buffer for %p without WINED3DUSAGE_WRITEONLY.\n", buffer); 58 | + heap = device->wo_buffer_heap; 59 | + } 60 | + 61 | + buffer->buffer_heap = heap; 62 | + if (FAILED(hr = wined3d_buffer_heap_alloc(heap, buffer->resource.size, &map_range))) 63 | + { 64 | + goto fail; 65 | + } 66 | + buffer->cs_persistent_map = map_range; 67 | + buffer->mt_persistent_map = map_range; 68 | + return TRUE; 69 | + 70 | +fail: 71 | + // FIXME(acomminos): fall back to standalone BO here? 72 | + ERR("Failed to create persistent map for buffer %p, hr=%x\n", buffer, hr); 73 | + buffer->buffer_heap = NULL; 74 | + return FALSE; 75 | +} 76 | + 77 | +static void buffer_free_persistent_map(struct wined3d_buffer *buffer) 78 | +{ 79 | + if (!buffer->buffer_heap) 80 | + return; 81 | + 82 | + // TODO(acomminos): get the CS thread to free pending main thread buffers. 83 | + wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); 84 | + buffer->buffer_heap = NULL; 85 | +} 86 | + 87 | static BOOL buffer_process_converted_attribute(struct wined3d_buffer *buffer, 88 | const enum wined3d_buffer_conversion_type conversion_type, 89 | const struct wined3d_stream_info_element *attrib, DWORD *stride_this_run) 90 | @@ -628,6 +676,16 @@ static BOOL wined3d_buffer_prepare_location(struct wined3d_buffer *buffer, 91 | return FALSE; 92 | } 93 | return buffer_create_buffer_object(buffer, context); 94 | + case WINED3D_LOCATION_PERSISTENT_MAP: 95 | + if (buffer->buffer_heap) 96 | + return TRUE; 97 | + 98 | + if (!(buffer->flags & WINED3D_BUFFER_PERSISTENT)) 99 | + { 100 | + WARN("Trying to map a persistent region for buffer %p without WINED3D_BUFFER_PERSISTENT.\n", buffer); 101 | + return FALSE; 102 | + } 103 | + return buffer_alloc_persistent_map(buffer, context); 104 | 105 | default: 106 | ERR("Invalid location %s.\n", wined3d_debug_location(location)); 107 | @@ -686,16 +744,32 @@ BOOL wined3d_buffer_load_location(struct wined3d_buffer *buffer, 108 | buffer_conversion_upload(buffer, context); 109 | break; 110 | 111 | + case WINED3D_LOCATION_PERSISTENT_MAP: 112 | + // TODO(acomminos): are we guaranteed location_sysmem to be kept? 113 | + // no. 114 | + if (buffer->conversion_map) 115 | + FIXME("Attempting to use conversion map with persistent mapping.\n"); 116 | + memcpy(buffer->buffer_heap->map_ptr + 117 | + buffer->cs_persistent_map.offset, 118 | + buffer->resource.heap_memory, buffer->resource.size); 119 | + break; 120 | + 121 | default: 122 | ERR("Invalid location %s.\n", wined3d_debug_location(location)); 123 | return FALSE; 124 | } 125 | 126 | wined3d_buffer_validate_location(buffer, location); 127 | - if (buffer->resource.heap_memory && location == WINED3D_LOCATION_BUFFER 128 | + if (buffer->resource.heap_memory 129 | + && location & WINED3D_LOCATION_BUFFER 130 | && !(buffer->resource.usage & WINED3DUSAGE_DYNAMIC)) 131 | wined3d_buffer_evict_sysmem(buffer); 132 | 133 | + // FIXME(acomminos) 134 | + if (buffer->resource.heap_memory 135 | + && location & WINED3D_LOCATION_PERSISTENT_MAP) 136 | + wined3d_buffer_evict_sysmem(buffer); 137 | + 138 | return TRUE; 139 | } 140 | 141 | @@ -717,12 +791,25 @@ DWORD wined3d_buffer_get_memory(struct wined3d_buffer *buffer, 142 | { 143 | data->buffer_object = buffer->buffer_object; 144 | data->addr = NULL; 145 | + data->length = buffer->resource.size; 146 | return WINED3D_LOCATION_BUFFER; 147 | } 148 | + if (locations & WINED3D_LOCATION_PERSISTENT_MAP) 149 | + { 150 | + // FIXME(acomminos): should we expose a buffer object we don't wholly own here? 151 | + data->buffer_object = buffer->buffer_heap->buffer_object; 152 | + data->addr = buffer->cs_persistent_map.offset; 153 | + // Note that the size of the underlying buffer allocation may be larger 154 | + // than the buffer knows about. In this case, we've rounded it up to be 155 | + // aligned (e.g. for uniform buffer offsets). 156 | + data->length = buffer->cs_persistent_map.size; 157 | + return WINED3D_LOCATION_PERSISTENT_MAP; 158 | + } 159 | if (locations & WINED3D_LOCATION_SYSMEM) 160 | { 161 | data->buffer_object = 0; 162 | data->addr = buffer->resource.heap_memory; 163 | + data->length = buffer->resource.size; 164 | return WINED3D_LOCATION_SYSMEM; 165 | } 166 | 167 | @@ -758,6 +845,8 @@ static void buffer_unload(struct wined3d_resource *resource) 168 | buffer->flags &= ~WINED3D_BUFFER_HASDESC; 169 | } 170 | 171 | + buffer_free_persistent_map(buffer); 172 | + 173 | resource_unload(resource); 174 | } 175 | 176 | @@ -781,6 +870,8 @@ static void wined3d_buffer_destroy_object(void *object) 177 | heap_free(buffer->conversion_map); 178 | } 179 | 180 | + buffer_free_persistent_map(buffer); 181 | + 182 | heap_free(buffer->maps); 183 | heap_free(buffer); 184 | } 185 | @@ -897,6 +988,16 @@ void wined3d_buffer_load(struct wined3d_buffer *buffer, struct wined3d_context * 186 | 187 | buffer_mark_used(buffer); 188 | 189 | + if (buffer->flags & WINED3D_BUFFER_PERSISTENT) 190 | + { 191 | + if (wined3d_buffer_load_location(buffer, context, WINED3D_LOCATION_PERSISTENT_MAP)) 192 | + return; 193 | + 194 | + ERR("Failed to preload persistent mapping for %p, falling back to BO.\n", buffer); 195 | + buffer->flags |= WINED3D_BUFFER_USE_BO; 196 | + buffer->flags &= ~WINED3D_BUFFER_PERSISTENT; 197 | + } 198 | + 199 | /* TODO: Make converting independent from VBOs */ 200 | if (!(buffer->flags & WINED3D_BUFFER_USE_BO)) 201 | { 202 | @@ -1007,6 +1108,25 @@ static HRESULT wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UI 203 | 204 | count = ++buffer->resource.map_count; 205 | 206 | + if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) 207 | + { 208 | + const struct wined3d_gl_info *gl_info; 209 | + context = context_acquire(device, NULL, 0); 210 | + 211 | + FIXME_(d3d_perf)("Fences not used for persistent buffer maps on CS thread, using glFinish.\n"); 212 | + 213 | + gl_info = context->gl_info; 214 | + gl_info->gl_ops.gl.p_glFinish(); 215 | + 216 | + base = buffer->buffer_heap->map_ptr 217 | + + buffer->cs_persistent_map.offset; 218 | + *data = base + offset; 219 | + 220 | + context_release(context); 221 | + 222 | + return WINED3D_OK; 223 | + } 224 | + 225 | if (buffer->buffer_object) 226 | { 227 | unsigned int dirty_offset = offset, dirty_size = size; 228 | @@ -1149,6 +1269,12 @@ static void wined3d_buffer_unmap(struct wined3d_buffer *buffer) 229 | return; 230 | } 231 | 232 | + if (buffer->flags & WINED3D_BUFFER_PERSISTENT) 233 | + { 234 | + TRACE("Persistent buffer, ignore unmap.\n"); 235 | + return; 236 | + } 237 | + 238 | if (buffer->map_ptr) 239 | { 240 | struct wined3d_device *device = buffer->resource.device; 241 | @@ -1251,6 +1377,64 @@ static void buffer_resource_preload(struct wined3d_resource *resource) 242 | 243 | static HRESULT buffer_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, 244 | struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) 245 | +{ 246 | + struct wined3d_buffer *buffer = buffer_from_resource(resource); 247 | + UINT offset = box ? box->left : 0; 248 | + 249 | + if (sub_resource_idx) 250 | + { 251 | + WARN("Invalid sub_resource_idx %u.\n", sub_resource_idx); 252 | + return E_INVALIDARG; 253 | + } 254 | + 255 | + // Support immediate mapping of persistent buffers off the command thread, 256 | + // which require no GL calls to interface with. 257 | + if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) 258 | + { 259 | + map_desc->row_pitch = map_desc->slice_pitch = buffer->desc.byte_width; 260 | + if (flags & WINED3D_MAP_DISCARD) 261 | + { 262 | + HRESULT hr; 263 | + struct wined3d_map_range map_range; 264 | + if (FAILED(hr = wined3d_buffer_heap_alloc(buffer->buffer_heap, resource->size, &map_range))) 265 | + { 266 | + FIXME_(d3d_perf)("Failed to allocate new buffer, falling back to sync path.\n"); 267 | + return hr; 268 | + } 269 | + map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; 270 | + resource->map_count++; 271 | + 272 | + buffer->mt_persistent_map = map_range; 273 | + 274 | + // Discard handler on CSMT thread is responsible for returning the 275 | + // currently used buffer to the free pool, along with the fence that 276 | + // must be called before the buffer can be reused. 277 | + wined3d_cs_emit_discard_buffer(resource->device->cs, buffer, map_range); 278 | + return WINED3D_OK; 279 | + } 280 | + else if (flags & WINED3D_MAP_NOOVERWRITE) 281 | + { 282 | + // Allow immediate access for persistent buffers without a fence. 283 | + // Always use the latest buffer in this case in case the latest 284 | + // DISCARDed one hasn't reached the command stream yet. 285 | + struct wined3d_map_range map_range = buffer->mt_persistent_map; 286 | + map_desc->data = buffer->buffer_heap->map_ptr + map_range.offset + offset; 287 | + resource->map_count++; 288 | + return WINED3D_OK; 289 | + } 290 | + else 291 | + { 292 | + // TODO(acomminos): Should check mapped ranges to see if the region is writeable even though NOOVERWRITE is specified. 293 | + WARN_(d3d_perf)("Mapping persistent buffer %p in sync with CS thread.\n", buffer); 294 | + // XXX(acomminos): kill this early return. they're the worst. 295 | + } 296 | + } 297 | + 298 | + return E_NOTIMPL; 299 | +} 300 | + 301 | +static HRESULT buffer_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, 302 | + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) 303 | { 304 | struct wined3d_buffer *buffer = buffer_from_resource(resource); 305 | UINT offset, size; 306 | @@ -1294,6 +1478,18 @@ static HRESULT buffer_resource_sub_resource_map_info(struct wined3d_resource *re 307 | } 308 | 309 | static HRESULT buffer_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) 310 | +{ 311 | + struct wined3d_buffer *buffer = buffer_from_resource(resource); 312 | + if (buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) 313 | + { 314 | + // Nothing to be done to unmap a region of a persistent buffer. 315 | + resource->map_count--; 316 | + return WINED3D_OK; 317 | + } 318 | + return E_NOTIMPL; 319 | +} 320 | + 321 | +static HRESULT buffer_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) 322 | { 323 | if (sub_resource_idx) 324 | { 325 | @@ -1314,6 +1510,8 @@ static const struct wined3d_resource_ops buffer_resource_ops = 326 | buffer_resource_sub_resource_map, 327 | buffer_resource_sub_resource_map_info, 328 | buffer_resource_sub_resource_unmap, 329 | + buffer_resource_sub_resource_map_cs, 330 | + buffer_resource_sub_resource_unmap_cs, 331 | }; 332 | 333 | static GLenum buffer_type_hint_from_bind_flags(const struct wined3d_gl_info *gl_info, 334 | @@ -1392,12 +1590,30 @@ static HRESULT buffer_init(struct wined3d_buffer *buffer, struct wined3d_device 335 | buffer->flags |= WINED3D_BUFFER_PIN_SYSMEM; 336 | } 337 | 338 | + if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC) 339 | + { 340 | + if (!gl_info->supported[ARB_BUFFER_STORAGE]) 341 | + { 342 | + WARN_(d3d_perf)("Not creating a persistent mapping for a dynamic buffer because ARB_buffer_storage is unsupported.\n"); 343 | + } 344 | + else 345 | + { 346 | + // If supported, use persistent mapped buffers instead of a 347 | + // standalone BO for dynamic buffers. 348 | + buffer->flags |= WINED3D_BUFFER_PERSISTENT; 349 | + } 350 | + } 351 | + 352 | /* Observations show that draw_primitive_immediate_mode() is faster on 353 | * dynamic vertex buffers than converting + draw_primitive_arrays(). 354 | * (Half-Life 2 and others.) */ 355 | dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE] || gl_info->supported[ARB_MAP_BUFFER_RANGE]; 356 | 357 | - if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) 358 | + if (buffer->flags & WINED3D_BUFFER_PERSISTENT) 359 | + { 360 | + TRACE("Not creating a BO because a persistent mapped buffer will be used.\n"); 361 | + } 362 | + else if (!gl_info->supported[ARB_VERTEX_BUFFER_OBJECT]) 363 | { 364 | TRACE("Not creating a BO because GL_ARB_vertex_buffer is not supported.\n"); 365 | } 366 | diff --git a/dlls/wined3d/context.c b/dlls/wined3d/context.c 367 | index cff50df6574..698ba6fc8f2 100644 368 | --- a/dlls/wined3d/context.c 369 | +++ b/dlls/wined3d/context.c 370 | @@ -4970,7 +4970,11 @@ void draw_primitive(struct wined3d_device *device, const struct wined3d_state *s 371 | if (parameters->indexed) 372 | { 373 | struct wined3d_buffer *index_buffer = state->index_buffer; 374 | - if (!index_buffer->buffer_object || !stream_info->all_vbo) 375 | + if (index_buffer->locations & WINED3D_LOCATION_PERSISTENT_MAP) 376 | + { 377 | + idx_data = index_buffer->cs_persistent_map.offset; 378 | + } 379 | + else if (!index_buffer->buffer_object || !stream_info->all_vbo) 380 | { 381 | idx_data = index_buffer->resource.heap_memory; 382 | } 383 | diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c 384 | index e2bfd88dd2c..a11d21d1961 100644 385 | --- a/dlls/wined3d/cs.c 386 | +++ b/dlls/wined3d/cs.c 387 | @@ -73,6 +73,7 @@ enum wined3d_cs_op 388 | WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW, 389 | WINED3D_CS_OP_COPY_UAV_COUNTER, 390 | WINED3D_CS_OP_GENERATE_MIPMAPS, 391 | + WINED3D_CS_OP_DISCARD_BUFFER, 392 | WINED3D_CS_OP_STOP, 393 | }; 394 | 395 | @@ -439,6 +440,13 @@ struct wined3d_cs_generate_mipmaps 396 | struct wined3d_shader_resource_view *view; 397 | }; 398 | 399 | +struct wined3d_cs_discard_buffer 400 | +{ 401 | + enum wined3d_cs_op opcode; 402 | + struct wined3d_buffer *buffer; 403 | + struct wined3d_map_range map_range; 404 | +}; 405 | + 406 | struct wined3d_cs_stop 407 | { 408 | enum wined3d_cs_op opcode; 409 | @@ -2025,7 +2033,7 @@ static void wined3d_cs_exec_map(struct wined3d_cs *cs, const void *data) 410 | const struct wined3d_cs_map *op = data; 411 | struct wined3d_resource *resource = op->resource; 412 | 413 | - *op->hr = resource->resource_ops->resource_sub_resource_map(resource, 414 | + *op->hr = resource->resource_ops->resource_sub_resource_map_cs(resource, 415 | op->sub_resource_idx, op->map_desc, op->box, op->flags); 416 | } 417 | 418 | @@ -2059,7 +2067,7 @@ static void wined3d_cs_exec_unmap(struct wined3d_cs *cs, const void *data) 419 | const struct wined3d_cs_unmap *op = data; 420 | struct wined3d_resource *resource = op->resource; 421 | 422 | - *op->hr = resource->resource_ops->resource_sub_resource_unmap(resource, op->sub_resource_idx); 423 | + *op->hr = resource->resource_ops->resource_sub_resource_unmap_cs(resource, op->sub_resource_idx); 424 | } 425 | 426 | HRESULT wined3d_cs_unmap(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx) 427 | @@ -2454,6 +2462,53 @@ void wined3d_cs_emit_generate_mipmaps(struct wined3d_cs *cs, struct wined3d_shad 428 | cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); 429 | } 430 | 431 | +static void wined3d_cs_exec_discard_buffer(struct wined3d_cs *cs, const void *data) 432 | +{ 433 | + const struct wined3d_cs_discard_buffer *op = data; 434 | + struct wined3d_buffer *buffer = op->buffer; 435 | + HRESULT hr; 436 | + 437 | + // TODO(acomminos): should call into buffer.c here instead. 438 | + if (FAILED(hr = wined3d_buffer_heap_free_fenced(buffer->buffer_heap, cs->device, buffer->cs_persistent_map))) 439 | + { 440 | + ERR("Failed to do a fenced free on discarded buffer %p, hr %x\n. Freeing anyway.", buffer, hr); 441 | + wined3d_buffer_heap_free(buffer->buffer_heap, buffer->cs_persistent_map); 442 | + } 443 | + 444 | + buffer->cs_persistent_map = op->map_range; 445 | + 446 | + // TODO(acomminos): merge this logic with buffer.c functions for standalone BOs 447 | + if (buffer->bind_flags & WINED3D_BIND_VERTEX_BUFFER) 448 | + device_invalidate_state(cs->device, STATE_STREAMSRC); 449 | + if (buffer->bind_flags & WINED3D_BIND_INDEX_BUFFER) 450 | + device_invalidate_state(cs->device, STATE_INDEXBUFFER); 451 | + if (buffer->bind_flags & WINED3D_BIND_CONSTANT_BUFFER) 452 | + { 453 | + device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_VERTEX)); 454 | + device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_HULL)); 455 | + device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_DOMAIN)); 456 | + device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_GEOMETRY)); 457 | + device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_PIXEL)); 458 | + device_invalidate_state(cs->device, STATE_CONSTANT_BUFFER(WINED3D_SHADER_TYPE_COMPUTE)); 459 | + } 460 | + 461 | + wined3d_resource_release(&op->buffer->resource); 462 | +} 463 | + 464 | +void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) 465 | +{ 466 | + struct wined3d_cs_discard_buffer *op; 467 | + 468 | + op = cs->ops->require_space(cs, sizeof(*op), WINED3D_CS_QUEUE_DEFAULT); 469 | + op->opcode = WINED3D_CS_OP_DISCARD_BUFFER; 470 | + op->buffer = buffer; 471 | + op->map_range = map_range; 472 | + 473 | + wined3d_resource_acquire(&buffer->resource); 474 | + 475 | + cs->ops->submit(cs, WINED3D_CS_QUEUE_DEFAULT); 476 | +} 477 | + 478 | static void wined3d_cs_emit_stop(struct wined3d_cs *cs) 479 | { 480 | struct wined3d_cs_stop *op; 481 | @@ -2514,6 +2569,7 @@ static void (* const wined3d_cs_op_handlers[])(struct wined3d_cs *cs, const void 482 | /* WINED3D_CS_OP_CLEAR_UNORDERED_ACCESS_VIEW */ wined3d_cs_exec_clear_unordered_access_view, 483 | /* WINED3D_CS_OP_COPY_UAV_COUNTER */ wined3d_cs_exec_copy_uav_counter, 484 | /* WINED3D_CS_OP_GENERATE_MIPMAPS */ wined3d_cs_exec_generate_mipmaps, 485 | + /* WINED3D_CS_OP_DISCARD_BUFFER */ wined3d_cs_exec_discard_buffer, 486 | }; 487 | 488 | static BOOL wined3d_cs_st_check_space(struct wined3d_cs *cs, size_t size, enum wined3d_cs_queue_id queue_id) 489 | diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c 490 | index df73997c84c..7060d71613a 100644 491 | --- a/dlls/wined3d/resource.c 492 | +++ b/dlls/wined3d/resource.c 493 | @@ -334,6 +334,7 @@ static DWORD wined3d_resource_sanitise_map_flags(const struct wined3d_resource * 494 | HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, 495 | struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) 496 | { 497 | + HRESULT hr; 498 | TRACE("resource %p, sub_resource_idx %u, map_desc %p, box %s, flags %#x.\n", 499 | resource, sub_resource_idx, map_desc, debug_box(box), flags); 500 | 501 | @@ -356,9 +357,14 @@ HRESULT CDECL wined3d_resource_map(struct wined3d_resource *resource, unsigned i 502 | } 503 | 504 | flags = wined3d_resource_sanitise_map_flags(resource, flags); 505 | - wined3d_resource_wait_idle(resource); 506 | + if (FAILED(hr = resource->resource_ops->resource_sub_resource_map(resource, sub_resource_idx, map_desc, box, flags))) 507 | + { 508 | + TRACE_(d3d_perf)("Mapping resource %p on the command stream.\n", resource); 509 | + wined3d_resource_wait_idle(resource); 510 | + hr = wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); 511 | + } 512 | 513 | - return wined3d_cs_map(resource->device->cs, resource, sub_resource_idx, map_desc, box, flags); 514 | + return hr; 515 | } 516 | 517 | HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, 518 | @@ -371,9 +377,15 @@ HRESULT CDECL wined3d_resource_map_info(struct wined3d_resource *resource, unsig 519 | 520 | HRESULT CDECL wined3d_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) 521 | { 522 | + HRESULT hr; 523 | TRACE("resource %p, sub_resource_idx %u.\n", resource, sub_resource_idx); 524 | 525 | - return wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); 526 | + if (FAILED(hr = resource->resource_ops->resource_sub_resource_unmap(resource, sub_resource_idx))) 527 | + { 528 | + TRACE_(d3d_perf)("Unmapping resource %p on the command stream.\n", resource); 529 | + hr = wined3d_cs_unmap(resource->device->cs, resource, sub_resource_idx); 530 | + } 531 | + return hr; 532 | } 533 | 534 | UINT CDECL wined3d_resource_update_info(struct wined3d_resource *resource, unsigned int sub_resource_idx, 535 | diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c 536 | index 89f2576fd01..6564f7cc180 100644 537 | --- a/dlls/wined3d/state.c 538 | +++ b/dlls/wined3d/state.c 539 | @@ -4348,7 +4348,11 @@ static void indexbuffer(struct wined3d_context *context, const struct wined3d_st 540 | else 541 | { 542 | struct wined3d_buffer *ib = state->index_buffer; 543 | - GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); 544 | + // FIXME(acomminos): disasterous. 545 | + if (ib->locations & WINED3D_LOCATION_PERSISTENT_MAP) 546 | + GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_heap->buffer_object)); 547 | + else 548 | + GL_EXTCALL(glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ib->buffer_object)); 549 | } 550 | } 551 | 552 | @@ -4414,6 +4418,7 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state 553 | enum wined3d_shader_type shader_type; 554 | struct wined3d_buffer *buffer; 555 | unsigned int i, base, count; 556 | + struct wined3d_bo_address bo_addr; 557 | 558 | TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 559 | 560 | @@ -4426,7 +4431,15 @@ static void state_cb(struct wined3d_context *context, const struct wined3d_state 561 | for (i = 0; i < count; ++i) 562 | { 563 | buffer = state->cb[shader_type][i]; 564 | - GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, buffer ? buffer->buffer_object : 0)); 565 | + if (buffer) 566 | + { 567 | + wined3d_buffer_get_memory(buffer, &bo_addr, buffer->locations); 568 | + GL_EXTCALL(glBindBufferRange(GL_UNIFORM_BUFFER, base + i, bo_addr.buffer_object, bo_addr.addr, bo_addr.length)); 569 | + } 570 | + else 571 | + { 572 | + GL_EXTCALL(glBindBufferBase(GL_UNIFORM_BUFFER, base + i, 0)); 573 | + } 574 | } 575 | checkGLcall("bind constant buffers"); 576 | } 577 | diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c 578 | index 3290879451d..e15cfa42544 100644 579 | --- a/dlls/wined3d/texture.c 580 | +++ b/dlls/wined3d/texture.c 581 | @@ -2216,6 +2216,12 @@ static void wined3d_texture_unload(struct wined3d_resource *resource) 582 | 583 | static HRESULT texture_resource_sub_resource_map(struct wined3d_resource *resource, unsigned int sub_resource_idx, 584 | struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) 585 | +{ 586 | + return E_NOTIMPL; 587 | +} 588 | + 589 | +static HRESULT texture_resource_sub_resource_map_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx, 590 | + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags) 591 | { 592 | const struct wined3d_format *format = resource->format; 593 | struct wined3d_texture_sub_resource *sub_resource; 594 | @@ -2376,6 +2382,11 @@ static HRESULT texture_resource_sub_resource_map_info(struct wined3d_resource *r 595 | } 596 | 597 | static HRESULT texture_resource_sub_resource_unmap(struct wined3d_resource *resource, unsigned int sub_resource_idx) 598 | +{ 599 | + return E_NOTIMPL; 600 | +} 601 | + 602 | +static HRESULT texture_resource_sub_resource_unmap_cs(struct wined3d_resource *resource, unsigned int sub_resource_idx) 603 | { 604 | struct wined3d_texture_sub_resource *sub_resource; 605 | struct wined3d_device *device = resource->device; 606 | @@ -2428,6 +2439,8 @@ static const struct wined3d_resource_ops texture_resource_ops = 607 | texture_resource_sub_resource_map, 608 | texture_resource_sub_resource_map_info, 609 | texture_resource_sub_resource_unmap, 610 | + texture_resource_sub_resource_map_cs, 611 | + texture_resource_sub_resource_unmap_cs, 612 | }; 613 | 614 | /* Context activation is done by the caller. */ 615 | diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c 616 | index a092c83e03b..f834decba88 100644 617 | --- a/dlls/wined3d/utils.c 618 | +++ b/dlls/wined3d/utils.c 619 | @@ -6322,6 +6322,7 @@ const char *wined3d_debug_location(DWORD location) 620 | LOCATION_TO_STR(WINED3D_LOCATION_DRAWABLE); 621 | LOCATION_TO_STR(WINED3D_LOCATION_RB_MULTISAMPLE); 622 | LOCATION_TO_STR(WINED3D_LOCATION_RB_RESOLVED); 623 | + LOCATION_TO_STR(WINED3D_LOCATION_PERSISTENT_MAP); 624 | #undef LOCATION_TO_STR 625 | if (location) 626 | FIXME("Unrecognized location flag(s) %#x.\n", location); 627 | diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h 628 | index dea1c2f5207..4918293b82b 100644 629 | --- a/dlls/wined3d/wined3d_private.h 630 | +++ b/dlls/wined3d/wined3d_private.h 631 | @@ -1484,6 +1484,7 @@ struct wined3d_bo_address 632 | { 633 | GLuint buffer_object; 634 | BYTE *addr; 635 | + GLsizeiptr length; 636 | }; 637 | 638 | struct wined3d_const_bo_address 639 | @@ -3063,6 +3064,9 @@ struct wined3d_resource_ops 640 | HRESULT (*resource_map_info)(struct wined3d_resource *resource, unsigned int sub_resource_idx, 641 | struct wined3d_map_info *info, DWORD flags); 642 | HRESULT (*resource_sub_resource_unmap)(struct wined3d_resource *resource, unsigned int sub_resource_idx); 643 | + HRESULT (*resource_sub_resource_map_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx, 644 | + struct wined3d_map_desc *map_desc, const struct wined3d_box *box, DWORD flags); 645 | + HRESULT (*resource_sub_resource_unmap_cs)(struct wined3d_resource *resource, unsigned int sub_resource_idx); 646 | }; 647 | 648 | struct wined3d_resource 649 | @@ -3377,6 +3381,7 @@ void wined3d_texture_validate_location(struct wined3d_texture *texture, 650 | #define WINED3D_LOCATION_DRAWABLE 0x00000040 651 | #define WINED3D_LOCATION_RB_MULTISAMPLE 0x00000080 652 | #define WINED3D_LOCATION_RB_RESOLVED 0x00000100 653 | +#define WINED3D_LOCATION_PERSISTENT_MAP 0x00000200 654 | 655 | const char *wined3d_debug_location(DWORD location) DECLSPEC_HIDDEN; 656 | 657 | @@ -3679,6 +3684,7 @@ void wined3d_cs_emit_unload_resource(struct wined3d_cs *cs, struct wined3d_resou 658 | void wined3d_cs_emit_update_sub_resource(struct wined3d_cs *cs, struct wined3d_resource *resource, 659 | unsigned int sub_resource_idx, const struct wined3d_box *box, const void *data, unsigned int row_pitch, 660 | unsigned int slice_pitch) DECLSPEC_HIDDEN; 661 | +void wined3d_cs_emit_discard_buffer(struct wined3d_cs *cs, struct wined3d_buffer *buffer, struct wined3d_map_range map_range) DECLSPEC_HIDDEN; 662 | void wined3d_cs_init_object(struct wined3d_cs *cs, 663 | void (*callback)(void *object), void *object) DECLSPEC_HIDDEN; 664 | HRESULT wined3d_cs_map(struct wined3d_cs *cs, struct wined3d_resource *resource, unsigned int sub_resource_idx, 665 | @@ -3791,6 +3797,11 @@ struct wined3d_buffer 666 | UINT stride; /* 0 if no conversion */ 667 | enum wined3d_buffer_conversion_type *conversion_map; /* NULL if no conversion */ 668 | UINT conversion_stride; /* 0 if no shifted conversion */ 669 | + 670 | + /* persistent mapped buffer */ 671 | + struct wined3d_buffer_heap *buffer_heap; 672 | + struct wined3d_map_range cs_persistent_map; 673 | + struct wined3d_map_range mt_persistent_map; // TODO: make struct list? 674 | }; 675 | 676 | static inline struct wined3d_buffer *buffer_from_resource(struct wined3d_resource *resource) 677 | -- 678 | 2.17.0 679 | 680 | -------------------------------------------------------------------------------- /patches/0001-wined3d-Initial-implementation-of-a-persistent-mappe.patch: -------------------------------------------------------------------------------- 1 | From db8f9546baea233a4bcfae27fa12d655e7c4be3d Mon Sep 17 00:00:00 2001 2 | From: Andrew Comminos 3 | Date: Mon, 5 Mar 2018 15:38:35 -0800 4 | Subject: [PATCH 1/9] wined3d: Initial implementation of a persistent mapped 5 | buffer allocator. 6 | 7 | --- 8 | dlls/wined3d/Makefile.in | 1 + 9 | dlls/wined3d/buffer_heap.c | 508 +++++++++++++++++++++++++++++++++ 10 | dlls/wined3d/cs.c | 9 + 11 | dlls/wined3d/device.c | 52 ++++ 12 | dlls/wined3d/directx.c | 3 + 13 | dlls/wined3d/query.c | 2 +- 14 | dlls/wined3d/wined3d_gl.h | 1 + 15 | dlls/wined3d/wined3d_private.h | 68 ++++- 16 | 8 files changed, 640 insertions(+), 4 deletions(-) 17 | create mode 100644 dlls/wined3d/buffer_heap.c 18 | 19 | diff --git a/dlls/wined3d/Makefile.in b/dlls/wined3d/Makefile.in 20 | index b850ba6872c..52ef8666fb7 100644 21 | --- a/dlls/wined3d/Makefile.in 22 | +++ b/dlls/wined3d/Makefile.in 23 | @@ -6,6 +6,7 @@ C_SRCS = \ 24 | arb_program_shader.c \ 25 | ati_fragment_shader.c \ 26 | buffer.c \ 27 | + buffer_heap.c \ 28 | context.c \ 29 | cs.c \ 30 | device.c \ 31 | diff --git a/dlls/wined3d/buffer_heap.c b/dlls/wined3d/buffer_heap.c 32 | new file mode 100644 33 | index 00000000000..b133bd68933 34 | --- /dev/null 35 | +++ b/dlls/wined3d/buffer_heap.c 36 | @@ -0,0 +1,508 @@ 37 | +/* 38 | + * Copyright 2018 Andrew Comminos 39 | + * 40 | + * This library is free software; you can redistribute it and/or 41 | + * modify it under the terms of the GNU Lesser General Public 42 | + * License as published by the Free Software Foundation; either 43 | + * version 2.1 of the License, or (at your option) any later version. 44 | + * 45 | + * This library is distributed in the hope that it will be useful, 46 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of 47 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 48 | + * Lesser General Public License for more details. 49 | + * 50 | + * You should have received a copy of the GNU Lesser General Public 51 | + * License along with this library; if not, write to the Free Software 52 | + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 53 | + * 54 | + */ 55 | + 56 | +#include "config.h" 57 | +#include "wine/port.h" 58 | +#include "wine/rbtree.h" 59 | +#include "wined3d_private.h" 60 | + 61 | +WINE_DEFAULT_DEBUG_CHANNEL(d3d); 62 | +WINE_DECLARE_DEBUG_CHANNEL(d3d_perf); 63 | + 64 | +struct wined3d_buffer_heap_element 65 | +{ 66 | + struct wined3d_map_range range; 67 | + 68 | + // rbtree data 69 | + struct wine_rb_entry entry; 70 | + 71 | + // Binned free list positions 72 | + struct wined3d_buffer_heap_element *next; 73 | + struct wined3d_buffer_heap_element *prev; 74 | +}; 75 | + 76 | +struct wined3d_buffer_heap_fenced_element 77 | +{ 78 | + struct wined3d_buffer_heap_bin_set free_list; 79 | + struct wined3d_fence *fence; 80 | + 81 | + struct wined3d_buffer_heap_fenced_element *next; 82 | +}; 83 | + 84 | +static struct wined3d_buffer_heap_element* element_new(GLsizei offset, GLsizei size) 85 | +{ 86 | + struct wined3d_buffer_heap_element* elem; 87 | + elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_element)); 88 | + if (!elem) 89 | + return NULL; 90 | + elem->range.offset = offset; 91 | + elem->range.size = size; 92 | + return elem; 93 | +} 94 | + 95 | +static inline int bitwise_log2_floor(GLsizei size) 96 | +{ 97 | + // XXX(acomminos): I hope this gets unrolled. 98 | + for (int i = 8 * sizeof(GLsizei) - 1; i >= 0; i--) 99 | + { 100 | + if ((size >> i) & 1) { 101 | + return i; 102 | + } 103 | + } 104 | + return 0; 105 | +} 106 | + 107 | +static inline int bitwise_log2_ceil(GLsizei size) 108 | +{ 109 | + // Add one to the floor of size if size isn't a power of two. 110 | + return bitwise_log2_floor(size) + !!(size & (size - 1)); 111 | +} 112 | + 113 | +static int element_bin(struct wined3d_buffer_heap_element *elem) 114 | +{ 115 | + return min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_floor(elem->range.size)); 116 | +} 117 | + 118 | +// Inserts an element into the appropriate free list bin. 119 | +static void element_insert_free_bin(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) 120 | +{ 121 | + int bin = element_bin(elem); 122 | + 123 | + elem->prev = NULL; 124 | + elem->next = heap->free_list.bins[bin].head; 125 | + if (heap->free_list.bins[bin].head) 126 | + heap->free_list.bins[bin].head->prev = elem; 127 | + heap->free_list.bins[bin].head = elem; 128 | + 129 | + if (!heap->free_list.bins[bin].tail) 130 | + heap->free_list.bins[bin].tail = elem; 131 | + 132 | + TRACE("Inserted allocation at %p of size %lld into bin %d\n", elem->range.offset, elem->range.size, bin); 133 | +} 134 | + 135 | +// Removes an element from the free tree, its bin, and the coalesce list. 136 | +static void element_remove_free(struct wined3d_buffer_heap *heap, struct wined3d_buffer_heap_element *elem) 137 | +{ 138 | + int bin = element_bin(elem); 139 | + 140 | + if (elem->prev) 141 | + elem->prev->next = elem->next; 142 | + 143 | + if (elem->next) 144 | + elem->next->prev = elem->prev; 145 | + 146 | + if (elem == heap->free_list.bins[bin].head) 147 | + heap->free_list.bins[bin].head = elem->next; 148 | + 149 | + if (elem == heap->free_list.bins[bin].tail) 150 | + heap->free_list.bins[bin].tail = elem->prev; 151 | + 152 | + elem->prev = NULL; 153 | + elem->next = NULL; 154 | + 155 | + TRACE("Freed allocation at %p of size %lld from bin %d\n", elem->range.offset, elem->range.size, bin); 156 | +} 157 | + 158 | +static struct wined3d_buffer_heap_fenced_element* fenced_element_new(struct wined3d_buffer_heap_bin_set bins, struct wined3d_fence* fence) 159 | +{ 160 | + struct wined3d_buffer_heap_fenced_element* elem; 161 | + elem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct wined3d_buffer_heap_fenced_element)); 162 | + if (!elem) 163 | + return NULL; 164 | + elem->free_list = bins; 165 | + elem->fence = fence; 166 | + elem->next = NULL; 167 | + return elem; 168 | +} 169 | + 170 | +static int free_tree_compare(const void *key, const struct wine_rb_entry *entry) 171 | +{ 172 | + const GLsizei offset = *(const GLsizei*) key; 173 | + struct wined3d_buffer_heap_element *elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); 174 | + 175 | + if (offset < elem->range.offset) 176 | + return -1; 177 | + if (offset > elem->range.offset) 178 | + return 1; 179 | + return 0; 180 | +} 181 | + 182 | +/* Context activation is done by the caller. */ 183 | +HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **buffer_heap) 184 | +{ 185 | + const struct wined3d_gl_info *gl_info = context->gl_info; 186 | + const GLenum buffer_target = GL_ARRAY_BUFFER; 187 | + GLbitfield access_flags; 188 | + GLbitfield storage_flags; 189 | + struct wined3d_buffer_heap_element *initial_elem; 190 | + 191 | + struct wined3d_buffer_heap *object; 192 | + 193 | + if ((alignment & (alignment - 1)) != 0) 194 | + { 195 | + return E_FAIL; 196 | + } 197 | + 198 | + if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)))) 199 | + { 200 | + return E_OUTOFMEMORY; 201 | + } 202 | + 203 | + access_flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT; 204 | + if (!write_only) 205 | + { 206 | + access_flags |= GL_MAP_READ_BIT; 207 | + } 208 | + storage_flags = access_flags; 209 | + 210 | + // TODO(acomminos): where should we be checking for errors here? 211 | + GL_EXTCALL(glGenBuffers(1, &object->buffer_object)); 212 | + 213 | + context_bind_bo(context, buffer_target, object->buffer_object); 214 | + 215 | + // TODO(acomminos): assert glBufferStorage supported? 216 | + GL_EXTCALL(glBufferStorage(buffer_target, size, NULL, storage_flags)); 217 | + 218 | + if (!(object->map_ptr = GL_EXTCALL(glMapBufferRange(buffer_target, 0, size, access_flags)))) 219 | + { 220 | + ERR("Couldn't map persistent buffer.\n"); 221 | + return -1; // FIXME(acomminos): proper error code, cleanup 222 | + } 223 | + context_bind_bo(context, buffer_target, 0); 224 | + 225 | + object->fenced_head = object->fenced_tail = NULL; 226 | + object->alignment = alignment; 227 | + InitializeCriticalSection(&object->temp_lock); 228 | + 229 | + initial_elem = element_new(0, size); 230 | + // Don't bother adding the initial allocation to the coalescing tree. 231 | + element_insert_free_bin(object, initial_elem); 232 | + 233 | + *buffer_heap = object; 234 | + 235 | + return WINED3D_OK; 236 | +} 237 | + 238 | +/* Context activation is done by the caller. */ 239 | +HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) 240 | +{ 241 | + FIXME("Unimplemented, leaking buffer"); 242 | + return WINED3D_OK; 243 | +} 244 | + 245 | +HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range *out_range) 246 | +{ 247 | + int initial_bin; 248 | + int initial_size = size; 249 | + 250 | + EnterCriticalSection(&heap->temp_lock); 251 | + 252 | + // After alignment, reduce fragmentation by rounding to next power of two. 253 | + // If the alignment is a power of two (which it should be), this should be 254 | + // no problem. 255 | + size = 1 << bitwise_log2_ceil(size); 256 | + 257 | + // Align size values where possible. 258 | + if (heap->alignment && (size % heap->alignment != 0)) 259 | + size += heap->alignment - (size % heap->alignment); 260 | + 261 | + initial_bin = min(WINED3D_BUFFER_HEAP_BINS - 1, bitwise_log2_ceil(size)); 262 | + 263 | + for (int i = initial_bin; i < WINED3D_BUFFER_HEAP_BINS; i++) 264 | + { 265 | + struct wined3d_buffer_heap_element *elem = heap->free_list.bins[i].head; 266 | + if (elem) 267 | + { 268 | + struct wined3d_map_range remaining_range; 269 | + remaining_range.offset = elem->range.offset + size; 270 | + remaining_range.size = elem->range.size - size; 271 | + 272 | + out_range->offset = elem->range.offset; 273 | + out_range->size = size; 274 | + 275 | + TRACE_(d3d_perf)("Allocated %d (requested %d) at %p from bin %d (initial %d)\n", size, initial_size, elem->range.offset, i, initial_bin); 276 | + 277 | + // Remove the element from its current free bin to move it to the correct list. 278 | + element_remove_free(heap, elem); 279 | + 280 | + if (remaining_range.size > 0) 281 | + { 282 | + TRACE_(d3d_perf)("Imperfect fit allocated, fragmenting remainder of %lld at %p.\n", remaining_range.size, remaining_range.offset); 283 | + 284 | + elem->range = remaining_range; 285 | + element_insert_free_bin(heap, elem); 286 | + } 287 | + else 288 | + { 289 | + HeapFree(GetProcessHeap(), 0, elem); 290 | + } 291 | + 292 | + LeaveCriticalSection(&heap->temp_lock); 293 | + return WINED3D_OK; 294 | + } 295 | + } 296 | + 297 | + LeaveCriticalSection(&heap->temp_lock); 298 | + 299 | + FIXME_(d3d_perf)("Forcing coalesce, not enough free space in buffer heap.\n"); 300 | + int num_coalesced; 301 | + if (SUCCEEDED(wined3d_buffer_heap_deferred_coalesce(heap, &num_coalesced))) 302 | + { 303 | + if (num_coalesced > 0) 304 | + return wined3d_buffer_heap_alloc(heap, size, out_range); 305 | + } 306 | + 307 | + FIXME_(d3d_perf)("Coalescing did not create new blocks, failing.\n"); 308 | + 309 | + return WINED3DERR_OUTOFVIDEOMEMORY; 310 | +} 311 | + 312 | +HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) 313 | +{ 314 | + struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); 315 | + 316 | + if (!elem) 317 | + return E_OUTOFMEMORY; 318 | + 319 | + EnterCriticalSection(&heap->temp_lock); 320 | + 321 | + // Only insert the element into a free bin, coalescing will occur later. 322 | + element_insert_free_bin(heap, elem); 323 | + 324 | + LeaveCriticalSection(&heap->temp_lock); 325 | + 326 | + return WINED3D_OK; 327 | +} 328 | + 329 | +HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) 330 | +{ 331 | + struct wined3d_buffer_heap_element *elem = element_new(range.offset, range.size); 332 | + int bin_index = element_bin(elem); 333 | + struct wined3d_buffer_heap_bin *bin = &heap->pending_fenced_bins.bins[bin_index]; 334 | + 335 | + if (bin->tail) 336 | + { 337 | + bin->tail->next = elem; 338 | + elem->prev = bin->tail; 339 | + bin->tail = elem; 340 | + } 341 | + else 342 | + { 343 | + bin->head = elem; 344 | + bin->tail = elem; 345 | + } 346 | + 347 | + return WINED3D_OK; 348 | +} 349 | + 350 | +HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) 351 | +{ 352 | + struct wined3d_buffer_heap_fenced_element *fenced_elem; 353 | + struct wined3d_fence *fence; 354 | + HRESULT hr; 355 | + 356 | + if (heap->fenced_head) 357 | + { 358 | + // XXX(acomminos): double or triple buffer this? 359 | + wined3d_buffer_heap_cs_fence_wait(heap, device); 360 | + } 361 | + 362 | + if (FAILED(hr = wined3d_fence_create(device, &fence))) 363 | + { 364 | + ERR("Failed to create fence.\n"); 365 | + return hr; 366 | + } 367 | + 368 | + fenced_elem = fenced_element_new(heap->pending_fenced_bins, fence); 369 | + if (!fenced_elem) 370 | + return E_OUTOFMEMORY; 371 | + 372 | + TRACE_(d3d_perf)("Dispatching fenced buffer set.\n"); 373 | + memset(&heap->pending_fenced_bins, 0, sizeof(heap->pending_fenced_bins)); 374 | + 375 | + // Append to end of fenced list, which works well if you assume that buffers 376 | + // are freed in some ascending draw call ordering. 377 | + if (!heap->fenced_head) 378 | + { 379 | + heap->fenced_head = fenced_elem; 380 | + heap->fenced_tail = fenced_elem; 381 | + } 382 | + else 383 | + { 384 | + heap->fenced_tail->next = fenced_elem; 385 | + heap->fenced_tail = fenced_elem; 386 | + } 387 | + 388 | + wined3d_fence_issue(fence, device); 389 | + return WINED3D_OK; 390 | +} 391 | + 392 | +HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) 393 | +{ 394 | + enum wined3d_fence_result res; 395 | + struct wined3d_buffer_heap_fenced_element *elem = heap->fenced_head; 396 | + if (!elem) 397 | + return WINED3D_OK; 398 | + 399 | + res = wined3d_fence_wait(elem->fence, device); 400 | + switch (res) 401 | + { 402 | + case WINED3D_FENCE_OK: 403 | + case WINED3D_FENCE_NOT_STARTED: 404 | + { 405 | + TRACE_(d3d_perf)("Freed fence group.\n"); 406 | + 407 | + EnterCriticalSection(&heap->temp_lock); 408 | + for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) 409 | + { 410 | + struct wined3d_buffer_heap_bin *elem_bin = &elem->free_list.bins[i]; 411 | + if (!elem_bin->tail) 412 | + continue; 413 | + 414 | + struct wined3d_buffer_heap_bin *heap_bin = &heap->free_list.bins[i]; 415 | + if (heap_bin->head) 416 | + { 417 | + // Insert to front. 418 | + elem_bin->tail->next = heap_bin->head; 419 | + heap_bin->head->prev = elem_bin->tail; 420 | + 421 | + elem_bin->head->prev = NULL; 422 | + heap_bin->head = elem_bin->head; 423 | + } 424 | + else 425 | + { 426 | + elem_bin->head->prev = NULL; 427 | + heap_bin->head = elem_bin->head; 428 | + elem_bin->tail->next = NULL; 429 | + heap_bin->tail = elem_bin->tail; 430 | + } 431 | + } 432 | + LeaveCriticalSection(&heap->temp_lock); 433 | + 434 | + wined3d_fence_destroy(elem->fence); 435 | + 436 | + heap->fenced_head = elem->next; 437 | + HeapFree(GetProcessHeap(), 0, elem); 438 | + // TODO(acomminos): bother to null out fenced_tail? 439 | + break; 440 | + } 441 | + default: 442 | + return WINED3D_OK; 443 | + } 444 | + 445 | + return WINED3D_OK; 446 | +} 447 | + 448 | +HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *coalesced_count) 449 | +{ 450 | + struct wined3d_buffer_heap_element *elem = NULL; 451 | + struct wined3d_buffer_heap_element *next = NULL; 452 | + struct wine_rb_entry *entry; 453 | + struct wined3d_map_range coalesced_range; 454 | + 455 | + struct wine_rb_tree free_tree; 456 | + int num_coalesced = 0; 457 | + 458 | + wine_rb_init(&free_tree, free_tree_compare); 459 | + 460 | + EnterCriticalSection(&heap->temp_lock); 461 | + 462 | + // TODO(acomminos): on one hand, if there's a lot of elements in the list, 463 | + // it's highly fragmented. on the other, we can potentially waste a decent 464 | + // sum of time checking for uncoalesced bins. 465 | + for (int i = 0; i < WINED3D_BUFFER_HEAP_BINS; i++) 466 | + { 467 | + elem = heap->free_list.bins[i].head; 468 | + while (elem) 469 | + { 470 | + // Insert a sentry. FIXME(acomminos): can skip this with traversal. 471 | + if (wine_rb_put(&free_tree, &elem->range.offset, &elem->entry) == -1) 472 | + { 473 | + ERR("Failed to insert key %x in tree.\n", elem->range.offset); 474 | + elem = elem->next; 475 | + continue; 476 | + } 477 | + 478 | + coalesced_range = elem->range; 479 | + 480 | + // Coalesce right. 481 | + entry = wine_rb_next(&elem->entry); 482 | + if (entry) 483 | + { 484 | + TRACE("Coalesced right.\n"); 485 | + struct wined3d_buffer_heap_element *right_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); 486 | + if (elem->range.offset + elem->range.size == right_elem->range.offset) 487 | + { 488 | + coalesced_range.size += right_elem->range.size; 489 | + 490 | + wine_rb_remove(&free_tree, entry); 491 | + element_remove_free(heap, right_elem); 492 | + HeapFree(GetProcessHeap(), 0, right_elem); 493 | + 494 | + num_coalesced++; 495 | + } 496 | + } 497 | + 498 | + // Coalesce left. 499 | + entry = wine_rb_prev(&elem->entry); 500 | + if (entry) 501 | + { 502 | + TRACE("Coalesced left.\n"); 503 | + struct wined3d_buffer_heap_element *left_elem = WINE_RB_ENTRY_VALUE(entry, struct wined3d_buffer_heap_element, entry); 504 | + if (left_elem->range.offset + left_elem->range.size == coalesced_range.offset) 505 | + { 506 | + coalesced_range.offset = left_elem->range.offset; 507 | + coalesced_range.size += left_elem->range.size; 508 | + 509 | + wine_rb_remove(&free_tree, entry); 510 | + element_remove_free(heap, left_elem); 511 | + HeapFree(GetProcessHeap(), 0, left_elem); 512 | + 513 | + num_coalesced++; 514 | + } 515 | + } 516 | + 517 | + next = elem->next; 518 | + 519 | + if (elem->range.size != coalesced_range.size) 520 | + { 521 | + FIXME_(d3d_perf)("Coalesced range from (%p, %ld) to (%p, %ld)\n", elem->range.offset, elem->range.size, coalesced_range.offset, coalesced_range.size); 522 | + 523 | + wine_rb_remove(&free_tree, &elem->entry); 524 | + 525 | + // Move to the correct free bin. 526 | + element_remove_free(heap, elem); 527 | + elem->range = coalesced_range; 528 | + element_insert_free_bin(heap, elem); 529 | + 530 | + wine_rb_put(&free_tree, &elem->range.offset, &elem->entry); 531 | + } 532 | + 533 | + elem = next; 534 | + } 535 | + } 536 | + 537 | + LeaveCriticalSection(&heap->temp_lock); 538 | + 539 | + FIXME_(d3d_perf)("Performed %d coalesces.\n", num_coalesced); 540 | + if (coalesced_count) 541 | + *coalesced_count = num_coalesced; 542 | + 543 | + return WINED3D_OK; 544 | +} 545 | diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c 546 | index c60d67b83a7..e2bfd88dd2c 100644 547 | --- a/dlls/wined3d/cs.c 548 | +++ b/dlls/wined3d/cs.c 549 | @@ -467,6 +467,15 @@ static void wined3d_cs_exec_present(struct wined3d_cs *cs, const void *data) 550 | } 551 | 552 | InterlockedDecrement(&cs->pending_presents); 553 | + 554 | + // FIXME(acomminos): is this the right place to put double-buffered frame 555 | + // timing based logic? 556 | + // FIXME(acomminos): this conditional sucks, replace with fancier feature check 557 | + if (cs->device->wo_buffer_heap && cs->device->cb_buffer_heap) 558 | + { 559 | + wined3d_buffer_heap_cs_fence_issue(cs->device->wo_buffer_heap, cs->device); 560 | + wined3d_buffer_heap_cs_fence_issue(cs->device->cb_buffer_heap, cs->device); 561 | + } 562 | } 563 | 564 | void wined3d_cs_emit_present(struct wined3d_cs *cs, struct wined3d_swapchain *swapchain, 565 | diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c 566 | index b97b7b963ba..f6bf07604c7 100644 567 | --- a/dlls/wined3d/device.c 568 | +++ b/dlls/wined3d/device.c 569 | @@ -837,6 +837,53 @@ static void destroy_default_samplers(struct wined3d_device *device, struct wined 570 | device->null_sampler = NULL; 571 | } 572 | 573 | +/* Context activation is done by the caller. */ 574 | +static void create_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) 575 | +{ 576 | + const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 577 | + // TODO(acomminos): kill this magic number. perhaps base on vram. 578 | + GLsizeiptr geo_heap_size = 512 * 1024 * 1024; 579 | + // We choose a constant buffer size of 128MB, the same as NVIDIA claims to 580 | + // use in their Direct3D driver for discarded constant buffers. 581 | + GLsizeiptr cb_heap_size = 128 * 1024 * 1024; 582 | + GLint ub_alignment; 583 | + HRESULT hr; 584 | + 585 | + if (gl_info->supported[ARB_BUFFER_STORAGE]) 586 | + { 587 | + gl_info->gl_ops.gl.p_glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ub_alignment); 588 | + 589 | + // Align constant buffer heap size, in case GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT isn't a power of two (for some reason). 590 | + cb_heap_size -= cb_heap_size % ub_alignment; 591 | + 592 | + if (FAILED(hr = wined3d_buffer_heap_create(context, geo_heap_size, 0, TRUE, &device->wo_buffer_heap))) 593 | + { 594 | + ERR("Failed to create write-only persistent buffer heap, hr %#x.\n", hr); 595 | + } 596 | + 597 | + if (FAILED(hr = wined3d_buffer_heap_create(context, cb_heap_size, ub_alignment, TRUE, &device->cb_buffer_heap))) 598 | + { 599 | + ERR("Failed to create persistent buffer heap for constant buffers, hr %#x.\n", hr); 600 | + } 601 | + 602 | + FIXME("Initialized PBA (geo_heap_size: %ld, cb_heap_size: %ld, ub_align: %d)\n", geo_heap_size, cb_heap_size, ub_alignment); 603 | + } 604 | + else 605 | + { 606 | + FIXME("Not using PBA, ARB_buffer_storage unsupported.\n"); 607 | + } 608 | +} 609 | + 610 | +/* Context activation is done by the caller. */ 611 | +static void destroy_buffer_heap(struct wined3d_device *device, struct wined3d_context *context) 612 | +{ 613 | + if (device->wo_buffer_heap) 614 | + wined3d_buffer_heap_destroy(device->wo_buffer_heap, context); 615 | + 616 | + if (device->cb_buffer_heap) 617 | + wined3d_buffer_heap_destroy(device->cb_buffer_heap, context); 618 | +} 619 | + 620 | static LONG fullscreen_style(LONG style) 621 | { 622 | /* Make sure the window is managed, otherwise we won't get keyboard input. */ 623 | @@ -1001,6 +1048,8 @@ static void wined3d_device_delete_opengl_contexts_cs(void *object) 624 | device->shader_backend->shader_free_private(device); 625 | destroy_dummy_textures(device, context); 626 | destroy_default_samplers(device, context); 627 | + destroy_buffer_heap(device, context); 628 | + 629 | context_release(context); 630 | 631 | while (device->context_count) 632 | @@ -1050,6 +1099,9 @@ static void wined3d_device_create_primary_opengl_context_cs(void *object) 633 | context = context_acquire(device, target, 0); 634 | create_dummy_textures(device, context); 635 | create_default_samplers(device, context); 636 | + 637 | + create_buffer_heap(device, context); 638 | + 639 | context_release(context); 640 | } 641 | 642 | diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c 643 | index d55d5674f4f..eabdb658b48 100644 644 | --- a/dlls/wined3d/directx.c 645 | +++ b/dlls/wined3d/directx.c 646 | @@ -111,6 +111,7 @@ static const struct wined3d_extension_map gl_extension_map[] = 647 | /* ARB */ 648 | {"GL_ARB_base_instance", ARB_BASE_INSTANCE }, 649 | {"GL_ARB_blend_func_extended", ARB_BLEND_FUNC_EXTENDED }, 650 | + {"GL_ARB_buffer_storage", ARB_BUFFER_STORAGE }, 651 | {"GL_ARB_clear_buffer_object", ARB_CLEAR_BUFFER_OBJECT }, 652 | {"GL_ARB_clear_texture", ARB_CLEAR_TEXTURE }, 653 | {"GL_ARB_clip_control", ARB_CLIP_CONTROL }, 654 | @@ -2713,6 +2714,8 @@ static void load_gl_funcs(struct wined3d_gl_info *gl_info) 655 | /* GL_ARB_blend_func_extended */ 656 | USE_GL_FUNC(glBindFragDataLocationIndexed) 657 | USE_GL_FUNC(glGetFragDataIndex) 658 | + /* GL_ARB_buffer_storage */ 659 | + USE_GL_FUNC(glBufferStorage) 660 | /* GL_ARB_clear_buffer_object */ 661 | USE_GL_FUNC(glClearBufferData) 662 | USE_GL_FUNC(glClearBufferSubData) 663 | diff --git a/dlls/wined3d/query.c b/dlls/wined3d/query.c 664 | index 5ea79b6e4a7..f3ca1630e58 100644 665 | --- a/dlls/wined3d/query.c 666 | +++ b/dlls/wined3d/query.c 667 | @@ -88,7 +88,7 @@ static BOOL wined3d_fence_supported(const struct wined3d_gl_info *gl_info) 668 | return gl_info->supported[ARB_SYNC] || gl_info->supported[NV_FENCE] || gl_info->supported[APPLE_FENCE]; 669 | } 670 | 671 | -static enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, 672 | +enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, 673 | const struct wined3d_device *device, DWORD flags) 674 | { 675 | const struct wined3d_gl_info *gl_info; 676 | diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h 677 | index bbee7881f2f..730eff131f3 100644 678 | --- a/dlls/wined3d/wined3d_gl.h 679 | +++ b/dlls/wined3d/wined3d_gl.h 680 | @@ -44,6 +44,7 @@ enum wined3d_gl_extension 681 | /* ARB */ 682 | ARB_BASE_INSTANCE, 683 | ARB_BLEND_FUNC_EXTENDED, 684 | + ARB_BUFFER_STORAGE, 685 | ARB_CLEAR_BUFFER_OBJECT, 686 | ARB_CLEAR_TEXTURE, 687 | ARB_CLIP_CONTROL, 688 | diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h 689 | index ee4e1a4d6c3..dea1c2f5207 100644 690 | --- a/dlls/wined3d/wined3d_private.h 691 | +++ b/dlls/wined3d/wined3d_private.h 692 | @@ -1726,6 +1726,9 @@ void wined3d_fence_destroy(struct wined3d_fence *fence) DECLSPEC_HIDDEN; 693 | void wined3d_fence_issue(struct wined3d_fence *fence, const struct wined3d_device *device) DECLSPEC_HIDDEN; 694 | enum wined3d_fence_result wined3d_fence_wait(const struct wined3d_fence *fence, 695 | const struct wined3d_device *device) DECLSPEC_HIDDEN; 696 | +// XXX(acomminos): really expose this? 697 | +enum wined3d_fence_result wined3d_fence_test(const struct wined3d_fence *fence, 698 | + const struct wined3d_device *device, DWORD flags) DECLSPEC_HIDDEN; 699 | 700 | /* Direct3D terminology with little modifications. We do not have an issued 701 | * state because only the driver knows about it, but we have a created state 702 | @@ -3015,6 +3018,10 @@ struct wined3d_device 703 | /* Context management */ 704 | struct wined3d_context **contexts; 705 | UINT context_count; 706 | + 707 | + /* Dynamic buffer heap */ 708 | + struct wined3d_buffer_heap *wo_buffer_heap; 709 | + struct wined3d_buffer_heap *cb_buffer_heap; 710 | }; 711 | 712 | void device_clear_render_targets(struct wined3d_device *device, UINT rt_count, const struct wined3d_fb_state *fb, 713 | @@ -3522,6 +3529,12 @@ void state_init(struct wined3d_state *state, struct wined3d_fb_state *fb, 714 | DWORD flags) DECLSPEC_HIDDEN; 715 | void state_unbind_resources(struct wined3d_state *state) DECLSPEC_HIDDEN; 716 | 717 | +struct wined3d_map_range 718 | +{ 719 | + GLintptr offset; 720 | + GLsizeiptr size; 721 | +}; 722 | + 723 | enum wined3d_cs_queue_id 724 | { 725 | WINED3D_CS_QUEUE_DEFAULT = 0, 726 | @@ -3699,12 +3712,61 @@ enum wined3d_buffer_conversion_type 727 | CONV_POSITIONT, 728 | }; 729 | 730 | -struct wined3d_map_range 731 | +struct wined3d_buffer_heap_element; 732 | +struct wined3d_buffer_heap_fenced_element; 733 | + 734 | +// Number of power-of-two buckets to populate. 735 | +#define WINED3D_BUFFER_HEAP_BINS 32 736 | + 737 | +struct wined3d_buffer_heap_bin 738 | { 739 | - UINT offset; 740 | - UINT size; 741 | + struct wined3d_buffer_heap_element *head; 742 | + struct wined3d_buffer_heap_element *tail; 743 | +}; 744 | + 745 | +struct wined3d_buffer_heap_bin_set 746 | +{ 747 | + struct wined3d_buffer_heap_bin bins[WINED3D_BUFFER_HEAP_BINS]; 748 | }; 749 | 750 | +// A heap that manages allocations with a single GL buffer. 751 | +struct wined3d_buffer_heap 752 | +{ 753 | + GLuint buffer_object; 754 | + void *map_ptr; 755 | + GLsizeiptr alignment; 756 | + CRITICAL_SECTION temp_lock; // Temporary lock while we implement the fenced free list. 757 | + 758 | + struct wined3d_buffer_heap_bin_set free_list; 759 | + 760 | + // Elements that need to be fenced, but haven't reached the required size. 761 | + struct wined3d_buffer_heap_bin_set pending_fenced_bins; 762 | + 763 | + // List of sets of buffers behind a common fence, in FIFO order. 764 | + struct wined3d_buffer_heap_fenced_element *fenced_head; 765 | + struct wined3d_buffer_heap_fenced_element *fenced_tail; 766 | +}; 767 | + 768 | +HRESULT wined3d_buffer_heap_create(struct wined3d_context *context, GLsizeiptr size, GLsizeiptr alignment, BOOL write_only, struct wined3d_buffer_heap **heap) DECLSPEC_HIDDEN; 769 | +HRESULT wined3d_buffer_heap_destroy(struct wined3d_buffer_heap *heap, struct wined3d_context *context) DECLSPEC_HIDDEN; 770 | +// Fetches a buffer from the heap of at least the given size. 771 | +// Attempts to coalesce blocks under memory pressure. 772 | +HRESULT wined3d_buffer_heap_alloc(struct wined3d_buffer_heap *heap, GLsizeiptr size, struct wined3d_map_range* out_range) DECLSPEC_HIDDEN; 773 | +// Immediately frees a heap-allocated buffer segment. 774 | +HRESULT wined3d_buffer_heap_free(struct wined3d_buffer_heap *heap, struct wined3d_map_range range) DECLSPEC_HIDDEN; 775 | +// Enqueues a buffer segment to return to the heap once its fence has been signaled. 776 | +HRESULT wined3d_buffer_heap_free_fenced(struct wined3d_buffer_heap *heap, struct wined3d_device *device, struct wined3d_map_range range) DECLSPEC_HIDDEN; 777 | +// Issues a fence for the current set of pending fenced buffers. 778 | +// Double-buffered: if the last fence issued has not yet been triggered, waits 779 | +// on it. 780 | +HRESULT wined3d_buffer_heap_cs_fence_issue(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; 781 | +// Waits on the next issued fence in FIFO order. Frees the fenced buffers after 782 | +// the fence has been triggered. 783 | +HRESULT wined3d_buffer_heap_cs_fence_wait(struct wined3d_buffer_heap *heap, struct wined3d_device *device) DECLSPEC_HIDDEN; 784 | +// Performs deferred coalescing of buffers. To be called under memory pressure. 785 | +// Outputs the number of coalesced regions in `num_coalesced`. 786 | +HRESULT wined3d_buffer_heap_deferred_coalesce(struct wined3d_buffer_heap *heap, int *num_coalesced) DECLSPEC_HIDDEN; 787 | + 788 | struct wined3d_buffer 789 | { 790 | struct wined3d_resource resource; 791 | -- 792 | 2.17.0 793 | 794 | --------------------------------------------------------------------------------