Compare commits

...

1 Commits

Author SHA1 Message Date
Shinmegumi
f33c1ca7c1 [vk] Fix query cache leak on missed sync (#131)
Provided by community member, elementary-particle. Submitted as PR by MaranBR.

Fixed issues:

1. The queue cache forgot to put the host query into unregister queue if they weren't synced. This will block all the banks from freeing causing a major leak.

2. SamplesQueryCounter is not aligned with renderpass begin/end. This creates invalid queries.

3. Conditional rendering is not turned on/off at the correct location making them invalid.

Co-authored-by: Maufeat <sahyno1996@gmail.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/131
Co-authored-by: Shinmegumi <shinmegumi@eden-emu.dev>
Co-committed-by: Shinmegumi <shinmegumi@eden-emu.dev>
2025-07-26 12:26:29 -04:00
4 changed files with 82 additions and 98 deletions

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -115,8 +118,8 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
Tegra::GPU& gpu_)
: owner{owner_}, rasterizer{rasterizer_},
device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} {
: owner{owner_}, rasterizer{rasterizer_}, device_memory{device_memory_}, runtime{runtime_},
gpu{gpu_} {
streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@@ -267,7 +270,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
return;
}
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
ASSERT(false);
LOG_ERROR(HW_GPU,
"Query report value not synchronized. Consider increasing GPU accuracy.");
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
}
return;
}
query_base->value += streamer->GetAmendValue();
@@ -370,8 +377,6 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
if (resume) {
impl->runtime.ResumeHostConditionalRendering();
} else {
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
CounterClose(VideoCommon::QueryType::StreamingByteCount);
impl->runtime.PauseHostConditionalRendering();
}
}

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -1161,10 +1164,9 @@ struct QueryCacheRuntimeImpl {
StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
: rasterizer{rasterizer_}, device_memory{device_memory_},
buffer_cache{buffer_cache_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
guest_streamer(0, runtime),
: rasterizer{rasterizer_}, device_memory{device_memory_}, buffer_cache{buffer_cache_},
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
device, scheduler, memory_allocator, compute_pass_descriptor_queue,
descriptor_pool),
@@ -1300,9 +1302,11 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
if (impl->hcr_is_set) {
if (impl->hcr_setup.buffer == impl->hcr_buffer &&
impl->hcr_setup.offset == impl->hcr_offset) {
ResumeHostConditionalRendering();
return;
}
}
bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering();
}
impl->hcr_setup.buffer = impl->hcr_buffer;
@@ -1310,7 +1314,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0;
impl->hcr_is_set = true;
impl->is_hcr_running = false;
ResumeHostConditionalRendering();
if (was_running) {
ResumeHostConditionalRendering();
}
}
void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) {
@@ -1325,7 +1331,8 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
to_resolve = buffer->Handle();
to_resolve_offset = static_cast<u32>(offset);
}
if (impl->is_hcr_running) {
bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering();
}
impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve,
@@ -1335,7 +1342,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
impl->hcr_is_set = true;
impl->is_hcr_running = false;
ResumeHostConditionalRendering();
if (was_running) {
ResumeHostConditionalRendering();
}
}
bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,

View File

@@ -217,8 +217,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@@ -232,9 +230,13 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
UpdateDynamicStates();
HandleTransformFeedback();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
draw_func();
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
@@ -311,8 +313,6 @@ void RasterizerVulkan::DrawTexture() {
};
FlushWork();
query_cache.NotifySegment(true);
std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
@@ -359,10 +359,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork();
gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
regs.clear_surface.A;
@@ -378,6 +374,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
@@ -832,6 +832,7 @@ std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay(
if (!image_view) {
return {};
}
query_cache.NotifySegment(false);
const auto& resolution = Settings::values.resolution_info;
@@ -943,22 +944,20 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
// TODO: updating line stipple causes the cmdbuf to die
// UpdateLineStipple(regs);
const u8 dynamic_state = Settings::values.dyna_state.GetValue();
auto features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported()
&& dynamic_state > 0,
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported()
&& dynamic_state > 1,
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported()
&& dynamic_state > 1,
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported()
&& dynamic_state > 2,
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported()
&& dynamic_state > 2,
.has_extended_dynamic_state =
device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0,
.has_extended_dynamic_state_2 =
device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1,
.has_extended_dynamic_state_2_extra =
device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1,
.has_extended_dynamic_state_3_blend =
device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2,
.has_extended_dynamic_state_3_enables =
device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2,
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
};
@@ -983,16 +982,12 @@ void RasterizerVulkan::UpdateDynamicStates() {
if (features.has_extended_dynamic_state_3_enables) {
using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE
|| device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
struct In
{
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
struct In {
const Maxwell3D::Regs::VertexAttribute::Type d;
In(Maxwell3D::Regs::VertexAttribute::Type n)
: d(n)
{}
bool operator()(Maxwell3D::Regs::VertexAttribute n) const
{
In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {}
bool operator()(Maxwell3D::Regs::VertexAttribute n) const {
return n.type == d;
}
};
@@ -1143,36 +1138,36 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (is_d24 && !device.SupportsD24DepthBuffer()) {
static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]);
static constexpr const u64 *start = NEEDS_D24;
static constexpr const u64 *end = NEEDS_D24 + length;
static constexpr const u64* start = NEEDS_D24;
static constexpr const u64* end = NEEDS_D24 + length;
const u64 *it = std::find(start, end, program_id);
const u64* it = std::find(start, end, program_id);
if (it != end) {
// the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor = static_cast<double>(1ULL << (32 - 24))
/ (static_cast<double>(0x1.ep+127));
const double rescale_factor =
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
}
}
scheduler.Record(
[constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias, this](
vk::CommandBuffer cmdbuf) {
if (device.IsExtDepthBiasControlSupported()) {
static VkDepthBiasRepresentationInfoEXT bias_info{
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.pNext = nullptr,
.depthBiasRepresentation = VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = VK_FALSE,
};
scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
factor = regs.slope_scale_depth_bias, this](vk::CommandBuffer cmdbuf) {
if (device.IsExtDepthBiasControlSupported()) {
static VkDepthBiasRepresentationInfoEXT bias_info{
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.pNext = nullptr,
.depthBiasRepresentation =
VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = VK_FALSE,
};
cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info);
} else {
cmdbuf.SetDepthBias(constant, clamp, factor);
}
});
cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info);
} else {
cmdbuf.SetDepthBias(constant, clamp, factor);
}
});
}
void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1354,8 +1349,7 @@ void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::
});
}
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs)
{
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchConservativeRasterizationMode()) {
return;
}
@@ -1367,8 +1361,7 @@ void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwe
});
}
void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs)
{
void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLineStippleEnable()) {
return;
}
@@ -1378,19 +1371,7 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
void RasterizerVulkan::UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs)
{
if (!state_tracker.TouchLineStipple()) {
return;
}
scheduler.Record([params = regs.line_stipple_params](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineStippleEXT(params.factor, static_cast<uint16_t>(params.pattern));
});
}
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs)
{
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
// if (!state_tracker.TouchLi()) {
// return;
// }

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -257,16 +260,6 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
query_cache->NotifySegment(true);
}
#else
query_cache->NotifySegment(true);
#endif
}
}
void Scheduler::InvalidateState() {
@@ -276,15 +269,7 @@ void Scheduler::InvalidateState() {
}
void Scheduler::EndPendingOperations() {
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache->DisableStreams();
}
#else
// query_cache->DisableStreams();
#endif
query_cache->NotifySegment(false);
query_cache->CounterReset(VideoCommon::QueryType::ZPassPixelCount64);
EndRenderPass();
}
@@ -292,6 +277,10 @@ void Scheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers;