Compare commits

...

3 Commits

Author SHA1 Message Date
lizzie
d7558be543 [dynarmic] backport WAITPKG based spinlocks
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-05 00:04:45 +02:00
crueter
1a13e79c3d [cmake] fix video_core and tests comp errors on Windows (#2631)
did not link to video_core thus did not properly propagate the GPUOpen
target thus failed to find vk_mem_alloc

also msvc sucks

Signed-off-by: crueter <crueter@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2631
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
2025-10-05 00:00:52 +02:00
Ribbit
268918aece [vk] Implement Shader Read Barrier (#2671)
Adding the shader read barrier keeps every render/compute/transfer write visible before the image is sampled, so it prevents the “read-before-writes-finish” hazards. Without it you can get random stale frames, flickering post process passes, partially updated HUD textures, and corrupted depth-to-color conversions especially in scenes that render into an offscreen image and immediately feed that image to a shader (reflections, bloom, dynamic resolution, depth visualizers, etc.). This fix makes those R2T chains deterministic again across all Vulkan drivers.

Co-authored-by: Ribbit <ribbit@placeholder.com>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2671
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Reviewed-by: crueter <crueter@eden-emu.dev>
Co-authored-by: Ribbit <ribbit@eden-emu.dev>
Co-committed-by: Ribbit <ribbit@eden-emu.dev>
2025-10-04 23:58:08 +02:00
12 changed files with 154 additions and 29 deletions

View File

@@ -188,6 +188,8 @@ HostFeature GetHostFeatures() {
features |= HostFeature::LZCNT;
if (cpu_info.has(Cpu::tGFNI))
features |= HostFeature::GFNI;
if (cpu_info.has(Cpu::tWAITPKG))
features |= HostFeature::WAITPKG;
if (cpu_info.has(Cpu::tBMI2)) {
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.

View File

@@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr();
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
EmitExclusiveLock(code, conf, tmp, eax);
EmitExclusiveLock(code, conf, tmp, tmp2);
SharedLabel end = GenSharedLabel();

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p
}
code.mov(pointer, mcl::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
EmitSpinLockLock(code, pointer, tmp);
EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
}
template<typename UserConfig>

View File

@@ -35,9 +35,10 @@ enum class HostFeature : u64 {
BMI2 = 1ULL << 19,
LZCNT = 1ULL << 20,
GFNI = 1ULL << 21,
WAITPKG = 1ULL << 22,
// Zen-based BMI2
FastBMI2 = 1ULL << 22,
FastBMI2 = 1ULL << 23,
// Orthographic AVX512 features on 128 and 256 vectors
AVX512_Ortho = AVX512F | AVX512VL,

View File

@@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE
namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
// TODO: this is because we lack regalloc - so better to be safe :(
if (waitpkg) {
code.push(Xbyak::util::eax);
code.push(Xbyak::util::ebx);
code.push(Xbyak::util::edx);
}
Xbyak::Label start, loop;
code.jmp(start, code.T_NEAR);
code.L(loop);
code.pause();
if (waitpkg) {
// TODO: This clobbers EAX and EDX did we tell the regalloc?
// ARM ptr for address-monitoring
code.umonitor(ptr);
// tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
// tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
// edx:eax is implicitly used as a 64-bit deadline timestamp
// Use the maximum so that we use the operating system's maximum
// allowed wait time within the IA32_UMWAIT_CONTROL register
// Enter power state designated by tmp and wait for a write to lock_ptr
code.mov(Xbyak::util::eax, 0xFFFFFFFF);
code.mov(Xbyak::util::edx, Xbyak::util::eax);
// TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it...
code.mov(Xbyak::util::ebx, 1);
code.umwait(Xbyak::util::ebx);
// CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
// CF == 0 if we exited the wait for any other reason
} else {
code.pause();
}
code.L(start);
code.mov(tmp, 1);
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
code.test(tmp, tmp);
code.jnz(loop, code.T_NEAR);
if (waitpkg) {
code.pop(Xbyak::util::edx);
code.pop(Xbyak::util::ebx);
code.pop(Xbyak::util::eax);
}
}
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
@@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() {
code.align();
lock = code.getCurr<void (*)(volatile int*)>();
EmitSpinLockLock(code, ABI_PARAM1, code.eax);
EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
code.ret();
code.align();

View File

@@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@@ -9,7 +12,7 @@
namespace Dynarmic {
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
} // namespace Dynarmic

View File

@@ -1,3 +1,6 @@
# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
# SPDX-License-Identifier: GPL-3.0-or-later
# SPDX-FileCopyrightText: 2018 yuzu Emulator Project
# SPDX-License-Identifier: GPL-2.0-or-later
@@ -21,7 +24,7 @@ add_executable(tests
create_target_directory_groups(tests)
target_link_libraries(tests PRIVATE common core input_common)
target_link_libraries(tests PRIVATE common core input_common video_core)
target_link_libraries(tests PRIVATE ${PLATFORM_LIBRARIES} Catch2::Catch2WithMain Threads::Threads)
add_test(NAME tests COMMAND tests)

View File

@@ -156,6 +156,8 @@ void MaxwellDMA::Launch() {
}
void MaxwellDMA::CopyBlockLinearToPitch() {
u32 bytes_per_pixel = 1;
DMA::ImageOperand src_operand;
src_operand.bytes_per_pixel = bytes_per_pixel;

View File

@@ -46,6 +46,38 @@ namespace Vulkan {
using VideoCommon::ImageViewType;
namespace {
[[nodiscard]] VkImageAspectFlags AspectMaskFromFormat(VideoCore::Surface::PixelFormat format) {
using VideoCore::Surface::SurfaceType;
switch (VideoCore::Surface::GetFormatType(format)) {
case SurfaceType::ColorTexture:
return VK_IMAGE_ASPECT_COLOR_BIT;
case SurfaceType::Depth:
return VK_IMAGE_ASPECT_DEPTH_BIT;
case SurfaceType::Stencil:
return VK_IMAGE_ASPECT_STENCIL_BIT;
case SurfaceType::DepthStencil:
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
default:
return VK_IMAGE_ASPECT_COLOR_BIT;
}
}
[[nodiscard]] VkImageSubresourceRange SubresourceRangeFromView(const ImageView& image_view) {
auto range = image_view.range;
if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) {
range.base.layer = 0;
range.extent.layers = 1;
}
return VkImageSubresourceRange{
.aspectMask = AspectMaskFromFormat(image_view.format),
.baseMipLevel = static_cast<u32>(range.base.level),
.levelCount = static_cast<u32>(range.extent.levels),
.baseArrayLayer = static_cast<u32>(range.base.layer),
.layerCount = static_cast<u32>(range.extent.layers),
};
}
struct PushConstants {
std::array<float, 2> tex_scale;
std::array<float, 2> tex_offset;
@@ -417,6 +449,40 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
0, barrier);
}
void RecordShaderReadBarrier(Scheduler& scheduler, const ImageView& image_view) {
const VkImage image = image_view.ImageHandle();
const VkImageSubresourceRange subresource_range = SubresourceRangeFromView(image_view);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([image, subresource_range](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
VK_ACCESS_SHADER_WRITE_BIT |
VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = subresource_range,
};
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_TRANSFER_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0,
barrier);
});
}
void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) {
const VkRenderPass render_pass = framebuffer->RenderPass();
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
@@ -484,7 +550,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_view,
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
@@ -496,10 +562,12 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
const VkImageView src_view = src_image_view.Handle(Shader::TextureType::Color2D);
RecordShaderReadBarrier(scheduler, src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
src_view](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@@ -538,7 +606,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
}
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view,
ImageView& src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) {
@@ -554,10 +622,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
const VkImageView src_depth_view = src_image_view.DepthView();
const VkImageView src_stencil_view = src_image_view.StencilView();
RecordShaderReadBarrier(scheduler, src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
src_stencil_view);
@@ -692,6 +763,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent = GetConversionExtent(src_image_view);
RecordShaderReadBarrier(scheduler, src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
@@ -717,7 +789,6 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
// TODO: Barriers
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);
@@ -737,6 +808,7 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent = GetConversionExtent(src_image_view);
RecordShaderReadBarrier(scheduler, src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent,
this](vk::CommandBuffer cmdbuf) {
@@ -763,7 +835,6 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer
const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
src_stencil_view);
// TODO: Barriers
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
nullptr);

View File

@@ -1,4 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@@ -43,7 +46,7 @@ public:
StateTracker& state_tracker, DescriptorPool& descriptor_pool);
~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
@@ -52,9 +55,9 @@ public:
VkImage src_image, VkSampler src_sampler, const Region2D& dst_region,
const Region2D& src_region, const Extent3D& src_size);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
void BlitDepthStencil(const Framebuffer* dst_framebuffer, ImageView& src_image_view,
const Region2D& dst_region, const Region2D& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);

View File

@@ -1086,8 +1086,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
return;
}
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT && !is_src_msaa && !is_dst_msaa) {
blit_image_helper.BlitColor(dst_framebuffer, src.Handle(Shader::TextureType::Color2D),
dst_region, src_region, filter, operation);
blit_image_helper.BlitColor(dst_framebuffer, src, dst_region, src_region, filter,
operation);
return;
}
ASSERT(src.format == dst.format);
@@ -1106,8 +1106,8 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
}();
if (!can_blit_depth_stencil) {
UNIMPLEMENTED_IF(is_src_msaa || is_dst_msaa);
blit_image_helper.BlitDepthStencil(dst_framebuffer, src.DepthView(), src.StencilView(),
dst_region, src_region, filter, operation);
blit_image_helper.BlitDepthStencil(dst_framebuffer, src, dst_region, src_region,
filter, operation);
return;
}
}
@@ -1968,18 +1968,17 @@ bool Image::BlitScaleHelper(bool scale_up) {
blit_framebuffer =
std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent, scale_up);
}
const auto color_view = blit_view->Handle(Shader::TextureType::Color2D);
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), color_view, dst_region,
runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region,
src_region, operation, BLIT_OPERATION);
} else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
if (!blit_framebuffer) {
blit_framebuffer =
std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent, scale_up);
}
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), blit_view->DepthView(),
blit_view->StencilView(), dst_region,
src_region, operation, BLIT_OPERATION);
runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view,
dst_region, src_region, operation,
BLIT_OPERATION);
} else {
// TODO: Use helper blits where applicable
flags &= ~ImageFlagBits::Rescaled;

View File

@@ -10,4 +10,12 @@
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
#ifdef _MSC_VER
#pragma warning( push )
#pragma warning( disable : 4189 )
#endif
#include "vk_mem_alloc.h"
#ifdef _MSC_VER
#pragma warning( pop )
#endif