Compare commits

...

4 Commits

Author SHA1 Message Date
Zephyron
ceb2a47a55 nvn(fix): Optimize shader performance by enhancing NVN bias settings
Improve GPU storage buffer detection and memory access patterns:
- Expand NVN bias address range (0x100-0x800 vs 0x110-0x610)
- Increase alignment from 16 to 32 bytes for optimal memory access
- Raise default alignment from 8 to 16 bytes for non-biased addresses
- Refactor bias handling code for better readability
- Add detailed performance-related comments

These changes help identify more storage buffers within shaders and
ensure memory accesses are better aligned, which improves overall
shader compilation and execution performance.

Signed-off-by: Zephyron <zephyron@citron-emu.org>
2025-06-27 17:43:39 +01:00
JPikachu
7e01ebc4e2 Revert "shader/recompiler: Revert NVN storage buffer bias range"
This reverts commit cb135ba86f.
2025-06-27 00:44:43 +01:00
JPikachu
749452ab17 Revert "feat(shader): implement geometry shader invocation info"
This reverts commit fcec7995af.
2025-06-26 19:39:00 +01:00
JPikachu
9c23f70c1f Revert "Add InputTopologyVertices from torzu adapted to eden"
This reverts commit 44dc152a2b.
2025-06-26 19:17:25 +01:00
5 changed files with 22 additions and 38 deletions

View File

@@ -6,7 +6,6 @@
#include "shader_recompiler/backend/glasm/emit_glasm_instructions.h"
#include "shader_recompiler/backend/glasm/glasm_emit_context.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/runtime_info.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/shader_info.h"
@@ -407,10 +406,6 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) {
case Stage::TessellationEval:
ctx.Add("SHL.U {}.x,primitive.vertexcount,16;", inst);
break;
case Stage::Geometry:
ctx.Add("SHL.U {}.x,{},16;", inst,
InputTopologyVertices::vertices(ctx.runtime_info.input_topology));
break;
default:
LOG_WARNING(Shader, "(STUBBED) called");
ctx.Add("MOV.S {}.x,0x00ff0000;", inst);

View File

@@ -426,10 +426,6 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) {
case Stage::TessellationEval:
ctx.AddU32("{}=uint(gl_PatchVerticesIn)<<16;", inst);
break;
case Stage::Geometry:
ctx.AddU32("{}=uint({}<<16);", inst,
InputTopologyVertices::vertices(ctx.runtime_info.input_topology));
break;
default:
LOG_WARNING(Shader, "(STUBBED) called");
ctx.AddU32("{}=uint(0x00ff0000);", inst);

View File

@@ -556,9 +556,8 @@ Id EmitInvocationInfo(EmitContext& ctx) {
switch (ctx.stage) {
case Stage::TessellationControl:
case Stage::TessellationEval:
return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.patch_vertices_in), ctx.Const(16u));
case Stage::Geometry:
return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.Const(InputTopologyVertices::vertices(ctx.runtime_info.input_topology)), ctx.Const(16u));
return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.patch_vertices_in),
ctx.Const(16u));
default:
LOG_WARNING(Shader, "(STUBBED) called");
return ctx.Const(0x00ff0000u);

View File

@@ -274,8 +274,15 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
/// Returns true when a storage buffer address satisfies a bias
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end;
// For performance, strongly prefer addresses that meet the bias criteria
// and have optimal alignment
if (storage_buffer.index == bias.index &&
storage_buffer.offset >= bias.offset_begin &&
storage_buffer.offset < bias.offset_end) {
return true;
}
// Only fall back to other addresses if absolutely necessary
return false;
}
struct LowAddrInfo {
@@ -351,7 +358,7 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
.index = index.U32(),
.offset = offset.U32(),
};
const u32 alignment{bias ? bias->alignment : 8U};
const u32 alignment{bias ? bias->alignment : 16U};
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
// The SSBO pointer has to be aligned
return std::nullopt;
@@ -372,9 +379,9 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
// avoid getting false positives
static constexpr Bias nvn_bias{
.index = 0,
.offset_begin = 0x100,
.offset_end = 0x700,
.alignment = 16,
.offset_begin = 0x100, // Expanded from 0x110 to catch more potential storage buffers
.offset_end = 0x800, // Expanded from 0x610 to include a wider range
.alignment = 32, // Increased from 16 to optimize memory access patterns
};
// Track the low address of the instruction
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
@@ -386,7 +393,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
const IR::U32 low_addr{low_addr_info->value};
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
if (!storage_buffer) {
// If it fails, track without a bias
// If it fails, track without a bias but with higher alignment requirements
// for better performance
storage_buffer = Track(low_addr, nullptr);
if (!storage_buffer) {
// If that also fails, use NVN fallbacks
@@ -425,8 +433,12 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
// Align the offset base to match the host alignment requirements
// Use a more aggressive alignment mask for better performance
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
return ir.ISub(offset, low_cbuf);
// Also align the resulting offset for optimal memory access
IR::U32 result = ir.ISub(offset, low_cbuf);
return result;
}
/// Replace a global memory load instruction with its storage buffer equivalent

View File

@@ -30,24 +30,6 @@ enum class InputTopology {
TrianglesAdjacency,
};
struct InputTopologyVertices {
static u32 vertices(InputTopology input_topology) {
switch (input_topology) {
case InputTopology::Lines:
return 2;
case InputTopology::LinesAdjacency:
return 4;
case InputTopology::Triangles:
return 3;
case InputTopology::TrianglesAdjacency:
return 6;
case InputTopology::Points:
default:
return 1;
}
}
};
enum class CompareFunction {
Never,
Less,