mirror of
https://github.com/dolphin-emu/dolphin
synced 2025-10-06 00:13:03 +02:00
Jit64: dcbz
, use VMOVAPS with YMM registers on AVX CPUs
LLVM does this, so let's do it as well.
This commit is contained in:
@@ -1858,19 +1858,23 @@ void XEmitter::WriteVEXOp4(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, co
|
|||||||
Write8((u8)regOp3 << 4);
|
Write8((u8)regOp3 << 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
void CheckAVXSupport()
|
||||||
int W, int extrabytes)
|
|
||||||
{
|
{
|
||||||
if (!cpu_info.bAVX)
|
if (!cpu_info.bAVX)
|
||||||
PanicAlertFmt("Trying to use AVX on a system that doesn't support it. Bad programmer.");
|
PanicAlertFmt("Trying to use AVX on a system that doesn't support it. Bad programmer.");
|
||||||
|
}
|
||||||
|
|
||||||
|
void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
||||||
|
int W, int extrabytes)
|
||||||
|
{
|
||||||
|
CheckAVXSupport();
|
||||||
WriteVEXOp(opPrefix, op, regOp1, regOp2, arg, W, extrabytes);
|
WriteVEXOp(opPrefix, op, regOp1, regOp2, arg, W, extrabytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::WriteAVXOp4(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
void XEmitter::WriteAVXOp4(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg,
|
||||||
X64Reg regOp3, int W)
|
X64Reg regOp3, int W)
|
||||||
{
|
{
|
||||||
if (!cpu_info.bAVX)
|
CheckAVXSupport();
|
||||||
PanicAlertFmt("Trying to use AVX on a system that doesn't support it. Bad programmer.");
|
|
||||||
WriteVEXOp4(opPrefix, op, regOp1, regOp2, arg, regOp3, W);
|
WriteVEXOp4(opPrefix, op, regOp1, regOp2, arg, regOp3, W);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3030,6 +3034,19 @@ void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)
|
|||||||
WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg);
|
WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void XEmitter::VMOVAPS(const OpArg& arg, X64Reg regOp)
|
||||||
|
{
|
||||||
|
WriteAVXOp(0x00, 0x29, X64Reg::INVALID_REG, regOp, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void XEmitter::VZEROUPPER()
|
||||||
|
{
|
||||||
|
CheckAVXSupport();
|
||||||
|
Write8(0xC5);
|
||||||
|
Write8(0xF8);
|
||||||
|
Write8(0x77);
|
||||||
|
}
|
||||||
|
|
||||||
void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)
|
void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)
|
||||||
{
|
{
|
||||||
WriteFMA3Op(0x98, regOp1, regOp2, arg);
|
WriteFMA3Op(0x98, regOp1, regOp2, arg);
|
||||||
|
@@ -876,6 +876,10 @@ public:
|
|||||||
void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||||
void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||||
|
|
||||||
|
void VMOVAPS(const OpArg& arg, X64Reg regOp);
|
||||||
|
|
||||||
|
void VZEROUPPER();
|
||||||
|
|
||||||
// FMA3
|
// FMA3
|
||||||
void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||||
void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
|
||||||
|
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
#include "Common/Assert.h"
|
#include "Common/Assert.h"
|
||||||
#include "Common/BitSet.h"
|
#include "Common/BitSet.h"
|
||||||
|
#include "Common/CPUDetect.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/MsgHandler.h"
|
#include "Common/MsgHandler.h"
|
||||||
#include "Common/x64ABI.h"
|
#include "Common/x64ABI.h"
|
||||||
@@ -469,9 +470,18 @@ void Jit64::dcbz(UGeckoInstruction inst)
|
|||||||
FixupBranch slow = J_CC(CC_Z, Jump::Near);
|
FixupBranch slow = J_CC(CC_Z, Jump::Near);
|
||||||
|
|
||||||
// Fast path: compute full address, then zero out 32 bytes of memory.
|
// Fast path: compute full address, then zero out 32 bytes of memory.
|
||||||
XORPS(XMM0, R(XMM0));
|
if (cpu_info.bAVX)
|
||||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
|
{
|
||||||
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
|
VXORPS(XMM0, XMM0, R(XMM0));
|
||||||
|
VMOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), YMM0);
|
||||||
|
VZEROUPPER();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
XORPS(XMM0, R(XMM0));
|
||||||
|
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0);
|
||||||
|
MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0);
|
||||||
|
}
|
||||||
|
|
||||||
// Slow path: call the general-case code.
|
// Slow path: call the general-case code.
|
||||||
SwitchToFarCode();
|
SwitchToFarCode();
|
||||||
|
Reference in New Issue
Block a user