diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 31f33edc28..f1aa26eae3 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -1858,19 +1858,23 @@ void XEmitter::WriteVEXOp4(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, co Write8((u8)regOp3 << 4); } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, - int W, int extrabytes) +void CheckAVXSupport() { if (!cpu_info.bAVX) PanicAlertFmt("Trying to use AVX on a system that doesn't support it. Bad programmer."); +} + +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, + int W, int extrabytes) +{ + CheckAVXSupport(); WriteVEXOp(opPrefix, op, regOp1, regOp2, arg, W, extrabytes); } void XEmitter::WriteAVXOp4(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, X64Reg regOp3, int W) { - if (!cpu_info.bAVX) - PanicAlertFmt("Trying to use AVX on a system that doesn't support it. Bad programmer."); + CheckAVXSupport(); WriteVEXOp4(opPrefix, op, regOp1, regOp2, arg, regOp3, W); } @@ -3030,6 +3034,19 @@ void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } +void XEmitter::VMOVAPS(const OpArg& arg, X64Reg regOp) +{ + WriteAVXOp(0x00, 0x29, X64Reg::INVALID_REG, regOp, arg); +} + +void XEmitter::VZEROUPPER() +{ + CheckAVXSupport(); + Write8(0xC5); + Write8(0xF8); + Write8(0x77); +} + void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteFMA3Op(0x98, regOp1, regOp2, arg); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 2b7bbb7726..3c8a411159 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -876,6 +876,10 @@ public: void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); + void VMOVAPS(const OpArg& arg, X64Reg regOp); + + void VZEROUPPER(); + // FMA3 void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 05bd690694..2b26a9c26e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -8,6 +8,7 @@ #include "Common/Assert.h" #include "Common/BitSet.h" +#include "Common/CPUDetect.h" #include "Common/CommonTypes.h" #include "Common/MsgHandler.h" #include "Common/x64ABI.h" @@ -469,9 +470,18 @@ void Jit64::dcbz(UGeckoInstruction inst) FixupBranch slow = J_CC(CC_Z, Jump::Near); // Fast path: compute full address, then zero out 32 bytes of memory. - XORPS(XMM0, R(XMM0)); - MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); - MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); + if (cpu_info.bAVX) + { + VXORPS(XMM0, XMM0, R(XMM0)); + VMOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), YMM0); + VZEROUPPER(); + } + else + { + XORPS(XMM0, R(XMM0)); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 16), XMM0); + MOVAPS(MComplex(RMEM, RSCRATCH, SCALE_1, 0), XMM0); + } // Slow path: call the general-case code. SwitchToFarCode();