mirror of
https://github.com/TASEmulators/desmume
synced 2025-10-05 16:22:49 +02:00
GPU: Add AltiVec-accelerated functions for 2D layer compositing. (For PowerPC CPUs only.)
- This improves GPU performance by up to 25% on a PowerPC 970MP.
This commit is contained in:
@@ -59,6 +59,9 @@
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
#define USEVECTORSIZE_128
|
||||
#define VECTORSIZE 16
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
#define USEVECTORSIZE_128
|
||||
#define VECTORSIZE 16
|
||||
#endif
|
||||
|
||||
#if defined(USEVECTORSIZE_512) || defined(USEVECTORSIZE_256) || defined(USEVECTORSIZE_128)
|
||||
|
@@ -751,6 +751,8 @@ static FORCEINLINE void CopyLinesForVerticalCount(void *__restrict dstLineHead,
|
||||
#include "GPU_Operations_SSE2.cpp"
|
||||
#elif defined(ENABLE_NEON_A64)
|
||||
#include "GPU_Operations_NEON.cpp"
|
||||
#elif defined(ENABLE_ALTIVEC)
|
||||
#include "GPU_Operations_AltiVec.cpp"
|
||||
#else
|
||||
|
||||
template <bool NEEDENDIANSWAP, size_t ELEMENTSIZE>
|
||||
|
3377
desmume/src/GPU_Operations_AltiVec.cpp
Normal file
3377
desmume/src/GPU_Operations_AltiVec.cpp
Normal file
File diff suppressed because it is too large
Load Diff
122
desmume/src/GPU_Operations_AltiVec.h
Normal file
122
desmume/src/GPU_Operations_AltiVec.h
Normal file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
Copyright (C) 2025 DeSmuME team
|
||||
|
||||
This file is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This file is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with the this software. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GPU_OPERATIONS_ALTIVEC_H
|
||||
#define GPU_OPERATIONS_ALTIVEC_H
|
||||
|
||||
#include "GPU_Operations.h"
|
||||
|
||||
#ifndef ENABLE_ALTIVEC
|
||||
#warning This header requires PowerPC AltiVec support.
|
||||
#else
|
||||
|
||||
class ColorOperation_AltiVec
|
||||
{
|
||||
public:
|
||||
ColorOperation_AltiVec() {};
|
||||
|
||||
FORCEINLINE v128u16 blend(const v128u16 &colA, const v128u16 &colB, const v128u16 &blendEVA, const v128u16 &blendEVB) const;
|
||||
template<NDSColorFormat COLORFORMAT, bool USECONSTANTBLENDVALUESHINT> FORCEINLINE v128u32 blend(const v128u32 &colA, const v128u32 &colB, const v128u16 &blendEVA, const v128u16 &blendEVB) const;
|
||||
|
||||
FORCEINLINE v128u16 blend3D(const v128u32 &colA_Lo, const v128u32 &colA_Hi, const v128u16 &colB) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v128u32 blend3D(const v128u32 &colA, const v128u32 &colB) const;
|
||||
|
||||
FORCEINLINE v128u16 increase(const v128u16 &col, const v128u16 &blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v128u32 increase(const v128u32 &col, const v128u16 &blendEVY) const;
|
||||
|
||||
FORCEINLINE v128u16 decrease(const v128u16 &col, const v128u16 &blendEVY) const;
|
||||
template<NDSColorFormat COLORFORMAT> FORCEINLINE v128u32 decrease(const v128u32 &col, const v128u16 &blendEVY) const;
|
||||
};
|
||||
|
||||
class PixelOperation_AltiVec
|
||||
{
|
||||
protected:
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy16(GPUEngineCompositorInfo &compInfo, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copy32(GPUEngineCompositorInfo &compInfo, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copyMask16(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT, bool ISDEBUGRENDER> FORCEINLINE void _copyMask32(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp16(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUp32(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUpMask16(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessUpMask32(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown16(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDown32(GPUEngineCompositorInfo &compInfo, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDownMask16(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u16 &src1, const v128u16 &src0) const;
|
||||
template<NDSColorFormat OUTPUTFORMAT> FORCEINLINE void _brightnessDownMask32(GPUEngineCompositorInfo &compInfo, const v128u8 &passMask8, const v128u16 &evy16, const v128u8 &srcLayerID, const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _unknownEffectMask16(GPUEngineCompositorInfo &compInfo,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u16 &src1, const v128u16 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const v128u8 &enableColorEffectMask,
|
||||
const v128u8 &spriteAlpha,
|
||||
const v128u8 &spriteMode) const;
|
||||
|
||||
template<NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE>
|
||||
FORCEINLINE void _unknownEffectMask32(GPUEngineCompositorInfo &compInfo,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const v128u8 &enableColorEffectMask,
|
||||
const v128u8 &spriteAlpha,
|
||||
const v128u8 &spriteMode) const;
|
||||
|
||||
public:
|
||||
PixelOperation_AltiVec() {};
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void Composite16(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u16 &src1, const v128u16 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr) const;
|
||||
|
||||
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, GPULayerType LAYERTYPE, bool WILLPERFORMWINDOWTEST>
|
||||
FORCEINLINE void Composite32(GPUEngineCompositorInfo &compInfo,
|
||||
const bool didAllPixelsPass,
|
||||
const v128u8 &passMask8,
|
||||
const v128u16 &evy16,
|
||||
const v128u8 &srcLayerID,
|
||||
const v128u32 &src3, const v128u32 &src2, const v128u32 &src1, const v128u32 &src0,
|
||||
const v128u8 &srcEffectEnableMask,
|
||||
const v128u8 &dstBlendEnableMaskLUT,
|
||||
const u8 *__restrict enableColorEffectPtr,
|
||||
const u8 *__restrict sprAlphaPtr,
|
||||
const u8 *__restrict sprModePtr) const;
|
||||
};
|
||||
|
||||
#endif // ENABLE_ALTIVEC
|
||||
|
||||
#endif // GPU_OPERATIONS_ALTIVEC_H
|
@@ -4423,6 +4423,8 @@
|
||||
ABDDF7C41898F024007583C1 /* Icon_DisplayToggle_420x420.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_DisplayToggle_420x420.png; path = images/Icon_DisplayToggle_420x420.png; sourceTree = "<group>"; };
|
||||
ABDDF7C71898F032007583C1 /* Icon_FrameAdvance_420x420.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_FrameAdvance_420x420.png; path = images/Icon_FrameAdvance_420x420.png; sourceTree = "<group>"; };
|
||||
ABDDF7C81898F032007583C1 /* Icon_FrameJump_420x420.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_FrameJump_420x420.png; path = images/Icon_FrameJump_420x420.png; sourceTree = "<group>"; };
|
||||
ABDE648E2E21068500C03E0B /* GPU_Operations_AltiVec.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GPU_Operations_AltiVec.h; sourceTree = "<group>"; };
|
||||
ABDE648F2E21068500C03E0B /* GPU_Operations_AltiVec.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = GPU_Operations_AltiVec.cpp; sourceTree = "<group>"; };
|
||||
ABE5DFE3143FB1DA00835AD8 /* cocoa_videofilter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cocoa_videofilter.h; sourceTree = "<group>"; };
|
||||
ABE5DFE4143FB1DA00835AD8 /* cocoa_videofilter.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = cocoa_videofilter.mm; sourceTree = "<group>"; };
|
||||
ABE670251415DE6C00E8E4C9 /* tinystr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tinystr.cpp; sourceTree = "<group>"; };
|
||||
@@ -5688,6 +5690,7 @@
|
||||
AB1D4BB426E6F8D700A9AE42 /* GPU_Operations_SSE2.cpp */,
|
||||
AB1D4BAF26E6F8D700A9AE42 /* GPU_Operations_AVX2.cpp */,
|
||||
ABD86C842D83E20500505422 /* GPU_Operations_NEON.cpp */,
|
||||
ABDE648F2E21068500C03E0B /* GPU_Operations_AltiVec.cpp */,
|
||||
ABD1FEB81345AC8400AF11D1 /* lua-engine.cpp */,
|
||||
ABD1FEB91345AC8400AF11D1 /* matrix.cpp */,
|
||||
ABD1FEBA1345AC8400AF11D1 /* mc.cpp */,
|
||||
@@ -5733,6 +5736,7 @@
|
||||
AB1D4BB226E6F8D700A9AE42 /* GPU_Operations_SSE2.h */,
|
||||
AB1D4BB026E6F8D700A9AE42 /* GPU_Operations_AVX2.h */,
|
||||
ABD86C832D83E20500505422 /* GPU_Operations_NEON.h */,
|
||||
ABDE648E2E21068500C03E0B /* GPU_Operations_AltiVec.h */,
|
||||
AB796CA215CDCB6B00C59155 /* instruction_attributes.h */,
|
||||
AB796CA315CDCB6B00C59155 /* instructions.h */,
|
||||
ABD1FE841345AC8400AF11D1 /* lua-engine.h */,
|
||||
|
@@ -1918,6 +1918,8 @@
|
||||
AB0F29A314BE7213009ABC6F /* Icon_RotateCW_420x420.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_RotateCW_420x420.png; path = images/Icon_RotateCW_420x420.png; sourceTree = "<group>"; };
|
||||
AB0F29A414BE7213009ABC6F /* Icon_ShowHUD_420x420.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_ShowHUD_420x420.png; path = images/Icon_ShowHUD_420x420.png; sourceTree = "<group>"; };
|
||||
AB0F29A514BE7213009ABC6F /* Icon_Speaker_420x420.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Icon_Speaker_420x420.png; path = images/Icon_Speaker_420x420.png; sourceTree = "<group>"; };
|
||||
AB11AE8E2E210BB400E8A516 /* GPU_Operations_AltiVec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = GPU_Operations_AltiVec.cpp; sourceTree = "<group>"; };
|
||||
AB11AE8F2E210BB400E8A516 /* GPU_Operations_AltiVec.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GPU_Operations_AltiVec.h; sourceTree = "<group>"; };
|
||||
AB126D06182ECB9500EBCF22 /* slot2_passme.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slot2_passme.cpp; sourceTree = "<group>"; };
|
||||
AB142025186E2CD80015D52F /* Image_MemoryExpansionPak.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = Image_MemoryExpansionPak.png; path = images/Image_MemoryExpansionPak.png; sourceTree = "<group>"; };
|
||||
AB1B20AB2AD5ED59007CA7EB /* slot2_hcv1000.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slot2_hcv1000.cpp; sourceTree = "<group>"; };
|
||||
@@ -3436,6 +3438,7 @@
|
||||
AB6FE66C26E6F7C2002B2106 /* GPU_Operations_SSE2.cpp */,
|
||||
AB6FE66A26E6F7C2002B2106 /* GPU_Operations_AVX2.cpp */,
|
||||
AB3B8DC62D8A35A000C9CBFD /* GPU_Operations_NEON.cpp */,
|
||||
AB11AE8E2E210BB400E8A516 /* GPU_Operations_AltiVec.cpp */,
|
||||
ABD1FEB81345AC8400AF11D1 /* lua-engine.cpp */,
|
||||
ABD1FEB91345AC8400AF11D1 /* matrix.cpp */,
|
||||
ABD1FEBA1345AC8400AF11D1 /* mc.cpp */,
|
||||
@@ -3481,6 +3484,7 @@
|
||||
AB6FE66D26E6F7C2002B2106 /* GPU_Operations_SSE2.h */,
|
||||
AB6FE66B26E6F7C2002B2106 /* GPU_Operations_AVX2.h */,
|
||||
AB3B8DC72D8A35A000C9CBFD /* GPU_Operations_NEON.h */,
|
||||
AB11AE8F2E210BB400E8A516 /* GPU_Operations_AltiVec.h */,
|
||||
ABBCE29D15ACB26100A2C965 /* instruction_attributes.h */,
|
||||
ABBCE29E15ACB26100A2C965 /* instructions.h */,
|
||||
ABD1FE841345AC8400AF11D1 /* lua-engine.h */,
|
||||
|
Reference in New Issue
Block a user