Compare commits

...

2 Commits

12 changed files with 1190 additions and 1203 deletions

View File

@@ -3598,7 +3598,7 @@ void GPUEngineA::RenderLine(const size_t l)
template <GPUCompositorMode COMPOSITORMODE, NDSColorFormat OUTPUTFORMAT, bool WILLPERFORMWINDOWTEST>
void GPUEngineA::RenderLine_Layer3D(GPUEngineCompositorInfo &compInfo)
{
const Color4u8 *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer();
const Color4u8 *__restrict framebuffer3D = CurrentRenderer->GetFramebuffer32();
if (framebuffer3D == NULL)
{
return;
@@ -3926,7 +3926,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
}
}
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (Color4u8 *)compInfo.target.lineColorHead : (Color4u8 *)CurrentRenderer->GetFramebuffer() + compInfo.line.blockOffsetCustom;
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (const Color4u8 *)compInfo.target.lineColorHead : CurrentRenderer->GetFramebuffer32() + compInfo.line.blockOffsetCustom;
srcBPtr = (DISPCAPCNT.SrcB == 0) ? vramCustom32 : this->_fifoLine32;
dstCustomPtr = (Color4u8 *)this->_VRAMCustomBlockPtr[DISPCAPCNT.VRAMWriteBlock] + dstCustomOffset;
}
@@ -3934,7 +3934,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
{
const u16 *vramPtr16 = (willReadNativeVRAM) ? vramNative16 : vramCustom16;
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (u16 *)compInfo.target.lineColorHead : this->_3DFramebuffer16 + compInfo.line.blockOffsetCustom;
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (const Color5551 *)compInfo.target.lineColorHead : CurrentRenderer->GetFramebuffer16() + compInfo.line.blockOffsetCustom;
srcBPtr = (DISPCAPCNT.SrcB == 0) ? vramPtr16 : this->_fifoLine16;
dstCustomPtr = (u16 *)this->_VRAMCustomBlockPtr[DISPCAPCNT.VRAMWriteBlock] + dstCustomOffset;
}
@@ -3998,7 +3998,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI
if (needCaptureNative)
{
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (u16 *)compInfo.target.lineColorHead : this->_3DFramebuffer16 + compInfo.line.blockOffsetCustom;
srcAPtr = (DISPCAPCNT.SrcA == 0) ? (const Color5551 *)compInfo.target.lineColorHead : CurrentRenderer->GetFramebuffer16() + compInfo.line.blockOffsetCustom;
srcBPtr = (DISPCAPCNT.SrcB == 0) ? vramNative16 : this->_fifoLine16;
// Convert 18-bit and 24-bit framebuffers to 15-bit for native screen capture.

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash
Copyright (C) 2008-2024 DeSmuME team
Copyright (C) 2008-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -174,13 +174,6 @@ EXTERNOGLEXT(PFNGLMAPBUFFERPROC, glMapBuffer) // Core in v1.5
#endif
EXTERNOGLEXT(PFNGLUNMAPBUFFERPROC, glUnmapBuffer) // Core in v1.5
// VAO (always available in Apple's implementation of OpenGL, including old versions)
#if defined(__APPLE__) || defined(GL_VERSION_3_0) || defined(GL_ES_VERSION_3_0)
EXTERNOGLEXT(PFNGLGENVERTEXARRAYSPROC, glGenVertexArrays) // Core in v3.0 and ES v3.0
EXTERNOGLEXT(PFNGLDELETEVERTEXARRAYSPROC, glDeleteVertexArrays) // Core in v3.0 and ES v3.0
EXTERNOGLEXT(PFNGLBINDVERTEXARRAYPROC, glBindVertexArray) // Core in v3.0 and ES v3.0
#endif
// OPENGL CORE FUNCTIONS ADDED IN 3.2 CORE PROFILE AND VARIANTS
#if defined(GL_VERSION_3_0) || defined(GL_ES_VERSION_3_0)
@@ -200,6 +193,11 @@ EXTERNOGLEXT(PFNGLBINDFRAGDATALOCATIONINDEXEDPROC, glBindFragDataLocationIndexed
// Buffer Objects
EXTERNOGLEXT(PFNGLMAPBUFFERRANGEPROC, glMapBufferRange) // Core in v3.0 and ES v3.0
// Vertex Array Objects
EXTERNOGLEXT(PFNGLGENVERTEXARRAYSPROC, glGenVertexArrays) // Core in v3.0 and ES v3.0
EXTERNOGLEXT(PFNGLDELETEVERTEXARRAYSPROC, glDeleteVertexArrays) // Core in v3.0 and ES v3.0
EXTERNOGLEXT(PFNGLBINDVERTEXARRAYPROC, glBindVertexArray) // Core in v3.0 and ES v3.0
// FBO
EXTERNOGLEXT(PFNGLGENFRAMEBUFFERSPROC, glGenFramebuffers) // Core in v3.0 and ES v2.0
EXTERNOGLEXT(PFNGLBINDFRAMEBUFFERPROC, glBindFramebuffer) // Core in v3.0 and ES v2.0
@@ -241,6 +239,7 @@ EXTERNOGLEXT(PFNGLDELETESYNCPROC, glDeleteSync) // Core in v3.2 and ES v3.0
// OPENGL FBO EXTENSIONS
// We need to include these explicitly for OpenGL legacy mode since the EXT versions of FBOs
// may work differently than their ARB counterparts when running on older drivers.
#if defined(GL_EXT_framebuffer_object)
// FBO
EXTERNOGLEXT(PFNGLGENFRAMEBUFFERSEXTPROC, glGenFramebuffersEXT)
@@ -304,6 +303,21 @@ EXTERNOGLEXT(PFNGLDELETERENDERBUFFERSEXTPROC, glDeleteRenderbuffersEXT)
#endif // GL_EXT_framebuffer_object
// OPENGL APPLE EXTENSIONS
// There are some useful Apple extensions that can provide performance improvements
// or functionality in legacy OpenGL that is similar to their v3.2 Core Profile
// counterparts.
#if defined(GL_APPLE_vertex_array_object)
EXTERNOGLEXT(PFNGLBINDVERTEXARRAYAPPLEPROC, glBindVertexArrayAPPLE)
EXTERNOGLEXT(PFNGLDELETEVERTEXARRAYSAPPLEPROC, glDeleteVertexArraysAPPLE)
EXTERNOGLEXT(PFNGLGENVERTEXARRAYSAPPLEPROC, glGenVertexArraysAPPLE)
#endif
#if defined(GL_APPLE_texture_range)
EXTERNOGLEXT(PFNGLTEXTURERANGEAPPLEPROC, glTextureRangeAPPLE)
#endif
// Some headers, such as the OpenGL ES headers, may not include this token.
// Add it manually to avoid compiling issues.
#ifndef GL_BGRA
@@ -444,7 +458,7 @@ enum OGLErrorCode
{
OGLERROR_NOERR = RENDER3DERROR_NOERR,
OGLERROR_DRIVER_VERSION_TOO_OLD,
OGLERROR_DRIVER_VERSION_TOO_OLD = 20000,
OGLERROR_BEGINGL_FAILED,
OGLERROR_CLIENT_RESIZE_ERROR,
@@ -471,21 +485,6 @@ enum OGLPolyDrawMode
OGLPolyDrawMode_ZeroAlphaPass = 2
};
union GLvec2
{
GLfloat vec[2];
struct { GLfloat x, y; };
};
typedef union GLvec2 GLvec2;
union GLvec3
{
GLfloat vec[3];
struct { GLfloat r, g, b; };
struct { GLfloat x, y, z; };
};
typedef union GLvec3 GLvec3;
union GLvec4
{
GLfloat vec[4];
@@ -494,14 +493,6 @@ union GLvec4
};
typedef union GLvec4 GLvec4;
struct OGLVertex
{
GLvec4 position;
GLvec2 texCoord;
GLvec3 color;
};
typedef struct OGLVertex OGLVertex;
struct OGLRenderStates
{
GLuint enableAntialiasing;
@@ -607,12 +598,42 @@ struct OGLFogShaderID
};
typedef OGLFogShaderID OGLFogShaderID;
struct OGLFeatureInfo
{
OpenGLVariantID variantID;
bool supportTextureMirroredRepeat; // Core in v1.4
bool supportBlendFuncSeparate; // Core in v1.4
bool supportBlendEquationSeparate; // Core in v2.0
bool supportMapBufferRange; // Core in v3.0 and ES v3.0
bool supportVBO; // Core in v1.5
bool supportPBO; // Core in v2.1
bool supportFBO; // Core in v3.0 and ES v2.0
bool supportFBOBlit; // Core in v3.0 and ES v3.0
bool supportMultisampledFBO; // Core in v3.0 and ES v2.0
bool supportVAO; // Core in v3.0 and ES v3.0
bool supportVAO_APPLE; // GL_APPLE_vertex_array_object
bool supportUBO; // Core in v3.1 and ES v3.0
bool supportUBO64K; // Core in v3.1 and ES v3.0
bool supportTBO; // Core in v3.1 and ES v3.2
bool supportShaders; // Core in v2.0
bool supportSampleShading; // Core in v3.2, not available in ES
bool supportShaderFixedLocation; // Core in v3.3, not available in ES
bool supportConservativeDepth; // Core in v4.0, not available in ES
bool supportConservativeDepth_AMD; // GL_AMD_conservative_depth
bool supportTextureRange_APPLE; // GL_APPLE_texture_range
bool supportClientStorage_APPLE; // GL_APPLE_client_storage
GLint stateTexMirroredRepeat;
GLint readPixelsBestFormat;
GLint readPixelsBestDataType;
};
typedef OGLFeatureInfo OGLFeatureInfo;
struct OGLRenderRef
{
// OpenGL Feature Support
GLint stateTexMirroredRepeat;
GLint readPixelsBestDataType;
GLint readPixelsBestFormat;
GLenum textureSrcTypeCIColor;
GLenum textureSrcTypeCIFog;
GLenum textureSrcTypeEdgeColor;
@@ -743,6 +764,11 @@ extern CACHE_ALIGN const GLfloat divide6bitBy63_LUT[64];
extern const GLfloat PostprocessVtxBuffer[16];
extern const GLubyte PostprocessElementBuffer[6];
extern const char *FramebufferOutputVtxShader;
extern const char *FramebufferOutputBGRA6665FragShader;
extern const char *FramebufferOutputBGRA8888FragShader;
extern const char *FramebufferOutputRGBA6665FragShader;
//This is called by OGLRender whenever it initializes.
//Platforms, please be sure to set this up.
//return true if you successfully init.
@@ -785,9 +811,7 @@ extern void (*OGLCreateRenderer_3_2_Func)(OpenGLRenderer **rendererPtr);
extern void (*OGLLoadEntryPoints_ES_3_0_Func)();
extern void (*OGLCreateRenderer_ES_3_0_Func)(OpenGLRenderer **rendererPtr);
bool IsOpenGLDriverVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision);
#define glDrawBuffer(theAttachment) glDrawBufferDESMUME((theAttachment), this->_variantID)
#define glDrawBuffer(theAttachment) glDrawBufferDESMUME((theAttachment), this->_feature.variantID)
static inline void glDrawBufferDESMUME(GLenum theAttachment, const OpenGLVariantID variantID)
{
GLenum bufs[4] = { GL_NONE, GL_NONE, GL_NONE, GL_NONE };
@@ -822,6 +846,11 @@ static inline void glDrawBufferDESMUME(GLenum theAttachment, const OpenGLVariant
}
}
bool IsOpenGLDriverVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision);
Render3DError ShaderProgramCreateOGL(GLuint &vtxShaderID, GLuint &fragShaderID, GLuint &programID, const char *vtxShaderCString, const char *fragShaderCString);
bool ValidateShaderProgramLinkOGL(GLuint theProgram);
class OpenGLTexture : public Render3DTexture
{
protected:
@@ -859,12 +888,6 @@ class OpenGLRenderer : public Render3D_AltiVec
class OpenGLRenderer : public Render3D
#endif
{
private:
// Driver's OpenGL Version
unsigned int versionMajor;
unsigned int versionMinor;
unsigned int versionRevision;
private:
template<bool SWAP_RB> Render3DError _FlushFramebufferConvertOnCPU(const Color4u8 *__restrict srcFramebuffer,
Color4u8 *__restrict dstFramebufferMain, u16 *__restrict dstFramebuffer16,
@@ -875,17 +898,7 @@ protected:
OGLRenderRef *ref;
// OpenGL Feature Support
OpenGLVariantID _variantID;
bool _isBlendFuncSeparateSupported;
bool _isBlendEquationSeparateSupported;
bool isVBOSupported;
bool isPBOSupported;
bool isFBOSupported;
bool _isFBOBlitSupported;
bool isMultisampledFBOSupported;
bool isShaderSupported;
bool _isSampleShadingSupported;
bool isVAOSupported;
OGLFeatureInfo _feature;
bool _willConvertFramebufferOnGPU;
bool _willUseMultisampleShaders;
@@ -965,11 +978,7 @@ protected:
virtual Render3DError CreateFramebufferOutput8888Program(const char *vtxShaderCString, const char *fragShaderCString) = 0;
virtual void DestroyFramebufferOutput8888Programs() = 0;
virtual Render3DError InitPostprocessingPrograms(const char *edgeMarkVtxShader,
const char *edgeMarkFragShader,
const char *framebufferOutputVtxShader,
const char *framebufferOutputRGBA6665FragShader,
const char *framebufferOutputRGBA8888FragShader) = 0;
virtual Render3DError InitPostprocessingPrograms(const char *edgeMarkVtxShader, const char *edgeMarkFragShader) = 0;
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID) = 0;
@@ -982,8 +991,6 @@ protected:
virtual void _ResolveWorkingBackFacing() = 0;
virtual void _ResolveGeometry() = 0;
virtual void _ResolveFinalFramebuffer() = 0;
virtual void _FramebufferProcessVertexAttribEnable() = 0;
virtual void _FramebufferProcessVertexAttribDisable() = 0;
virtual Render3DError _FramebufferConvertColorFormat() = 0;
virtual Render3DError DrawShadowPolygon(const GLenum polyPrimitive, const GLsizei vertIndexCount, const GLushort *indexBufferPtr, const bool performDepthEqualTest, const bool enableAlphaDepthWrite, const bool isTranslucent, const u8 opaquePolyID) = 0;
@@ -994,21 +1001,14 @@ public:
OpenGLRenderer();
virtual ~OpenGLRenderer();
const OGLFeatureInfo& GetFeatureInfo() const;
virtual Render3DError InitExtensions() = 0;
bool IsExtensionPresent(const std::set<std::string> *oglExtensionSet, const std::string extensionName) const;
Render3DError ShaderProgramCreate(GLuint &vtxShaderID,
GLuint &fragShaderID,
GLuint &programID,
const char *vtxShaderCString,
const char *fragShaderCString);
bool ValidateShaderCompile(GLenum shaderType, GLuint theShader) const;
bool ValidateShaderProgramLink(GLuint theProgram) const;
void GetVersion(unsigned int *major, unsigned int *minor, unsigned int *revision) const;
void SetVersion(unsigned int major, unsigned int minor, unsigned int revision);
bool IsVersionSupported(unsigned int checkVersionMajor, unsigned int checkVersionMinor, unsigned int checkVersionRevision) const;
virtual Color4u8* GetFramebuffer();
virtual const Color4u8* GetFramebuffer32() const;
virtual GLsizei GetLimitedMultisampleSize() const;
Render3DError ApplyRenderingSettings(const GFX3D_State &renderState);
@@ -1046,11 +1046,7 @@ protected:
virtual Render3DError CreateFramebufferOutput8888Program(const char *vtxShaderCString, const char *fragShaderCString);
virtual void DestroyFramebufferOutput8888Programs();
virtual Render3DError InitPostprocessingPrograms(const char *edgeMarkVtxShader,
const char *edgeMarkFragShader,
const char *framebufferOutputVtxShader,
const char *framebufferOutputRGBA6665FragShader,
const char *framebufferOutputRGBA8888FragShader);
virtual Render3DError InitPostprocessingPrograms(const char *edgeMarkVtxShader, const char *edgeMarkFragShader);
virtual Render3DError UploadClearImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID);
@@ -1064,8 +1060,6 @@ protected:
virtual void _ResolveWorkingBackFacing();
virtual void _ResolveGeometry();
virtual void _ResolveFinalFramebuffer();
virtual void _FramebufferProcessVertexAttribEnable();
virtual void _FramebufferProcessVertexAttribDisable();
virtual Render3DError _FramebufferConvertColorFormat();
// Base rendering methods

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
/*
Copyright (C) 2006 yopyop
Copyright (C) 2006-2007 shash
Copyright (C) 2008-2024 DeSmuME team
Copyright (C) 2008-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -35,8 +35,6 @@ extern const char *EdgeMarkVtxShader_150;
extern const char *EdgeMarkFragShader_150;
extern const char *FogVtxShader_150;
extern const char *FogFragShader_150;
extern const char *FramebufferOutputVtxShader_150;
extern const char *FramebufferOutput6665FragShader_150;
// A port that wants to use the OpenGL 3.2 renderer must assign the two following functions
// to OGLLoadEntryPoints_3_2_Func and OGLCreateRenderer_3_2_Func, respectively.
@@ -111,10 +109,6 @@ public:
class OpenGLRenderer_3_2 : public OpenGLRenderer_2_1
{
protected:
bool _isShaderFixedLocationSupported;
bool _isConservativeDepthSupported;
bool _isConservativeDepthAMDSupported;
OpenGLGeometryResource *_gResource;
OpenGLRenderStatesResource *_rsResource;
@@ -136,21 +130,15 @@ protected:
virtual void DestroyMSGeometryZeroDstAlphaProgram();
virtual Render3DError CreateEdgeMarkProgram(const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString);
virtual Render3DError CreateFogProgram(const OGLFogProgramKey fogProgramKey, const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString);
virtual Render3DError CreateFramebufferOutput6665Program(const char *vtxShaderCString, const char *fragShaderCString);
virtual Render3DError CreateFramebufferOutput8888Program(const char *vtxShaderCString, const char *fragShaderCString);
virtual void GetExtensionSet(std::set<std::string> *oglExtensionSet);
virtual void _SetupGeometryShaders(const OGLGeometryFlags flags);
virtual void _RenderGeometryVertexAttribEnable();
virtual void _RenderGeometryVertexAttribDisable();
virtual Render3DError ZeroDstAlphaPass(const POLY *rawPolyList, const CPoly *clippedPolyList, const size_t clippedPolyCount, const size_t clippedPolyOpaqueCount, bool enableAlphaBlending, size_t indexOffset, POLYGON_ATTR lastPolyAttr);
virtual void _RenderGeometryLoopBegin();
virtual void _RenderGeometryLoopEnd();
virtual void _ResolveWorkingBackFacing();
virtual void _ResolveGeometry();
virtual void _ResolveFinalFramebuffer();
virtual void _FramebufferProcessVertexAttribEnable();
virtual void _FramebufferProcessVertexAttribDisable();
virtual Render3DError _FramebufferConvertColorFormat();
virtual Render3DError BeginRender(const GFX3D_State &renderState, const GFX3D_GeometryList &renderGList);

View File

@@ -1,5 +1,5 @@
/*
Copyright (C) 2024 DeSmuME team
Copyright (C) 2024-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -267,13 +267,13 @@ void OGLCreateRenderer_ES_3_0(OpenGLRenderer **rendererPtr)
if (IsOpenGLDriverVersionSupported(3, 0, 0))
{
*rendererPtr = new OpenGLESRenderer_3_0;
(*rendererPtr)->SetVersion(3, 0, 0);
}
}
OpenGLESRenderer_3_0::OpenGLESRenderer_3_0()
{
_variantID = OpenGLVariantID_ES3_3_0;
_deviceInfo.renderID = RENDERID_OPENGL_ES;
_feature.variantID = OpenGLVariantID_ES3_3_0;
_geometryDrawBuffersEnum = GeometryDrawBuffersEnumES;
_geometryAttachmentWorkingBuffer = GeometryAttachmentWorkingBufferES;
@@ -295,6 +295,43 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
std::set<std::string> oglExtensionSet;
this->GetExtensionSet(&oglExtensionSet);
// All features below are assumed supported in both OpenGL v3.2 Core Profile and OpenGL ES v3.0.
this->_feature.supportTextureMirroredRepeat = true;
this->_feature.stateTexMirroredRepeat = GL_MIRRORED_REPEAT;
this->_feature.supportBlendFuncSeparate = true;
this->_feature.supportBlendEquationSeparate = true;
this->_feature.supportMapBufferRange = true;
this->_feature.supportVBO = true;
// PBOs are only used when reading back the rendered framebuffer for the emulated
// BG0 layer. For desktop-class GPUs, doing an asynchronous glReadPixels() call
// is always advantageous since such devices are expected to have their GPUs
// connected to a data bus.
//
// However, many ARM-based mobile devices use integrated GPUs of varying degrees
// of memory latency and implementation quality. This means that the performance
// of an asynchronous glReadPixels() call is NOT guaranteed on such devices.
//
// In fact, many ARM-based devices suffer devastating performance drops when trying
// to do asynchronous framebuffer reads. Therefore, since most OpenGL ES users will
// be running an ARM-based iGPU, we will disable PBOs for OpenGL ES and stick with
// a traditional synchronous glReadPixels() call instead.
this->_feature.supportPBO = false;
this->_feature.supportFBO = true;
this->_feature.supportFBOBlit = true;
this->_feature.supportMultisampledFBO = true;
this->_feature.supportVAO = true;
this->_feature.supportVAO_APPLE = false; // VAOs are natively supported in OpenGL ES, so no need for the APPLE extension.
this->_feature.supportUBO = true;
this->_feature.supportShaders = true;
// All features below are specific to OpenGL ES v3.0.
this->_feature.supportSampleShading = false; // Supported in OpenGL v3.2 Core Profile. Unsupported in OpenGL ES.
this->_feature.supportShaderFixedLocation = true; // Requires OpenGL v3.3 or later. Supported in OpenGL ES v3.0.
this->_feature.supportConservativeDepth = false; // Requires OpenGL v4.0 or later. Unsupported in OpenGL ES.
this->_feature.supportConservativeDepth_AMD = false; // Requires OpenGL v4.0 or later. Unsupported in OpenGL ES.
// OpenGL ES 3.0 should fully support FBOs, so we don't need the default framebuffer.
// However, OpenGL ES has traditionally required some kind of surface buffer attached
// to the context before using it. We don't want it, nor would we ever use it here.
@@ -310,24 +347,27 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
return error;
}
// Mirrored Repeat Mode Support
OGLRef.stateTexMirroredRepeat = GL_MIRRORED_REPEAT;
// Uniform Buffer Object support
GLint maxUBOSize = 0;
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &maxUBOSize);
this->_feature.supportUBO64K = (maxUBOSize >= 65536); // Hardware-dependent feature. The vast majority of modern GPUs should support this.
// Blending Support
this->_isBlendFuncSeparateSupported = true;
this->_isBlendEquationSeparateSupported = true;
// Texture Buffer Object support. OpenGL v3.2 Core Profile natively supports this, but OpenGL ES requires v3.2.
this->_feature.supportTBO = ( (this->_feature.variantID & OpenGLVariantFamily_CoreProfile) != 0) ||
(((this->_feature.variantID & OpenGLVariantFamily_ES3) != 0) && ((this->_feature.variantID & 0x000F) >= 0x0002) );
// Apple-specific extensions
this->_feature.supportTextureRange_APPLE = this->IsExtensionPresent(&oglExtensionSet, "GL_APPLE_texture_range");
this->_feature.supportClientStorage_APPLE = this->IsExtensionPresent(&oglExtensionSet, "GL_APPLE_client_storage");
// Fixed locations in shaders are supported in ES 3.0 by default.
this->_isShaderFixedLocationSupported = true;
// OpenGL ES 3.0 should be able to handle the GL_RGBA format in glReadPixels without any performance penalty.
this->_feature.readPixelsBestFormat = GL_RGBA;
this->_feature.readPixelsBestDataType = GL_UNSIGNED_BYTE;
GLfloat maxAnisotropyOGL = 1.0f;
glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &maxAnisotropyOGL);
this->_deviceInfo.maxAnisotropy = (float)maxAnisotropyOGL;
// OpenGL ES 3.0 should be able to handle the GL_RGBA format in glReadPixels without any performance penalty.
OGLRef.readPixelsBestFormat = GL_RGBA;
OGLRef.readPixelsBestDataType = GL_UNSIGNED_BYTE;
this->_deviceInfo.isEdgeMarkSupported = true;
this->_deviceInfo.isFogSupported = true;
@@ -341,7 +381,7 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)this->_framebufferWidth, (GLsizei)this->_framebufferHeight, 0, this->_feature.readPixelsBestFormat, this->_feature.readPixelsBestDataType, NULL);
glActiveTexture(GL_TEXTURE0);
// OpenGL ES 3.0 should have all the necessary features to be able to flip and convert the framebuffer.
@@ -353,9 +393,6 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
this->_emulateNDSDepthCalculation = CommonSettings.OpenGL_Emulation_NDSDepthCalculation;
this->_emulateDepthLEqualPolygonFacing = CommonSettings.OpenGL_Emulation_DepthLEqualPolygonFacing;
// Load and create shaders. Return on any error, since ES 3.0 makes shaders mandatory.
this->isShaderSupported = true;
this->_rsResource = new OpenGLRenderStatesResource();
if (IsOpenGLDriverVersionSupported(3, 2, 0))
@@ -371,12 +408,13 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
this->_gResource = new OpenGLGeometryResource(OpenGLVariantID_ES3_3_0);
}
// Load and create shaders. Return on any error, since ES 3.0 makes shaders mandatory.
error = this->CreateGeometryPrograms();
if (error != OGLERROR_NOERR)
{
glUseProgram(0);
this->DestroyGeometryPrograms();
this->isShaderSupported = false;
this->_feature.supportShaders = false;
return error;
}
@@ -386,7 +424,7 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
{
glUseProgram(0);
this->DestroyGeometryPrograms();
this->isShaderSupported = false;
this->_feature.supportShaders = false;
return error;
}
@@ -397,60 +435,35 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
glUseProgram(0);
this->DestroyGeometryPrograms();
this->DestroyClearImageProgram();
this->isShaderSupported = false;
this->_feature.supportShaders = false;
return error;
}
INFO("OpenGL ES: Successfully created geometry shaders.\n");
error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_150,
EdgeMarkFragShader_150,
FramebufferOutputVtxShader_150,
FramebufferOutput6665FragShader_150,
NULL);
error = this->InitPostprocessingPrograms(EdgeMarkVtxShader_150, EdgeMarkFragShader_150);
if (error != OGLERROR_NOERR)
{
glUseProgram(0);
this->DestroyGeometryPrograms();
this->DestroyClearImageProgram();
this->DestroyGeometryZeroDstAlphaProgram();
this->isShaderSupported = false;
this->_feature.supportShaders = false;
return error;
}
this->isVBOSupported = true;
this->CreateVBOs();
// PBOs are only used when reading back the rendered framebuffer for the emulated
// BG0 layer. For desktop-class GPUs, doing an asynchronous glReadPixels() call
// is always advantageous since such devices are expected to have their GPUs
// connected to a data bus.
//
// However, many ARM-based mobile devices use integrated GPUs of varying degrees
// of memory latency and implementation quality. This means that the performance
// of an asynchronous glReadPixels() call is NOT guaranteed on such devices.
//
// In fact, many ARM-based devices suffer devastating performance drops when trying
// to do asynchronous framebuffer reads. Therefore, since most OpenGL ES users will
// be running an ARM-based iGPU, we will disable PBOs for OpenGL ES and stick with
// a traditional synchronous glReadPixels() call instead.
this->isPBOSupported = false;
this->isVAOSupported = true;
this->CreateVAOs();
// Load and create FBOs. Return on any error, since OpenGL ES 3.0 includes FBOs as core functionality.
this->isFBOSupported = true;
error = this->CreateFBOs();
if (error != OGLERROR_NOERR)
{
this->isFBOSupported = false;
this->_feature.supportFBO = false;
return error;
}
this->_isFBOBlitSupported = true;
this->isMultisampledFBOSupported = true;
this->_selectedMultisampleSize = CommonSettings.GFX3D_Renderer_MultisampleSize;
GLint maxSamplesOGL = 0;
@@ -473,7 +486,7 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
error = this->CreateMultisampledFBO(sampleSize);
if (error != OGLERROR_NOERR)
{
this->isMultisampledFBOSupported = false;
this->_feature.supportMultisampledFBO = false;
}
// If GFX3D_Renderer_MultisampleSize is 0, then we can deallocate the buffers now
@@ -485,12 +498,12 @@ Render3DError OpenGLESRenderer_3_0::InitExtensions()
}
else
{
this->isMultisampledFBOSupported = false;
this->_feature.supportMultisampledFBO = false;
INFO("OpenGL ES: Driver does not support at least 2x multisampled FBOs.\n");
}
this->_isDepthLEqualPolygonFacingSupported = true;
this->_enableMultisampledRendering = ((this->_selectedMultisampleSize >= 2) && this->isMultisampledFBOSupported);
this->_enableMultisampledRendering = ((this->_selectedMultisampleSize >= 2) && this->_feature.supportMultisampledFBO);
return OGLERROR_NOERR;
}
@@ -555,11 +568,11 @@ Render3DError OpenGLESRenderer_3_0::CreateGeometryPrograms()
std::string fragShaderCode = shaderHeader.str() + shaderFlags.str() + std::string(GeometryFragShader_ES300);
error = this->ShaderProgramCreate(OGLRef.vertexGeometryShaderID,
OGLRef.fragmentGeometryShaderID[flagsValue],
OGLRef.programGeometryID[flagsValue],
vtxShaderCode.c_str(),
fragShaderCode.c_str());
error = ShaderProgramCreateOGL(OGLRef.vertexGeometryShaderID,
OGLRef.fragmentGeometryShaderID[flagsValue],
OGLRef.programGeometryID[flagsValue],
vtxShaderCode.c_str(),
fragShaderCode.c_str());
if (error != OGLERROR_NOERR)
{
INFO("OpenGL ES: Failed to create the GEOMETRY shader program.\n");
@@ -569,7 +582,7 @@ Render3DError OpenGLESRenderer_3_0::CreateGeometryPrograms()
}
glLinkProgram(OGLRef.programGeometryID[flagsValue]);
if (!this->ValidateShaderProgramLink(OGLRef.programGeometryID[flagsValue]))
if (!ValidateShaderProgramLinkOGL(OGLRef.programGeometryID[flagsValue]))
{
INFO("OpenGL ES: Failed to link the GEOMETRY shader program.\n");
glUseProgram(0);
@@ -630,38 +643,22 @@ Render3DError OpenGLESRenderer_3_0::CreateClearImageProgram(const char *vsCStrin
shaderHeader << "\n";
std::stringstream vsHeader;
if (this->_isShaderFixedLocationSupported)
{
vsHeader << "#define IN_VTX_POSITION layout (location = " << OGLVertexAttributeID_Position << ") in\n";
vsHeader << "#define IN_VTX_TEXCOORD0 layout (location = " << OGLVertexAttributeID_TexCoord0 << ") in\n";
}
else
{
vsHeader << "#define IN_VTX_POSITION in\n";
vsHeader << "#define IN_VTX_TEXCOORD0 in\n";
}
vsHeader << "#define IN_VTX_POSITION layout (location = " << OGLVertexAttributeID_Position << ") in\n";
vsHeader << "#define IN_VTX_TEXCOORD0 layout (location = " << OGLVertexAttributeID_TexCoord0 << ") in\n";
vsHeader << "\n";
std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vsCString);
std::stringstream fsHeader;
if (this->_isShaderFixedLocationSupported)
{
fsHeader << "#define OUT_COLOR layout (location = 0) out\n";
fsHeader << "#define OUT_FOGATTR layout (location = 1) out\n";
}
else
{
fsHeader << "#define OUT_COLOR out\n";
fsHeader << "#define OUT_FOGATTR out\n";
}
fsHeader << "#define OUT_COLOR layout (location = 0) out\n";
fsHeader << "#define OUT_FOGATTR layout (location = 1) out\n";
fsHeader << "\n";
std::string fragShaderCodeFogColor = shaderHeader.str() + fsHeader.str() + std::string(fsCString);
error = this->ShaderProgramCreate(OGLRef.vsClearImageID,
OGLRef.fsClearImageID,
OGLRef.pgClearImageID,
vtxShaderCode.c_str(),
fragShaderCodeFogColor.c_str());
error = ShaderProgramCreateOGL(OGLRef.vsClearImageID,
OGLRef.fsClearImageID,
OGLRef.pgClearImageID,
vtxShaderCode.c_str(),
fragShaderCodeFogColor.c_str());
if (error != OGLERROR_NOERR)
{
INFO("OpenGL ES: Failed to create the CLEAR_IMAGE shader program.\n");
@@ -671,7 +668,7 @@ Render3DError OpenGLESRenderer_3_0::CreateClearImageProgram(const char *vsCStrin
}
glLinkProgram(OGLRef.pgClearImageID);
if (!this->ValidateShaderProgramLink(OGLRef.pgClearImageID))
if (!ValidateShaderProgramLinkOGL(OGLRef.pgClearImageID))
{
INFO("OpenGL ES: Failed to link the CLEAR_IMAGE shader color/fog program.\n");
glUseProgram(0);
@@ -716,11 +713,11 @@ Render3DError OpenGLESRenderer_3_0::CreateGeometryZeroDstAlphaProgram(const char
std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vtxShaderCString);
std::string fragShaderCode = shaderHeader.str() + std::string(fragShaderCString);
error = this->ShaderProgramCreate(OGLRef.vtxShaderGeometryZeroDstAlphaID,
OGLRef.fragShaderGeometryZeroDstAlphaID,
OGLRef.programGeometryZeroDstAlphaID,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
error = ShaderProgramCreateOGL(OGLRef.vtxShaderGeometryZeroDstAlphaID,
OGLRef.fragShaderGeometryZeroDstAlphaID,
OGLRef.programGeometryZeroDstAlphaID,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
if (error != OGLERROR_NOERR)
{
INFO("OpenGL ES: Failed to create the GEOMETRY ZERO DST ALPHA shader program.\n");
@@ -730,7 +727,7 @@ Render3DError OpenGLESRenderer_3_0::CreateGeometryZeroDstAlphaProgram(const char
}
glLinkProgram(OGLRef.programGeometryZeroDstAlphaID);
if (!this->ValidateShaderProgramLink(OGLRef.programGeometryZeroDstAlphaID))
if (!ValidateShaderProgramLinkOGL(OGLRef.programGeometryZeroDstAlphaID))
{
INFO("OpenGL ES: Failed to link the GEOMETRY ZERO DST ALPHA shader program.\n");
glUseProgram(0);
@@ -777,11 +774,11 @@ Render3DError OpenGLESRenderer_3_0::CreateEdgeMarkProgram(const bool isMultisamp
std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vtxShaderCString);
std::string fragShaderCode = shaderHeader.str() + fsHeader.str() + std::string(fragShaderCString);
error = this->ShaderProgramCreate(OGLRef.vertexEdgeMarkShaderID,
OGLRef.fragmentEdgeMarkShaderID,
OGLRef.programEdgeMarkID,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
error = ShaderProgramCreateOGL(OGLRef.vertexEdgeMarkShaderID,
OGLRef.fragmentEdgeMarkShaderID,
OGLRef.programEdgeMarkID,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
if (error != OGLERROR_NOERR)
{
INFO("OpenGL ES: Failed to create the EDGE MARK shader program.\n");
@@ -791,7 +788,7 @@ Render3DError OpenGLESRenderer_3_0::CreateEdgeMarkProgram(const bool isMultisamp
}
glLinkProgram(OGLRef.programEdgeMarkID);
if (!this->ValidateShaderProgramLink(OGLRef.programEdgeMarkID))
if (!ValidateShaderProgramLinkOGL(OGLRef.programEdgeMarkID))
{
INFO("OpenGL ES: Failed to link the EDGE MARK shader program.\n");
glUseProgram(0);
@@ -860,11 +857,11 @@ Render3DError OpenGLESRenderer_3_0::CreateFogProgram(const OGLFogProgramKey fogP
shaderID.program = 0;
shaderID.fragShader = 0;
error = this->ShaderProgramCreate(OGLRef.vertexFogShaderID,
shaderID.fragShader,
shaderID.program,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
error = ShaderProgramCreateOGL(OGLRef.vertexFogShaderID,
shaderID.fragShader,
shaderID.program,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
this->_fogProgramMap[fogProgramKey.key] = shaderID;
@@ -877,7 +874,7 @@ Render3DError OpenGLESRenderer_3_0::CreateFogProgram(const OGLFogProgramKey fogP
}
glLinkProgram(shaderID.program);
if (!this->ValidateShaderProgramLink(shaderID.program))
if (!ValidateShaderProgramLinkOGL(shaderID.program))
{
INFO("OpenGL ES: Failed to link the FOG shader program.\n");
glUseProgram(0);
@@ -900,64 +897,3 @@ Render3DError OpenGLESRenderer_3_0::CreateFogProgram(const OGLFogProgramKey fogP
return OGLERROR_NOERR;
}
Render3DError OpenGLESRenderer_3_0::CreateFramebufferOutput6665Program(const char *vtxShaderCString, const char *fragShaderCString)
{
Render3DError error = OGLERROR_NOERR;
OGLRenderRef &OGLRef = *this->ref;
if ( (vtxShaderCString == NULL) || (fragShaderCString == NULL) )
{
return error;
}
std::stringstream shaderHeader;
shaderHeader << "#version 300 es\n";
shaderHeader << "precision highp float;\n";
shaderHeader << "precision highp int;\n";
shaderHeader << "\n";
shaderHeader << "#define FRAMEBUFFER_SIZE_X " << this->_framebufferWidth << ".0 \n";
shaderHeader << "#define FRAMEBUFFER_SIZE_Y " << this->_framebufferHeight << ".0 \n";
shaderHeader << "\n";
std::stringstream vsHeader;
vsHeader << "#define IN_VTX_POSITION layout (location = " << OGLVertexAttributeID_Position << ") in\n";
vsHeader << "#define IN_VTX_TEXCOORD0 layout (location = " << OGLVertexAttributeID_TexCoord0 << ") in\n";
vsHeader << "#define IN_VTX_COLOR layout (location = " << OGLVertexAttributeID_Color << ") in\n";
std::stringstream fsHeader;
fsHeader << "#define OUT_COLOR layout (location = " << (OGL_WORKING_ATTACHMENT_ID - GL_COLOR_ATTACHMENT0) << ") out\n";
std::string vtxShaderCode = shaderHeader.str() + vsHeader.str() + std::string(vtxShaderCString);
std::string fragShaderCode = shaderHeader.str() + fsHeader.str() + std::string(fragShaderCString);
error = this->ShaderProgramCreate(OGLRef.vertexFramebufferOutput6665ShaderID,
OGLRef.fragmentFramebufferRGBA6665OutputShaderID,
OGLRef.programFramebufferRGBA6665OutputID,
vtxShaderCode.c_str(),
fragShaderCode.c_str());
if (error != OGLERROR_NOERR)
{
INFO("OpenGL ES: Failed to create the FRAMEBUFFER OUTPUT RGBA6665 shader program.\n");
glUseProgram(0);
this->DestroyFramebufferOutput6665Programs();
return error;
}
glLinkProgram(OGLRef.programFramebufferRGBA6665OutputID);
if (!this->ValidateShaderProgramLink(OGLRef.programFramebufferRGBA6665OutputID))
{
INFO("OpenGL ES: Failed to link the FRAMEBUFFER OUTPUT RGBA6665 shader program.\n");
glUseProgram(0);
this->DestroyFramebufferOutput6665Programs();
return OGLERROR_SHADER_CREATE_ERROR;
}
glValidateProgram(OGLRef.programFramebufferRGBA6665OutputID);
glUseProgram(OGLRef.programFramebufferRGBA6665OutputID);
const GLint uniformTexGColor = glGetUniformLocation(OGLRef.programFramebufferRGBA6665OutputID, "texInFragColor");
glUniform1i(uniformTexGColor, OGLTextureUnitID_GColor);
return OGLERROR_NOERR;
}

View File

@@ -1,5 +1,5 @@
/*
Copyright (C) 2024 DeSmuME team
Copyright (C) 2024-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -48,7 +48,6 @@ protected:
virtual Render3DError CreateGeometryZeroDstAlphaProgram(const char *vtxShaderCString, const char *fragShaderCString);
virtual Render3DError CreateEdgeMarkProgram(const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString);
virtual Render3DError CreateFogProgram(const OGLFogProgramKey fogProgramKey, const bool isMultisample, const char *vtxShaderCString, const char *fragShaderCString);
virtual Render3DError CreateFramebufferOutput6665Program(const char *vtxShaderCString, const char *fragShaderCString);
public:
OpenGLESRenderer_3_0();

43
desmume/src/gfx3d.cpp Normal file → Executable file
View File

@@ -3840,16 +3840,16 @@ void gfx3d_PrepareSaveStateBufferWrite()
{
if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvertBuffer6665To8888<false, false>((u32 *)CurrentRenderer->GetFramebuffer(), (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceConvertBuffer6665To8888<false, false>((const u32 *)CurrentRenderer->GetFramebuffer32(), (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
else
{
ColorspaceCopyBuffer32<false, false>((u32 *)CurrentRenderer->GetFramebuffer(), (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
ColorspaceCopyBuffer32<false, false>((const u32 *)CurrentRenderer->GetFramebuffer32(), (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
}
else // Framebuffer is at a custom size
{
const Color4u8 *__restrict src = (Color4u8 *)CurrentRenderer->GetFramebuffer();
const Color4u8 *__restrict src = CurrentRenderer->GetFramebuffer32();
Color4u8 *__restrict dst = gfx3d.framebufferNativeSave;
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
@@ -4021,45 +4021,12 @@ void gfx3d_FinishLoadStateBufferRead()
switch (deviceInfo.renderID)
{
case RENDERID_NULL:
memset(CurrentRenderer->GetFramebuffer(), 0, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(Color4u8));
CurrentRenderer->FillZero();
break;
case RENDERID_SOFTRASTERIZER:
{
const size_t w = CurrentRenderer->GetFramebufferWidth();
const size_t h = CurrentRenderer->GetFramebufferHeight();
if ( (w == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (h == GPU_FRAMEBUFFER_NATIVE_HEIGHT) ) // Framebuffer is at the native size
{
if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)gfx3d.framebufferNativeSave, (u32 *)CurrentRenderer->GetFramebuffer(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
else
{
ColorspaceCopyBuffer32<false, false>((u32 *)gfx3d.framebufferNativeSave, (u32 *)CurrentRenderer->GetFramebuffer(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
}
else // Framebuffer is at a custom size
{
if (CurrentRenderer->GetColorFormat() == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvertBuffer8888To6665<false, false>((u32 *)gfx3d.framebufferNativeSave, (u32 *)gfx3d.framebufferNativeSave, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
const Color4u8 *__restrict src = gfx3d.framebufferNativeSave;
Color4u8 *__restrict dst = (Color4u8 *)CurrentRenderer->GetFramebuffer();
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{
const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, src, dst);
src += GPU_FRAMEBUFFER_NATIVE_WIDTH;
dst += lineInfo.pixelCount;
}
}
CurrentRenderer->FillColor32(gfx3d.framebufferNativeSave, true);
break;
}
default:
// Do nothing. Loading the 3D framebuffer is unsupported on this 3D renderer.

View File

@@ -1,5 +1,5 @@
/*
Copyright (C) 2009-2021 DeSmuME team
Copyright (C) 2009-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -60,6 +60,7 @@
#include "movie.h"
#include "MMU.h"
#include "GPU.h"
#include "render3D.h"
#include "SPU.h"
#include "saves.h"
#include "emufile.h"
@@ -2279,7 +2280,7 @@ public:
char temp [256];
sprintf(temp, " " /*"mismatch at "*/ "byte %d(0x%X at %p): %d(0x%X) != %d(0x%X)\n", i, i, dst, *src,*src, *dst,*dst);
if(ptr == dispInfo.masterNativeBuffer16 || ptr == dispInfo.masterCustomBuffer || ptr == GPU->GetEngineMain()->Get3DFramebufferMain()) // ignore screen-only differences since frame skipping can cause them and it's probably ok
if(ptr == dispInfo.masterNativeBuffer16 || ptr == dispInfo.masterCustomBuffer || ptr == CurrentRenderer->GetFramebuffer32()) // ignore screen-only differences since frame skipping can cause them and it's probably ok
break;
differences.push_back(temp); // <-- probably the best place for a breakpoint
@@ -2994,7 +2995,7 @@ static void PutTextInternal (const char *str, int len, short x, short y, int col
{
for(int x3 = max(0,x2-1); x3 <= min(4,x2+1); x3++)
{
on |= y3 >= 0 && y3 < 8 && (Cur_Glyph[y3*8] & (1 << x3));
on |= (y3 >= 0) && (y3 < 8) && ((Cur_Glyph[y3*8] & (1 << x3)) != 0);
if (on)
goto draw_outline; // speedup?
}

View File

@@ -1287,7 +1287,7 @@ FORCEINLINE void RasterizerUnit<RENDERER>::Render()
return;
}
Color4u8 *dstColor = this->_softRender->GetFramebuffer();
Color4u8 *dstColor = this->_softRender->GetInUseFramebuffer32();
const size_t dstWidth = this->_softRender->GetFramebufferWidth();
const size_t dstHeight = this->_softRender->GetFramebufferHeight();
@@ -1764,7 +1764,7 @@ SoftRasterizerRenderer::SoftRasterizerRenderer()
_debug_drawClippedUserPoly = 0;
_renderGeometryNeedsFinish = false;
_framebufferAttributes = NULL;
_framebufferAttributes = new FragmentAttributesBuffer(_framebufferWidth * _framebufferHeight);
_enableHighPrecisionColorInterpolation = CommonSettings.GFX3D_HighResolutionInterpolateColor;
_enableLineHack = CommonSettings.GFX3D_LineHack;
@@ -2180,11 +2180,13 @@ void SoftRasterizerRenderer::_UpdateFogTable(const u8 *fogDensityTable)
Render3DError SoftRasterizerRenderer::RenderEdgeMarkingAndFog(const SoftRasterizerPostProcessParams &param)
{
Color4u8 *framebufferColor = this->GetInUseFramebuffer32();
for (size_t i = param.startLine * this->_framebufferWidth, y = param.startLine; y < param.endLine; y++)
{
for (size_t x = 0; x < this->_framebufferWidth; x++, i++)
{
Color4u8 &dstColor = this->_framebufferColor[i];
Color4u8 &dstColor = framebufferColor[i];
const u32 depth = this->_framebufferAttributes->depth[i];
const u8 polyID = this->_framebufferAttributes->opaquePolyID[i];
@@ -2297,6 +2299,8 @@ const SoftRasterizerPrecalculation* SoftRasterizerRenderer::GetPrecalculationLis
Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colorBuffer, const u32 *__restrict depthBuffer, const u8 *__restrict fogBuffer, const u8 opaquePolyID)
{
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
const size_t xRatio = (size_t)((GPU_FRAMEBUFFER_NATIVE_WIDTH << 16) / this->_framebufferWidth) + 1;
const size_t yRatio = (size_t)((GPU_FRAMEBUFFER_NATIVE_HEIGHT << 16) / this->_framebufferHeight) + 1;
@@ -2308,7 +2312,7 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
{
const size_t ir = readLine + ((x * xRatio) >> 16);
this->_framebufferColor[iw].value = COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F);
framebufferColor[iw].value = COLOR555TO6665(colorBuffer[ir] & 0x7FFF, (colorBuffer[ir] >> 15) * 0x1F);
this->_framebufferAttributes->depth[iw] = depthBuffer[ir];
this->_framebufferAttributes->isFogged[iw] = fogBuffer[ir];
this->_framebufferAttributes->opaquePolyID[iw] = opaquePolyID;
@@ -2324,9 +2328,11 @@ Render3DError SoftRasterizerRenderer::ClearUsingImage(const u16 *__restrict colo
void SoftRasterizerRenderer::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
for (size_t i = startPixel; i < endPixel; i++)
{
this->_framebufferColor[i] = this->_clearColor6665;
framebufferColor[i] = this->_clearColor6665;
this->_framebufferAttributes->SetAtIndex(i, this->_clearAttributes);
}
}
@@ -2544,10 +2550,11 @@ Render3DError SoftRasterizer_SIMD<SIMDBYTES>::ClearUsingValues(const Color4u8 &c
this->ClearUsingValues_Execute(0, this->_framebufferSIMDPixCount);
}
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
#pragma LOOPVECTORIZE_DISABLE
for (size_t i = this->_framebufferSIMDPixCount; i < this->_framebufferPixCount; i++)
{
this->_framebufferColor[i] = clearColor6665;
framebufferColor[i] = clearColor6665;
this->_framebufferAttributes->SetAtIndex(i, clearAttributes);
}
@@ -2629,12 +2636,14 @@ void SoftRasterizerRenderer_AVX2::LoadClearValues(const Color4u8 &clearColor6665
void SoftRasterizerRenderer_AVX2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
for (size_t i = startPixel; i < endPixel; i+=sizeof(v256u8))
{
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i) + 0, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i) + 1, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i) + 2, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferColor + i) + 3, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(framebufferColor + i) + 0, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(framebufferColor + i) + 1, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(framebufferColor + i) + 2, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(framebufferColor + i) + 3, this->_clearColor_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i) + 0, this->_clearDepth_v256u32);
_mm256_stream_si256((v256u32 *)(this->_framebufferAttributes->depth + i) + 1, this->_clearDepth_v256u32);
@@ -2666,12 +2675,14 @@ void SoftRasterizerRenderer_SSE2::LoadClearValues(const Color4u8 &clearColor6665
void SoftRasterizerRenderer_SSE2::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
for (size_t i = startPixel; i < endPixel; i+=sizeof(v128u8))
{
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i) + 0, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i) + 1, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i) + 2, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferColor + i) + 3, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(framebufferColor + i) + 0, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(framebufferColor + i) + 1, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(framebufferColor + i) + 2, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(framebufferColor + i) + 3, this->_clearColor_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i) + 0, this->_clearDepth_v128u32);
_mm_stream_si128((v128u32 *)(this->_framebufferAttributes->depth + i) + 1, this->_clearDepth_v128u32);
@@ -2734,12 +2745,14 @@ void SoftRasterizerRenderer_NEON::LoadClearValues(const Color4u8 &clearColor6665
void SoftRasterizerRenderer_NEON::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
for (size_t i = startPixel; i < endPixel; i+=(sizeof(v128u8)*4))
{
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 0, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 16, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 32, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(this->_framebufferColor + i) + 48, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(framebufferColor + i) + 0, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(framebufferColor + i) + 16, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(framebufferColor + i) + 32, this->_clearColor_v128u32x4);
vst1q_u32_x4((u32 *)(framebufferColor + i) + 48, this->_clearColor_v128u32x4);
vst1q_u32_x4((this->_framebufferAttributes->depth + i) + 0, this->_clearDepth_v128u32x4);
vst1q_u32_x4((this->_framebufferAttributes->depth + i) + 16, this->_clearDepth_v128u32x4);
@@ -2759,48 +2772,41 @@ void SoftRasterizerRenderer_NEON::ClearUsingValues_Execute(const size_t startPix
void SoftRasterizerRenderer_AltiVec::LoadClearValues(const Color4u8 &clearColor6665, const FragmentAttributes &clearAttributes)
{
this->_clearColor_v128u32 = (v128u32){clearColor6665.value,clearColor6665.value,clearColor6665.value,clearColor6665.value};
this->_clearDepth_v128u32 = (v128u32){clearAttributes.depth,clearAttributes.depth,clearAttributes.depth,clearAttributes.depth};
CACHE_ALIGN const u32 cc = clearColor6665.value;
this->_clearColor_v128u32 = vec_splat( vec_lde(0, &cc), 0 );
this->_clearAttrOpaquePolyID_v128u8 = (v128u8){clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,
clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,
clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,
clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID,clearAttributes.opaquePolyID};
CACHE_ALIGN const u32 d = clearAttributes.depth;
this->_clearDepth_v128u32 = vec_splat( vec_lde(0, &d), 0 );
this->_clearAttrTranslucentPolyID_v128u8 = (v128u8){clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,
clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,
clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,
clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID,clearAttributes.translucentPolyID};
CACHE_ALIGN const u8 opaquePolyID = clearAttributes.opaquePolyID;
this->_clearAttrOpaquePolyID_v128u8 = vec_splat( vec_lde(0, &opaquePolyID), 0 );
this->_clearAttrStencil_v128u8 = (v128u8){clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,
clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,
clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,
clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil,clearAttributes.stencil};
CACHE_ALIGN const u8 translucentPolyID = clearAttributes.translucentPolyID;
this->_clearAttrTranslucentPolyID_v128u8 = vec_splat( vec_lde(0, &translucentPolyID), 0 );
this->_clearAttrIsFogged_v128u8 = (v128u8){clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,
clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,
clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,
clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged,clearAttributes.isFogged};
CACHE_ALIGN const u8 stencil = clearAttributes.stencil;
this->_clearAttrStencil_v128u8 = vec_splat( vec_lde(0, &stencil), 0 );
this->_clearAttrIsTranslucentPoly_v128u8 = (v128u8){clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,
clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,
clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,
clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly,clearAttributes.isTranslucentPoly};
CACHE_ALIGN const u8 fogFlag = clearAttributes.isFogged;
this->_clearAttrIsFogged_v128u8 = vec_splat( vec_lde(0, &fogFlag), 0 );
this->_clearAttrPolyFacing_v128u8 = (v128u8){clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,
clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,
clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,
clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing,clearAttributes.polyFacing};
CACHE_ALIGN const u8 translucentPolyFlag = clearAttributes.isTranslucentPoly;
this->_clearAttrIsTranslucentPoly_v128u8 = vec_splat( vec_lde(0, &translucentPolyFlag), 0 );
CACHE_ALIGN const u8 facing = clearAttributes.polyFacing;
this->_clearAttrPolyFacing_v128u8 = vec_splat( vec_lde(0, &facing), 0 );
}
void SoftRasterizerRenderer_AltiVec::ClearUsingValues_Execute(const size_t startPixel, const size_t endPixel)
{
Color4u8 *__restrict framebufferColor = this->GetInUseFramebuffer32();
for (size_t i = startPixel; i < endPixel; i+=sizeof(v128u8))
{
vec_st(this->_clearColor_v128u32, (i * 4) + 0, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 16, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 32, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 48, this->_framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 0, framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 16, framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 32, framebufferColor);
vec_st(this->_clearColor_v128u32, (i * 4) + 48, framebufferColor);
vec_st(this->_clearDepth_v128u32, (i * 4) + 0, this->_framebufferAttributes->depth);
vec_st(this->_clearDepth_v128u32, (i * 4) + 16, this->_framebufferAttributes->depth);

View File

@@ -1,6 +1,6 @@
/*
Copyright (C) 2006-2007 shash
Copyright (C) 2008-2024 DeSmuME team
Copyright (C) 2008-2025 DeSmuME team
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,6 +21,7 @@
#include <string.h>
#include "utils/bits.h"
#include "GPU_Operations.h"
#include "MMU.h"
#include "NDSSystem.h"
#include "./filter/filter.h"
@@ -319,6 +320,7 @@ Render3D::Render3D()
_framebufferSIMDPixCount = 0;
_framebufferColorSizeBytes = _framebufferWidth * _framebufferHeight * sizeof(Color4u8);
_framebufferColor = NULL;
_framebufferColor16 = NULL;
_internalRenderingFormat = NDSColorFormat_BGR666_Rev;
_outputFormat = NDSColorFormat_BGR666_Rev;
@@ -391,7 +393,17 @@ std::string Render3D::GetName()
return this->_deviceInfo.renderName;
}
Color4u8* Render3D::GetFramebuffer()
const Color5551* Render3D::GetFramebuffer16() const
{
return this->_framebufferColor16;
}
const Color4u8* Render3D::GetFramebuffer32() const
{
return this->_framebufferColor;
}
Color4u8* Render3D::GetInUseFramebuffer32() const
{
return this->_framebufferColor;
}
@@ -423,6 +435,7 @@ Render3DError Render3D::SetFramebufferSize(size_t w, size_t h)
this->_framebufferPixCount = w * h;
this->_framebufferColorSizeBytes = w * h * sizeof(Color4u8);
this->_framebufferColor = GPU->GetEngineMain()->Get3DFramebufferMain(); // Just use the buffer that is already present on the main GPU engine
this->_framebufferColor16 = (Color5551 *)GPU->GetEngineMain()->Get3DFramebuffer16(); // Just use the buffer that is already present on the main GPU engine
return RENDER3DERROR_NOERR;
}
@@ -779,6 +792,11 @@ Render3DError Render3D::Reset()
memset(this->_framebufferColor, 0, this->_framebufferColorSizeBytes);
}
if (this->_framebufferColor16 != NULL)
{
memset(this->_framebufferColor16, 0, this->_framebufferPixCount * sizeof(Color5551));
}
this->_clearColor6665.value = 0;
memset(&this->_clearAttributes, 0, sizeof(FragmentAttributes));
@@ -857,11 +875,30 @@ Render3DError Render3D::Render(const GFX3D_State &renderState, const GFX3D_Geome
Render3DError Render3D::RenderFinish()
{
this->_renderNeedsFlushMain = true;
this->_renderNeedsFlush16 = true;
return RENDER3DERROR_NOERR;
}
Render3DError Render3D::RenderFlush(bool willFlushBuffer32, bool willFlushBuffer16)
{
if ( !this->_isPoweredOn ||
(!this->_renderNeedsFlushMain && !this->_renderNeedsFlush16) )
{
return RENDER3DERROR_NOERR;
}
if (this->_renderNeedsFlushMain && willFlushBuffer32)
{
this->_renderNeedsFlushMain = false;
}
if (this->_renderNeedsFlush16 && willFlushBuffer16)
{
this->_renderNeedsFlush16 = false;
}
return RENDER3DERROR_NOERR;
}
@@ -871,6 +908,89 @@ Render3DError Render3D::VramReconfigureSignal()
return RENDER3DERROR_NOERR;
}
Render3DError Render3D::FillZero()
{
Render3DError error = RENDER3DERROR_NOERR;
if (this->_framebufferColor != NULL)
{
memset(this->_framebufferColor, 0, this->_framebufferColorSizeBytes);
}
else
{
error = RENDER3DERROR_INVALID_BUFFER;
}
if (this->_framebufferColor16 != NULL)
{
memset(this->_framebufferColor16, 0, this->_framebufferPixCount * sizeof(Color5551));
}
else
{
error = RENDER3DERROR_INVALID_BUFFER;
}
return RENDER3DERROR_NOERR;
}
Render3DError Render3D::FillColor32(const Color4u8 *__restrict src, const bool isSrcNativeSize)
{
Render3DError error = RENDER3DERROR_NOERR;
const u32 *__restrict src32 = (const u32 *__restrict)src;
u32 *__restrict mutableFramebuffer32 = (u32 *__restrict)this->GetInUseFramebuffer32();
if ( (src32 == NULL) || (mutableFramebuffer32 == NULL) )
{
error = RENDER3DERROR_INVALID_BUFFER;
return error;
}
this->RenderFinish();
this->RenderFlush(false, false);
this->SetRenderNeedsFinish(false);
const size_t w = this->_framebufferWidth;
const size_t h = this->_framebufferHeight;
const bool isDstNativeSize = ( (w == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (h == GPU_FRAMEBUFFER_NATIVE_HEIGHT) );
if (isSrcNativeSize)
{
if (isDstNativeSize) // Framebuffer is at the native size
{
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvertBuffer8888To6665<false, false>(src32, mutableFramebuffer32, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
else
{
ColorspaceCopyBuffer32<false, false>(src32, mutableFramebuffer32, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
}
else // Framebuffer is at a custom size
{
if (this->_outputFormat == NDSColorFormat_BGR666_Rev)
{
ColorspaceConvertBuffer8888To6665<false, false>(src32, (u32 *)src32, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT);
}
for (size_t l = 0; l < GPU_FRAMEBUFFER_NATIVE_HEIGHT; l++)
{
const GPUEngineLineInfo &lineInfo = GPU->GetLineInfoAtIndex(l);
CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, src32, mutableFramebuffer32);
src32 += GPU_FRAMEBUFFER_NATIVE_WIDTH;
mutableFramebuffer32 += lineInfo.pixelCount;
}
}
}
else
{
memcpy(mutableFramebuffer32, src32, this->_framebufferColorSizeBytes);
}
return error;
}
template <size_t SIMDBYTES>
Render3D_SIMD<SIMDBYTES>::Render3D_SIMD()
{
@@ -1065,8 +1185,8 @@ void Render3D_AltiVec::_ClearImageBaseLoop(const u16 *__restrict inColor16, cons
vec_st( vec_msum(calcDepth3, calcDepthMul, calcDepthAdd), 48, outDepth24 + i);
// Write the fog flags to the fog flag buffer.
const v128u16 clearFogLo = vec_sr(clearDepthLo, ((v128u16){15,15,15,15,15,15,15,15}));
const v128u16 clearFogHi = vec_sr(clearDepthHi, ((v128u16){15,15,15,15,15,15,15,15}));
const v128u16 clearFogLo = vec_sr(clearDepthLo, vec_splat_u16(15));
const v128u16 clearFogHi = vec_sr(clearDepthHi, vec_splat_u16(15));
vec_st( vec_pack(clearFogLo, clearFogHi), 0, outFog + i );
}
}

View File

@@ -58,18 +58,22 @@ void Render3D_DeInit();
enum RendererID
{
RENDERID_NULL = 0,
RENDERID_SOFTRASTERIZER = 1,
RENDERID_OPENGL_AUTO = 1000,
RENDERID_OPENGL_LEGACY = 1001,
RENDERID_OPENGL_3_2 = 1002,
RENDERID_METAL = 2000
RENDERID_NULL = 0,
RENDERID_SOFTRASTERIZER = 1,
RENDERID_OPENGL_AUTO = 1000,
RENDERID_OPENGL_LEGACY = 1001,
RENDERID_OPENGL_3_2 = 1002,
RENDERID_OPENG_ES = 1003,
RENDERID_METAL = 2000
};
enum Render3DErrorCode
{
RENDER3DERROR_NOERR = 0
RENDER3DERROR_NOERR = 0,
RENDER3DERROR_INVALID_VALUE = 1,
RENDER3DERROR_INVALID_BUFFER = 2
};
typedef int Render3DError;
enum PolyFacing
{
@@ -78,8 +82,6 @@ enum PolyFacing
PolyFacing_Back = 2
};
typedef int Render3DError;
struct FragmentAttributes
{
u32 depth;
@@ -208,6 +210,7 @@ protected:
size_t _framebufferSIMDPixCount;
size_t _framebufferColorSizeBytes;
Color4u8 *_framebufferColor;
Color5551 *_framebufferColor16;
Color4u8 _clearColor6665;
FragmentAttributes _clearAttributes;
@@ -309,7 +312,12 @@ public:
virtual NDSColorFormat GetColorFormat() const; // The output color format of the 3D renderer.
virtual Color4u8* GetFramebuffer();
virtual Render3DError FillZero();
virtual Render3DError FillColor32(const Color4u8 *__restrict src, const bool isSrcNativeSize);
const Color5551* GetFramebuffer16() const;
virtual const Color4u8* GetFramebuffer32() const;
Color4u8* GetInUseFramebuffer32() const;
bool GetRenderNeedsFinish() const;
void SetRenderNeedsFinish(const bool renderNeedsFinish);