diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.h b/desmume/src/frontend/cocoa/cocoa_GPU.h index 4f22d33f2..ea5910c95 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.h +++ b/desmume/src/frontend/cocoa/cocoa_GPU.h @@ -162,6 +162,7 @@ private: pthread_mutex_t _mutexApplyGPUSettings; pthread_mutex_t _mutexApplyRender3DSettings; bool _render3DNeedsFinish; + int _cpuCoreCountRestoreValue; public: GPUEventHandlerAsync(); @@ -181,6 +182,8 @@ public: bool GetRender3DNeedsFinish(); + void SetTempThreadCount(int threadCount); + #ifdef ENABLE_ASYNC_FETCH virtual void DidFrameBegin(const size_t line, const bool isFrameSkipRequested, const size_t pageCount, u8 &selectedBufferIndexInOut); virtual void DidFrameEnd(bool isFrameSkipped, const NDSDisplayInfo &latestDisplayInfo); @@ -209,6 +212,8 @@ public: NSUInteger openglDeviceMaxMultisamples; NSString *render3DMultisampleSizeString; BOOL isCPUCoreCountAuto; + int _render3DThreadsRequested; + int _render3DThreadCount; BOOL _needRestoreRender3DLock; apple_unfairlock_t _unfairlockGpuState; diff --git a/desmume/src/frontend/cocoa/cocoa_GPU.mm b/desmume/src/frontend/cocoa/cocoa_GPU.mm index 97ec63698..4b6498599 100644 --- a/desmume/src/frontend/cocoa/cocoa_GPU.mm +++ b/desmume/src/frontend/cocoa/cocoa_GPU.mm @@ -127,6 +127,8 @@ char __hostRendererString[256] = {0}; GPUSTATE_SUB_OBJ_MASK; isCPUCoreCountAuto = NO; + _render3DThreadsRequested = 0; + _render3DThreadCount = 0; _needRestoreRender3DLock = NO; oglrender_init = &cgl_initOpenGL_StandardAuto; @@ -394,6 +396,13 @@ char __hostRendererString[256] = {0}; gpuEvent->ApplyRender3DSettingsLock(); GPU->Set3DRendererByID((int)rendererID); + + if (rendererID == CORE3DLIST_SWRASTERIZE) + { + gpuEvent->SetTempThreadCount(_render3DThreadCount); + GPU->Set3DRendererByID(CORE3DLIST_SWRASTERIZE); + } + gpuEvent->ApplyRender3DSettingsUnlock(); } @@ -554,34 +563,38 @@ char __hostRendererString[256] = {0}; - (void) setRender3DThreads:(NSUInteger)numberThreads { - NSUInteger numberCores = [[NSProcessInfo processInfo] activeProcessorCount]; + _render3DThreadsRequested = (int)numberThreads; + + const int numberCores = CommonSettings.num_cores; + int newThreadCount = numberCores; + if (numberThreads == 0) { isCPUCoreCountAuto = YES; if (numberCores < 2) { - numberCores = 1; + newThreadCount = 1; } else { - const NSUInteger reserveCoreCount = numberCores / 12; // For every 12 cores, reserve 1 core for the rest of the system. - numberCores -= reserveCoreCount; + const int reserveCoreCount = numberCores / 12; // For every 12 cores, reserve 1 core for the rest of the system. + newThreadCount -= reserveCoreCount; } } else { isCPUCoreCountAuto = NO; - numberCores = numberThreads; + newThreadCount = (int)numberThreads; } const RendererID renderingEngineID = (RendererID)[self render3DRenderingEngine]; + _render3DThreadCount = newThreadCount; gpuEvent->ApplyRender3DSettingsLock(); - CommonSettings.num_cores = (int)numberCores; - if (renderingEngineID == RENDERID_SOFTRASTERIZER) { + gpuEvent->SetTempThreadCount(newThreadCount); GPU->Set3DRendererByID(renderingEngineID); } @@ -590,11 +603,7 @@ char __hostRendererString[256] = {0}; - (NSUInteger) render3DThreads { - gpuEvent->ApplyRender3DSettingsLock(); - const NSUInteger numberThreads = isCPUCoreCountAuto ? 0 : (NSUInteger)CommonSettings.num_cores; - gpuEvent->ApplyRender3DSettingsUnlock(); - - return numberThreads; + return (isCPUCoreCountAuto) ? 0 : (NSUInteger)_render3DThreadsRequested; } - (void) setRender3DLineHack:(BOOL)state @@ -1240,17 +1249,24 @@ MacGPUFetchObjectAsync::~MacGPUFetchObjectAsync() void MacGPUFetchObjectAsync::Init() { - pthread_attr_t threadAttr; - pthread_attr_init(&threadAttr); - pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); - - struct sched_param sp; - memset(&sp, 0, sizeof(struct sched_param)); - sp.sched_priority = 44; - pthread_attr_setschedparam(&threadAttr, &sp); - - pthread_create(&_threadFetch, &threadAttr, &RunFetchThread, this); - pthread_attr_destroy(&threadAttr); + if (CommonSettings.num_cores > 1) + { + pthread_attr_t threadAttr; + pthread_attr_init(&threadAttr); + pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); + + struct sched_param sp; + memset(&sp, 0, sizeof(struct sched_param)); + sp.sched_priority = 44; + pthread_attr_setschedparam(&threadAttr, &sp); + + pthread_create(&_threadFetch, &threadAttr, &RunFetchThread, this); + pthread_attr_destroy(&threadAttr); + } + else + { + pthread_create(&_threadFetch, NULL, &RunFetchThread, this); + } } void MacGPUFetchObjectAsync::SemaphoreFramebufferCreate() @@ -1744,6 +1760,8 @@ GPUEventHandlerAsync::GPUEventHandlerAsync() { _fetchObject = nil; _render3DNeedsFinish = false; + _cpuCoreCountRestoreValue = 0; + pthread_mutex_init(&_mutexFrame, NULL); pthread_mutex_init(&_mutex3DRender, NULL); pthread_mutex_init(&_mutexApplyGPUSettings, NULL); @@ -1843,6 +1861,12 @@ void GPUEventHandlerAsync::DidApplyRender3DSettingsBegin() void GPUEventHandlerAsync::DidApplyRender3DSettingsEnd() { + if (this->_cpuCoreCountRestoreValue > 0) + { + CommonSettings.num_cores = this->_cpuCoreCountRestoreValue; + } + + this->_cpuCoreCountRestoreValue = 0; this->ApplyRender3DSettingsUnlock(); } @@ -1891,6 +1915,19 @@ bool GPUEventHandlerAsync::GetRender3DNeedsFinish() return this->_render3DNeedsFinish; } +void GPUEventHandlerAsync::SetTempThreadCount(int threadCount) +{ + if (threadCount < 1) + { + this->_cpuCoreCountRestoreValue = 0; + } + else + { + this->_cpuCoreCountRestoreValue = CommonSettings.num_cores; + CommonSettings.num_cores = threadCount; + } +} + #pragma mark - #if !defined(MAC_OS_X_VERSION_10_7) diff --git a/desmume/src/frontend/cocoa/cocoa_core.mm b/desmume/src/frontend/cocoa/cocoa_core.mm index e4e7f6e43..77b43a9f2 100644 --- a/desmume/src/frontend/cocoa/cocoa_core.mm +++ b/desmume/src/frontend/cocoa/cocoa_core.mm @@ -141,30 +141,37 @@ volatile bool execute = true; pthread_cond_init(&threadParam.condThreadExecute, NULL); pthread_rwlock_init(&threadParam.rwlockCoreExecute, NULL); - // The core emulation thread needs max priority since it is the sole - // producer thread for all output threads. Note that this is not being - // done for performance -- this is being done for timing accuracy. The - // core emulation thread is responsible for determining the emulator's - // timing. If one output thread interferes with timing, then it ends up - // affecting the whole emulator. - // - // Though it may be tempting to make this a real-time thread, it's best - // to keep this a normal thread. The core emulation thread can use up a - // lot of CPU time under certain conditions, which may interfere with - // other threads. (Example: Video tearing on display windows, even with - // V-sync enabled.) - - pthread_attr_t threadAttr; - pthread_attr_init(&threadAttr); - pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); - - struct sched_param sp; - memset(&sp, 0, sizeof(struct sched_param)); - sp.sched_priority = 42; - pthread_attr_setschedparam(&threadAttr, &sp); - - pthread_create(&coreThread, &threadAttr, &RunCoreThread, &threadParam); - pthread_attr_destroy(&threadAttr); + if (CommonSettings.num_cores > 1) + { + // The core emulation thread needs max priority since it is the sole + // producer thread for all output threads. Note that this is not being + // done for performance -- this is being done for timing accuracy. The + // core emulation thread is responsible for determining the emulator's + // timing. If one output thread interferes with timing, then it ends up + // affecting the whole emulator. + // + // Though it may be tempting to make this a real-time thread, it's best + // to keep this a normal thread. The core emulation thread can use up a + // lot of CPU time under certain conditions, which may interfere with + // other threads. (Example: Video tearing on display windows, even with + // V-sync enabled.) + + pthread_attr_t threadAttr; + pthread_attr_init(&threadAttr); + pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); + + struct sched_param sp; + memset(&sp, 0, sizeof(struct sched_param)); + sp.sched_priority = 42; + pthread_attr_setschedparam(&threadAttr, &sp); + + pthread_create(&coreThread, &threadAttr, &RunCoreThread, &threadParam); + pthread_attr_destroy(&threadAttr); + } + else + { + pthread_create(&coreThread, NULL, &RunCoreThread, &threadParam); + } [cdsGPU setOutputList:cdsOutputList rwlock:&threadParam.rwlockOutputList]; [cdsCheatManager setRwlockCoreExecute:&threadParam.rwlockCoreExecute]; diff --git a/desmume/src/frontend/cocoa/cocoa_output.mm b/desmume/src/frontend/cocoa/cocoa_output.mm index 8825f083a..4729a6284 100644 --- a/desmume/src/frontend/cocoa/cocoa_output.mm +++ b/desmume/src/frontend/cocoa/cocoa_output.mm @@ -99,17 +99,24 @@ pthread_mutex_init(&_mutexMessageLoop, NULL); pthread_cond_init(&_condSignalMessage, NULL); - pthread_attr_t threadAttr; - pthread_attr_init(&threadAttr); - pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); - - struct sched_param sp; - memset(&sp, 0, sizeof(struct sched_param)); - sp.sched_priority = 45; - pthread_attr_setschedparam(&threadAttr, &sp); - - pthread_create(&_pthread, &threadAttr, &RunOutputThread, self); - pthread_attr_destroy(&threadAttr); + if (CommonSettings.num_cores > 1) + { + pthread_attr_t threadAttr; + pthread_attr_init(&threadAttr); + pthread_attr_setschedpolicy(&threadAttr, SCHED_RR); + + struct sched_param sp; + memset(&sp, 0, sizeof(struct sched_param)); + sp.sched_priority = 45; + pthread_attr_setschedparam(&threadAttr, &sp); + + pthread_create(&_pthread, &threadAttr, &RunOutputThread, self); + pthread_attr_destroy(&threadAttr); + } + else + { + pthread_create(&_pthread, NULL, &RunOutputThread, self); + } } - (void) exitThread diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 4df484915..d7cf7be6d 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -1832,13 +1832,19 @@ SoftRasterizerRenderer::SoftRasterizerRenderer() char name[16]; snprintf(name, 16, "rasterizer %d", (int)i); + #ifdef DESMUME_COCOA - // The Cocoa port takes advantage of hand-optimized thread priorities - // to help stabilize performance when running SoftRasterizer. - _task[i].start(false, 43, name); -#else - _task[i].start(false, 0, name); + if (coreCount > 1) + { + // The Cocoa port takes advantage of hand-optimized thread priorities + // to help stabilize performance when running SoftRasterizer. + _task[i].start(false, 43, name); + } + else #endif + { + _task[i].start(false, 0, name); + } } } diff --git a/desmume/src/wifi.cpp b/desmume/src/wifi.cpp index f079f9a7a..29ee07d4f 100644 --- a/desmume/src/wifi.cpp +++ b/desmume/src/wifi.cpp @@ -3395,12 +3395,18 @@ bool AdhocCommInterface::Start(WifiHandler* currentWifiHandler) // Start the RX packet thread. this->_wifiHandler = currentWifiHandler; this->_rawPacket = (RXRawPacketData*)calloc(1, sizeof(RXRawPacketData)); - - #ifdef DESMUME_COCOA - this->_rxTask->start(false, 43, "wifi ad-hoc"); - #else - this->_rxTask->start(false, 0, "wifi ad-hoc"); - #endif + +#ifdef DESMUME_COCOA + if (CommonSettings.num_cores > 1) + { + this->_rxTask->start(false, 43, "wifi ad-hoc"); + } + else +#endif + { + this->_rxTask->start(false, 0, "wifi ad-hoc"); + } + this->_isRXThreadRunning = true; this->_rxTask->execute(&Adhoc_RXPacketGetOnThread, this); @@ -3669,12 +3675,18 @@ bool SoftAPCommInterface::Start(WifiHandler* currentWifiHandler) // Start the RX packet thread. this->_wifiHandler = currentWifiHandler; this->_rawPacket = (RXRawPacketData*)calloc(1, sizeof(RXRawPacketData)); - - #ifdef DESMUME_COCOA - this->_rxTask->start(false, 43, "wifi ap"); - #else - this->_rxTask->start(false, 0, "wifi ap"); - #endif + +#ifdef DESMUME_COCOA + if (CommonSettings.num_cores > 1) + { + this->_rxTask->start(false, 43, "wifi ap"); + } + else +#endif + { + this->_rxTask->start(false, 0, "wifi ap"); + } + this->_isRXThreadRunning = true; this->_rxTask->execute(&Infrastructure_RXPacketGetOnThread, this); }