From be209055828d215095fe956590d45386ceb52010 Mon Sep 17 00:00:00 2001 From: John Kessenich Date: Sun, 12 Nov 2017 15:28:58 -0700 Subject: [PATCH] Memory: Non-Functional: Rationalize and improve encapsulation of TLS usage. This will make the next (functional) commit easier to see. --- OGLCompilersDLL/InitializeDll.cpp | 16 ++++-- OGLCompilersDLL/InitializeDll.h | 2 +- glslang/Include/InitializeGlobals.h | 3 +- glslang/Include/PoolAlloc.h | 9 +-- glslang/MachineIndependent/PoolAlloc.cpp | 67 +++++++++++++---------- glslang/MachineIndependent/ShaderLang.cpp | 24 ++++---- glslang/Public/ShaderLang.h | 7 +-- 7 files changed, 70 insertions(+), 58 deletions(-) diff --git a/OGLCompilersDLL/InitializeDll.cpp b/OGLCompilersDLL/InitializeDll.cpp index 2eb912c42..c1f03c09c 100644 --- a/OGLCompilersDLL/InitializeDll.cpp +++ b/OGLCompilersDLL/InitializeDll.cpp @@ -45,6 +45,10 @@ namespace glslang { OS_TLSIndex ThreadInitializeIndex = OS_INVALID_TLS_INDEX; +// Per-process initialization. +// Needs to be called at least once before parsing, etc. is done. +// Will also do thread initialization for the calling thread; other +// threads will need to do that explicitly. bool InitProcess() { glslang::GetGlobalLock(); @@ -85,7 +89,9 @@ bool InitProcess() return true; } - +// Per-thread scoped initialization. +// Must be called at least once by each new thread sharing the +// symbol tables, etc., needed to parse. bool InitThread() { // @@ -109,7 +115,9 @@ bool InitThread() return true; } - +// Thread-scoped tear down. Needs to be done by all but the last +// thread, which calls DetachProcess() instead. +// NB. TODO: Not currently being executed by each thread. bool DetachThread() { bool success = true; @@ -126,13 +134,13 @@ bool DetachThread() success = false; } - FreeGlobalPools(); - + FreeMemoryPools(); } return success; } +// Process-scoped tear down. Needs to be done by final thread in process. bool DetachProcess() { bool success = true; diff --git a/OGLCompilersDLL/InitializeDll.h b/OGLCompilersDLL/InitializeDll.h index 60b2b1593..281f3b1a9 100644 --- a/OGLCompilersDLL/InitializeDll.h +++ b/OGLCompilersDLL/InitializeDll.h @@ -40,7 +40,7 @@ namespace glslang { bool InitProcess(); bool InitThread(); -bool DetachThread(); +bool DetachThread(); // TODO: use this or remove it; ideally make it unneeded bool DetachProcess(); } // end namespace glslang diff --git a/glslang/Include/InitializeGlobals.h b/glslang/Include/InitializeGlobals.h index 4cf2dca7c..dd7554b62 100644 --- a/glslang/Include/InitializeGlobals.h +++ b/glslang/Include/InitializeGlobals.h @@ -38,7 +38,8 @@ namespace glslang { void InitializeMemoryPools(); -void FreeGlobalPools(); +void FreeMemoryPools(); + bool InitializePoolIndex(); void FreePoolIndex(); diff --git a/glslang/Include/PoolAlloc.h b/glslang/Include/PoolAlloc.h index 69bacb156..0e237a6a2 100644 --- a/glslang/Include/PoolAlloc.h +++ b/glslang/Include/PoolAlloc.h @@ -250,15 +250,8 @@ private: // different times. But a simple use is to have a global pop // with everyone using the same global allocator. // -typedef TPoolAllocator* PoolAllocatorPointer; extern TPoolAllocator& GetThreadPoolAllocator(); - -struct TThreadMemoryPools -{ - TPoolAllocator* threadPoolAllocator; -}; - -void SetThreadPoolAllocator(TPoolAllocator& poolAllocator); +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator); // // This STL compatible allocator is intended to be used as the allocator diff --git a/glslang/MachineIndependent/PoolAlloc.cpp b/glslang/MachineIndependent/PoolAlloc.cpp index 4007c3861..5077a26bc 100644 --- a/glslang/MachineIndependent/PoolAlloc.cpp +++ b/glslang/MachineIndependent/PoolAlloc.cpp @@ -40,35 +40,40 @@ namespace glslang { +// Process-wide TLS index OS_TLSIndex PoolIndex; -void InitializeMemoryPools() +// Per-thread structure holding pool pointers. +struct TThreadMemoryPools { - TThreadMemoryPools* pools = static_cast(OS_GetTLSValue(PoolIndex)); - if (pools) - return; + TPoolAllocator* threadPoolAllocator; // the current pool +}; - TPoolAllocator *threadPoolAllocator = new TPoolAllocator(); - - TThreadMemoryPools* threadData = new TThreadMemoryPools(); - - threadData->threadPoolAllocator = threadPoolAllocator; - - OS_SetTLSValue(PoolIndex, threadData); +// Return the thread-specific pool pointers. +TThreadMemoryPools* GetThreadMemoryPools() +{ + return static_cast(OS_GetTLSValue(PoolIndex)); } -void FreeGlobalPools() +// Set the thread-specific pool pointers. +void SetThreadMemoryPools(TThreadMemoryPools* pools) { - // Release the allocated memory for this thread. - TThreadMemoryPools* globalPools = static_cast(OS_GetTLSValue(PoolIndex)); - if (! globalPools) - return; - - GetThreadPoolAllocator().popAll(); - delete &GetThreadPoolAllocator(); - delete globalPools; + OS_SetTLSValue(PoolIndex, pools); } +// Return the thread-specific current pool. +TPoolAllocator& GetThreadPoolAllocator() +{ + return *GetThreadMemoryPools()->threadPoolAllocator; +} + +// Set the thread-specific current pool. +void SetThreadPoolAllocator(TPoolAllocator* poolAllocator) +{ + GetThreadMemoryPools()->threadPoolAllocator = poolAllocator; +} + +// Process-wide set up of the TLS pool storage. bool InitializePoolIndex() { // Allocate a TLS index. @@ -78,24 +83,30 @@ bool InitializePoolIndex() return true; } +// Process-wide tear down of the TLS pool storage. void FreePoolIndex() { // Release the TLS index. OS_FreeTLSIndex(PoolIndex); } -TPoolAllocator& GetThreadPoolAllocator() +// Per-thread set up of the memory pools. +void InitializeMemoryPools() { - TThreadMemoryPools* threadData = static_cast(OS_GetTLSValue(PoolIndex)); - - return *threadData->threadPoolAllocator; + if (GetThreadMemoryPools() == nullptr) { + SetThreadMemoryPools(new TThreadMemoryPools()); + SetThreadPoolAllocator(new TPoolAllocator()); + } } -void SetThreadPoolAllocator(TPoolAllocator& poolAllocator) +// Per-thread tear down of the memory pools. +void FreeMemoryPools() { - TThreadMemoryPools* threadData = static_cast(OS_GetTLSValue(PoolIndex)); - - threadData->threadPoolAllocator = &poolAllocator; + if (GetThreadMemoryPools() != nullptr) { + GetThreadPoolAllocator().popAll(); + delete &GetThreadPoolAllocator(); + delete GetThreadMemoryPools(); + } } // diff --git a/glslang/MachineIndependent/ShaderLang.cpp b/glslang/MachineIndependent/ShaderLang.cpp index c8e954ce0..8de5297be 100644 --- a/glslang/MachineIndependent/ShaderLang.cpp +++ b/glslang/MachineIndependent/ShaderLang.cpp @@ -217,7 +217,7 @@ enum EPrecisionClass { TSymbolTable* CommonSymbolTable[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EPcCount] = {}; TSymbolTable* SharedSymbolTables[VersionCount][SpvVersionCount][ProfileCount][SourceCount][EShLangCount] = {}; -TPoolAllocator* PerProcessGPA = 0; +TPoolAllocator* PerProcessGPA = nullptr; // // Parse and add to the given symbol table the content of the given shader string. @@ -361,7 +361,7 @@ bool AddContextSpecificSymbols(const TBuiltInResource* resources, TInfoSink& inf // pool allocator intact, so: // - Switch to a new pool for parsing the built-ins // - Do the parsing, which builds the symbol table, using the new pool -// - Switch to the process-global pool to save a copy the resulting symbol table +// - Switch to the process-global pool to save a copy of the resulting symbol table // - Free up the new pool used to parse the built-ins // - Switch back to the original thread's pool // @@ -388,8 +388,8 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp // Switch to a new pool TPoolAllocator& previousAllocator = GetThreadPoolAllocator(); - TPoolAllocator* builtInPoolAllocator = new TPoolAllocator(); - SetThreadPoolAllocator(*builtInPoolAllocator); + TPoolAllocator* builtInPoolAllocator = new TPoolAllocator; + SetThreadPoolAllocator(builtInPoolAllocator); // Dynamically allocate the local symbol tables so we can control when they are deallocated WRT when the pool is popped. TSymbolTable* commonTable[EPcCount]; @@ -403,7 +403,7 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp InitializeSymbolTables(infoSink, commonTable, stageTables, version, profile, spvVersion, source); // Switch to the process-global pool - SetThreadPoolAllocator(*PerProcessGPA); + SetThreadPoolAllocator(PerProcessGPA); // Copy the local symbol tables from the new pool to the global tables using the process-global pool for (int precClass = 0; precClass < EPcCount; ++precClass) { @@ -430,7 +430,7 @@ void SetupBuiltinSymbolTable(int version, EProfile profile, const SpvVersion& sp delete stageTables[stage]; delete builtInPoolAllocator; - SetThreadPoolAllocator(previousAllocator); + SetThreadPoolAllocator(&previousAllocator); glslang::ReleaseGlobalLock(); } @@ -1196,7 +1196,7 @@ int ShInitialize() if (! InitProcess()) return 0; - if (! PerProcessGPA) + if (PerProcessGPA == nullptr) PerProcessGPA = new TPoolAllocator(); glslang::TScanContext::fillInKeywordMap(); @@ -1288,10 +1288,10 @@ int __fastcall ShFinalize() } } - if (PerProcessGPA) { + if (PerProcessGPA != nullptr) { PerProcessGPA->popAll(); delete PerProcessGPA; - PerProcessGPA = 0; + PerProcessGPA = nullptr; } glslang::TScanContext::deleteKeywordMap(); @@ -1708,7 +1708,7 @@ bool TShader::parse(const TBuiltInResource* builtInResources, int defaultVersion return false; pool = new TPoolAllocator(); - SetThreadPoolAllocator(*pool); + SetThreadPoolAllocator(pool); if (! preamble) preamble = ""; @@ -1732,7 +1732,7 @@ bool TShader::preprocess(const TBuiltInResource* builtInResources, return false; pool = new TPoolAllocator(); - SetThreadPoolAllocator(*pool); + SetThreadPoolAllocator(pool); if (! preamble) preamble = ""; @@ -1789,7 +1789,7 @@ bool TProgram::link(EShMessages messages) bool error = false; pool = new TPoolAllocator(); - SetThreadPoolAllocator(*pool); + SetThreadPoolAllocator(pool); for (int s = 0; s < EShLangCount; ++s) { if (! linkStage((EShLanguage)s, messages)) diff --git a/glslang/Public/ShaderLang.h b/glslang/Public/ShaderLang.h index 6fadfbf0c..6e22bdd72 100644 --- a/glslang/Public/ShaderLang.h +++ b/glslang/Public/ShaderLang.h @@ -68,15 +68,14 @@ #endif // -// Driver must call this first, once, before doing any other -// compiler/linker operations. +// Call before doing any other compiler/linker operations. // // (Call once per process, not once per thread.) // SH_IMPORT_EXPORT int ShInitialize(); // -// Driver should call this at process shutdown. +// Call this at process shutdown to clean up memory. // SH_IMPORT_EXPORT int __fastcall ShFinalize(); @@ -290,7 +289,7 @@ SH_IMPORT_EXPORT int ShGetUniformLocation(const ShHandle uniformMap, const char* // Deferred-Lowering C++ Interface // ----------------------------------- // -// Below is a new alternate C++ interface that might potentially replace the above +// Below is a new alternate C++ interface, which deprecates the above // opaque handle-based interface. // // The below is further designed to handle multiple compilation units per stage, where