Replicate network weights only to used NUMA nodes

On a system with multiple NUMA nodes, this patch avoids unneeded replicated
(e.g. 8x for a single threaded run), reducting memory use in that case.

Lazy initialization forced before search.

Passed STC:
https://tests.stockfishchess.org/tests/view/66a28c524ff211be9d4ecdd4
LLR: 2.96 (-2.94,2.94) <-1.75,0.25>
Total: 691776 W: 179429 L: 179927 D: 332420
Ptnml(0-2): 2573, 79370, 182547, 78778, 2620

closes https://github.com/official-stockfish/Stockfish/pull/5515

No functional change
This commit is contained in:
Tomasz Sobczyk
2024-07-25 14:37:08 +02:00
committed by Joost VandeVondele
parent 2343f71f3f
commit 8e560c4fd3
7 changed files with 152 additions and 16 deletions
+14 -12
View File
@@ -131,19 +131,19 @@ struct LimitsType {
// The UCI stores the uci options, thread pool, and transposition table.
// This struct is used to easily forward data to the Search::Worker class.
struct SharedState {
SharedState(const OptionsMap& optionsMap,
ThreadPool& threadPool,
TranspositionTable& transpositionTable,
const NumaReplicated<Eval::NNUE::Networks>& nets) :
SharedState(const OptionsMap& optionsMap,
ThreadPool& threadPool,
TranspositionTable& transpositionTable,
const LazyNumaReplicated<Eval::NNUE::Networks>& nets) :
options(optionsMap),
threads(threadPool),
tt(transpositionTable),
networks(nets) {}
const OptionsMap& options;
ThreadPool& threads;
TranspositionTable& tt;
const NumaReplicated<Eval::NNUE::Networks>& networks;
const OptionsMap& options;
ThreadPool& threads;
TranspositionTable& tt;
const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
};
class Worker;
@@ -274,6 +274,8 @@ class Worker {
bool is_mainthread() const { return threadIdx == 0; }
void ensure_network_replicated();
// Public because they need to be updatable by the stats
ButterflyHistory mainHistory;
CapturePieceToHistory captureHistory;
@@ -328,10 +330,10 @@ class Worker {
Tablebases::Config tbConfig;
const OptionsMap& options;
ThreadPool& threads;
TranspositionTable& tt;
const NumaReplicated<Eval::NNUE::Networks>& networks;
const OptionsMap& options;
ThreadPool& threads;
TranspositionTable& tt;
const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
// Used by NNUE
Eval::NNUE::AccumulatorCaches refreshTable;