Merge branch 'master' into cluster

This commit is contained in:
Disservin
2024-04-10 18:46:26 +02:00
90 changed files with 11653 additions and 13407 deletions
+195 -156
View File
@@ -1,6 +1,6 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file)
Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,243 +16,282 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cassert>
#include "thread.h"
#include <algorithm> // For std::count
#include <algorithm>
#include <cassert>
#include <deque>
#include <memory>
#include <unordered_map>
#include <utility>
#include "cluster.h"
#include "misc.h"
#include "movegen.h"
#include "search.h"
#include "thread.h"
#include "uci.h"
#include "syzygy/tbprobe.h"
#include "timeman.h"
#include "tt.h"
#include "types.h"
#include "ucioption.h"
namespace Stockfish {
ThreadPool Threads; // Global object
// Constructor launches the thread and waits until it goes to sleep
// in idle_loop(). Note that 'searching' and 'exit' should be already set.
Thread::Thread(Search::SharedState& sharedState,
std::unique_ptr<Search::ISearchManager> sm,
size_t n) :
worker(std::make_unique<Search::Worker>(sharedState, std::move(sm), n)),
idx(n),
nthreads(sharedState.options["Threads"]),
stdThread(&Thread::idle_loop, this) {
/// Thread constructor launches the thread and waits until it goes to sleep
/// in idle_loop(). Note that 'searching' and 'exit' should be already set.
Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) {
wait_for_search_finished();
wait_for_search_finished();
}
/// Thread destructor wakes up the thread in idle_loop() and waits
/// for its termination. Thread should be already waiting.
// Destructor wakes up the thread in idle_loop() and waits
// for its termination. Thread should be already waiting.
Thread::~Thread() {
assert(!searching);
assert(!searching);
exit = true;
start_searching();
stdThread.join();
exit = true;
start_searching();
stdThread.join();
}
/// Thread::clear() reset histories, usually before a new game
void Thread::clear() {
counterMoves.fill(MOVE_NONE);
mainHistory.fill(0);
captureHistory.fill(0);
for (bool inCheck : { false, true })
for (StatsType c : { NoCaptures, Captures })
for (auto& to : continuationHistory[inCheck][c])
for (auto& h : to)
h->fill(-71);
}
/// Thread::start_searching() wakes up the thread that will start the search
// Wakes up the thread that will start the search
void Thread::start_searching() {
mutex.lock();
searching = true;
mutex.unlock(); // Unlock before notifying saves a few CPU-cycles
cv.notify_one(); // Wake up the thread in idle_loop()
mutex.lock();
searching = true;
mutex.unlock(); // Unlock before notifying saves a few CPU-cycles
cv.notify_one(); // Wake up the thread in idle_loop()
}
/// Thread::wait_for_search_finished() blocks on the condition variable
/// until the thread has finished searching.
// Blocks on the condition variable
// until the thread has finished searching.
void Thread::wait_for_search_finished() {
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&]{ return !searching; });
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&] { return !searching; });
}
/// Thread::idle_loop() is where the thread is parked, blocked on the
/// condition variable, when it has no work to do.
// Thread gets parked here, blocked on the
// condition variable, when it has no work to do.
void Thread::idle_loop() {
// If OS already scheduled us on a different group than 0 then don't overwrite
// the choice, eventually we are one of many one-threaded processes running on
// some Windows NUMA hardware, for instance in fishtest. To make it simple,
// just check if running threads are below a threshold, in this case all this
// NUMA machinery is not needed.
if (Options["Threads"] > 8)
WinProcGroup::bindThisThread(idx);
// If OS already scheduled us on a different group than 0 then don't overwrite
// the choice, eventually we are one of many one-threaded processes running on
// some Windows NUMA hardware, for instance in fishtest. To make it simple,
// just check if running threads are below a threshold, in this case, all this
// NUMA machinery is not needed.
if (nthreads > 8)
WinProcGroup::bind_this_thread(idx);
while (true)
{
std::unique_lock<std::mutex> lk(mutex);
searching = false;
cv.notify_one(); // Wake up anyone waiting for search finished
cv.wait(lk, [&]{ return searching; });
while (true)
{
std::unique_lock<std::mutex> lk(mutex);
searching = false;
cv.notify_one(); // Wake up anyone waiting for search finished
cv.wait(lk, [&] { return searching; });
if (exit)
return;
if (exit)
return;
lk.unlock();
lk.unlock();
search();
}
worker->start_searching();
}
}
/// ThreadPool::set() creates/destroys threads to match the requested number.
/// Created and launched threads will immediately go to sleep in idle_loop.
/// Upon resizing, threads are recreated to allow for binding if necessary.
Search::SearchManager* ThreadPool::main_manager() {
return static_cast<Search::SearchManager*>(main_thread()->worker.get()->manager.get());
}
void ThreadPool::set(size_t requested) {
uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }
uint64_t ThreadPool::TT_saves() const { return accumulate(&Search::Worker::TTsaves); }
if (threads.size() > 0) // destroy any existing thread(s)
{
main()->wait_for_search_finished();
// Creates/destroys threads to match the requested number.
// Created and launched threads will immediately go to sleep in idle_loop.
// Upon resizing, threads are recreated to allow for binding if necessary.
void ThreadPool::set(Search::SharedState sharedState) {
while (threads.size() > 0)
delete threads.back(), threads.pop_back();
}
if (threads.size() > 0) // destroy any existing thread(s)
{
main_thread()->wait_for_search_finished();
if (requested > 0) // create new thread(s)
{
threads.push_back(new MainThread(0));
while (threads.size() > 0)
delete threads.back(), threads.pop_back();
}
while (threads.size() < requested)
threads.push_back(new Thread(threads.size()));
clear();
const size_t requested = sharedState.options["Threads"];
// Reallocate the hash with the new threadpool size
TT.resize(size_t(Options["Hash"]));
if (requested > 0) // create new thread(s)
{
threads.push_back(new Thread(
sharedState, std::unique_ptr<Search::ISearchManager>(new Search::SearchManager()), 0));
// Adjust cluster buffers
Cluster::ttSendRecvBuff_resize(requested);
// Init thread number dependent search params.
Search::init();
}
while (threads.size() < requested)
threads.push_back(new Thread(
sharedState, std::unique_ptr<Search::ISearchManager>(new Search::NullSearchManager()),
threads.size()));
clear();
main_thread()->wait_for_search_finished();
// Reallocate the hash with the new threadpool size
sharedState.tt.resize(sharedState.options["Hash"], requested);
// Adjust cluster buffers
Cluster::ttSendRecvBuff_resize(requested);
}
}
/// ThreadPool::clear() sets threadPool data to initial values
// Sets threadPool data to initial values
void ThreadPool::clear() {
for (Thread* th : threads)
th->clear();
for (Thread* th : threads)
th->worker->clear();
main()->callsCnt = 0;
main()->bestPreviousScore = VALUE_INFINITE;
main()->bestPreviousAverageScore = VALUE_INFINITE;
main()->previousTimeReduction = 1.0;
main_manager()->callsCnt = 0;
main_manager()->bestPreviousScore = VALUE_INFINITE;
main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
main_manager()->previousTimeReduction = 1.0;
main_manager()->tm.clear();
}
/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and
/// returns immediately. Main thread will wake up other threads and start the search.
// Wakes up main thread waiting in idle_loop() and
// returns immediately. Main thread will wake up other threads and start the search.
void ThreadPool::start_thinking(const OptionsMap& options,
Position& pos,
StateListPtr& states,
Search::LimitsType limits) {
void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
const Search::LimitsType& limits, bool ponderMode) {
main_thread()->wait_for_search_finished();
main()->wait_for_search_finished();
main_manager()->stopOnPonderhit = stop = abortedSearch = false;
main_manager()->ponder = limits.ponderMode;
main()->stopOnPonderhit = stop = false;
increaseDepth = true;
main()->ponder = ponderMode;
Search::Limits = limits;
Search::RootMoves rootMoves;
increaseDepth = true;
for (const auto& m : MoveList<LEGAL>(pos))
if ( limits.searchmoves.empty()
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
rootMoves.emplace_back(m);
Search::RootMoves rootMoves;
if (!rootMoves.empty())
Tablebases::rank_root_moves(pos, rootMoves);
for (const auto& m : MoveList<LEGAL>(pos))
if (limits.searchmoves.empty()
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
rootMoves.emplace_back(m);
// After ownership transfer 'states' becomes empty, so if we stop the search
// and call 'go' again without setting a new position states.get() == nullptr.
assert(states.get() || setupStates.get());
Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves);
if (states.get())
setupStates = std::move(states); // Ownership transfer, states is now empty
// After ownership transfer 'states' becomes empty, so if we stop the search
// and call 'go' again without setting a new position states.get() == nullptr.
assert(states.get() || setupStates.get());
// We use Position::set() to set root position across threads. But there are
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
// be deduced from a fen string, so set() clears them and they are set from
// setupStates->back() later. The rootState is per thread, earlier states are shared
// since they are read-only.
for (Thread* th : threads)
{
th->nodes = th->tbHits = th->TTsaves = th->nmpMinPly = th->bestMoveChanges = 0;
th->rootDepth = th->completedDepth = 0;
th->rootMoves = rootMoves;
th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
th->rootState = setupStates->back();
}
if (states.get())
setupStates = std::move(states); // Ownership transfer, states is now empty
Cluster::signals_init();
// We use Position::set() to set root position across threads. But there are
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
// be deduced from a fen string, so set() clears them and they are set from
// setupStates->back() later. The rootState is per thread, earlier states are shared
// since they are read-only.
for (Thread* th : threads)
{
th->worker->limits = limits;
th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
th->worker->bestMoveChanges = 0;
th->worker->TTsaves = 0;
th->worker->rootDepth = th->worker->completedDepth = 0;
th->worker->rootMoves = rootMoves;
th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
th->worker->rootState = setupStates->back();
th->worker->tbConfig = tbConfig;
}
main()->start_searching();
Cluster::signals_init();
main_thread()->start_searching();
}
Thread* ThreadPool::get_best_thread() const {
Thread* bestThread = threads.front();
std::map<Move, int64_t> votes;
Value minScore = VALUE_NONE;
Value minScore = VALUE_NONE;
// Find minimum score of all threads
for (Thread* th: threads)
minScore = std::min(minScore, th->rootMoves[0].score);
std::unordered_map<Move, int64_t, Move::MoveHash> votes(
2 * std::min(size(), bestThread->worker->rootMoves.size()));
// Find the minimum score of all threads
for (Thread* th : threads)
minScore = std::min(minScore, th->worker->rootMoves[0].score);
// Vote according to score and depth, and select the best thread
auto thread_value = [minScore](Thread* th) {
return (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);
};
auto thread_voting_value = [minScore](Thread* th) {
return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
};
for (Thread* th : threads)
votes[th->rootMoves[0].pv[0]] += thread_value(th);
votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th);
for (Thread* th : threads)
if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
{
const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
const auto newThreadScore = th->worker->rootMoves[0].score;
const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv;
const auto& newThreadPV = th->worker->rootMoves[0].pv;
const auto bestThreadMoveVote = votes[bestThreadPV[0]];
const auto newThreadMoveVote = votes[newThreadPV[0]];
const bool bestThreadInProvenWin = bestThreadScore >= VALUE_TB_WIN_IN_MAX_PLY;
const bool newThreadInProvenWin = newThreadScore >= VALUE_TB_WIN_IN_MAX_PLY;
const bool bestThreadInProvenLoss =
bestThreadScore != -VALUE_INFINITE && bestThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
const bool newThreadInProvenLoss =
newThreadScore != -VALUE_INFINITE && newThreadScore <= VALUE_TB_LOSS_IN_MAX_PLY;
// Note that we make sure not to pick a thread with truncated-PV for better viewer experience.
const bool betterVotingValue =
thread_voting_value(th) * int(newThreadPV.size() > 2)
> thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
if (bestThreadInProvenWin)
{
// Make sure we pick the shortest mate / TB conversion or stave off mate the longest
if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
// Make sure we pick the shortest mate / TB conversion
if (newThreadScore > bestThreadScore)
bestThread = th;
}
else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
|| ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
&& ( votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]
|| ( votes[th->rootMoves[0].pv[0]] == votes[bestThread->rootMoves[0].pv[0]]
&& thread_value(th) * int(th->rootMoves[0].pv.size() > 2)
> thread_value(bestThread) * int(bestThread->rootMoves[0].pv.size() > 2)))))
else if (bestThreadInProvenLoss)
{
// Make sure we pick the shortest mated / TB conversion
if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
bestThread = th;
}
else if (newThreadInProvenWin || newThreadInProvenLoss
|| (newThreadScore > VALUE_TB_LOSS_IN_MAX_PLY
&& (newThreadMoveVote > bestThreadMoveVote
|| (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
bestThread = th;
}
return bestThread;
}
/// Start non-main threads
// Start non-main threads
// Will be invoked by main thread after it has started searching
void ThreadPool::start_searching() {
for (Thread* th : threads)
@@ -261,7 +300,7 @@ void ThreadPool::start_searching() {
}
/// Wait for non-main threads
// Wait for non-main threads
void ThreadPool::wait_for_search_finished() const {
@@ -270,4 +309,4 @@ void ThreadPool::wait_for_search_finished() const {
th->wait_for_search_finished();
}
} // namespace Stockfish
} // namespace Stockfish