Implement proper stop signalling from root node

Previous behavior was to wait on all nodes to finish their search on their own TM and aggregate to root node via a blocking MPI_Allreduce call. This seems to be problematic.

In this commit a proper non-blocking signalling barrier was implemented to use TM from root node to control the cluster search, and disable TM on all non-root nodes.

Also includes some cosmetic fix to the nodes/NPS display.
This commit is contained in:
noobpwnftw
2018-07-11 09:09:48 +08:00
committed by Stéphane Nicolet
parent 3b7b632aa5
commit 8a95d269eb
6 changed files with 48 additions and 6 deletions
+10 -4
View File
@@ -234,12 +234,15 @@ void MainThread::search() {
Threads.stopOnPonderhit = true;
while (!Threads.stop && (Threads.ponder || Limits.infinite))
{} // Busy wait for a stop or a ponder reset
{ } // Busy wait for a stop or a ponder reset
// Stop the threads if not already stopped (also raise the stop if
// "ponderhit" just reset Threads.ponder).
Threads.stop = true;
// Finish any outstanding barriers.
Cluster::sync_stop();
// Wait until all threads have finished
for (Thread* th : Threads)
if (th != this)
@@ -292,8 +295,8 @@ void MainThread::search() {
previousScore = static_cast<Value>(mi.score);
// Send again PV info if we have a new best thread
if (Cluster::is_root()) {
// Send again PV info if we have a new best thread
if (bestThread != this)
sync_cout << UCI::pv(bestThread->rootPos, bestThread->completedDepth, -VALUE_INFINITE, VALUE_INFINITE) << sync_endl;
@@ -1608,6 +1611,9 @@ void MainThread::check_time() {
if (Threads.ponder)
return;
// Check if root has reached a stop barrier
Cluster::sync_stop();
if ( (Limits.use_time_management() && elapsed > Time.maximum() - 10)
|| (Limits.movetime && elapsed >= Limits.movetime)
|| (Limits.nodes && Threads.nodes_searched() >= (uint64_t)Limits.nodes))
@@ -1653,8 +1659,8 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
if (!tb && i == pvIdx)
ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : "");
ss << " nodes " << nodesSearched
<< " nps " << nodesSearched * 1000 / elapsed;
ss << " nodes " << nodesSearched * Cluster::size()
<< " nps " << nodesSearched * Cluster::size() * 1000 / elapsed;
if (elapsed > 1000) // Earlier makes little sense
ss << " hashfull " << TT.hashfull();