Simplify TT interface and avoid changing TT info

This commit builds on the work and ideas of #5345, #5348, and #5364.

Place as much as possible of the TT implementation in tt.cpp, rather than in the
header.  Some commentary is added to better document the public interface.

Fix the search read-TT races, or at least contain them to within TT methods only.

Passed SMP STC: https://tests.stockfishchess.org/tests/view/666134ab91e372763104b443
LLR: 2.94 (-2.94,2.94) <-1.75,0.25>
Total: 512552 W: 132387 L: 132676 D: 247489
Ptnml(0-2): 469, 58429, 138771, 58136, 471

The unmerged version has bench identical to the other PR (see also #5348) and
therefore those same-functionality tests:

SMP LTC: https://tests.stockfishchess.org/tests/view/665c7021fd45fb0f907c214a
SMP LTC: https://tests.stockfishchess.org/tests/view/665d28a7fd45fb0f907c5495

closes https://github.com/official-stockfish/Stockfish/pull/5369

bench 1205675
This commit is contained in:
Dubslow
2024-06-10 18:03:36 -05:00
committed by Joost VandeVondele
parent 7e890fd048
commit c8213ba0d0
4 changed files with 265 additions and 208 deletions
+119 -29
View File
@@ -25,11 +25,63 @@
#include <iostream>
#include "memory.h"
#include "misc.h"
#include "syzygy/tbprobe.h"
#include "thread.h"
namespace Stockfish {
// TTEntry struct is the 10 bytes transposition table entry, defined as below:
//
// key 16 bit
// depth 8 bit
// generation 5 bit
// pv node 1 bit
// bound type 2 bit
// move 16 bit
// value 16 bit
// evaluation 16 bit
//
// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially.
// Equally, the store order in save() matches this order.
struct TTEntry {
// Convert internal bitfields to external types
TTData read() const {
return TTData{Move(move16), Value(value16),
Value(eval16), Depth(depth8 + DEPTH_ENTRY_OFFSET),
Bound(genBound8 & 0x3), bool(genBound8 & 0x4)};
}
void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
// The returned age is a multiple of TranspositionTable::GENERATION_DELTA
uint8_t relative_age(const uint8_t generation8) const;
private:
friend class TranspositionTable;
uint16_t key16;
uint8_t depth8;
uint8_t genBound8;
Move move16;
int16_t value16;
int16_t eval16;
};
// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits
// and 3 trailing miscellaneous bits.
// These bits are reserved for other things.
static constexpr unsigned GENERATION_BITS = 3;
// increment for generation field
static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
// cycle length
static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
// mask to pull out generation number
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but
// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits":
// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted below.)
@@ -65,12 +117,34 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const {
// is needed to keep the unrelated lowest n bits from affecting
// the result) to calculate the entry age correctly even after
// generation8 overflows into the next cycle.
return (TranspositionTable::GENERATION_CYCLE + generation8 - genBound8)
& TranspositionTable::GENERATION_MASK;
return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK;
}
// TTWriter is but a very thin wrapper around the pointer
TTWriter::TTWriter(TTEntry* tte) :
entry(tte) {}
void TTWriter::write(
Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
entry->save(k, v, pv, b, d, m, ev, generation8);
}
// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number
// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should
// divide the size of a cache line for best performance, as the cacheline is prefetched when possible.
static constexpr int ClusterSize = 3;
struct Cluster {
TTEntry entry[ClusterSize];
char padding[2]; // Pad to 32 bytes
};
static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size");
// Sets the size of the transposition table,
// measured in megabytes. Transposition table consists
// of clusters and each cluster consists of ClusterSize number of TTEntry.
@@ -114,32 +188,6 @@ void TranspositionTable::clear(ThreadPool& threads) {
}
// Looks up the current position in the transposition
// table. It returns true and a pointer to the TTEntry if the position is found.
// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
// to be replaced later. The replace value of an entry is calculated as its depth
// minus 8 times its relative age. TTEntry t1 is considered more valuable than
// TTEntry t2 if its replace value is greater than that of t2.
TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
TTEntry* const tte = first_entry(key);
const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i)
if (tte[i].key16 == key16)
return found = bool(tte[i].depth8), &tte[i];
// Find an entry to be replaced according to the replacement strategy
TTEntry* replace = tte;
for (int i = 1; i < ClusterSize; ++i)
if (replace->depth8 - replace->relative_age(generation8) * 2
> tte[i].depth8 - tte[i].relative_age(generation8) * 2)
replace = &tte[i];
return found = false, replace;
}
// Returns an approximation of the hashtable
// occupation during a search. The hash is x permill full, as per UCI protocol.
// Only counts entries which match the current generation.
@@ -154,4 +202,46 @@ int TranspositionTable::hashfull() const {
return cnt / ClusterSize;
}
void TranspositionTable::new_search() {
// increment by delta to keep lower bits as is
generation8 += GENERATION_DELTA;
}
uint8_t TranspositionTable::generation() const { return generation8; }
// Looks up the current position in the transposition
// table. It returns true if the position is found.
// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
// to be replaced later. The replace value of an entry is calculated as its depth
// minus 8 times its relative age. TTEntry t1 is considered more valuable than
// TTEntry t2 if its replace value is greater than that of t2.
std::tuple<bool, TTData, TTWriter> TranspositionTable::probe(const Key key) const {
TTEntry* const tte = first_entry(key);
const uint16_t key16 = uint16_t(key); // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i)
if (tte[i].key16 == key16)
// This gap is the main place for read races.
// After `read()` completes that copy is final, but may be self-inconsistent.
return {bool(tte[i].depth8), tte[i].read(), TTWriter(&tte[i])};
// Find an entry to be replaced according to the replacement strategy
TTEntry* replace = tte;
for (int i = 1; i < ClusterSize; ++i)
if (replace->depth8 - replace->relative_age(generation8) * 2
> tte[i].depth8 - tte[i].relative_age(generation8) * 2)
replace = &tte[i];
return {false, replace->read(), TTWriter(replace)};
}
TTEntry* TranspositionTable::first_entry(const Key key) const {
return &table[mul_hi64(key, clusterCount)].entry[0];
}
} // namespace Stockfish