Simplify TT interface and avoid changing TT info

This commit builds on the work and ideas of #5345, #5348, and #5364.

Place as much as possible of the TT implementation in tt.cpp, rather than in the
header.  Some commentary is added to better document the public interface.

Fix the search read-TT races, or at least contain them to within TT methods only.

Passed SMP STC: https://tests.stockfishchess.org/tests/view/666134ab91e372763104b443
LLR: 2.94 (-2.94,2.94) <-1.75,0.25>
Total: 512552 W: 132387 L: 132676 D: 247489
Ptnml(0-2): 469, 58429, 138771, 58136, 471

The unmerged version has bench identical to the other PR (see also #5348) and
therefore those same-functionality tests:

SMP LTC: https://tests.stockfishchess.org/tests/view/665c7021fd45fb0f907c214a
SMP LTC: https://tests.stockfishchess.org/tests/view/665d28a7fd45fb0f907c5495

closes https://github.com/official-stockfish/Stockfish/pull/5369

bench 1205675
This commit is contained in:
Dubslow
2024-06-10 18:03:36 -05:00
committed by Joost VandeVondele
parent 7e890fd048
commit c8213ba0d0
4 changed files with 265 additions and 208 deletions
+46 -73
View File
@@ -21,103 +21,76 @@
#include <cstddef>
#include <cstdint>
#include <tuple>
#include "memory.h"
#include "misc.h"
#include "types.h"
namespace Stockfish {
// TTEntry struct is the 10 bytes transposition table entry, defined as below:
//
// key 16 bit
// depth 8 bit
// generation 5 bit
// pv node 1 bit
// bound type 2 bit
// move 16 bit
// value 16 bit
// eval value 16 bit
//
// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially.
// Equally, the store order in save() matches this order.
struct TTEntry {
class ThreadPool;
struct TTEntry;
struct Cluster;
Move move() const { return Move(move16); }
Value value() const { return Value(value16); }
Value eval() const { return Value(eval16); }
Depth depth() const { return Depth(depth8 + DEPTH_ENTRY_OFFSET); }
bool is_pv() const { return bool(genBound8 & 0x4); }
Bound bound() const { return Bound(genBound8 & 0x3); }
void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
// The returned age is a multiple of TranspositionTable::GENERATION_DELTA
uint8_t relative_age(const uint8_t generation8) const;
// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy
// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and
// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate
// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size.
//
// `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of:
// 1) whether the entry already has this position
// 2) a copy of the prior data (if any) (may be inconsistent due to read races)
// 3) a writer object to this entry
// The copied data and the writer are separated to maintain clear boundaries between local vs global objects.
// A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data.
struct TTData {
Move move;
Value value, eval;
Depth depth;
Bound bound;
bool is_pv;
};
// This is used to make racy writes to the global TT.
struct TTWriter {
public:
void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
private:
friend class TranspositionTable;
uint16_t key16;
uint8_t depth8;
uint8_t genBound8;
Move move16;
int16_t value16;
int16_t eval16;
TTEntry* entry;
TTWriter(TTEntry* tte);
};
class ThreadPool;
// A TranspositionTable is an array of Cluster, of size clusterCount. Each
// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry
// contains information on exactly one position. The size of a Cluster should
// divide the size of a cache line for best performance, as the cacheline is
// prefetched when possible.
class TranspositionTable {
static constexpr int ClusterSize = 3;
struct Cluster {
TTEntry entry[ClusterSize];
char padding[2]; // Pad to 32 bytes
};
static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
// Constants used to refresh the hash table periodically
// We have 8 bits available where the lowest 3 bits are
// reserved for other things.
static constexpr unsigned GENERATION_BITS = 3;
// increment for generation field
static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
// cycle length
static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
// mask to pull out generation number
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
public:
~TranspositionTable() { aligned_large_pages_free(table); }
void new_search() {
// increment by delta to keep lower bits as is
generation8 += GENERATION_DELTA;
}
TTEntry* probe(const Key key, bool& found) const;
int hashfull() const;
void resize(size_t mbSize, ThreadPool& threads);
void clear(ThreadPool& threads);
void resize(size_t mbSize, ThreadPool& threads); // Set TT size
void clear(ThreadPool& threads); // Re-initialize memory, multithreaded
int hashfull()
const; // Approximate what fraction of entries (permille) have been written to during this root search
TTEntry* first_entry(const Key key) const {
return &table[mul_hi64(key, clusterCount)].entry[0];
}
uint8_t generation() const { return generation8; }
void
new_search(); // This must be called at the beginning of each root search to track entry aging
uint8_t generation() const; // The current age, used when writing new data to the TT
std::tuple<bool, TTData, TTWriter>
probe(const Key key) const; // The main method, whose retvals separate local vs global objects
TTEntry* first_entry(const Key key)
const; // This is the hash function; its only external use is memory prefetching.
private:
friend struct TTEntry;
size_t clusterCount;
Cluster* table = nullptr;
uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
Cluster* table = nullptr;
uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
};
} // namespace Stockfish