Merge remote-tracking branch 'remotes/origin/master' into trainer

This commit is contained in:
noobpwnftw
2020-09-19 02:26:03 +08:00
7 changed files with 196 additions and 147 deletions
+1
View File
@@ -63,6 +63,7 @@ Gary Heckman (gheckman)
George Sobala (gsobala) George Sobala (gsobala)
gguliash gguliash
Gian-Carlo Pascutto (gcp) Gian-Carlo Pascutto (gcp)
Deshawn Mohan-Smith (GoldenRare)
Gontran Lemaire (gonlem) Gontran Lemaire (gonlem)
Goodkov Vasiliy Aleksandrovich (goodkov) Goodkov Vasiliy Aleksandrovich (goodkov)
Gregor Cramer Gregor Cramer
+13 -19
View File
@@ -410,19 +410,6 @@ ifeq ($(COMP),clang)
endif endif
endif endif
ifeq ($(comp),icc)
profile_make = icc-profile-make
profile_use = icc-profile-use
else
ifeq ($(comp),clang)
profile_make = clang-profile-make
profile_use = clang-profile-use
else
profile_make = gcc-profile-make
profile_use = gcc-profile-use
endif
endif
ifeq ($(KERNEL),Darwin) ifeq ($(KERNEL),Darwin)
CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
@@ -434,20 +421,30 @@ endif
# Currently we don't know how to make PGO builds with the NDK yet. # Currently we don't know how to make PGO builds with the NDK yet.
ifeq ($(COMP),ndk) ifeq ($(COMP),ndk)
CXXFLAGS += -stdlib=libc++ -fPIE CXXFLAGS += -stdlib=libc++ -fPIE
comp=clang
ifeq ($(arch),armv7) ifeq ($(arch),armv7)
comp=armv7a-linux-androideabi16-clang
CXX=armv7a-linux-androideabi16-clang++ CXX=armv7a-linux-androideabi16-clang++
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
STRIP=arm-linux-androideabi-strip STRIP=arm-linux-androideabi-strip
endif endif
ifeq ($(arch),armv8) ifeq ($(arch),armv8)
comp=aarch64-linux-android21-clang
CXX=aarch64-linux-android21-clang++ CXX=aarch64-linux-android21-clang++
STRIP=aarch64-linux-android-strip STRIP=aarch64-linux-android-strip
endif endif
LDFLAGS += -static-libstdc++ -pie -lm -latomic LDFLAGS += -static-libstdc++ -pie -lm -latomic
endif endif
ifeq ($(comp),icc)
profile_make = icc-profile-make
profile_use = icc-profile-use
else ifeq ($(comp),clang)
profile_make = clang-profile-make
profile_use = clang-profile-use
else
profile_make = gcc-profile-make
profile_use = gcc-profile-use
endif
### Travis CI script uses COMPILER to overwrite CXX ### Travis CI script uses COMPILER to overwrite CXX
ifdef COMPILER ifdef COMPILER
COMPCXX=$(COMPILER) COMPCXX=$(COMPILER)
@@ -619,10 +616,7 @@ endif
### needs access to the optimization flags. ### needs access to the optimization flags.
ifeq ($(optimize),yes) ifeq ($(optimize),yes)
ifeq ($(debug), no) ifeq ($(debug), no)
ifeq ($(COMP),ndk) ifeq ($(comp),clang)
CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS)
else ifeq ($(comp),clang)
CXXFLAGS += -flto=thin CXXFLAGS += -flto=thin
ifneq ($(findstring MINGW,$(KERNEL)),) ifneq ($(findstring MINGW,$(KERNEL)),)
CXXFLAGS += -fuse-ld=lld CXXFLAGS += -fuse-ld=lld
+125 -108
View File
@@ -29,6 +29,56 @@
namespace Eval::NNUE { namespace Eval::NNUE {
// If vector instructions are enabled, we update and refresh the
// accumulator tile by tile such that each tile fits in the CPU's
// vector registers.
#define TILING
#ifdef USE_AVX512
typedef __m512i vec_t;
#define vec_load(a) _mm512_loadA_si512(a)
#define vec_store(a,b) _mm512_storeA_si512(a,b)
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
static constexpr IndexType kNumRegs = 8; // only 8 are needed
#elif USE_AVX2
typedef __m256i vec_t;
#define vec_load(a) _mm256_loadA_si256(a)
#define vec_store(a,b) _mm256_storeA_si256(a,b)
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
static constexpr IndexType kNumRegs = 16;
#elif USE_SSE2
typedef __m128i vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_epi16(a,b)
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
#elif USE_MMX
typedef __m64 vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
static constexpr IndexType kNumRegs = 8;
#elif USE_NEON
typedef int16x8_t vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) vaddq_s16(a,b)
#define vec_sub_16(a,b) vsubq_s16(a,b)
static constexpr IndexType kNumRegs = 16;
#else
#undef TILING
#endif
// Input feature converter // Input feature converter
class FeatureTransformer { class FeatureTransformer {
@@ -36,6 +86,11 @@ namespace Eval::NNUE {
// Number of output dimensions for one side // Number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
#ifdef TILING
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
#endif
public: public:
// Output type // Output type
using OutputType = TransformedFeatureType; using OutputType = TransformedFeatureType;
@@ -205,57 +260,41 @@ namespace Eval::NNUE {
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
active_indices); active_indices);
for (Color perspective : { WHITE, BLACK }) { for (Color perspective : { WHITE, BLACK }) {
#ifdef TILING
for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
vec_t acc[kNumRegs];
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (unsigned k = 0; k < kNumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
std::memcpy(accumulator.accumulation[perspective][i], biases_, std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType)); kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) { for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index; const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX512)
auto accumulation = reinterpret_cast<__m512i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
for (IndexType j = 0; j < kNumChunks; ++j)
_mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
#elif defined(USE_AVX2)
auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
_mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
#elif defined(USE_SSE2)
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
#elif defined(USE_MMX)
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
#elif defined(USE_NEON)
auto accumulation = reinterpret_cast<int16x8_t*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j]; accumulator.accumulation[perspective][i][j] += weights_[offset + j];
#endif
} }
#endif
} }
#if defined(USE_MMX) #if defined(USE_MMX)
_mm_empty(); _mm_empty();
#endif #endif
@@ -273,29 +312,55 @@ namespace Eval::NNUE {
bool reset[2]; bool reset[2];
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset); removed_indices, added_indices, reset);
for (Color perspective : { WHITE, BLACK }) {
#if defined(USE_AVX2) #ifdef TILING
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
auto accumulation = reinterpret_cast<__m256i*>( for (Color perspective : { WHITE, BLACK }) {
&accumulator.accumulation[perspective][i][0]); auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
vec_t acc[kNumRegs];
#elif defined(USE_SSE2) if (reset[perspective]) {
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); auto biasesTile = reinterpret_cast<const vec_t*>(
auto accumulation = reinterpret_cast<__m128i*>( &biases_[j * kTileHeight]);
&accumulator.accumulation[perspective][i][0]); for (unsigned k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
auto prevAccTile = reinterpret_cast<const vec_t*>(
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_load(&prevAccTile[k]);
#elif defined(USE_MMX) // Difference calculation for the deactivated features
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); for (const auto index : removed_indices[perspective]) {
auto accumulation = reinterpret_cast<__m64*>( const IndexType offset = kHalfDimensions * index + j * kTileHeight;
&accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
#elif defined(USE_NEON) for (IndexType k = 0; k < kNumRegs; ++k)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); acc[k] = vec_sub_16(acc[k], column[k]);
auto accumulation = reinterpret_cast<int16x8_t*>( }
&accumulator.accumulation[perspective][i][0]); }
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
for (IndexType k = 0; k < kNumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
}
#if defined(USE_MMX)
_mm_empty();
#endif #endif
#else
for (Color perspective : { WHITE, BLACK }) {
if (reset[perspective]) { if (reset[perspective]) {
std::memcpy(accumulator.accumulation[perspective][i], biases_, std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType)); kHalfDimensions * sizeof(BiasType));
@@ -307,67 +372,19 @@ namespace Eval::NNUE {
for (const auto index : removed_indices[perspective]) { for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index; const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vsubq_s16(accumulation[j], column[j]);
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
#endif
} }
} }
{ // Difference calculation for the activated features { // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) { for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index; const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j]; accumulator.accumulation[perspective][i][j] += weights_[offset + j];
#endif
} }
} }
} }
#if defined(USE_MMX)
_mm_empty();
#endif #endif
accumulator.computed_accumulation = true; accumulator.computed_accumulation = true;
+1
View File
@@ -194,6 +194,7 @@ public:
// Returns the position of the ball on the c side. // Returns the position of the ball on the c side.
Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; } Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; }
#endif // EVAL_LEARN #endif // EVAL_LEARN
bool RootInTB;
private: private:
// Initialization helpers (used while setting up a position) // Initialization helpers (used while setting up a position)
+9 -18
View File
@@ -43,7 +43,6 @@ namespace Search {
namespace Tablebases { namespace Tablebases {
int Cardinality; int Cardinality;
bool RootInTB;
bool UseRule50; bool UseRule50;
Depth ProbeDepth; Depth ProbeDepth;
} }
@@ -520,7 +519,7 @@ void Thread::search() {
totBestMoveChanges += th->bestMoveChanges; totBestMoveChanges += th->bestMoveChanges;
th->bestMoveChanges = 0; th->bestMoveChanges = 0;
} }
double bestMoveInstability = 1 + totBestMoveChanges / Threads.size(); double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size();
double totalTime = rootMoves.size() == 1 ? 0 : double totalTime = rootMoves.size() == 1 ? 0 :
Time.optimum() * fallingEval * reduction * bestMoveInstability; Time.optimum() * fallingEval * reduction * bestMoveInstability;
@@ -654,9 +653,7 @@ namespace {
// starts with statScore = 0. Later grandchildren start with the last calculated // starts with statScore = 0. Later grandchildren start with the last calculated
// statScore of the previous grandchild. This influences the reduction rules in // statScore of the previous grandchild. This influences the reduction rules in
// LMR which are based on the statScore of parent position. // LMR which are based on the statScore of parent position.
if (rootNode) if (!rootNode)
(ss+4)->statScore = 0;
else
(ss+2)->statScore = 0; (ss+2)->statScore = 0;
// Step 4. Transposition table lookup. We don't want the score of a partial // Step 4. Transposition table lookup. We don't want the score of a partial
@@ -1062,7 +1059,6 @@ moves_loop: // When in check, search starts from here
if ( !givesCheck if ( !givesCheck
&& lmrDepth < 6 && lmrDepth < 6
&& !(PvNode && abs(bestValue) < 2) && !(PvNode && abs(bestValue) < 2)
&& PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
&& !ss->inCheck && !ss->inCheck
&& ss->staticEval + 169 + 244 * lmrDepth && ss->staticEval + 169 + 244 * lmrDepth
+ PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
@@ -1133,11 +1129,6 @@ moves_loop: // When in check, search starts from here
&& pos.non_pawn_material() <= 2 * RookValueMg) && pos.non_pawn_material() <= 2 * RookValueMg)
extension = 1; extension = 1;
// Castling extension
if ( type_of(move) == CASTLING
&& popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
extension = 1;
// Late irreversible move extension // Late irreversible move extension
if ( move == ttMove if ( move == ttMove
&& pos.rule50_count() > 80 && pos.rule50_count() > 80
@@ -1853,7 +1844,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
size_t pvIdx = pos.this_thread()->pvIdx; size_t pvIdx = pos.this_thread()->pvIdx;
size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size()); size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size());
uint64_t nodesSearched = Threads.nodes_searched(); uint64_t nodesSearched = Threads.nodes_searched();
uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); uint64_t tbHits = Threads.tb_hits() + (pos.RootInTB ? rootMoves.size() : 0);
for (size_t i = 0; i < multiPV; ++i) for (size_t i = 0; i < multiPV; ++i)
{ {
@@ -1868,7 +1859,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
if (v == -VALUE_INFINITE) if (v == -VALUE_INFINITE)
v = VALUE_ZERO; v = VALUE_ZERO;
bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; bool tb = pos.RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
v = tb ? rootMoves[i].tbScore : v; v = tb ? rootMoves[i].tbScore : v;
if (ss.rdbuf()->in_avail()) // Not at first line if (ss.rdbuf()->in_avail()) // Not at first line
@@ -1935,7 +1926,7 @@ bool RootMove::extract_ponder_from_tt(Position& pos) {
void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
RootInTB = false; pos.RootInTB = false;
UseRule50 = bool(Options["Syzygy50MoveRule"]); UseRule50 = bool(Options["Syzygy50MoveRule"]);
ProbeDepth = int(Options["SyzygyProbeDepth"]); ProbeDepth = int(Options["SyzygyProbeDepth"]);
Cardinality = int(Options["SyzygyProbeLimit"]); Cardinality = int(Options["SyzygyProbeLimit"]);
@@ -1952,17 +1943,17 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
{ {
// Rank moves using DTZ tables // Rank moves using DTZ tables
RootInTB = root_probe(pos, rootMoves); pos.RootInTB = root_probe(pos, rootMoves);
if (!RootInTB) if (!pos.RootInTB)
{ {
// DTZ tables are missing; try to rank moves using WDL tables // DTZ tables are missing; try to rank moves using WDL tables
dtz_available = false; dtz_available = false;
RootInTB = root_probe_wdl(pos, rootMoves); pos.RootInTB = root_probe_wdl(pos, rootMoves);
} }
} }
if (RootInTB) if (pos.RootInTB)
{ {
// Sort moves according to TB rank // Sort moves according to TB rank
std::stable_sort(rootMoves.begin(), rootMoves.end(), std::stable_sort(rootMoves.begin(), rootMoves.end(),
+46 -1
View File
@@ -32,7 +32,27 @@ TranspositionTable TT; // Our global transposition table
/// overwriting an old position. Update is not atomic and can be racy. /// overwriting an old position. Update is not atomic and can be racy.
void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) { void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
if (Options["Training"])
return;
// Preserve any existing move for the same position
if (m || (uint16_t)k != key16)
move16 = (uint16_t)m;
// Overwrite less valuable entries (cheapest checks first)
if (b == BOUND_EXACT
|| (uint16_t)k != key16
|| d - DEPTH_OFFSET > depth8 - 4)
{
assert(d > DEPTH_OFFSET);
assert(d < 256 + DEPTH_OFFSET);
key16 = (uint16_t)k;
depth8 = (uint8_t)(d - DEPTH_OFFSET);
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
value16 = (int16_t)v;
eval16 = (int16_t)ev;
}
} }
@@ -97,7 +117,32 @@ void TranspositionTable::clear() {
/// TTEntry t2 if its replace value is greater than that of t2. /// TTEntry t2 if its replace value is greater than that of t2.
TTEntry* TranspositionTable::probe(const Key key, bool& found) const { TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
return found = false, first_entry(0); if (Options["Training"])
return found = false, first_entry(0);
TTEntry* const tte = first_entry(key);
const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i)
if (tte[i].key16 == key16 || !tte[i].depth8)
{
tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh
return found = (bool)tte[i].depth8, &tte[i];
}
// Find an entry to be replaced according to the replacement strategy
TTEntry* replace = tte;
for (int i = 1; i < ClusterSize; ++i)
// Due to our packed storage format for generation and its cyclic
// nature we add 263 (256 is the modulus plus 7 to keep the unrelated
// lowest three bits from affecting the result) to calculate the entry
// age correctly even after generation8 overflows into the next cycle.
if ( replace->depth8 - ((263 + generation8 - replace->genBound8) & 0xF8)
> tte[i].depth8 - ((263 + generation8 - tte[i].genBound8) & 0xF8))
replace = &tte[i];
return found = false, replace;
} }
+1 -1
View File
@@ -200,7 +200,7 @@ namespace UCI {
if (token == "go" || token == "eval") if (token == "go" || token == "eval")
{ {
cerr << "\nPosition: " << cnt++ << '/' << num << endl; cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl;
if (token == "go") if (token == "go")
{ {
go(pos, is, states); go(pos, is, states);