Cleanup and simplify NNUE code.

A lot of optimizations happend since the NNUE was introduced
and since then some parts of the code were left unused. This
got to the point where asserts were have to be made just to
let people know that modifying something will not have any
effects or may even break everything due to the assumptions
being made. Removing these parts removes those inexisting
"false dependencies". Additionally:

 * append_changed_indices now takes the king pos and stateinfo
   explicitly, no more misleading pos parameter
 * IndexList is removed in favor of a generic ValueList.
   Feature transformer just instantiates the type it needs.
 * The update cost and refresh requirement is deferred to the
   feature set once again, but now doesn't go through the whole
   FeatureSet machinery and just calls HalfKP directly.
 * accumulator no longer has a singular dimension.
 * The PS constants and the PieceSquareIndex array are made local
   to the HalfKP feature set because they are specific to it and
   DO differ for other feature sets.
 * A few names are changed to more descriptive

Passed STC non-regression:
https://tests.stockfishchess.org/tests/view/608421dd95e7f1852abd2790
LLR: 2.95 (-2.94,2.94) <-2.50,0.50>
Total: 180008 W: 16186 L: 16258 D: 147564
Ptnml(0-2): 587, 12593, 63725, 12503, 596

closes https://github.com/official-stockfish/Stockfish/pull/3441

No functional change
This commit is contained in:
Tomasz Sobczyk
2021-04-24 15:08:11 +02:00
committed by Joost VandeVondele
parent 32d781769d
commit b748b46714
11 changed files with 219 additions and 363 deletions
+55 -55
View File
@@ -23,7 +23,8 @@
#include "nnue_common.h"
#include "nnue_architecture.h"
#include "features/index_list.h"
#include "../misc.h"
#include <cstring> // std::memset()
@@ -96,7 +97,7 @@ namespace Stockfish::Eval::NNUE {
using OutputType = TransformedFeatureType;
// Number of input/output dimensions
static constexpr IndexType InputDimensions = RawFeatures::Dimensions;
static constexpr IndexType InputDimensions = FeatureSet::Dimensions;
static constexpr IndexType OutputDimensions = HalfDimensions * 2;
// Size of forward propagation buffer
@@ -105,7 +106,7 @@ namespace Stockfish::Eval::NNUE {
// Hash value embedded in the evaluation file
static constexpr std::uint32_t get_hash_value() {
return RawFeatures::HashValue ^ OutputDimensions;
return FeatureSet::HashValue ^ OutputDimensions;
}
// Read network parameters
@@ -161,9 +162,9 @@ namespace Stockfish::Eval::NNUE {
auto out = reinterpret_cast<__m512i*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m512i sum0 = _mm512_load_si512(
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]])[j * 2 + 0]);
__m512i sum1 = _mm512_load_si512(
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]])[j * 2 + 1]);
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(Control,
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), Zero)));
}
@@ -172,9 +173,9 @@ namespace Stockfish::Eval::NNUE {
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m256i sum0 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]])[j * 2 + 0]);
__m256i sum1 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]])[j * 2 + 1]);
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), Zero), Control));
}
@@ -183,9 +184,9 @@ namespace Stockfish::Eval::NNUE {
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
accumulation[perspectives[p]])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
accumulation[perspectives[p]])[j * 2 + 1]);
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
_mm_store_si128(&out[j],
@@ -203,9 +204,9 @@ namespace Stockfish::Eval::NNUE {
auto out = reinterpret_cast<__m64*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
accumulation[perspectives[p]])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
accumulation[perspectives[p]])[j * 2 + 1]);
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
@@ -214,13 +215,13 @@ namespace Stockfish::Eval::NNUE {
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < NumChunks; ++j) {
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][0])[j];
accumulation[perspectives[p]])[j];
out[j] = vmax_s8(vqmovn_s16(sum), Zero);
}
#else
for (IndexType j = 0; j < HalfDimensions; ++j) {
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
BiasType sum = accumulation[static_cast<int>(perspectives[p])][j];
output[offset + j] = static_cast<OutputType>(
std::max<int>(0, std::min<int>(127, sum)));
}
@@ -233,7 +234,13 @@ namespace Stockfish::Eval::NNUE {
}
private:
void update_accumulator(const Position& pos, const Color c) const {
void update_accumulator(const Position& pos, const Color perspective) const {
// The size must be enough to contain the largest possible update.
// That might depend on the feature set and generally relies on the
// feature set's update cost calculation to be correct and never
// allow updates with more added/removed features than MaxActiveDimensions.
using IndexList = ValueList<IndexType, FeatureSet::MaxActiveDimensions>;
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
@@ -244,23 +251,19 @@ namespace Stockfish::Eval::NNUE {
// Look for a usable accumulator of an earlier position. We keep track
// of the estimated gain in terms of features to be added/subtracted.
StateInfo *st = pos.state(), *next = nullptr;
int gain = pos.count<ALL_PIECES>() - 2;
while (st->accumulator.state[c] == EMPTY)
int gain = FeatureSet::refresh_cost(pos);
while (st->accumulator.state[perspective] == EMPTY)
{
auto& dp = st->dirtyPiece;
// The first condition tests whether an incremental update is
// possible at all: if this side's king has moved, it is not possible.
static_assert(std::is_same_v<RawFeatures::SortedTriggerSet,
Features::CompileTimeList<Features::TriggerEvent, Features::TriggerEvent::FriendKingMoved>>,
"Current code assumes that only FriendlyKingMoved refresh trigger is being used.");
if ( dp.piece[0] == make_piece(c, KING)
|| (gain -= dp.dirty_num + 1) < 0)
// This governs when a full feature refresh is needed and how many
// updates are better than just one full refresh.
if ( FeatureSet::requires_refresh(st, perspective)
|| (gain -= FeatureSet::update_cost(st) + 1) < 0)
break;
next = st;
st = st->previous;
}
if (st->accumulator.state[c] == COMPUTED)
if (st->accumulator.state[perspective] == COMPUTED)
{
if (next == nullptr)
return;
@@ -268,34 +271,32 @@ namespace Stockfish::Eval::NNUE {
// Update incrementally in two steps. First, we update the "next"
// accumulator. Then, we update the current accumulator (pos.state()).
// Gather all features to be updated. This code assumes HalfKP features
// only and doesn't support refresh triggers.
static_assert(std::is_same_v<Features::FeatureSet<Features::HalfKP<Features::Side::Friend>>,
RawFeatures>);
Features::IndexList removed[2], added[2];
Features::HalfKP<Features::Side::Friend>::append_changed_indices(pos,
next->dirtyPiece, c, &removed[0], &added[0]);
// Gather all features to be updated.
const Square ksq = pos.square<KING>(perspective);
IndexList removed[2], added[2];
FeatureSet::append_changed_indices(
ksq, next, perspective, removed[0], added[0]);
for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous)
Features::HalfKP<Features::Side::Friend>::append_changed_indices(pos,
st2->dirtyPiece, c, &removed[1], &added[1]);
FeatureSet::append_changed_indices(
ksq, st2, perspective, removed[1], added[1]);
// Mark the accumulators as computed.
next->accumulator.state[c] = COMPUTED;
pos.state()->accumulator.state[c] = COMPUTED;
next->accumulator.state[perspective] = COMPUTED;
pos.state()->accumulator.state[perspective] = COMPUTED;
// Now update the accumulators listed in info[], where the last element is a sentinel.
StateInfo *info[3] =
// Now update the accumulators listed in states_to_update[], where the last element is a sentinel.
StateInfo *states_to_update[3] =
{ next, next == pos.state() ? nullptr : pos.state(), nullptr };
#ifdef VECTOR
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
{
// Load accumulator
auto accTile = reinterpret_cast<vec_t*>(
&st->accumulator.accumulation[c][0][j * TileHeight]);
&st->accumulator.accumulation[perspective][j * TileHeight]);
for (IndexType k = 0; k < NumRegs; ++k)
acc[k] = vec_load(&accTile[k]);
for (IndexType i = 0; info[i]; ++i)
for (IndexType i = 0; states_to_update[i]; ++i)
{
// Difference calculation for the deactivated features
for (const auto index : removed[i])
@@ -317,19 +318,19 @@ namespace Stockfish::Eval::NNUE {
// Store accumulator
accTile = reinterpret_cast<vec_t*>(
&info[i]->accumulator.accumulation[c][0][j * TileHeight]);
&states_to_update[i]->accumulator.accumulation[perspective][j * TileHeight]);
for (IndexType k = 0; k < NumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
}
#else
for (IndexType i = 0; info[i]; ++i)
for (IndexType i = 0; states_to_update[i]; ++i)
{
std::memcpy(info[i]->accumulator.accumulation[c][0],
st->accumulator.accumulation[c][0],
std::memcpy(states_to_update[i]->accumulator.accumulation[perspective],
st->accumulator.accumulation[perspective],
HalfDimensions * sizeof(BiasType));
st = info[i];
st = states_to_update[i];
// Difference calculation for the deactivated features
for (const auto index : removed[i])
@@ -337,7 +338,7 @@ namespace Stockfish::Eval::NNUE {
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
st->accumulator.accumulation[c][0][j] -= weights[offset + j];
st->accumulator.accumulation[perspective][j] -= weights[offset + j];
}
// Difference calculation for the activated features
@@ -346,7 +347,7 @@ namespace Stockfish::Eval::NNUE {
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
st->accumulator.accumulation[c][0][j] += weights[offset + j];
st->accumulator.accumulation[perspective][j] += weights[offset + j];
}
}
#endif
@@ -355,9 +356,9 @@ namespace Stockfish::Eval::NNUE {
{
// Refresh the accumulator
auto& accumulator = pos.state()->accumulator;
accumulator.state[c] = COMPUTED;
Features::IndexList active;
Features::HalfKP<Features::Side::Friend>::append_active_indices(pos, c, &active);
accumulator.state[perspective] = COMPUTED;
IndexList active;
FeatureSet::append_active_indices(pos, perspective, active);
#ifdef VECTOR
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
@@ -377,13 +378,13 @@ namespace Stockfish::Eval::NNUE {
}
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[c][0][j * TileHeight]);
&accumulator.accumulation[perspective][j * TileHeight]);
for (unsigned k = 0; k < NumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
std::memcpy(accumulator.accumulation[c][0], biases,
std::memcpy(accumulator.accumulation[perspective], biases,
HalfDimensions * sizeof(BiasType));
for (const auto index : active)
@@ -391,7 +392,7 @@ namespace Stockfish::Eval::NNUE {
const IndexType offset = HalfDimensions * index;
for (IndexType j = 0; j < HalfDimensions; ++j)
accumulator.accumulation[c][0][j] += weights[offset + j];
accumulator.accumulation[perspective][j] += weights[offset + j];
}
#endif
}
@@ -405,8 +406,7 @@ namespace Stockfish::Eval::NNUE {
using WeightType = std::int16_t;
alignas(CacheLineSize) BiasType biases[HalfDimensions];
alignas(CacheLineSize)
WeightType weights[HalfDimensions * InputDimensions];
alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions];
};
} // namespace Stockfish::Eval::NNUE