Merge branch 'master' of github.com:official-stockfish/Stockfish into nnue-player-merge

This commit is contained in:
nodchip
2020-08-10 08:52:55 +09:00
8 changed files with 35 additions and 117 deletions
+2 -2
View File
@@ -368,8 +368,8 @@ endif
endif endif
ifeq ($(KERNEL),Darwin) ifeq ($(KERNEL),Darwin)
CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.15 CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
LDFLAGS += -arch $(arch) -mmacosx-version-min=10.15 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
endif endif
### Travis CI script uses COMPILER to overwrite CXX ### Travis CI script uses COMPILER to overwrite CXX
+9 -5
View File
@@ -51,9 +51,13 @@ namespace Eval {
std::string eval_file = std::string(Options["EvalFile"]); std::string eval_file = std::string(Options["EvalFile"]);
if (useNNUE && eval_file_loaded != eval_file) if (useNNUE && eval_file_loaded != eval_file)
{ {
std::cerr << "Use of NNUE evaluation, but the file " << eval_file << " was not loaded successfully. " UCI::OptionsMap defaults;
<< "These network evaluation parameters must be available, compatible with this version of the code. " UCI::init(defaults);
<< "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << std::endl;
std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. "
<< "These network evaluation parameters must be available, and compatible with this version of the code. "
<< "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. "
<< "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl;
std::exit(EXIT_FAILURE); std::exit(EXIT_FAILURE);
} }
@@ -111,7 +115,7 @@ namespace {
constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold1 = Value(1400);
constexpr Value LazyThreshold2 = Value(1300); constexpr Value LazyThreshold2 = Value(1300);
constexpr Value SpaceThreshold = Value(12222); constexpr Value SpaceThreshold = Value(12222);
constexpr Value NNUEThreshold = Value(520); constexpr Value NNUEThreshold = Value(460);
// KingAttackWeights[PieceType] contains king attack weights by piece type // KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -942,7 +946,7 @@ Value Eval::evaluate(const Position& pos) {
{ {
Value v = eg_value(pos.psq_score()); Value v = eg_value(pos.psq_score());
// Take NNUE eval only on balanced positions // Take NNUE eval only on balanced positions
if (abs(v) < NNUEThreshold) if (abs(v) < NNUEThreshold + 20 * pos.count<PAWN>())
return NNUE::evaluate(pos) + Tempo; return NNUE::evaluate(pos) + Tempo;
} }
return Evaluation<NO_TRACE>(pos).value(); return Evaluation<NO_TRACE>(pos).value();
+4 -4
View File
@@ -321,9 +321,9 @@ void prefetch(void* addr) {
/// ///
void* std_aligned_alloc(size_t alignment, size_t size) { void* std_aligned_alloc(size_t alignment, size_t size) {
#if defined(__APPLE__) #if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
return aligned_alloc(alignment, size); return aligned_alloc(alignment, size);
#elif defined(_WIN32) #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
return _mm_malloc(size, alignment); return _mm_malloc(size, alignment);
#else #else
return std::aligned_alloc(alignment, size); return std::aligned_alloc(alignment, size);
@@ -331,9 +331,9 @@ void* std_aligned_alloc(size_t alignment, size_t size) {
} }
void std_aligned_free(void* ptr) { void std_aligned_free(void* ptr) {
#if defined(__APPLE__) #if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
free(ptr); free(ptr);
#elif defined(_WIN32) #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
_mm_free(ptr); _mm_free(ptr);
#else #else
free(ptr); free(ptr);
+5 -24
View File
@@ -123,13 +123,8 @@ namespace Eval::NNUE::Layers {
__m512i sum = _mm512_setzero_si512(); __m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m512i product = _mm512_maddubs_epi16(
#if defined(__MINGW32__) || defined(__MINGW64__) _mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
__m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
__m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#endif
product = _mm512_madd_epi16(product, kOnes); product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product); sum = _mm512_add_epi32(sum, product);
} }
@@ -144,12 +139,8 @@ namespace Eval::NNUE::Layers {
const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]); const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
int j = kNumChunks * 2; int j = kNumChunks * 2;
#if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2. __m256i sum256 = _mm256_maddubs_epi16(
__m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); _mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
#else
__m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
#endif
sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
sum256 = _mm256_hadd_epi32(sum256, sum256); sum256 = _mm256_hadd_epi32(sum256, sum256);
sum256 = _mm256_hadd_epi32(sum256, sum256); sum256 = _mm256_hadd_epi32(sum256, sum256);
@@ -163,17 +154,7 @@ namespace Eval::NNUE::Layers {
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i product = _mm256_maddubs_epi16( __m256i product = _mm256_maddubs_epi16(
_mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes); product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product); sum = _mm256_add_epi32(sum, product);
} }
+6 -43
View File
@@ -86,50 +86,13 @@ namespace Eval::NNUE::Layers {
const auto out = reinterpret_cast<__m256i*>(output); const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) { for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_load_si256(&in[i * 4 + 0]),
#if defined(__MINGW32__) || defined(__MINGW64__) _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 0]),
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_load_si256(&in[i * 4 + 2]),
#if defined(__MINGW32__) || defined(__MINGW64__) _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
_mm256_loadu_si256 _mm256_store_si256(
#else &out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_load_si256
#endif
(&in[i * 4 + 2]),
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 3])), kWeightScaleBits);
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256
#else
_mm256_store_si256
#endif
(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets)); _mm256_packs_epi16(words0, words1), kZero), kOffsets));
} }
constexpr IndexType kStart = kNumChunks * kSimdWidth; constexpr IndexType kStart = kNumChunks * kSimdWidth;
+3 -31
View File
@@ -126,36 +126,12 @@ namespace Eval::NNUE {
auto out = reinterpret_cast<__m256i*>(&output[offset]); auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 = __m256i sum0 =
_mm256_load_si256(&reinterpret_cast<const __m256i*>(
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]); accumulation[perspectives[p]][0])[j * 2 + 0]);
__m256i sum1 = __m256i sum1 =
_mm256_load_si256(&reinterpret_cast<const __m256i*>(
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]); accumulation[perspectives[p]][0])[j * 2 + 1]);
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256
#else
_mm256_store_si256
#endif
(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl)); _mm256_packs_epi16(sum0, sum1), kZero), kControl));
} }
@@ -218,11 +194,7 @@ namespace Eval::NNUE {
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]); auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
#else
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
#endif
} }
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
+5 -8
View File
@@ -204,21 +204,18 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
// We use Position::set() to set root position across threads. But there are // We use Position::set() to set root position across threads. But there are
// some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
// be deduced from a fen string, so set() clears them and to not lose the info // be deduced from a fen string, so set() clears them and they are set from
// we need to backup and later restore setupStates->back(). Note that setupStates // setupStates->back() later. The rootState is per thread, earlier states are shared
// is shared by threads but is accessed in read-only mode. // since they are read-only.
StateInfo tmp = setupStates->back();
for (Thread* th : *this) for (Thread* th : *this)
{ {
th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
th->rootDepth = th->completedDepth = 0; th->rootDepth = th->completedDepth = 0;
th->rootMoves = rootMoves; th->rootMoves = rootMoves;
th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th); th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
th->rootState = setupStates->back();
} }
setupStates->back() = tmp;
main()->start_searching(); main()->start_searching();
} }
+1
View File
@@ -65,6 +65,7 @@ public:
std::atomic<uint64_t> nodes, tbHits, bestMoveChanges; std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
Position rootPos; Position rootPos;
StateInfo rootState;
Search::RootMoves rootMoves; Search::RootMoves rootMoves;
Depth rootDepth, completedDepth; Depth rootDepth, completedDepth;
CounterMoveHistory counterMoves; CounterMoveHistory counterMoves;