Merge branch 'master' of github.com:official-stockfish/Stockfish into nnue-player-merge

# Conflicts:
#	README.md
#	Readme.md
#	src/Makefile
#	src/evaluate.cpp
#	src/evaluate.h
#	src/misc.cpp
#	src/nnue/architectures/halfkp_256x2-32-32.h
#	src/nnue/evaluate_nnue.cpp
#	src/nnue/evaluate_nnue.h
#	src/nnue/features/feature_set.h
#	src/nnue/features/features_common.h
#	src/nnue/features/half_kp.cpp
#	src/nnue/features/half_kp.h
#	src/nnue/features/index_list.h
#	src/nnue/layers/affine_transform.h
#	src/nnue/layers/clipped_relu.h
#	src/nnue/layers/input_slice.h
#	src/nnue/nnue_accumulator.h
#	src/nnue/nnue_architecture.h
#	src/nnue/nnue_common.h
#	src/nnue/nnue_feature_transformer.h
#	src/position.cpp
#	src/position.h
#	src/types.h
#	src/ucioption.cpp
#	stockfish.md
This commit is contained in:
nodchip
2020-08-08 15:55:42 +09:00
74 changed files with 2527 additions and 2729 deletions
+25 -10
View File
@@ -1,7 +1,25 @@
// Definition of input features and network structure used in NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef HALFKP_256X2_32_32_H
#define HALFKP_256X2_32_32_H
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of input features and network structure used in NNUE evaluation function
#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
#include "../features/feature_set.h"
#include "../features/half_kp.h"
@@ -10,9 +28,7 @@
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
namespace Eval {
namespace NNUE {
namespace Eval::NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
@@ -23,7 +39,7 @@ constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// define network structure
// Define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
@@ -33,7 +49,6 @@ using OutputLayer = AffineTransform<HiddenLayer2, 1>;
using Network = Layers::OutputLayer;
} // namespace NNUE
} // namespace Eval::NNUE
} // namespace Eval
#endif // HALFKP_256X2_32_32_H
#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+165 -277
View File
@@ -1,9 +1,26 @@
// Code for calculating NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#if defined(EVAL_NNUE)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Code for calculating NNUE evaluation function
#include <fstream>
#include <iostream>
#include <set>
#include "../evaluate.h"
#include "../position.h"
@@ -12,315 +29,186 @@
#include "evaluate_nnue.h"
namespace Eval {
ExtPieceSquare kpp_board_index[PIECE_NB] = {
// convention: W - us, B - them
// viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE },
{ PS_W_PAWN, PS_B_PAWN },
{ PS_W_KNIGHT, PS_B_KNIGHT },
{ PS_W_BISHOP, PS_B_BISHOP },
{ PS_W_ROOK, PS_B_ROOK },
{ PS_W_QUEEN, PS_B_QUEEN },
{ PS_W_KING, PS_B_KING },
{ PS_NONE, PS_NONE },
{ PS_NONE, PS_NONE },
{ PS_B_PAWN, PS_W_PAWN },
{ PS_B_KNIGHT, PS_W_KNIGHT },
{ PS_B_BISHOP, PS_W_BISHOP },
{ PS_B_ROOK, PS_W_ROOK },
{ PS_B_QUEEN, PS_W_QUEEN },
{ PS_B_KING, PS_W_KING },
{ PS_NONE, PS_NONE }
};
namespace NNUE {
// Input feature converter
AlignedPtr<FeatureTransformer> feature_transformer;
namespace Eval::NNUE {
// Evaluation function
AlignedPtr<Network> network;
// Input feature converter
AlignedPtr<FeatureTransformer> feature_transformer;
// Evaluation function file name
std::string fileName = "nn.bin";
// Evaluation function
AlignedPtr<Network> network;
// Saved evaluation function file name
std::string savedfileName = "nn.bin";
// Evaluation function file name
std::string fileName;
// Get a string that represents the structure of the evaluation function
std::string GetArchitectureString() {
return "Features=" + FeatureTransformer::GetStructureString() +
// Saved evaluation function file name
std::string savedfileName = "nn.bin";
// Get a string that represents the structure of the evaluation function
std::string GetArchitectureString() {
return "Features=" + FeatureTransformer::GetStructureString() +
",Network=" + Network::GetStructureString();
}
}
namespace {
namespace Detail {
namespace Detail {
// Initialize the evaluation function parameters
template <typename T>
void Initialize(AlignedPtr<T>& pointer) {
// Initialize the evaluation function parameters
template <typename T>
void Initialize(AlignedPtr<T>& pointer) {
pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
// read evaluation function parameters
template <typename T>
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
std::uint32_t header;
stream.read(reinterpret_cast<char*>(&header), sizeof(header));
if (!stream || header != T::GetHashValue()) return false;
return pointer->ReadParameters(stream);
}
// Read evaluation function parameters
template <typename T>
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
// write evaluation function parameters
template <typename T>
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
return pointer->WriteParameters(stream);
}
std::uint32_t header;
stream.read(reinterpret_cast<char*>(&header), sizeof(header));
if (!stream || header != T::GetHashValue()) return false;
return pointer->ReadParameters(stream);
}
} // namespace Detail
// write evaluation function parameters
template <typename T>
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
return pointer->WriteParameters(stream);
}
// Initialize the evaluation function parameters
void Initialize() {
Detail::Initialize(feature_transformer);
Detail::Initialize(network);
}
} // namespace Detail
} // namespace
// Initialize the evaluation function parameters
void Initialize() {
// read the header
bool ReadHeader(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture) {
std::uint32_t version, size;
stream.read(reinterpret_cast<char*>(&version), sizeof(version));
stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
stream.read(reinterpret_cast<char*>(&size), sizeof(size));
if (!stream || version != kVersion) return false;
architecture->resize(size);
stream.read(&(*architecture)[0], size);
return !stream.fail();
}
Detail::Initialize(feature_transformer);
Detail::Initialize(network);
}
// write the header
bool WriteHeader(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture) {
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
stream.write(architecture.data(), size);
return !stream.fail();
}
// Read network header
bool ReadHeader(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture) {
// read evaluation function parameters
bool ReadParameters(std::istream& stream) {
std::uint32_t hash_value;
std::string architecture;
if (!ReadHeader(stream, &hash_value, &architecture)) return false;
if (hash_value != kHashValue) return false;
if (!Detail::ReadParameters(stream, feature_transformer)) return false;
if (!Detail::ReadParameters(stream, network)) return false;
return stream && stream.peek() == std::ios::traits_type::eof();
}
std::uint32_t version, size;
stream.read(reinterpret_cast<char*>(&version), sizeof(version));
stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
stream.read(reinterpret_cast<char*>(&size), sizeof(size));
if (!stream || version != kVersion) return false;
architecture->resize(size);
stream.read(&(*architecture)[0], size);
return !stream.fail();
}
// write evaluation function parameters
bool WriteParameters(std::ostream& stream) {
if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
if (!Detail::WriteParameters(stream, feature_transformer)) return false;
if (!Detail::WriteParameters(stream, network)) return false;
return !stream.fail();
}
// write the header
bool WriteHeader(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture) {
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
stream.write(architecture.data(), size);
return !stream.fail();
}
// proceed if you can calculate the difference
static void UpdateAccumulatorIfPossible(const Position& pos) {
feature_transformer->UpdateAccumulatorIfPossible(pos);
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
// Calculate the evaluation value
static Value ComputeScore(const Position& pos, bool refresh = false) {
auto& accumulator = pos.state()->accumulator;
if (!refresh && accumulator.computed_score) {
std::uint32_t hash_value;
std::string architecture;
if (!ReadHeader(stream, &hash_value, &architecture)) return false;
if (hash_value != kHashValue) return false;
if (!Detail::ReadParameters(stream, feature_transformer)) return false;
if (!Detail::ReadParameters(stream, network)) return false;
return stream && stream.peek() == std::ios::traits_type::eof();
}
// write evaluation function parameters
bool WriteParameters(std::ostream& stream) {
if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
if (!Detail::WriteParameters(stream, feature_transformer)) return false;
if (!Detail::WriteParameters(stream, network)) return false;
return !stream.fail();
}
// Proceed with the difference calculation if possible
static void UpdateAccumulatorIfPossible(const Position& pos) {
feature_transformer->UpdateAccumulatorIfPossible(pos);
}
// Calculate the evaluation value
static Value ComputeScore(const Position& pos, bool refresh) {
auto& accumulator = pos.state()->accumulator;
if (!refresh && accumulator.computed_score) {
return accumulator.score;
}
alignas(kCacheLineSize) TransformedFeatureType
transformed_features[FeatureTransformer::kBufferSize];
feature_transformer->Transform(pos, transformed_features, refresh);
alignas(kCacheLineSize) char buffer[Network::kBufferSize];
const auto output = network->Propagate(transformed_features, buffer);
auto score = static_cast<Value>(output[0] / FV_SCALE);
accumulator.score = score;
accumulator.computed_score = true;
return accumulator.score;
}
alignas(kCacheLineSize) TransformedFeatureType
transformed_features[FeatureTransformer::kBufferSize];
feature_transformer->Transform(pos, transformed_features, refresh);
alignas(kCacheLineSize) char buffer[Network::kBufferSize];
const auto output = network->Propagate(transformed_features, buffer);
// Load the evaluation function file
bool load_eval_file(const std::string& evalFile) {
// When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
// It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
Initialize();
fileName = evalFile;
// Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
// The best move in the previous iteration is pointed to as bestmove, so apparently
// no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
// Since such a jamming phase often appears at the end, there is a big difference in the situation
// Doesn't really affect the outcome.
std::ifstream stream(evalFile, std::ios::binary);
// However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
// Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
const bool result = ReadParameters(stream);
auto score = static_cast<Value>(output[0] / FV_SCALE);
// 1) I feel that if I clip too poorly, it will have an effect on my learning...
// 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
accumulator.score = score;
accumulator.computed_score = true;
return accumulator.score;
}
} // namespace NNUE
#if defined(USE_EVAL_HASH)
// Class used to store evaluation values in HashTable
struct alignas(16) ScoreKeyValue {
#if defined(USE_SSE2)
ScoreKeyValue() = default;
ScoreKeyValue(const ScoreKeyValue& other) {
static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
"sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
_mm_store_si128(&as_m128i, other.as_m128i);
}
ScoreKeyValue& operator=(const ScoreKeyValue& other) {
_mm_store_si128(&as_m128i, other.as_m128i);
return *this;
}
#endif
// It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
void encode() {
#if defined(USE_SSE2)
// ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
#else
key ^= score;
#endif
}
// decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
void decode() { encode(); }
union {
struct {
std::uint64_t key;
std::uint64_t score;
};
#if defined(USE_SSE2)
__m128i as_m128i;
#endif
};
};
// Simple HashTable implementation.
// Size is a power of 2.
template <typename T, size_t Size>
struct HashTable {
HashTable() { clear(); }
T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
void clear() { memset(entries_, 0, sizeof(T)*Size); }
// Check that Size is a power of 2
static_assert((Size & (Size - 1)) == 0, "");
private:
T entries_[Size];
};
//HashTable to save the evaluated ones (following ehash)
#if !defined(USE_LARGE_EVAL_HASH)
// 134MB (setting other than witch's AVX2)
struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
#else
// If you have prefetch, it's better to have a big one...
// → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
// 1GB (setting for witch's AVX2)
struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
#endif
EvaluateHashTable g_evalTable;
// Prepare a function to prefetch.
void prefetch_evalhash(const Key key) {
constexpr auto mask = ~((uint64_t)0x1f);
prefetch((void*)((uint64_t)g_evalTable[key] & mask));
}
#endif
// read the evaluation function file
// Save and restore Options with bench command etc., so EvalDir is changed at this time,
// This function may be called twice to flag that the evaluation function needs to be reloaded.
void load_eval() {
// Must be done!
NNUE::Initialize();
if (Options["SkipLoadingEval"])
{
std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
return;
return result;
}
const std::string file_name = Options["EvalFile"];
NNUE::fileName = file_name;
// Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos) {
Value v = ComputeScore(pos, false);
v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
std::ifstream stream(file_name, std::ios::binary);
const bool result = NNUE::ReadParameters(stream);
if (!result)
// It's a problem if it doesn't finish when there is a read error.
std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
else
std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
}
// Initialization
void init() {
}
// Evaluation function. Perform full calculation instead of difference calculation.
// Called only once with Position::set(). (The difference calculation after that)
// Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
// Since, we will not try to optimize this function.
Value compute_eval(const Position& pos) {
return NNUE::ComputeScore(pos, true);
}
// Evaluation function
Value evaluate(const Position& pos) {
const auto& accumulator = pos.state()->accumulator;
if (accumulator.computed_score) {
return accumulator.score;
return v;
}
#if defined(USE_GLOBAL_OPTIONS)
// If Global Options is set not to use eval hash
// Skip the query to the eval hash.
if (!GlobalOptions.use_eval_hash) {
ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
return NNUE::ComputeScore(pos);
// Evaluation function. Perform full calculation.
Value compute_eval(const Position& pos) {
return ComputeScore(pos, true);
}
#endif
#if defined(USE_EVAL_HASH)
// May be in the evaluate hash table.
const Key key = pos.key();
ScoreKeyValue entry = *g_evalTable[key];
entry.decode();
if (entry.key == key) {
// there were!
return Value(entry.score);
// Proceed with the difference calculation if possible
void update_eval(const Position& pos) {
UpdateAccumulatorIfPossible(pos);
}
#endif
Value score = NNUE::ComputeScore(pos);
#if defined(USE_EVAL_HASH)
// Since it was calculated carefully, save it in the evaluate hash table.
entry.key = key;
entry.score = score;
entry.encode();
*g_evalTable[key] = entry;
#endif
return score;
}
// proceed if you can calculate the difference
void evaluate_with_no_return(const Position& pos) {
NNUE::UpdateAccumulatorIfPossible(pos);
}
// display the breakdown of the evaluation value of the current phase
void print_eval_stat(Position& /*pos*/) {
std::cout << "--- EVAL STAT: not implemented" << std::endl;
}
} // namespace Eval
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE
+54 -44
View File
@@ -1,67 +1,77 @@
// header used in NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _EVALUATE_NNUE_H_
#define _EVALUATE_NNUE_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// header used in NNUE evaluation function
#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
#define NNUE_EVALUATE_NNUE_H_INCLUDED
#include "nnue_feature_transformer.h"
#include "nnue_architecture.h"
#include <memory>
namespace Eval {
namespace Eval::NNUE {
namespace NNUE {
// Hash value of evaluation function structure
constexpr std::uint32_t kHashValue =
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
// hash value of evaluation function structure
constexpr std::uint32_t kHashValue =
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
// Deleter for automating release of memory area
template <typename T>
struct AlignedDeleter {
void operator()(T* ptr) const {
ptr->~T();
std_aligned_free(ptr);
}
};
// Deleter for automating release of memory area
template <typename T>
struct AlignedDeleter {
void operator()(T* ptr) const {
ptr->~T();
aligned_free(ptr);
}
};
template <typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
template <typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
// Input feature converter
extern AlignedPtr<FeatureTransformer> feature_transformer;
// Input feature converter
extern AlignedPtr<FeatureTransformer> feature_transformer;
// Evaluation function
extern AlignedPtr<Network> network;
// Evaluation function
extern AlignedPtr<Network> network;
// Evaluation function file name
extern std::string fileName;
// Evaluation function file name
extern std::string fileName;
// Saved evaluation function file name
extern std::string savedfileName;
// Saved evaluation function file name
extern std::string savedfileName;
// Get a string that represents the structure of the evaluation function
std::string GetArchitectureString();
// Get a string that represents the structure of the evaluation function
std::string GetArchitectureString();
// read the header
bool ReadHeader(std::istream& stream,
// read the header
bool ReadHeader(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture);
// write the header
bool WriteHeader(std::ostream& stream,
// write the header
bool WriteHeader(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture);
// read evaluation function parameters
bool ReadParameters(std::istream& stream);
// read evaluation function parameters
bool ReadParameters(std::istream& stream);
// write evaluation function parameters
bool WriteParameters(std::ostream& stream);
// write evaluation function parameters
bool WriteParameters(std::ostream& stream);
} // namespace NNUE
} // namespace Eval::NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+1 -1
View File
@@ -23,7 +23,7 @@ namespace Eval {
}
if (perspective == BLACK) {
epSquare = Inv(epSquare);
epSquare = rotate180(epSquare);
}
auto file = file_of(epSquare);
+199 -199
View File
@@ -1,249 +1,249 @@
// A class template that represents the input feature set of the NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_FEATURE_SET_H_
#define _NNUE_FEATURE_SET_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// A class template that represents the input feature set of the NNUE evaluation function
#ifndef NNUE_FEATURE_SET_H_INCLUDED
#define NNUE_FEATURE_SET_H_INCLUDED
#include "features_common.h"
#include <array>
namespace Eval {
namespace Eval::NNUE::Features {
namespace NNUE {
// Class template that represents a list of values
template <typename T, T... Values>
struct CompileTimeList;
namespace Features {
template <typename T, T First, T... Remaining>
struct CompileTimeList<T, First, Remaining...> {
static constexpr bool Contains(T value) {
return value == First || CompileTimeList<T, Remaining...>::Contains(value);
}
static constexpr std::array<T, sizeof...(Remaining) + 1>
kValues = {{First, Remaining...}};
};
// A class template that represents a list of values
template <typename T, T... Values>
struct CompileTimeList;
template <typename T, T First, T... Remaining>
struct CompileTimeList<T, First, Remaining...> {
static constexpr bool Contains(T value) {
return value == First || CompileTimeList<T, Remaining...>::Contains(value);
}
static constexpr std::array<T, sizeof...(Remaining) + 1>
kValues = {{First, Remaining...}};
};
template <typename T, T First, T... Remaining>
constexpr std::array<T, sizeof...(Remaining) + 1>
template <typename T, T First, T... Remaining>
constexpr std::array<T, sizeof...(Remaining) + 1>
CompileTimeList<T, First, Remaining...>::kValues;
template <typename T>
struct CompileTimeList<T> {
static constexpr bool Contains(T /*value*/) {
return false;
}
static constexpr std::array<T, 0> kValues = {{}};
};
template <typename T>
struct CompileTimeList<T> {
static constexpr bool Contains(T /*value*/) {
return false;
}
static constexpr std::array<T, 0> kValues = { {} };
};
// Class template that adds to the beginning of the list
template <typename T, typename ListType, T Value>
struct AppendToList;
template <typename T, T... Values, T AnotherValue>
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
using Result = CompileTimeList<T, AnotherValue, Values...>;
};
// Class template that adds to the beginning of the list
template <typename T, typename ListType, T Value>
struct AppendToList;
template <typename T, T... Values, T AnotherValue>
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
using Result = CompileTimeList<T, AnotherValue, Values...>;
};
// Class template for adding to a sorted, unique list
template <typename T, typename ListType, T Value>
struct InsertToSet;
template <typename T, T First, T... Remaining, T AnotherValue>
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
using Result = std::conditional_t<
// Class template for adding to a sorted, unique list
template <typename T, typename ListType, T Value>
struct InsertToSet;
template <typename T, T First, T... Remaining, T AnotherValue>
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
using Result = std::conditional_t<
CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
CompileTimeList<T, First, Remaining...>,
std::conditional_t<(AnotherValue <First),
CompileTimeList<T, AnotherValue, First, Remaining...>,
typename AppendToList<T, typename InsertToSet<
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
First>::Result>>;
};
template <typename T, T Value>
struct InsertToSet<T, CompileTimeList<T>, Value> {
using Result = CompileTimeList<T, Value>;
};
std::conditional_t<(AnotherValue < First),
CompileTimeList<T, AnotherValue, First, Remaining...>,
typename AppendToList<T, typename InsertToSet<
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
First>::Result>>;
};
template <typename T, T Value>
struct InsertToSet<T, CompileTimeList<T>, Value> {
using Result = CompileTimeList<T, Value>;
};
// Base class of feature set
template <typename Derived>
class FeatureSetBase {
public:
// Get a list of indices with a value of 1 among the features
template <typename IndexListType>
static void AppendActiveIndices(
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
for (const auto perspective :Colors) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &active[perspective]);
}
}
// Base class of feature set
template <typename Derived>
class FeatureSetBase {
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <typename PositionType, typename IndexListType>
static void AppendChangedIndices(
const PositionType& pos, TriggerEvent trigger,
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
const auto& dp = pos.state()->dirtyPiece;
if (dp.dirty_num == 0) return;
public:
// Get a list of indices for active features
template <typename IndexListType>
static void AppendActiveIndices(
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
for (const auto perspective :Colors) {
reset[perspective] = false;
switch (trigger) {
case TriggerEvent::kNone:
break;
case TriggerEvent::kFriendKingMoved:
reset[perspective] =
dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
break;
case TriggerEvent::kEnemyKingMoved:
reset[perspective] =
dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
break;
case TriggerEvent::kAnyKingMoved:
reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
break;
case TriggerEvent::kAnyPieceMoved:
reset[perspective] = true;
break;
default:
assert(false);
break;
}
if (reset[perspective]) {
for (Color perspective : { WHITE, BLACK }) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &added[perspective]);
} else {
Derived::CollectChangedIndices(
pos, trigger, perspective,
&removed[perspective], &added[perspective]);
pos, trigger, perspective, &active[perspective]);
}
}
}
};
// Class template that represents the feature set
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
// Get a list of indices for recently changed features
template <typename PositionType, typename IndexListType>
static void AppendChangedIndices(
const PositionType& pos, TriggerEvent trigger,
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
const auto& dp = pos.state()->dirtyPiece;
if (dp.dirty_num == 0) return;
for (Color perspective : { WHITE, BLACK }) {
reset[perspective] = false;
switch (trigger) {
case TriggerEvent::kFriendKingMoved:
reset[perspective] =
dp.pieceId[0] == PIECE_ID_KING + perspective;
break;
default:
assert(false);
break;
}
if (reset[perspective]) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &added[perspective]);
} else {
Derived::CollectChangedIndices(
pos, trigger, perspective,
&removed[perspective], &added[perspective]);
}
}
}
};
// Class template that represents the feature set
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
public FeatureSetBase<
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
private:
using Head = FirstFeatureType;
using Tail = FeatureSet<RemainingFeatureTypes...>;
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
private:
using Head = FirstFeatureType;
using Tail = FeatureSet<RemainingFeatureTypes...>;
public:
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
public:
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
// number of feature dimensions
static constexpr IndexType kDimensions =
// number of feature dimensions
static constexpr IndexType kDimensions =
Head::kDimensions + Tail::kDimensions;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
// List of timings to perform all calculations instead of difference calculation
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
// List of timings to perform all calculations instead of difference calculation
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
// Get the feature quantity name
static std::string GetName() {
return std::string(Head::kName) + "+" + Tail::GetName();
}
// Get the feature quantity name
static std::string GetName() {
return std::string(Head::kName) + "+" + Tail::GetName();
}
private:
// Get a list of indices with a value of 1 among the features
template <typename IndexListType>
static void CollectActiveIndices(
private:
// Get a list of indices with a value of 1 among the features
template <typename IndexListType>
static void CollectActiveIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexListType* const active) {
Tail::CollectActiveIndices(pos, trigger, perspective, active);
if (Head::kRefreshTrigger == trigger) {
const auto start = active->size();
Head::AppendActiveIndices(pos, perspective, active);
for (auto i = start; i < active->size(); ++i) {
(*active)[i] += Tail::kDimensions;
Tail::CollectActiveIndices(pos, trigger, perspective, active);
if (Head::kRefreshTrigger == trigger) {
const auto start = active->size();
Head::AppendActiveIndices(pos, perspective, active);
for (auto i = start; i < active->size(); ++i) {
(*active)[i] += Tail::kDimensions;
}
}
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <typename IndexListType>
static void CollectChangedIndices(
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <typename IndexListType>
static void CollectChangedIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexListType* const removed, IndexListType* const added) {
Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
if (Head::kRefreshTrigger == trigger) {
const auto start_removed = removed->size();
const auto start_added = added->size();
Head::AppendChangedIndices(pos, perspective, removed, added);
for (auto i = start_removed; i < removed->size(); ++i) {
(*removed)[i] += Tail::kDimensions;
}
for (auto i = start_added; i < added->size(); ++i) {
(*added)[i] += Tail::kDimensions;
Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
if (Head::kRefreshTrigger == trigger) {
const auto start_removed = removed->size();
const auto start_added = added->size();
Head::AppendChangedIndices(pos, perspective, removed, added);
for (auto i = start_removed; i < removed->size(); ++i) {
(*removed)[i] += Tail::kDimensions;
}
for (auto i = start_added; i < added->size(); ++i) {
(*added)[i] += Tail::kDimensions;
}
}
}
}
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
// Class template that represents the feature set
// Specialization with one template argument
template <typename FeatureType>
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
public:
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
// number of feature dimensions
static constexpr IndexType kDimensions = FeatureType::kDimensions;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// List of timings to perform all calculations instead of difference calculation
using SortedTriggerSet =
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
// Class template that represents the feature set
template <typename FeatureType>
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
// Get the feature quantity name
static std::string GetName() {
return FeatureType::kName;
}
public:
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
// Number of feature dimensions
static constexpr IndexType kDimensions = FeatureType::kDimensions;
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// Trigger for full calculation instead of difference calculation
using SortedTriggerSet =
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
private:
// Get a list of indices with a value of 1 among the features
static void CollectActiveIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const active) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendActiveIndices(pos, perspective, active);
// Get the feature quantity name
static std::string GetName() {
return FeatureType::kName;
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void CollectChangedIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const removed, IndexList* const added) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendChangedIndices(pos, perspective, removed, added);
private:
// Get a list of indices for active features
static void CollectActiveIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const active) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendActiveIndices(pos, perspective, active);
}
}
}
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
// Get a list of indices for recently changed features
static void CollectChangedIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const removed, IndexList* const added) {
} // namespace Features
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendChangedIndices(pos, perspective, removed, added);
}
}
} // namespace NNUE
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
} // namespace Eval
} // namespace Eval::NNUE::Features
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
+38 -35
View File
@@ -1,47 +1,50 @@
//Common header of input features of NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_FEATURES_COMMON_H_
#define _NNUE_FEATURES_COMMON_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Common header of input features of NNUE evaluation function
#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
#define NNUE_FEATURES_COMMON_H_INCLUDED
#include "../../evaluate.h"
#include "../nnue_common.h"
namespace Eval {
namespace Eval::NNUE::Features {
namespace NNUE {
class IndexList;
namespace Features {
template <typename... FeatureTypes>
class FeatureSet;
// Index list type
class IndexList;
// Trigger to perform full calculations instead of difference only
enum class TriggerEvent {
kNone, // Calculate the difference whenever possible
kFriendKingMoved, // calculate all when own ball moves
kEnemyKingMoved, // do all calculations when enemy balls move
kAnyKingMoved, // do all calculations if either ball moves
kAnyPieceMoved, // always do all calculations
};
// Class template that represents the feature set
template <typename... FeatureTypes>
class FeatureSet;
enum class Side {
kFriend, // side to move
kEnemy, // opponent
};
// Type of timing to perform all calculations instead of difference calculation
enum class TriggerEvent {
kNone, // Calculate the difference whenever possible
kFriendKingMoved, // calculate all when own ball moves
kEnemyKingMoved, // do all calculations when enemy balls move
kAnyKingMoved, // do all calculations if either ball moves
kAnyPieceMoved, // always do all calculations
};
} // namespace Eval::NNUE::Features
// turn side or other side
enum class Side {
kFriend, // turn side
kEnemy, // opponent
};
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+74 -66
View File
@@ -1,84 +1,92 @@
//Definition of input features HalfKP of NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#if defined(EVAL_NNUE)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Definition of input features HalfKP of NNUE evaluation function
#include "half_kp.h"
#include "index_list.h"
namespace Eval {
namespace Eval::NNUE::Features {
namespace NNUE {
// Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
}
namespace Features {
// Get pieces information
template <Side AssociatedKing>
inline void HalfKP<AssociatedKing>::GetPieces(
const Position& pos, Color perspective,
PieceSquare** pieces, Square* sq_target_k) {
// Find the index of the feature quantity from the ball position and BonaPiece
template <Side AssociatedKing>
inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
}
*pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
const PieceId target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceId>(PIECE_ID_KING + perspective) :
static_cast<PieceId>(PIECE_ID_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
}
// Get the piece information
template <Side AssociatedKing>
inline void HalfKP<AssociatedKing>::GetPieces(
const Position& pos, Color perspective,
BonaPiece** pieces, Square* sq_target_k) {
*pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
const PieceNumber target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
}
// Get a list of indices for active features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
// Get a list of indices with a value of 1 among the features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
// do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
// Do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
BonaPiece* pieces;
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
if (pieces[i] != Eval::BONA_PIECE_ZERO) {
active->push_back(MakeIndex(sq_target_k, pieces[i]));
PieceSquare* pieces;
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
if (pieces[i] != PS_NONE) {
active->push_back(MakeIndex(sq_target_k, pieces[i]));
}
}
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
BonaPiece* pieces;
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
const auto old_p = static_cast<BonaPiece>(
dp.changed_piece[i].old_piece.from[perspective]);
if (old_p != Eval::BONA_PIECE_ZERO) {
removed->push_back(MakeIndex(sq_target_k, old_p));
}
const auto new_p = static_cast<BonaPiece>(
dp.changed_piece[i].new_piece.from[perspective]);
if (new_p != Eval::BONA_PIECE_ZERO) {
added->push_back(MakeIndex(sq_target_k, new_p));
// Get a list of indices for recently changed features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
PieceSquare* pieces;
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceId[i] >= PIECE_ID_KING) continue;
const auto old_p = static_cast<PieceSquare>(
dp.old_piece[i].from[perspective]);
if (old_p != PS_NONE) {
removed->push_back(MakeIndex(sq_target_k, old_p));
}
const auto new_p = static_cast<PieceSquare>(
dp.new_piece[i].from[perspective]);
if (new_p != PS_NONE) {
added->push_back(MakeIndex(sq_target_k, new_p));
}
}
}
}
template class HalfKP<Side::kFriend>;
template class HalfKP<Side::kEnemy>;
template class HalfKP<Side::kFriend>;
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
+53 -48
View File
@@ -1,62 +1,67 @@
//Definition of input features HalfKP of NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_FEATURES_HALF_KP_H_
#define _NNUE_FEATURES_HALF_KP_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Definition of input features HalfKP of NNUE evaluation function
#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
#define NNUE_FEATURES_HALF_KP_H_INCLUDED
#include "../../evaluate.h"
#include "features_common.h"
namespace Eval {
namespace Eval::NNUE::Features {
namespace NNUE {
// Feature HalfKP: Combination of the position of own king
// and the position of pieces other than kings
template <Side AssociatedKing>
class HalfKP {
namespace Features {
public:
// Feature name
static constexpr const char* kName = "HalfKP(Friend)";
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue =
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
// Number of feature dimensions
static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
// Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
template <Side AssociatedKing>
class HalfKP {
public:
// feature quantity name
static constexpr const char* kName =
(AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
// number of feature dimensions
static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Get a list of indices for active features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// Get a list of indices for recently changed features
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// Index of a feature for a given king position and another piece on some square
static IndexType MakeIndex(Square sq_k, PieceSquare p);
// Find the index of the feature quantity from the ball position and BonaPiece
static IndexType MakeIndex(Square sq_k, BonaPiece p);
private:
// Get pieces information
static void GetPieces(const Position& pos, Color perspective,
PieceSquare** pieces, Square* sq_target_k);
};
private:
// Get the piece information
static void GetPieces(const Position& pos, Color perspective,
BonaPiece** pieces, Square* sq_target_k);
};
} // namespace Eval::NNUE::Features
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+23 -23
View File
@@ -11,14 +11,14 @@ namespace NNUE {
namespace Features {
// Find the index of the feature quantity from the ball position and BonaPiece
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
Square sq_k, BonaPiece p) {
Square sq_k, PieceSquare p) {
constexpr IndexType W = kBoardWidth;
constexpr IndexType H = kBoardHeight;
const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
const IndexType piece_index = (p - PieceSquare::PS_W_PAWN) / SQUARE_NB;
const Square sq_p = static_cast<Square>((p - PieceSquare::PS_W_PAWN) % SQUARE_NB);
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
return H * W * piece_index + H * relative_file + relative_rank;
@@ -28,14 +28,14 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
template <Side AssociatedKing>
inline void HalfRelativeKP<AssociatedKing>::GetPieces(
const Position& pos, Color perspective,
BonaPiece** pieces, Square* sq_target_k) {
PieceSquare** pieces, Square* sq_target_k) {
*pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
const PieceNumber target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
const PieceId target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceId>(PieceId::PIECE_ID_KING + perspective) :
static_cast<PieceId>(PieceId::PIECE_ID_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - PieceSquare::PS_W_KING) % SQUARE_NB);
}
// Get a list of indices with a value of 1 among the features
@@ -45,12 +45,12 @@ void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
// do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
BonaPiece* pieces;
PieceSquare* pieces;
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
if (pieces[i] >= fe_hand_end) {
if (pieces[i] != Eval::BONA_PIECE_ZERO) {
for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
if (pieces[i] >= PieceSquare::PS_W_PAWN) {
if (pieces[i] != PieceSquare::PS_NONE) {
active->push_back(MakeIndex(sq_target_k, pieces[i]));
}
}
@@ -62,23 +62,23 @@ template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
BonaPiece* pieces;
PieceSquare* pieces;
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
const auto old_p = static_cast<BonaPiece>(
dp.changed_piece[i].old_piece.from[perspective]);
if (old_p >= fe_hand_end) {
if (old_p != Eval::BONA_PIECE_ZERO) {
if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
const auto old_p = static_cast<PieceSquare>(
dp.old_piece[i].from[perspective]);
if (old_p >= PieceSquare::PS_W_PAWN) {
if (old_p != PieceSquare::PS_NONE) {
removed->push_back(MakeIndex(sq_target_k, old_p));
}
}
const auto new_p = static_cast<BonaPiece>(
dp.changed_piece[i].new_piece.from[perspective]);
if (new_p >= fe_hand_end) {
if (new_p != Eval::BONA_PIECE_ZERO) {
const auto new_p = static_cast<PieceSquare>(
dp.new_piece[i].from[perspective]);
if (new_p >= PieceSquare::PS_W_PAWN) {
if (new_p != PieceSquare::PS_NONE) {
added->push_back(MakeIndex(sq_target_k, new_p));
}
}
+5 -5
View File
@@ -25,7 +25,7 @@ class HalfRelativeKP {
static constexpr std::uint32_t kHashValue =
0xF9180919u ^ (AssociatedKing == Side::kFriend);
// Piece type excluding balls
static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
static constexpr IndexType kNumPieceKinds = (PieceSquare::PS_END - PieceSquare::PS_W_PAWN) / SQUARE_NB;
// width of the virtual board with the ball in the center
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
// height of a virtual board with balls in the center
@@ -34,7 +34,7 @@ class HalfRelativeKP {
static constexpr IndexType kDimensions =
kNumPieceKinds * kBoardHeight * kBoardWidth;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
@@ -48,13 +48,13 @@ class HalfRelativeKP {
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// Find the index of the feature quantity from the ball position and BonaPiece
static IndexType MakeIndex(Square sq_k, BonaPiece p);
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType MakeIndex(Square sq_k, PieceSquare p);
private:
// Get the piece information
static void GetPieces(const Position& pos, Color perspective,
BonaPiece** pieces, Square* sq_target_k);
PieceSquare** pieces, Square* sq_target_k);
};
} // namespace Features
+50 -41
View File
@@ -1,55 +1,64 @@
// Definition of index list of input features
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_FEATURES_INDEX_LIST_H_
#define _NNUE_FEATURES_INDEX_LIST_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of index list of input features
#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
#include "../../position.h"
#include "../nnue_architecture.h"
namespace Eval {
namespace Eval::NNUE::Features {
namespace NNUE {
// Class template used for feature index list
template <typename T, std::size_t MaxSize>
class ValueList {
namespace Features {
public:
std::size_t size() const { return size_; }
void resize(std::size_t size) { size_ = size; }
void push_back(const T& value) { values_[size_++] = value; }
T& operator[](std::size_t index) { return values_[index]; }
T* begin() { return values_; }
T* end() { return values_ + size_; }
const T& operator[](std::size_t index) const { return values_[index]; }
const T* begin() const { return values_; }
const T* end() const { return values_ + size_; }
// Class template used for feature index list
template <typename T, std::size_t MaxSize>
class ValueList {
public:
std::size_t size() const { return size_; }
void resize(std::size_t size) { size_ = size; }
void push_back(const T& value) { values_[size_++] = value; }
T& operator[](std::size_t index) { return values_[index]; }
T* begin() { return values_; }
T* end() { return values_ + size_; }
const T& operator[](std::size_t index) const { return values_[index]; }
const T* begin() const { return values_; }
const T* end() const { return values_ + size_; }
void swap(ValueList& other) {
const std::size_t max_size = std::max(size_, other.size_);
for (std::size_t i = 0; i < max_size; ++i) {
std::swap(values_[i], other.values_[i]);
void swap(ValueList& other) {
const std::size_t max_size = std::max(size_, other.size_);
for (std::size_t i = 0; i < max_size; ++i) {
std::swap(values_[i], other.values_[i]);
}
std::swap(size_, other.size_);
}
std::swap(size_, other.size_);
}
private:
T values_[MaxSize];
std::size_t size_ = 0;
};
//Type of feature index list
class IndexList
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
};
private:
T values_[MaxSize];
std::size_t size_ = 0;
};
} // namespace Features
//Type of feature index list
class IndexList
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
};
} // namespace NNUE
} // namespace Eval::NNUE::Features
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+8 -8
View File
@@ -17,13 +17,13 @@ void K::AppendActiveIndices(
// do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
const BonaPiece* pieces = (perspective == BLACK) ?
const PieceSquare* pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
active->push_back(pieces[i] - fe_end);
assert(pieces[PieceId::PIECE_ID_BKING] != PieceSquare::PS_NONE);
assert(pieces[PieceId::PIECE_ID_WKING] != PieceSquare::PS_NONE);
for (PieceId i = PieceId::PIECE_ID_KING; i < PieceId::PIECE_ID_NONE; ++i) {
active->push_back(pieces[i] - PieceSquare::PS_END);
}
}
@@ -32,11 +32,11 @@ void K::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
const auto& dp = pos.state()->dirtyPiece;
if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
if (dp.pieceId[0] >= PieceId::PIECE_ID_KING) {
removed->push_back(
dp.changed_piece[0].old_piece.from[perspective] - fe_end);
dp.old_piece[0].from[perspective] - PieceSquare::PS_END);
added->push_back(
dp.changed_piece[0].new_piece.from[perspective] - fe_end);
dp.new_piece[0].from[perspective] - PieceSquare::PS_END);
}
}
+8 -8
View File
@@ -17,11 +17,11 @@ void P::AppendActiveIndices(
// do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
const BonaPiece* pieces = (perspective == BLACK) ?
const PieceSquare* pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
if (pieces[i] != Eval::BONA_PIECE_ZERO) {
for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
if (pieces[i] != PieceSquare::PS_NONE) {
active->push_back(pieces[i]);
}
}
@@ -33,12 +33,12 @@ void P::AppendChangedIndices(
IndexList* removed, IndexList* added) {
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
if (dp.old_piece[i].from[perspective] != PieceSquare::PS_NONE) {
removed->push_back(dp.old_piece[i].from[perspective]);
}
if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
if (dp.new_piece[i].from[perspective] != PieceSquare::PS_NONE) {
added->push_back(dp.new_piece[i].from[perspective]);
}
}
}
+3 -3
View File
@@ -14,7 +14,7 @@ namespace NNUE {
namespace Features {
// Feature P: BonaPiece of pieces other than balls
// Feature P: PieceSquare of pieces other than balls
class P {
public:
// feature quantity name
@@ -22,9 +22,9 @@ class P {
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
// number of feature dimensions
static constexpr IndexType kDimensions = fe_end;
static constexpr IndexType kDimensions = PieceSquare::PS_END;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
+213 -193
View File
@@ -1,217 +1,237 @@
// Definition of layer AffineTransform of NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
#define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of layer AffineTransform of NNUE evaluation function
#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
#include <iostream>
#include "../nnue_common.h"
namespace Eval {
namespace Eval::NNUE::Layers {
namespace NNUE {
// Affine transformation layer
template <typename PreviousLayer, IndexType OutputDimensions>
class AffineTransform {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using OutputType = std::int32_t;
static_assert(std::is_same<InputType, std::uint8_t>::value, "");
namespace Layers {
// Number of input/output dimensions
static constexpr IndexType kInputDimensions =
PreviousLayer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = OutputDimensions;
static constexpr IndexType kPaddedInputDimensions =
CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
// affine transformation layer
template <typename PreviousLayer, IndexType OutputDimensions>
class AffineTransform {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using OutputType = std::int32_t;
static_assert(std::is_same<InputType, std::uint8_t>::value, "");
// Size of forward propagation buffer used in this layer
static constexpr std::size_t kSelfBufferSize =
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
PreviousLayer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = OutputDimensions;
static constexpr IndexType kPaddedInputDimensions =
CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize =
PreviousLayer::kBufferSize + kSelfBufferSize;
// Size of forward propagation buffer used in this layer
static constexpr std::size_t kSelfBufferSize =
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xCC03DAE4u;
hash_value += kOutputDimensions;
hash_value ^= PreviousLayer::GetHashValue() >> 1;
hash_value ^= PreviousLayer::GetHashValue() << 31;
return hash_value;
}
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize =
PreviousLayer::kBufferSize + kSelfBufferSize;
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xCC03DAE4u;
hash_value += kOutputDimensions;
hash_value ^= PreviousLayer::GetHashValue() >> 1;
hash_value ^= PreviousLayer::GetHashValue() << 31;
return hash_value;
}
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "AffineTransform[" +
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "AffineTransform[" +
std::to_string(kOutputDimensions) + "<-" +
std::to_string(kInputDimensions) + "](" +
PreviousLayer::GetStructureString() + ")";
}
// read parameters
bool ReadParameters(std::istream& stream) {
if (!previous_layer_.ReadParameters(stream)) return false;
stream.read(reinterpret_cast<char*>(biases_),
kOutputDimensions * sizeof(BiasType));
stream.read(reinterpret_cast<char*>(weights_),
kOutputDimensions * kPaddedInputDimensions *
sizeof(WeightType));
return !stream.fail();
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
if (!previous_layer_.WriteParameters(stream)) return false;
stream.write(reinterpret_cast<const char*>(biases_),
kOutputDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kOutputDimensions * kPaddedInputDimensions *
sizeof(WeightType));
return !stream.fail();
}
// forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
const auto input = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
const auto output = reinterpret_cast<OutputType*>(buffer);
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
const __m512i kOnes = _mm512_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m512i*>(input);
#elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m256i kOnes = _mm256_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m256i*>(input);
#elif defined(USE_SSSE3)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m128i kOnes = _mm_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m128i*>(input);
#elif defined(IS_ARM)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
#endif
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType offset = i * kPaddedInputDimensions;
#if defined(USE_AVX512)
__m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
#if defined(__MINGW32__) || defined(__MINGW64__)
__m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
__m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#endif
product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product);
}
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
// and we have to do one more 256bit chunk.
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
{
const auto iv_256 = reinterpret_cast<const __m256i*>(input);
const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
int j = kNumChunks * 2;
#if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2.
__m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
#else
__m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
#endif
sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
sum256 = _mm256_hadd_epi32(sum256, sum256);
sum256 = _mm256_hadd_epi32(sum256, sum256);
const __m128i lo = _mm256_extracti128_si256(sum256, 0);
const __m128i hi = _mm256_extracti128_si256(sum256, 1);
output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
}
#elif defined(USE_AVX2)
__m256i sum = _mm256_setzero_si256();
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i product = _mm256_maddubs_epi16(
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product);
}
sum = _mm256_hadd_epi32(sum, sum);
sum = _mm256_hadd_epi32(sum, sum);
const __m128i lo = _mm256_extracti128_si256(sum, 0);
const __m128i hi = _mm256_extracti128_si256(sum, 1);
output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
#elif defined(USE_SSSE3)
__m128i sum = _mm_cvtsi32_si128(biases_[i]);
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i product = _mm_maddubs_epi16(
_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
product = _mm_madd_epi16(product, kOnes);
sum = _mm_add_epi32(sum, product);
}
sum = _mm_hadd_epi32(sum, sum);
sum = _mm_hadd_epi32(sum, sum);
output[i] = _mm_cvtsi128_si32(sum);
#elif defined(IS_ARM)
int32x4_t sum = {biases_[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
sum = vpadalq_s16(sum, product);
}
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
#else
OutputType sum = biases_[i];
for (IndexType j = 0; j < kInputDimensions; ++j) {
sum += weights_[offset + j] * input[j];
}
output[i] = sum;
#endif
}
return output;
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
if (!previous_layer_.ReadParameters(stream)) return false;
stream.read(reinterpret_cast<char*>(biases_),
kOutputDimensions * sizeof(BiasType));
stream.read(reinterpret_cast<char*>(weights_),
kOutputDimensions * kPaddedInputDimensions *
sizeof(WeightType));
return !stream.fail();
}
private:
// parameter type
using BiasType = OutputType;
using WeightType = std::int8_t;
// write parameters
bool WriteParameters(std::ostream& stream) const {
if (!previous_layer_.WriteParameters(stream)) return false;
stream.write(reinterpret_cast<const char*>(biases_),
kOutputDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kOutputDimensions * kPaddedInputDimensions *
sizeof(WeightType));
return !stream.fail();
}
// Make the learning class a friend
friend class Trainer<AffineTransform>;
// Forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
const auto input = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
const auto output = reinterpret_cast<OutputType*>(buffer);
// the layer immediately before this layer
PreviousLayer previous_layer_;
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
const __m512i kOnes = _mm512_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m512i*>(input);
// parameter
alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
alignas(kCacheLineSize)
WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
};
#elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m256i kOnes = _mm256_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m256i*>(input);
} // namespace Layers
#elif defined(USE_SSSE3)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m128i kOnes = _mm_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m128i*>(input);
} // namespace NNUE
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
#endif
} // namespace Eval
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType offset = i * kPaddedInputDimensions;
#endif // defined(EVAL_NNUE)
#if defined(USE_AVX512)
__m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
__m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
__m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#endif
product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product);
}
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
// and we have to do one more 256bit chunk.
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
{
const auto iv_256 = reinterpret_cast<const __m256i*>(input);
const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
int j = kNumChunks * 2;
#if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2.
__m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
#else
__m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
#endif
sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
sum256 = _mm256_hadd_epi32(sum256, sum256);
sum256 = _mm256_hadd_epi32(sum256, sum256);
const __m128i lo = _mm256_extracti128_si256(sum256, 0);
const __m128i hi = _mm256_extracti128_si256(sum256, 1);
output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
}
#elif defined(USE_AVX2)
__m256i sum = _mm256_setzero_si256();
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i product = _mm256_maddubs_epi16(
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product);
}
sum = _mm256_hadd_epi32(sum, sum);
sum = _mm256_hadd_epi32(sum, sum);
const __m128i lo = _mm256_extracti128_si256(sum, 0);
const __m128i hi = _mm256_extracti128_si256(sum, 1);
output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
#elif defined(USE_SSSE3)
__m128i sum = _mm_cvtsi32_si128(biases_[i]);
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i product = _mm_maddubs_epi16(
_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
product = _mm_madd_epi16(product, kOnes);
sum = _mm_add_epi32(sum, product);
}
sum = _mm_hadd_epi32(sum, sum);
sum = _mm_hadd_epi32(sum, sum);
output[i] = _mm_cvtsi128_si32(sum);
#elif defined(USE_NEON)
int32x4_t sum = {biases_[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
sum = vpadalq_s16(sum, product);
}
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
#else
OutputType sum = biases_[i];
for (IndexType j = 0; j < kInputDimensions; ++j) {
sum += weights_[offset + j] * input[j];
}
output[i] = sum;
#endif
}
return output;
}
private:
using BiasType = OutputType;
using WeightType = std::int8_t;
// Make the learning class a friend
friend class Trainer<AffineTransform>;
PreviousLayer previous_layer_;
alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
alignas(kCacheLineSize)
WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
};
} // namespace Eval::NNUE::Layers
#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+181 -157
View File
@@ -1,177 +1,201 @@
// Definition of layer ClippedReLU of NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
#define _NNUE_LAYERS_CLIPPED_RELU_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of layer ClippedReLU of NNUE evaluation function
#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
#include "../nnue_common.h"
namespace Eval {
namespace Eval::NNUE::Layers {
namespace NNUE {
// Clipped ReLU
template <typename PreviousLayer>
class ClippedReLU {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using OutputType = std::uint8_t;
static_assert(std::is_same<InputType, std::int32_t>::value, "");
namespace Layers {
// Number of input/output dimensions
static constexpr IndexType kInputDimensions =
PreviousLayer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = kInputDimensions;
// Clipped ReLU
template <typename PreviousLayer>
class ClippedReLU {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using OutputType = std::uint8_t;
static_assert(std::is_same<InputType, std::int32_t>::value, "");
// Size of forward propagation buffer used in this layer
static constexpr std::size_t kSelfBufferSize =
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
PreviousLayer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = kInputDimensions;
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize =
PreviousLayer::kBufferSize + kSelfBufferSize;
// Size of forward propagation buffer used in this layer
static constexpr std::size_t kSelfBufferSize =
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0x538D24C7u;
hash_value += PreviousLayer::GetHashValue();
return hash_value;
}
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize =
PreviousLayer::kBufferSize + kSelfBufferSize;
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0x538D24C7u;
hash_value += PreviousLayer::GetHashValue();
return hash_value;
}
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "ClippedReLU[" +
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "ClippedReLU[" +
std::to_string(kOutputDimensions) + "](" +
PreviousLayer::GetStructureString() + ")";
}
// read parameters
bool ReadParameters(std::istream& stream) {
return previous_layer_.ReadParameters(stream);
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
return previous_layer_.WriteParameters(stream);
}
// forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
const auto input = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
const auto output = reinterpret_cast<OutputType*>(buffer);
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
const __m256i kZero = _mm256_setzero_si256();
const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
const auto in = reinterpret_cast<const __m256i*>(input);
const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 0]),
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 2]),
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 3])), kWeightScaleBits);
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256
#else
_mm256_store_si256
#endif
(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_SSSE3)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
const __m128i kZero = _mm_setzero_si128();
#ifndef USE_SSE41
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
const auto in = reinterpret_cast<const __m128i*>(input);
const auto out = reinterpret_cast<__m128i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
_mm_load_si128(&in[i * 4 + 0]),
_mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
_mm_load_si128(&in[i * 4 + 2]),
_mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
_mm_store_si128(&out[i],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
// Read network parameters
bool ReadParameters(std::istream& stream) {
return previous_layer_.ReadParameters(stream);
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(IS_ARM)
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
const auto in = reinterpret_cast<const int32x4_t*>(input);
const auto out = reinterpret_cast<int8x8_t*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
int16x8_t shifted;
const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
// write parameters
bool WriteParameters(std::ostream& stream) const {
return previous_layer_.WriteParameters(stream);
}
constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
#else
constexpr IndexType kStart = 0;
#endif
for (IndexType i = kStart; i < kInputDimensions; ++i) {
output[i] = static_cast<OutputType>(
std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
// Forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
const auto input = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
const auto output = reinterpret_cast<OutputType*>(buffer);
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
const __m256i kZero = _mm256_setzero_si256();
const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
const auto in = reinterpret_cast<const __m256i*>(input);
const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 0]),
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 2]),
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&in[i * 4 + 3])), kWeightScaleBits);
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256
#else
_mm256_store_si256
#endif
(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_SSSE3)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
#ifdef USE_SSE41
const __m128i kZero = _mm_setzero_si128();
#else
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
const auto in = reinterpret_cast<const __m128i*>(input);
const auto out = reinterpret_cast<__m128i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
_mm_load_si128(&in[i * 4 + 0]),
_mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
_mm_load_si128(&in[i * 4 + 2]),
_mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
_mm_store_si128(&out[i],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
const auto in = reinterpret_cast<const int32x4_t*>(input);
const auto out = reinterpret_cast<int8x8_t*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
int16x8_t shifted;
const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
}
constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
#else
constexpr IndexType kStart = 0;
#endif
for (IndexType i = kStart; i < kInputDimensions; ++i) {
output[i] = static_cast<OutputType>(
std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
}
return output;
}
return output;
}
private:
// Make the learning class a friend
friend class Trainer<ClippedReLU>;
private:
// Make the learning class a friend
friend class Trainer<ClippedReLU>;
PreviousLayer previous_layer_;
};
// the layer immediately before this layer
PreviousLayer previous_layer_;
};
} // namespace Eval::NNUE::Layers
} // namespace Layers
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+32 -26
View File
@@ -1,35 +1,47 @@
// NNUE evaluation function layer InputSlice definition
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_LAYERS_INPUT_SLICE_H_
#define _NNUE_LAYERS_INPUT_SLICE_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// NNUE evaluation function layer InputSlice definition
#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
#include "../nnue_common.h"
namespace Eval {
namespace Eval::NNUE::Layers {
namespace NNUE {
namespace Layers {
// input layer
// Input layer
template <IndexType OutputDimensions, IndexType Offset = 0>
class InputSlice {
public:
// need to maintain alignment
// Need to maintain alignment
static_assert(Offset % kMaxSimdWidth == 0, "");
// output type
// Output type
using OutputType = TransformedFeatureType;
// output dimensionality
// Output dimensionality
static constexpr IndexType kOutputDimensions = OutputDimensions;
// Size of the forward propagation buffer used from the input layer to this layer
// Size of forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize = 0;
// Hash value embedded in the evaluation function file
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xEC42E90Du;
hash_value ^= kOutputDimensions ^ (Offset << 10);
@@ -39,11 +51,11 @@ class InputSlice {
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
std::to_string(Offset) + ":" +
std::to_string(Offset + kOutputDimensions) + ")]";
std::to_string(Offset) + ":" +
std::to_string(Offset + kOutputDimensions) + ")]";
}
// read parameters
// Read network parameters
bool ReadParameters(std::istream& /*stream*/) {
return true;
}
@@ -53,7 +65,7 @@ class InputSlice {
return true;
}
// forward propagation
// Forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features,
char* /*buffer*/) const {
@@ -65,10 +77,4 @@ class InputSlice {
} // namespace Layers
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+31 -22
View File
@@ -1,30 +1,39 @@
// Class for difference calculation of NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_ACCUMULATOR_H_
#define _NNUE_ACCUMULATOR_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Class for difference calculation of NNUE evaluation function
#ifndef NNUE_ACCUMULATOR_H_INCLUDED
#define NNUE_ACCUMULATOR_H_INCLUDED
#include "nnue_architecture.h"
namespace Eval {
namespace Eval::NNUE {
namespace NNUE {
// Class that holds the result of affine transformation of input features
struct alignas(32) Accumulator {
std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
Value score;
bool computed_accumulation;
bool computed_score;
};
// Class that holds the result of affine transformation of input features
// Keep the evaluation value that is the final output together
struct alignas(32) Accumulator {
std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
Value score = VALUE_ZERO;
bool computed_accumulation = false;
bool computed_score = false;
};
} // namespace Eval::NNUE
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // NNUE_ACCUMULATOR_H_INCLUDED
+29 -24
View File
@@ -1,33 +1,38 @@
// Input features and network structure used in NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_ARCHITECTURE_H_
#define _NNUE_ARCHITECTURE_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
// include a header that defines the input features and network structure
//#include "architectures/k-p_256x2-32-32.h"
//#include "architectures/k-p-cr_256x2-32-32.h"
//#include "architectures/k-p-cr-ep_256x2-32-32.h"
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Input features and network structure used in NNUE evaluation function
#ifndef NNUE_ARCHITECTURE_H_INCLUDED
#define NNUE_ARCHITECTURE_H_INCLUDED
// Defines the network structure
#include "architectures/halfkp_256x2-32-32.h"
//#include "architectures/halfkp-cr-ep_256x2-32-32.h"
//#include "architectures/halfkp_384x2-32-32.h"
namespace Eval {
namespace Eval::NNUE {
namespace NNUE {
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
static_assert(Network::kOutputDimensions == 1, "");
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
static_assert(Network::kOutputDimensions == 1, "");
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
// Trigger for full calculation instead of difference calculation
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
// List of timings to perform all calculations instead of difference calculation
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
} // namespace Eval::NNUE
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
+56 -39
View File
@@ -1,64 +1,81 @@
// Constants used in NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_COMMON_H_
#define _NNUE_COMMON_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Constants used in NNUE evaluation function
#ifndef NNUE_COMMON_H_INCLUDED
#define NNUE_COMMON_H_INCLUDED
#if defined(USE_AVX2)
#include <immintrin.h>
#elif defined(USE_SSE41)
#include <smmintrin.h>
#elif defined(USE_SSSE3)
#include <tmmintrin.h>
#elif defined(USE_SSE2)
#include <emmintrin.h>
#elif defined(USE_NEON)
#include <arm_neon.h>
#endif
namespace Eval {
namespace Eval::NNUE {
namespace NNUE {
// Version of the evaluation file
constexpr std::uint32_t kVersion = 0x7AF32F16u;
// A constant that represents the version of the evaluation function file
constexpr std::uint32_t kVersion = 0x7AF32F16u;
// Constant used in evaluation value calculation
constexpr int FV_SCALE = 16;
constexpr int kWeightScaleBits = 6;
// Constant used in evaluation value calculation
constexpr int FV_SCALE = 16;
constexpr int kWeightScaleBits = 6;
// Size of cache line (in bytes)
constexpr std::size_t kCacheLineSize = 64;
// Size of cache line (in bytes)
constexpr std::size_t kCacheLineSize = 64;
// SIMD width (in bytes)
#if defined(USE_AVX2)
constexpr std::size_t kSimdWidth = 32;
// SIMD width (in bytes)
#if defined(USE_AVX2)
constexpr std::size_t kSimdWidth = 32;
#elif defined(USE_SSE2)
constexpr std::size_t kSimdWidth = 16;
#elif defined(IS_ARM)
constexpr std::size_t kSimdWidth = 16;
#endif
constexpr std::size_t kMaxSimdWidth = 32;
#elif defined(USE_SSE2)
constexpr std::size_t kSimdWidth = 16;
// Type of input feature after conversion
using TransformedFeatureType = std::uint8_t;
#elif defined(USE_NEON)
constexpr std::size_t kSimdWidth = 16;
#endif
// index type
using IndexType = std::uint32_t;
constexpr std::size_t kMaxSimdWidth = 32;
// Forward declaration of learning class template
template <typename Layer>
class Trainer;
// Type of input feature after conversion
using TransformedFeatureType = std::uint8_t;
using IndexType = std::uint32_t;
// find the smallest multiple of n and above
template <typename IntType>
constexpr IntType CeilToMultiple(IntType n, IntType base) {
return (n + base - 1) / base * base;
}
// Forward declaration of learning class template
template <typename Layer>
class Trainer;
} // namespace NNUE
// Round n up to be a multiple of base
template <typename IntType>
constexpr IntType CeilToMultiple(IntType n, IntType base) {
return (n + base - 1) / base * base;
}
} // namespace Eval
} // namespace Eval::NNUE
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_COMMON_H_INCLUDED
+254 -237
View File
@@ -1,9 +1,25 @@
// A class that converts the input features of the NNUE evaluation function
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
#ifndef _NNUE_FEATURE_TRANSFORMER_H_
#define _NNUE_FEATURE_TRANSFORMER_H_
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
#if defined(EVAL_NNUE)
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// A class that converts the input features of the NNUE evaluation function
#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
#include "nnue_common.h"
#include "nnue_architecture.h"
@@ -11,209 +27,205 @@
#include <cstring> // std::memset()
namespace Eval {
namespace Eval::NNUE {
namespace NNUE {
// Input feature converter
class FeatureTransformer {
// Input feature converter
class FeatureTransformer {
private:
// number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
private:
// Number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
public:
// output type
using OutputType = TransformedFeatureType;
public:
// Output type
using OutputType = TransformedFeatureType;
// number of input/output dimensions
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
// Number of input/output dimensions
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
// size of forward propagation buffer
static constexpr std::size_t kBufferSize =
kOutputDimensions * sizeof(OutputType);
// Size of forward propagation buffer
static constexpr std::size_t kBufferSize =
kOutputDimensions * sizeof(OutputType);
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
return RawFeatures::kHashValue ^ kOutputDimensions;
}
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
return RawFeatures::kHashValue ^ kOutputDimensions;
}
// a string representing the structure
static std::string GetStructureString() {
return RawFeatures::GetName() + "[" +
// a string representing the structure
static std::string GetStructureString() {
return RawFeatures::GetName() + "[" +
std::to_string(kInputDimensions) + "->" +
std::to_string(kHalfDimensions) + "x2]";
}
// read parameters
bool ReadParameters(std::istream& stream) {
stream.read(reinterpret_cast<char*>(biases_),
kHalfDimensions * sizeof(BiasType));
stream.read(reinterpret_cast<char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(biases_),
kHalfDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
// proceed with the difference calculation if possible
bool UpdateAccumulatorIfPossible(const Position& pos) const {
const auto now = pos.state();
if (now->accumulator.computed_accumulation) {
return true;
}
const auto prev = now->previous;
if (prev && prev->accumulator.computed_accumulation) {
UpdateAccumulator(pos);
return true;
}
return false;
}
// convert input features
void Transform(const Position& pos, OutputType* output, bool refresh) const {
if (refresh || !UpdateAccumulatorIfPossible(pos)) {
RefreshAccumulator(pos);
// Read network parameters
bool ReadParameters(std::istream& stream) {
stream.read(reinterpret_cast<char*>(biases_),
kHalfDimensions * sizeof(BiasType));
stream.read(reinterpret_cast<char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
const auto& accumulation = pos.state()->accumulator.accumulation;
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256();
#elif defined(USE_SSSE3)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
const __m128i kZero = _mm_setzero_si128();
#ifndef USE_SSE41
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
#elif defined(IS_ARM)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
#endif
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
for (IndexType p = 0; p < 2; ++p) {
const IndexType offset = kHalfDimensions * p;
#if defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 =
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m256i sum1 =
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256
#else
_mm256_store_si256
#endif
(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
}
#elif defined(USE_SSSE3)
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
_mm_store_si128(&out[j],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
}
#elif defined(IS_ARM)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][0])[j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][i])[j]);
}
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
}
output[offset + j] = static_cast<OutputType>(
std::max<int>(0, std::min<int>(127, sum)));
}
#endif
}
}
private:
// Calculate cumulative value without using difference calculation
void RefreshAccumulator(const Position& pos) const {
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
// write parameters
bool WriteParameters(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(biases_),
kHalfDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
// Proceed with the difference calculation if possible
bool UpdateAccumulatorIfPossible(const Position& pos) const {
const auto now = pos.state();
if (now->accumulator.computed_accumulation) {
return true;
}
const auto prev = now->previous;
if (prev && prev->accumulator.computed_accumulation) {
UpdateAccumulator(pos);
return true;
}
return false;
}
// Convert input features
void Transform(const Position& pos, OutputType* output, bool refresh) const {
if (refresh || !UpdateAccumulatorIfPossible(pos)) {
RefreshAccumulator(pos);
}
const auto& accumulation = pos.state()->accumulator.accumulation;
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256();
#elif defined(USE_SSSE3)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#ifdef USE_SSE41
const __m128i kZero = _mm_setzero_si128();
#else
const __m128i k0x80s = _mm_set1_epi8(-128);
#endif
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0};
#endif
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
for (IndexType p = 0; p < 2; ++p) {
const IndexType offset = kHalfDimensions * p;
#if defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 =
#if defined(__MINGW32__) || defined(__MINGW64__)
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
// even though alignas is specified.
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m256i sum1 =
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_loadu_si256
#else
_mm256_load_si256
#endif
(&reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256
#else
_mm256_store_si256
#endif
(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
}
#elif defined(USE_SSSE3)
auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
_mm_store_si128(&out[j],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
}
#elif defined(USE_NEON)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][0])[j];
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
output[offset + j] = static_cast<OutputType>(
std::max<int>(0, std::min<int>(127, sum)));
}
#endif
}
}
private:
// Calculate cumulative value without using difference calculation
void RefreshAccumulator(const Position& pos) const {
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
Features::IndexList active_indices[2];
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
active_indices);
for (const auto perspective : Colors) {
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
for (Color perspective : { WHITE, BLACK }) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
#if defined(USE_AVX2)
auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) {
#if defined(__MINGW32__) || defined(__MINGW64__)
#if defined(__MINGW32__) || defined(__MINGW64__)
_mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
#else
#else
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
#endif
#endif
}
#elif defined(USE_SSE2)
#elif defined(USE_SSE2)
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
@@ -221,7 +233,8 @@ class FeatureTransformer {
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
}
#elif defined(IS_ARM)
#elif defined(USE_NEON)
auto accumulation = reinterpret_cast<int16x8_t*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
@@ -229,129 +242,133 @@ class FeatureTransformer {
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
}
#else
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#endif
#endif
}
}
accumulator.computed_accumulation = true;
accumulator.computed_score = false;
}
accumulator.computed_accumulation = true;
accumulator.computed_score = false;
}
// Calculate cumulative value using difference calculation
void UpdateAccumulator(const Position& pos) const {
const auto prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
// Calculate cumulative value using difference calculation
void UpdateAccumulator(const Position& pos) const {
const auto prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2];
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
for (const auto perspective : Colors) {
#if defined(USE_AVX2)
for (Color perspective : { WHITE, BLACK }) {
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(USE_SSE2)
#elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(IS_ARM)
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<int16x8_t*>(
&accumulator.accumulation[perspective][i][0]);
#endif
#endif
if (reset[perspective]) {
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
} else {// Difference calculation for the feature amount changed from 1 to 0
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memcpy(accumulator.accumulation[perspective][i],
prev_accumulator.accumulation[perspective][i],
kHalfDimensions * sizeof(BiasType));
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
}
#elif defined(USE_SSE2)
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
}
#elif defined(IS_ARM)
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = vsubq_s16(accumulation[j], column[j]);
}
#else
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
accumulator.accumulation[perspective][i][j] -=
weights_[offset + j];
}
#endif
#endif
}
}
{// Difference calculation for features that changed from 0 to 1
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
}
#elif defined(USE_SSE2)
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
}
#elif defined(IS_ARM)
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
}
#else
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
accumulator.accumulation[perspective][i][j] +=
weights_[offset + j];
}
#endif
#endif
}
}
}
accumulator.computed_accumulation = true;
accumulator.computed_score = false;
}
accumulator.computed_accumulation = true;
accumulator.computed_score = false;
}
using BiasType = std::int16_t;
using WeightType = std::int16_t;
// parameter type
using BiasType = std::int16_t;
using WeightType = std::int16_t;
// Make the learning class a friend
friend class Trainer<FeatureTransformer>;
// Make the learning class a friend
friend class Trainer<FeatureTransformer>;
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
alignas(kCacheLineSize)
WeightType weights_[kHalfDimensions * kInputDimensions];
};
// parameter
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
alignas(kCacheLineSize)
WeightType weights_[kHalfDimensions * kInputDimensions];
};
} // namespace Eval::NNUE
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
#endif
#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
@@ -62,8 +62,8 @@ class Factorizer<HalfKP<AssociatedKing>> {
IndexType index_offset = AppendBaseFeature<FeatureType>(
kProperties[kFeaturesHalfKP], base_index, training_features);
const auto sq_k = static_cast<Square>(base_index / fe_end);
const auto p = static_cast<BonaPiece>(base_index % fe_end);
const auto sq_k = static_cast<Square>(base_index / PieceSquare::PS_END);
const auto p = static_cast<PieceSquare>(base_index % PieceSquare::PS_END);
// kFeaturesHalfK
{
const auto& properties = kProperties[kFeaturesHalfK];
@@ -76,7 +76,7 @@ class Factorizer<HalfKP<AssociatedKing>> {
index_offset += InheritFeaturesIfRequired<P>(
index_offset, kProperties[kFeaturesP], p, training_features);
// kFeaturesHalfRelativeKP
if (p >= fe_hand_end) {
if (p >= PieceSquare::PS_W_PAWN) {
index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
index_offset, kProperties[kFeaturesHalfRelativeKP],
HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
+1 -1
View File
@@ -111,7 +111,7 @@ IntType Round(double value) {
// make_shared with alignment
template <typename T, typename... ArgumentTypes>
std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
const auto ptr = new(std_aligned_alloc(sizeof(T), alignof(T)))
T(std::forward<ArgumentTypes>(arguments)...);
return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
}