mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 15:37:47 +00:00
Moved the nnue folder.
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef HALFKP_CR_EP_256X2_32_32_H
|
||||
#define HALFKP_CR_EP_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
#include "../features/castling_right.h"
|
||||
#include "../features/enpassant.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
|
||||
Features::EnPassant>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // HALFKP_CR_EP_256X2_32_32_H
|
||||
@@ -0,0 +1,39 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef HALFKP_256X2_32_32_H
|
||||
#define HALFKP_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // HALFKP_256X2_32_32_H
|
||||
@@ -0,0 +1,39 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef HALFKP_384X2_32_32_H
|
||||
#define HALFKP_384X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 384;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // HALFKP_384X2_32_32_H
|
||||
@@ -0,0 +1,42 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef K_P_CR_EP_256X2_32_32_H
|
||||
#define K_P_CR_EP_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/k.h"
|
||||
#include "../features/p.h"
|
||||
#include "../features/castling_right.h"
|
||||
#include "../features/enpassant.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<Features::K, Features::P,
|
||||
Features::CastlingRight, Features::EnPassant>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // K_P_CR_EP_256X2_32_32_H
|
||||
@@ -0,0 +1,41 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef K_P_CR_256X2_32_32_H
|
||||
#define K_P_CR_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/k.h"
|
||||
#include "../features/p.h"
|
||||
#include "../features/castling_right.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<Features::K, Features::P,
|
||||
Features::CastlingRight>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // K_P_CR_256X2_32_32_H
|
||||
@@ -0,0 +1,38 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
#ifndef K_P_256X2_32_32_H
|
||||
#define K_P_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/k.h"
|
||||
#include "../features/p.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<Features::K, Features::P>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // K_P_256X2_32_32_H
|
||||
@@ -0,0 +1,326 @@
|
||||
// Code for calculating NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "../../position.h"
|
||||
#include "../../misc.h"
|
||||
#include "../../uci.h"
|
||||
|
||||
#include "evaluate_nnue.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input feature converter
|
||||
AlignedPtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
std::string fileName = "nn.bin";
|
||||
|
||||
// Saved evaluation function file name
|
||||
std::string savedfileName = "nn.bin";
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string GetArchitectureString() {
|
||||
return "Features=" + FeatureTransformer::GetStructureString() +
|
||||
",Network=" + Network::GetStructureString();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
namespace Detail {
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void Initialize(AlignedPtr<T>& pointer) {
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
// read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
|
||||
std::uint32_t header;
|
||||
stream.read(reinterpret_cast<char*>(&header), sizeof(header));
|
||||
if (!stream || header != T::GetHashValue()) return false;
|
||||
return pointer->ReadParameters(stream);
|
||||
}
|
||||
|
||||
// write evaluation function parameters
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
|
||||
} // namespace Detail
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
void Initialize() {
|
||||
Detail::Initialize(feature_transformer);
|
||||
Detail::Initialize(network);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// read the header
|
||||
bool ReadHeader(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture) {
|
||||
std::uint32_t version, size;
|
||||
stream.read(reinterpret_cast<char*>(&version), sizeof(version));
|
||||
stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
|
||||
stream.read(reinterpret_cast<char*>(&size), sizeof(size));
|
||||
if (!stream || version != kVersion) return false;
|
||||
architecture->resize(size);
|
||||
stream.read(&(*architecture)[0], size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write the header
|
||||
bool WriteHeader(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture) {
|
||||
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
|
||||
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
|
||||
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
|
||||
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
|
||||
stream.write(architecture.data(), size);
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!ReadHeader(stream, &hash_value, &architecture)) return false;
|
||||
if (hash_value != kHashValue) return false;
|
||||
if (!Detail::ReadParameters(stream, feature_transformer)) return false;
|
||||
if (!Detail::ReadParameters(stream, network)) return false;
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream) {
|
||||
if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
|
||||
if (!Detail::WriteParameters(stream, feature_transformer)) return false;
|
||||
if (!Detail::WriteParameters(stream, network)) return false;
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// proceed if you can calculate the difference
|
||||
static void UpdateAccumulatorIfPossible(const Position& pos) {
|
||||
feature_transformer->UpdateAccumulatorIfPossible(pos);
|
||||
}
|
||||
|
||||
// Calculate the evaluation value
|
||||
static Value ComputeScore(const Position& pos, bool refresh = false) {
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
if (!refresh && accumulator.computed_score) {
|
||||
return accumulator.score;
|
||||
}
|
||||
|
||||
alignas(kCacheLineSize) TransformedFeatureType
|
||||
transformed_features[FeatureTransformer::kBufferSize];
|
||||
feature_transformer->Transform(pos, transformed_features, refresh);
|
||||
alignas(kCacheLineSize) char buffer[Network::kBufferSize];
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
|
||||
// When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
|
||||
// It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
|
||||
|
||||
// Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
|
||||
// The best move in the previous iteration is pointed to as bestmove, so apparently
|
||||
// no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
|
||||
// Since such a jamming phase often appears at the end, there is a big difference in the situation
|
||||
// Doesn't really affect the outcome.
|
||||
|
||||
// However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
|
||||
// Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
|
||||
|
||||
auto score = static_cast<Value>(output[0] / FV_SCALE);
|
||||
|
||||
// 1) I feel that if I clip too poorly, it will have an effect on my learning...
|
||||
// 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
|
||||
score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
|
||||
|
||||
accumulator.score = score;
|
||||
accumulator.computed_score = true;
|
||||
return accumulator.score;
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
#if defined(USE_EVAL_HASH)
|
||||
// Class used to store evaluation values in HashTable
|
||||
struct alignas(16) ScoreKeyValue {
|
||||
#if defined(USE_SSE2)
|
||||
ScoreKeyValue() = default;
|
||||
ScoreKeyValue(const ScoreKeyValue& other) {
|
||||
static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
|
||||
"sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
|
||||
_mm_store_si128(&as_m128i, other.as_m128i);
|
||||
}
|
||||
ScoreKeyValue& operator=(const ScoreKeyValue& other) {
|
||||
_mm_store_si128(&as_m128i, other.as_m128i);
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
// It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
|
||||
void encode() {
|
||||
#if defined(USE_SSE2)
|
||||
// ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
|
||||
#else
|
||||
key ^= score;
|
||||
#endif
|
||||
}
|
||||
// decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
|
||||
void decode() { encode(); }
|
||||
|
||||
union {
|
||||
struct {
|
||||
std::uint64_t key;
|
||||
std::uint64_t score;
|
||||
};
|
||||
#if defined(USE_SSE2)
|
||||
__m128i as_m128i;
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
// Simple HashTable implementation.
|
||||
// Size is a power of 2.
|
||||
template <typename T, size_t Size>
|
||||
struct HashTable {
|
||||
HashTable() { clear(); }
|
||||
T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
|
||||
void clear() { memset(entries_, 0, sizeof(T)*Size); }
|
||||
|
||||
// Check that Size is a power of 2
|
||||
static_assert((Size & (Size - 1)) == 0, "");
|
||||
|
||||
private:
|
||||
T entries_[Size];
|
||||
};
|
||||
|
||||
//HashTable to save the evaluated ones (following ehash)
|
||||
|
||||
#if !defined(USE_LARGE_EVAL_HASH)
|
||||
// 134MB (setting other than witch's AVX2)
|
||||
struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
|
||||
#else
|
||||
// If you have prefetch, it's better to have a big one...
|
||||
// → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
|
||||
// 1GB (setting for witch's AVX2)
|
||||
struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
|
||||
#endif
|
||||
|
||||
EvaluateHashTable g_evalTable;
|
||||
|
||||
// Prepare a function to prefetch.
|
||||
void prefetch_evalhash(const Key key) {
|
||||
constexpr auto mask = ~((uint64_t)0x1f);
|
||||
prefetch((void*)((uint64_t)g_evalTable[key] & mask));
|
||||
}
|
||||
#endif
|
||||
|
||||
// read the evaluation function file
|
||||
// Save and restore Options with bench command etc., so EvalDir is changed at this time,
|
||||
// This function may be called twice to flag that the evaluation function needs to be reloaded.
|
||||
void load_eval() {
|
||||
|
||||
// Must be done!
|
||||
NNUE::Initialize();
|
||||
|
||||
if (Options["SkipLoadingEval"])
|
||||
{
|
||||
std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string file_name = Options["EvalFile"];
|
||||
NNUE::fileName = file_name;
|
||||
|
||||
std::ifstream stream(file_name, std::ios::binary);
|
||||
const bool result = NNUE::ReadParameters(stream);
|
||||
|
||||
if (!result)
|
||||
// It's a problem if it doesn't finish when there is a read error.
|
||||
std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
|
||||
|
||||
else
|
||||
std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
|
||||
}
|
||||
|
||||
// Initialization
|
||||
void init() {
|
||||
}
|
||||
|
||||
// Evaluation function. Perform full calculation instead of difference calculation.
|
||||
// Called only once with Position::set(). (The difference calculation after that)
|
||||
// Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
|
||||
// Since, we will not try to optimize this function.
|
||||
Value compute_eval(const Position& pos) {
|
||||
return NNUE::ComputeScore(pos, true);
|
||||
}
|
||||
|
||||
// Evaluation function
|
||||
Value evaluate(const Position& pos) {
|
||||
const auto& accumulator = pos.state()->accumulator;
|
||||
if (accumulator.computed_score) {
|
||||
return accumulator.score;
|
||||
}
|
||||
|
||||
#if defined(USE_GLOBAL_OPTIONS)
|
||||
// If Global Options is set not to use eval hash
|
||||
// Skip the query to the eval hash.
|
||||
if (!GlobalOptions.use_eval_hash) {
|
||||
ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
|
||||
return NNUE::ComputeScore(pos);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(USE_EVAL_HASH)
|
||||
// May be in the evaluate hash table.
|
||||
const Key key = pos.key();
|
||||
ScoreKeyValue entry = *g_evalTable[key];
|
||||
entry.decode();
|
||||
if (entry.key == key) {
|
||||
// there were!
|
||||
return Value(entry.score);
|
||||
}
|
||||
#endif
|
||||
|
||||
Value score = NNUE::ComputeScore(pos);
|
||||
#if defined(USE_EVAL_HASH)
|
||||
// Since it was calculated carefully, save it in the evaluate hash table.
|
||||
entry.key = key;
|
||||
entry.score = score;
|
||||
entry.encode();
|
||||
*g_evalTable[key] = entry;
|
||||
#endif
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
// proceed if you can calculate the difference
|
||||
void evaluate_with_no_return(const Position& pos) {
|
||||
NNUE::UpdateAccumulatorIfPossible(pos);
|
||||
}
|
||||
|
||||
// display the breakdown of the evaluation value of the current phase
|
||||
void print_eval_stat(Position& /*pos*/) {
|
||||
std::cout << "--- EVAL STAT: not implemented" << std::endl;
|
||||
}
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,67 @@
|
||||
// header used in NNUE evaluation function
|
||||
|
||||
#ifndef _EVALUATE_NNUE_H_
|
||||
#define _EVALUATE_NNUE_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "nnue_feature_transformer.h"
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
|
||||
// Deleter for automating release of memory area
|
||||
template <typename T>
|
||||
struct AlignedDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_free(ptr);
|
||||
}
|
||||
};
|
||||
template <typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
|
||||
// Input feature converter
|
||||
extern AlignedPtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
extern AlignedPtr<Network> network;
|
||||
|
||||
// Evaluation function file name
|
||||
extern std::string fileName;
|
||||
|
||||
// Saved evaluation function file name
|
||||
extern std::string savedfileName;
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string GetArchitectureString();
|
||||
|
||||
// read the header
|
||||
bool ReadHeader(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
|
||||
// write the header
|
||||
bool WriteHeader(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream);
|
||||
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream);
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,231 @@
|
||||
// Code for learning NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include <random>
|
||||
#include <fstream>
|
||||
|
||||
#include "../../learn/learn.h"
|
||||
#include "../../learn/learning_tools.h"
|
||||
|
||||
#include "../../position.h"
|
||||
#include "../../uci.h"
|
||||
#include "../../misc.h"
|
||||
#include "../../thread_win32_osx.h"
|
||||
|
||||
#include "../evaluate_common.h"
|
||||
|
||||
#include "evaluate_nnue.h"
|
||||
#include "evaluate_nnue_learner.h"
|
||||
#include "trainer/features/factorizer_feature_set.h"
|
||||
#include "trainer/features/factorizer_half_kp.h"
|
||||
#include "trainer/trainer_feature_transformer.h"
|
||||
#include "trainer/trainer_input_slice.h"
|
||||
#include "trainer/trainer_affine_transform.h"
|
||||
#include "trainer/trainer_clipped_relu.h"
|
||||
#include "trainer/trainer_sum.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace {
|
||||
|
||||
// learning data
|
||||
std::vector<Example> examples;
|
||||
|
||||
// Mutex for exclusive control of examples
|
||||
std::mutex examples_mutex;
|
||||
|
||||
// number of samples in mini-batch
|
||||
uint64_t batch_size;
|
||||
|
||||
// random number generator
|
||||
std::mt19937 rng;
|
||||
|
||||
// learner
|
||||
std::shared_ptr<Trainer<Network>> trainer;
|
||||
|
||||
// Learning rate scale
|
||||
double global_learning_rate_scale;
|
||||
|
||||
// Get the learning rate scale
|
||||
double GetGlobalLearningRateScale() {
|
||||
return global_learning_rate_scale;
|
||||
}
|
||||
|
||||
// Tell the learner options such as hyperparameters
|
||||
void SendMessages(std::vector<Message> messages) {
|
||||
for (auto& message : messages) {
|
||||
trainer->SendMessage(&message);
|
||||
assert(message.num_receivers > 0);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Initialize learning
|
||||
void InitializeTraining(double eta1, uint64_t eta1_epoch,
|
||||
double eta2, uint64_t eta2_epoch, double eta3) {
|
||||
std::cout << "Initializing NN training for "
|
||||
<< GetArchitectureString() << std::endl;
|
||||
|
||||
assert(feature_transformer);
|
||||
assert(network);
|
||||
trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
|
||||
|
||||
if (Options["SkipLoadingEval"]) {
|
||||
trainer->Initialize(rng);
|
||||
}
|
||||
|
||||
global_learning_rate_scale = 1.0;
|
||||
EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
|
||||
}
|
||||
|
||||
// set the number of samples in the mini-batch
|
||||
void SetBatchSize(uint64_t size) {
|
||||
assert(size > 0);
|
||||
batch_size = size;
|
||||
}
|
||||
|
||||
// set the learning rate scale
|
||||
void SetGlobalLearningRateScale(double scale) {
|
||||
global_learning_rate_scale = scale;
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SetOptions(const std::string& options) {
|
||||
std::vector<Message> messages;
|
||||
for (const auto& option : Split(options, ',')) {
|
||||
const auto fields = Split(option, '=');
|
||||
assert(fields.size() == 1 || fields.size() == 2);
|
||||
if (fields.size() == 1) {
|
||||
messages.emplace_back(fields[0]);
|
||||
} else {
|
||||
messages.emplace_back(fields[0], fields[1]);
|
||||
}
|
||||
}
|
||||
SendMessages(std::move(messages));
|
||||
}
|
||||
|
||||
// Reread the evaluation function parameters for learning from the file
|
||||
void RestoreParameters(const std::string& dir_name) {
|
||||
const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
|
||||
std::ifstream stream(file_name, std::ios::binary);
|
||||
bool result = ReadParameters(stream);
|
||||
assert(result);
|
||||
|
||||
SendMessages({{"reset"}});
|
||||
}
|
||||
|
||||
// Add 1 sample of learning data
|
||||
void AddExample(Position& pos, Color rootColor,
|
||||
const Learner::PackedSfenValue& psv, double weight) {
|
||||
Example example;
|
||||
if (rootColor == pos.side_to_move()) {
|
||||
example.sign = 1;
|
||||
} else {
|
||||
example.sign = -1;
|
||||
}
|
||||
example.psv = psv;
|
||||
example.weight = weight;
|
||||
|
||||
Features::IndexList active_indices[2];
|
||||
for (const auto trigger : kRefreshTriggers) {
|
||||
RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
|
||||
}
|
||||
if (pos.side_to_move() != WHITE) {
|
||||
active_indices[0].swap(active_indices[1]);
|
||||
}
|
||||
for (const auto color : Colors) {
|
||||
std::vector<TrainingFeature> training_features;
|
||||
for (const auto base_index : active_indices[color]) {
|
||||
static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
|
||||
(1 << TrainingFeature::kIndexBits), "");
|
||||
Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
|
||||
base_index, &training_features);
|
||||
}
|
||||
std::sort(training_features.begin(), training_features.end());
|
||||
|
||||
auto& unique_features = example.training_features[color];
|
||||
for (const auto& feature : training_features) {
|
||||
if (!unique_features.empty() &&
|
||||
feature.GetIndex() == unique_features.back().GetIndex()) {
|
||||
unique_features.back() += feature;
|
||||
} else {
|
||||
unique_features.push_back(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(examples_mutex);
|
||||
examples.push_back(std::move(example));
|
||||
}
|
||||
|
||||
// update the evaluation function parameters
|
||||
void UpdateParameters(uint64_t epoch) {
|
||||
assert(batch_size > 0);
|
||||
|
||||
EvalLearningTools::Weight::calc_eta(epoch);
|
||||
const auto learning_rate = static_cast<LearnFloatType>(
|
||||
get_eta() / batch_size);
|
||||
|
||||
std::lock_guard<std::mutex> lock(examples_mutex);
|
||||
std::shuffle(examples.begin(), examples.end(), rng);
|
||||
while (examples.size() >= batch_size) {
|
||||
std::vector<Example> batch(examples.end() - batch_size, examples.end());
|
||||
examples.resize(examples.size() - batch_size);
|
||||
|
||||
const auto network_output = trainer->Propagate(batch);
|
||||
|
||||
std::vector<LearnFloatType> gradients(batch.size());
|
||||
for (std::size_t b = 0; b < batch.size(); ++b) {
|
||||
const auto shallow = static_cast<Value>(Round<std::int32_t>(
|
||||
batch[b].sign * network_output[b] * kPonanzaConstant));
|
||||
const auto& psv = batch[b].psv;
|
||||
const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
|
||||
gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
|
||||
}
|
||||
|
||||
trainer->Backpropagate(gradients.data(), learning_rate);
|
||||
}
|
||||
SendMessages({{"quantize_parameters"}});
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
SendMessages({{"check_health"}});
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
// save merit function parameters to a file
|
||||
void save_eval(std::string dir_name) {
|
||||
auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
|
||||
std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
|
||||
|
||||
// mkdir() will fail if this folder already exists, but
|
||||
// Apart from that. If not, I just want you to make it.
|
||||
// Also, assume that the folders up to EvalSaveDir have been dug.
|
||||
Dependency::mkdir(eval_dir);
|
||||
|
||||
if (Options["SkipLoadingEval"] && NNUE::trainer) {
|
||||
NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
|
||||
}
|
||||
|
||||
const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
|
||||
std::ofstream stream(file_name, std::ios::binary);
|
||||
const bool result = NNUE::WriteParameters(stream);
|
||||
assert(result);
|
||||
|
||||
std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
|
||||
}
|
||||
|
||||
// get the current eta
|
||||
double get_eta() {
|
||||
return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
|
||||
}
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,46 @@
|
||||
// Interface used for learning NNUE evaluation function
|
||||
|
||||
#ifndef _EVALUATE_NNUE_LEARNER_H_
|
||||
#define _EVALUATE_NNUE_LEARNER_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../learn/learn.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Initialize learning
|
||||
void InitializeTraining(double eta1, uint64_t eta1_epoch,
|
||||
double eta2, uint64_t eta2_epoch, double eta3);
|
||||
|
||||
// set the number of samples in the mini-batch
|
||||
void SetBatchSize(uint64_t size);
|
||||
|
||||
// set the learning rate scale
|
||||
void SetGlobalLearningRateScale(double scale);
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SetOptions(const std::string& options);
|
||||
|
||||
// Reread the evaluation function parameters for learning from the file
|
||||
void RestoreParameters(const std::string& dir_name);
|
||||
|
||||
// Add 1 sample of learning data
|
||||
void AddExample(Position& pos, Color rootColor,
|
||||
const Learner::PackedSfenValue& psv, double weight);
|
||||
|
||||
// update the evaluation function parameters
|
||||
void UpdateParameters(uint64_t epoch);
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth();
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,73 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "castling_right.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void CastlingRight::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
int castling_rights = pos.state()->castlingRights;
|
||||
int relative_castling_rights;
|
||||
if (perspective == WHITE) {
|
||||
relative_castling_rights = castling_rights;
|
||||
}
|
||||
else {
|
||||
// Invert the perspective.
|
||||
relative_castling_rights = ((castling_rights & 3) << 2)
|
||||
& ((castling_rights >> 2) & 3);
|
||||
}
|
||||
|
||||
for (int i = 0; i <kDimensions; ++i) {
|
||||
if (relative_castling_rights & (i << 1)) {
|
||||
active->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void CastlingRight::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
|
||||
int previous_castling_rights = pos.state()->previous->castlingRights;
|
||||
int current_castling_rights = pos.state()->castlingRights;
|
||||
int relative_previous_castling_rights;
|
||||
int relative_current_castling_rights;
|
||||
if (perspective == WHITE) {
|
||||
relative_previous_castling_rights = previous_castling_rights;
|
||||
relative_current_castling_rights = current_castling_rights;
|
||||
}
|
||||
else {
|
||||
// Invert the perspective.
|
||||
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
|
||||
& ((previous_castling_rights >> 2) & 3);
|
||||
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
|
||||
& ((current_castling_rights >> 2) & 3);
|
||||
}
|
||||
|
||||
for (int i = 0; i < kDimensions; ++i) {
|
||||
if ((relative_previous_castling_rights & (i << 1)) &&
|
||||
(relative_current_castling_rights & (i << 1)) == 0) {
|
||||
removed->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,48 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
|
||||
#define _NNUE_FEATURES_CASTLING_RIGHT_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature K: Ball position
|
||||
class CastlingRight {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "CastlingRight";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x913968AAu;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = 4;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 4;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,47 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "enpassant.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void EnPassant::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
auto epSquare = pos.state()->epSquare;
|
||||
if (epSquare == SQ_NONE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (perspective == BLACK) {
|
||||
epSquare = Inv(epSquare);
|
||||
}
|
||||
|
||||
auto file = file_of(epSquare);
|
||||
active->push_back(file);
|
||||
}
|
||||
|
||||
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
|
||||
void EnPassant::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
// Not implemented.
|
||||
assert(false);
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,48 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_ENPASSANT_H_
|
||||
#define _NNUE_FEATURES_ENPASSANT_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature K: Ball position
|
||||
class EnPassant {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "EnPassant";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x02924F91u;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = 8;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 1;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,249 @@
|
||||
// A class template that represents the input feature set of the NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURE_SET_H_
|
||||
#define _NNUE_FEATURE_SET_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "features_common.h"
|
||||
#include <array>
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// A class template that represents a list of values
|
||||
template <typename T, T... Values>
|
||||
struct CompileTimeList;
|
||||
template <typename T, T First, T... Remaining>
|
||||
struct CompileTimeList<T, First, Remaining...> {
|
||||
static constexpr bool Contains(T value) {
|
||||
return value == First || CompileTimeList<T, Remaining...>::Contains(value);
|
||||
}
|
||||
static constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
kValues = {{First, Remaining...}};
|
||||
};
|
||||
template <typename T, T First, T... Remaining>
|
||||
constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
CompileTimeList<T, First, Remaining...>::kValues;
|
||||
template <typename T>
|
||||
struct CompileTimeList<T> {
|
||||
static constexpr bool Contains(T /*value*/) {
|
||||
return false;
|
||||
}
|
||||
static constexpr std::array<T, 0> kValues = {{}};
|
||||
};
|
||||
|
||||
// Class template that adds to the beginning of the list
|
||||
template <typename T, typename ListType, T Value>
|
||||
struct AppendToList;
|
||||
template <typename T, T... Values, T AnotherValue>
|
||||
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
|
||||
using Result = CompileTimeList<T, AnotherValue, Values...>;
|
||||
};
|
||||
|
||||
// Class template for adding to a sorted, unique list
|
||||
template <typename T, typename ListType, T Value>
|
||||
struct InsertToSet;
|
||||
template <typename T, T First, T... Remaining, T AnotherValue>
|
||||
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
|
||||
using Result = std::conditional_t<
|
||||
CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
|
||||
CompileTimeList<T, First, Remaining...>,
|
||||
std::conditional_t<(AnotherValue <First),
|
||||
CompileTimeList<T, AnotherValue, First, Remaining...>,
|
||||
typename AppendToList<T, typename InsertToSet<
|
||||
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
|
||||
First>::Result>>;
|
||||
};
|
||||
template <typename T, T Value>
|
||||
struct InsertToSet<T, CompileTimeList<T>, Value> {
|
||||
using Result = CompileTimeList<T, Value>;
|
||||
};
|
||||
|
||||
// Base class of feature set
|
||||
template <typename Derived>
|
||||
class FeatureSetBase {
|
||||
public:
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <typename IndexListType>
|
||||
static void AppendActiveIndices(
|
||||
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
|
||||
for (const auto perspective :Colors) {
|
||||
Derived::CollectActiveIndices(
|
||||
pos, trigger, perspective, &active[perspective]);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <typename PositionType, typename IndexListType>
|
||||
static void AppendChangedIndices(
|
||||
const PositionType& pos, TriggerEvent trigger,
|
||||
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
if (dp.dirty_num == 0) return;
|
||||
|
||||
for (const auto perspective :Colors) {
|
||||
reset[perspective] = false;
|
||||
switch (trigger) {
|
||||
case TriggerEvent::kNone:
|
||||
break;
|
||||
case TriggerEvent::kFriendKingMoved:
|
||||
reset[perspective] =
|
||||
dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
|
||||
break;
|
||||
case TriggerEvent::kEnemyKingMoved:
|
||||
reset[perspective] =
|
||||
dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
|
||||
break;
|
||||
case TriggerEvent::kAnyKingMoved:
|
||||
reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
|
||||
break;
|
||||
case TriggerEvent::kAnyPieceMoved:
|
||||
reset[perspective] = true;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
if (reset[perspective]) {
|
||||
Derived::CollectActiveIndices(
|
||||
pos, trigger, perspective, &added[perspective]);
|
||||
} else {
|
||||
Derived::CollectChangedIndices(
|
||||
pos, trigger, perspective,
|
||||
&removed[perspective], &added[perspective]);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Class template that represents the feature set
|
||||
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
|
||||
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
|
||||
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
|
||||
public FeatureSetBase<
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
|
||||
private:
|
||||
using Head = FirstFeatureType;
|
||||
using Tail = FeatureSet<RemainingFeatureTypes...>;
|
||||
|
||||
public:
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
Head::kDimensions + Tail::kDimensions;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
|
||||
// List of timings to perform all calculations instead of difference calculation
|
||||
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
|
||||
typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
|
||||
// Get the feature quantity name
|
||||
static std::string GetName() {
|
||||
return std::string(Head::kName) + "+" + Tail::GetName();
|
||||
}
|
||||
|
||||
private:
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <typename IndexListType>
|
||||
static void CollectActiveIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexListType* const active) {
|
||||
Tail::CollectActiveIndices(pos, trigger, perspective, active);
|
||||
if (Head::kRefreshTrigger == trigger) {
|
||||
const auto start = active->size();
|
||||
Head::AppendActiveIndices(pos, perspective, active);
|
||||
for (auto i = start; i < active->size(); ++i) {
|
||||
(*active)[i] += Tail::kDimensions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <typename IndexListType>
|
||||
static void CollectChangedIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexListType* const removed, IndexListType* const added) {
|
||||
Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
|
||||
if (Head::kRefreshTrigger == trigger) {
|
||||
const auto start_removed = removed->size();
|
||||
const auto start_added = added->size();
|
||||
Head::AppendChangedIndices(pos, perspective, removed, added);
|
||||
for (auto i = start_removed; i < removed->size(); ++i) {
|
||||
(*removed)[i] += Tail::kDimensions;
|
||||
}
|
||||
for (auto i = start_added; i < added->size(); ++i) {
|
||||
(*added)[i] += Tail::kDimensions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make the base class and the class template that recursively uses itself a friend
|
||||
friend class FeatureSetBase<FeatureSet>;
|
||||
template <typename... FeatureTypes>
|
||||
friend class FeatureSet;
|
||||
};
|
||||
|
||||
// Class template that represents the feature set
|
||||
// Specialization with one template argument
|
||||
template <typename FeatureType>
|
||||
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
|
||||
public:
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = FeatureType::kDimensions;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
// List of timings to perform all calculations instead of difference calculation
|
||||
using SortedTriggerSet =
|
||||
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
|
||||
// Get the feature quantity name
|
||||
static std::string GetName() {
|
||||
return FeatureType::kName;
|
||||
}
|
||||
|
||||
private:
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void CollectActiveIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexList* const active) {
|
||||
if (FeatureType::kRefreshTrigger == trigger) {
|
||||
FeatureType::AppendActiveIndices(pos, perspective, active);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void CollectChangedIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexList* const removed, IndexList* const added) {
|
||||
if (FeatureType::kRefreshTrigger == trigger) {
|
||||
FeatureType::AppendChangedIndices(pos, perspective, removed, added);
|
||||
}
|
||||
}
|
||||
|
||||
// Make the base class and the class template that recursively uses itself a friend
|
||||
friend class FeatureSetBase<FeatureSet>;
|
||||
template <typename... FeatureTypes>
|
||||
friend class FeatureSet;
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,47 @@
|
||||
//Common header of input features of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_COMMON_H_
|
||||
#define _NNUE_FEATURES_COMMON_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Index list type
|
||||
class IndexList;
|
||||
|
||||
// Class template that represents the feature set
|
||||
template <typename... FeatureTypes>
|
||||
class FeatureSet;
|
||||
|
||||
// Type of timing to perform all calculations instead of difference calculation
|
||||
enum class TriggerEvent {
|
||||
kNone, // Calculate the difference whenever possible
|
||||
kFriendKingMoved, // calculate all when own ball moves
|
||||
kEnemyKingMoved, // do all calculations when enemy balls move
|
||||
kAnyKingMoved, // do all calculations if either ball moves
|
||||
kAnyPieceMoved, // always do all calculations
|
||||
};
|
||||
|
||||
// turn side or other side
|
||||
enum class Side {
|
||||
kFriend, // turn side
|
||||
kEnemy, // opponent
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,84 @@
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "half_kp.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Find the index of the feature quantity from the ball position and BonaPiece
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
|
||||
return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
|
||||
}
|
||||
|
||||
// Get the piece information
|
||||
template <Side AssociatedKing>
|
||||
inline void HalfKP<AssociatedKing>::GetPieces(
|
||||
const Position& pos, Color perspective,
|
||||
BonaPiece** pieces, Square* sq_target_k) {
|
||||
*pieces = (perspective == BLACK) ?
|
||||
pos.eval_list()->piece_list_fb() :
|
||||
pos.eval_list()->piece_list_fw();
|
||||
const PieceNumber target = (AssociatedKing == Side::kFriend) ?
|
||||
static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
|
||||
static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
|
||||
*sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
BonaPiece* pieces;
|
||||
Square sq_target_k;
|
||||
GetPieces(pos, perspective, &pieces, &sq_target_k);
|
||||
for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
|
||||
if (pieces[i] != Eval::BONA_PIECE_ZERO) {
|
||||
active->push_back(MakeIndex(sq_target_k, pieces[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
BonaPiece* pieces;
|
||||
Square sq_target_k;
|
||||
GetPieces(pos, perspective, &pieces, &sq_target_k);
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
|
||||
const auto old_p = static_cast<BonaPiece>(
|
||||
dp.changed_piece[i].old_piece.from[perspective]);
|
||||
if (old_p != Eval::BONA_PIECE_ZERO) {
|
||||
removed->push_back(MakeIndex(sq_target_k, old_p));
|
||||
}
|
||||
const auto new_p = static_cast<BonaPiece>(
|
||||
dp.changed_piece[i].new_piece.from[perspective]);
|
||||
if (new_p != Eval::BONA_PIECE_ZERO) {
|
||||
added->push_back(MakeIndex(sq_target_k, new_p));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfKP<Side::kFriend>;
|
||||
template class HalfKP<Side::kEnemy>;
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,62 @@
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_HALF_KP_H_
|
||||
#define _NNUE_FEATURES_HALF_KP_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
|
||||
template <Side AssociatedKing>
|
||||
class HalfKP {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName =
|
||||
(AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
|
||||
// Find the index of the feature quantity from the ball position and BonaPiece
|
||||
static IndexType MakeIndex(Square sq_k, BonaPiece p);
|
||||
|
||||
private:
|
||||
// Get the piece information
|
||||
static void GetPieces(const Position& pos, Color perspective,
|
||||
BonaPiece** pieces, Square* sq_target_k);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,97 @@
|
||||
//Definition of input features HalfRelativeKP of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "half_relative_kp.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Find the index of the feature quantity from the ball position and BonaPiece
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
|
||||
Square sq_k, BonaPiece p) {
|
||||
constexpr IndexType W = kBoardWidth;
|
||||
constexpr IndexType H = kBoardHeight;
|
||||
const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
|
||||
const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
|
||||
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
|
||||
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
|
||||
return H * W * piece_index + H * relative_file + relative_rank;
|
||||
}
|
||||
|
||||
// Get the piece information
|
||||
template <Side AssociatedKing>
|
||||
inline void HalfRelativeKP<AssociatedKing>::GetPieces(
|
||||
const Position& pos, Color perspective,
|
||||
BonaPiece** pieces, Square* sq_target_k) {
|
||||
*pieces = (perspective == BLACK) ?
|
||||
pos.eval_list()->piece_list_fb() :
|
||||
pos.eval_list()->piece_list_fw();
|
||||
const PieceNumber target = (AssociatedKing == Side::kFriend) ?
|
||||
static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
|
||||
static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
|
||||
*sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
BonaPiece* pieces;
|
||||
Square sq_target_k;
|
||||
GetPieces(pos, perspective, &pieces, &sq_target_k);
|
||||
for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
|
||||
if (pieces[i] >= fe_hand_end) {
|
||||
if (pieces[i] != Eval::BONA_PIECE_ZERO) {
|
||||
active->push_back(MakeIndex(sq_target_k, pieces[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
BonaPiece* pieces;
|
||||
Square sq_target_k;
|
||||
GetPieces(pos, perspective, &pieces, &sq_target_k);
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
|
||||
const auto old_p = static_cast<BonaPiece>(
|
||||
dp.changed_piece[i].old_piece.from[perspective]);
|
||||
if (old_p >= fe_hand_end) {
|
||||
if (old_p != Eval::BONA_PIECE_ZERO) {
|
||||
removed->push_back(MakeIndex(sq_target_k, old_p));
|
||||
}
|
||||
}
|
||||
const auto new_p = static_cast<BonaPiece>(
|
||||
dp.changed_piece[i].new_piece.from[perspective]);
|
||||
if (new_p >= fe_hand_end) {
|
||||
if (new_p != Eval::BONA_PIECE_ZERO) {
|
||||
added->push_back(MakeIndex(sq_target_k, new_p));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfRelativeKP<Side::kFriend>;
|
||||
template class HalfRelativeKP<Side::kEnemy>;
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,68 @@
|
||||
//Definition of input features HalfRelativeKP of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
|
||||
#define _NNUE_FEATURES_HALF_RELATIVE_KP_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
|
||||
template <Side AssociatedKing>
|
||||
class HalfRelativeKP {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
|
||||
"HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0xF9180919u ^ (AssociatedKing == Side::kFriend);
|
||||
// Piece type excluding balls
|
||||
static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
|
||||
// width of the virtual board with the ball in the center
|
||||
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
|
||||
// height of a virtual board with balls in the center
|
||||
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
kNumPieceKinds * kBoardHeight * kBoardWidth;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
|
||||
// Find the index of the feature quantity from the ball position and BonaPiece
|
||||
static IndexType MakeIndex(Square sq_k, BonaPiece p);
|
||||
|
||||
private:
|
||||
// Get the piece information
|
||||
static void GetPieces(const Position& pos, Color perspective,
|
||||
BonaPiece** pieces, Square* sq_target_k);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,55 @@
|
||||
// Definition of index list of input features
|
||||
|
||||
#ifndef _NNUE_FEATURES_INDEX_LIST_H_
|
||||
#define _NNUE_FEATURES_INDEX_LIST_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../position.h"
|
||||
#include "../nnue_architecture.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Class template used for feature index list
|
||||
template <typename T, std::size_t MaxSize>
|
||||
class ValueList {
|
||||
public:
|
||||
std::size_t size() const { return size_; }
|
||||
void resize(std::size_t size) { size_ = size; }
|
||||
void push_back(const T& value) { values_[size_++] = value; }
|
||||
T& operator[](std::size_t index) { return values_[index]; }
|
||||
T* begin() { return values_; }
|
||||
T* end() { return values_ + size_; }
|
||||
const T& operator[](std::size_t index) const { return values_[index]; }
|
||||
const T* begin() const { return values_; }
|
||||
const T* end() const { return values_ + size_; }
|
||||
void swap(ValueList& other) {
|
||||
const std::size_t max_size = std::max(size_, other.size_);
|
||||
for (std::size_t i = 0; i < max_size; ++i) {
|
||||
std::swap(values_[i], other.values_[i]);
|
||||
}
|
||||
std::swap(size_, other.size_);
|
||||
}
|
||||
private:
|
||||
T values_[MaxSize];
|
||||
std::size_t size_ = 0;
|
||||
};
|
||||
|
||||
//Type of feature index list
|
||||
class IndexList
|
||||
: public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,49 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "k.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void K::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
const BonaPiece* pieces = (perspective == BLACK) ?
|
||||
pos.eval_list()->piece_list_fb() :
|
||||
pos.eval_list()->piece_list_fw();
|
||||
assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
|
||||
assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
|
||||
for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
|
||||
active->push_back(pieces[i] - fe_end);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void K::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
|
||||
removed->push_back(
|
||||
dp.changed_piece[0].old_piece.from[perspective] - fe_end);
|
||||
added->push_back(
|
||||
dp.changed_piece[0].new_piece.from[perspective] - fe_end);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,48 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_K_H_
|
||||
#define _NNUE_FEATURES_K_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature K: Ball position
|
||||
class K {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "K";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = SQUARE_NB * 2;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 2;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,52 @@
|
||||
//Definition of input feature P of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "p.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void P::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
const BonaPiece* pieces = (perspective == BLACK) ?
|
||||
pos.eval_list()->piece_list_fb() :
|
||||
pos.eval_list()->piece_list_fw();
|
||||
for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
|
||||
if (pieces[i] != Eval::BONA_PIECE_ZERO) {
|
||||
active->push_back(pieces[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void P::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
|
||||
if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
|
||||
removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
|
||||
}
|
||||
if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
|
||||
added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,48 @@
|
||||
//Definition of input feature P of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_P_H_
|
||||
#define _NNUE_FEATURES_P_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature P: BonaPiece of pieces other than balls
|
||||
class P {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "P";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = fe_end;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,217 @@
|
||||
// Definition of layer AffineTransform of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
|
||||
#define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// affine transformation layer
|
||||
template <typename PreviousLayer, IndexType OutputDimensions>
|
||||
class AffineTransform {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
using OutputType = std::int32_t;
|
||||
static_assert(std::is_same<InputType, std::uint8_t>::value, "");
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
static constexpr IndexType kPaddedInputDimensions =
|
||||
CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
PreviousLayer::kBufferSize + kSelfBufferSize;
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xCC03DAE4u;
|
||||
hash_value += kOutputDimensions;
|
||||
hash_value ^= PreviousLayer::GetHashValue() >> 1;
|
||||
hash_value ^= PreviousLayer::GetHashValue() << 31;
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "AffineTransform[" +
|
||||
std::to_string(kOutputDimensions) + "<-" +
|
||||
std::to_string(kInputDimensions) + "](" +
|
||||
PreviousLayer::GetStructureString() + ")";
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
if (!previous_layer_.ReadParameters(stream)) return false;
|
||||
stream.read(reinterpret_cast<char*>(biases_),
|
||||
kOutputDimensions * sizeof(BiasType));
|
||||
stream.read(reinterpret_cast<char*>(weights_),
|
||||
kOutputDimensions * kPaddedInputDimensions *
|
||||
sizeof(WeightType));
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
if (!previous_layer_.WriteParameters(stream)) return false;
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kOutputDimensions * sizeof(BiasType));
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kOutputDimensions * kPaddedInputDimensions *
|
||||
sizeof(WeightType));
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
const auto input = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
|
||||
const __m512i kOnes = _mm512_set1_epi16(1);
|
||||
const auto input_vector = reinterpret_cast<const __m512i*>(input);
|
||||
#elif defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const __m256i kOnes = _mm256_set1_epi16(1);
|
||||
const auto input_vector = reinterpret_cast<const __m256i*>(input);
|
||||
#elif defined(USE_SSSE3)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const __m128i kOnes = _mm_set1_epi16(1);
|
||||
const auto input_vector = reinterpret_cast<const __m128i*>(input);
|
||||
#elif defined(IS_ARM)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
|
||||
#endif
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType offset = i * kPaddedInputDimensions;
|
||||
#if defined(USE_AVX512)
|
||||
__m512i sum = _mm512_setzero_si512();
|
||||
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
__m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
|
||||
#else
|
||||
__m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
|
||||
#endif
|
||||
product = _mm512_madd_epi16(product, kOnes);
|
||||
sum = _mm512_add_epi32(sum, product);
|
||||
}
|
||||
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
|
||||
|
||||
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
|
||||
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
|
||||
// and we have to do one more 256bit chunk.
|
||||
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
|
||||
{
|
||||
const auto iv_256 = reinterpret_cast<const __m256i*>(input);
|
||||
const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
int j = kNumChunks * 2;
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2.
|
||||
__m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
|
||||
#else
|
||||
__m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
|
||||
#endif
|
||||
sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
|
||||
|
||||
sum256 = _mm256_hadd_epi32(sum256, sum256);
|
||||
sum256 = _mm256_hadd_epi32(sum256, sum256);
|
||||
const __m128i lo = _mm256_extracti128_si256(sum256, 0);
|
||||
const __m128i hi = _mm256_extracti128_si256(sum256, 1);
|
||||
output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
|
||||
}
|
||||
#elif defined(USE_AVX2)
|
||||
__m256i sum = _mm256_setzero_si256();
|
||||
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i product = _mm256_maddubs_epi16(
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
|
||||
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
|
||||
// even though alignas is specified.
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&input_vector[j]), _mm256_load_si256(&row[j]));
|
||||
product = _mm256_madd_epi16(product, kOnes);
|
||||
sum = _mm256_add_epi32(sum, product);
|
||||
}
|
||||
sum = _mm256_hadd_epi32(sum, sum);
|
||||
sum = _mm256_hadd_epi32(sum, sum);
|
||||
const __m128i lo = _mm256_extracti128_si256(sum, 0);
|
||||
const __m128i hi = _mm256_extracti128_si256(sum, 1);
|
||||
output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
|
||||
#elif defined(USE_SSSE3)
|
||||
__m128i sum = _mm_cvtsi32_si128(biases_[i]);
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i product = _mm_maddubs_epi16(
|
||||
_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
|
||||
product = _mm_madd_epi16(product, kOnes);
|
||||
sum = _mm_add_epi32(sum, product);
|
||||
}
|
||||
sum = _mm_hadd_epi32(sum, sum);
|
||||
sum = _mm_hadd_epi32(sum, sum);
|
||||
output[i] = _mm_cvtsi128_si32(sum);
|
||||
#elif defined(IS_ARM)
|
||||
int32x4_t sum = {biases_[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
|
||||
product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
|
||||
sum = vpadalq_s16(sum, product);
|
||||
}
|
||||
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
#else
|
||||
OutputType sum = biases_[i];
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
sum += weights_[offset + j] * input[j];
|
||||
}
|
||||
output[i] = sum;
|
||||
#endif
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
// parameter type
|
||||
using BiasType = OutputType;
|
||||
using WeightType = std::int8_t;
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<AffineTransform>;
|
||||
|
||||
// the layer immediately before this layer
|
||||
PreviousLayer previous_layer_;
|
||||
|
||||
// parameter
|
||||
alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
|
||||
};
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,177 @@
|
||||
// Definition of layer ClippedReLU of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
|
||||
#define _NNUE_LAYERS_CLIPPED_RELU_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// Clipped ReLU
|
||||
template <typename PreviousLayer>
|
||||
class ClippedReLU {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
using OutputType = std::uint8_t;
|
||||
static_assert(std::is_same<InputType, std::int32_t>::value, "");
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
PreviousLayer::kBufferSize + kSelfBufferSize;
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0x538D24C7u;
|
||||
hash_value += PreviousLayer::GetHashValue();
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "ClippedReLU[" +
|
||||
std::to_string(kOutputDimensions) + "](" +
|
||||
PreviousLayer::GetStructureString() + ")";
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
const auto input = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
|
||||
const auto in = reinterpret_cast<const __m256i*>(input);
|
||||
const auto out = reinterpret_cast<__m256i*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
|
||||
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
|
||||
// even though alignas is specified.
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&in[i * 4 + 0]),
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&in[i * 4 + 1])), kWeightScaleBits);
|
||||
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&in[i * 4 + 2]),
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&in[i * 4 + 3])), kWeightScaleBits);
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_storeu_si256
|
||||
#else
|
||||
_mm256_store_si256
|
||||
#endif
|
||||
(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * kSimdWidth;
|
||||
#elif defined(USE_SSSE3)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
#ifndef USE_SSE41
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
const auto in = reinterpret_cast<const __m128i*>(input);
|
||||
const auto out = reinterpret_cast<__m128i*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
|
||||
_mm_load_si128(&in[i * 4 + 0]),
|
||||
_mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
|
||||
const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
|
||||
_mm_load_si128(&in[i * 4 + 2]),
|
||||
_mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
|
||||
const __m128i packedbytes = _mm_packs_epi16(words0, words1);
|
||||
_mm_store_si128(&out[i],
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
);
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * kSimdWidth;
|
||||
#elif defined(IS_ARM)
|
||||
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
const auto in = reinterpret_cast<const int32x4_t*>(input);
|
||||
const auto out = reinterpret_cast<int8x8_t*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
int16x8_t shifted;
|
||||
const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
|
||||
pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
|
||||
pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
|
||||
out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
|
||||
#else
|
||||
constexpr IndexType kStart = 0;
|
||||
#endif
|
||||
for (IndexType i = kStart; i < kInputDimensions; ++i) {
|
||||
output[i] = static_cast<OutputType>(
|
||||
std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
private:
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<ClippedReLU>;
|
||||
|
||||
// the layer immediately before this layer
|
||||
PreviousLayer previous_layer_;
|
||||
};
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,74 @@
|
||||
// NNUE evaluation function layer InputSlice definition
|
||||
|
||||
#ifndef _NNUE_LAYERS_INPUT_SLICE_H_
|
||||
#define _NNUE_LAYERS_INPUT_SLICE_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// input layer
|
||||
template <IndexType OutputDimensions, IndexType Offset = 0>
|
||||
class InputSlice {
|
||||
public:
|
||||
// need to maintain alignment
|
||||
static_assert(Offset % kMaxSimdWidth == 0, "");
|
||||
|
||||
// output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// output dimensionality
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize = 0;
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xEC42E90Du;
|
||||
hash_value ^= kOutputDimensions ^ (Offset << 10);
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
|
||||
std::to_string(Offset) + ":" +
|
||||
std::to_string(Offset + kOutputDimensions) + ")]";
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& /*stream*/) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& /*stream*/) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features,
|
||||
char* /*buffer*/) const {
|
||||
return transformed_features + Offset;
|
||||
}
|
||||
|
||||
private:
|
||||
};
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,163 @@
|
||||
// Definition of layer Sum of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_LAYERS_SUM_H_
|
||||
#define _NNUE_LAYERS_SUM_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// Layer that sums the output of multiple layers
|
||||
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
|
||||
class Sum : public Sum<RemainingPreviousLayers...> {
|
||||
private:
|
||||
using Head = FirstPreviousLayer;
|
||||
using Tail = Sum<RemainingPreviousLayers...>;
|
||||
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename Head::OutputType;
|
||||
using OutputType = InputType;
|
||||
static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
static_assert(kInputDimensions == Tail::kInputDimensions ,"");
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xBCE400B4u;
|
||||
hash_value ^= Head::GetHashValue() >> 1;
|
||||
hash_value ^= Head::GetHashValue() << 31;
|
||||
hash_value ^= Tail::GetHashValue() >> 2;
|
||||
hash_value ^= Tail::GetHashValue() << 30;
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "Sum[" +
|
||||
std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
if (!Tail::ReadParameters(stream)) return false;
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
if (!Tail::WriteParameters(stream)) return false;
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
Tail::Propagate(transformed_features, buffer);
|
||||
const auto head_output = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
for (IndexType i = 0; i <kOutputDimensions; ++i) {
|
||||
output[i] += head_output[i];
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
protected:
|
||||
// A string that represents the list of layers to be summed
|
||||
static std::string GetSummandsString() {
|
||||
return Head::GetStructureString() + "," + Tail::GetSummandsString();
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<Sum>;
|
||||
|
||||
// the layer immediately before this layer
|
||||
FirstPreviousLayer previous_layer_;
|
||||
};
|
||||
|
||||
// Layer that sums the output of multiple layers (when there is one template argument)
|
||||
template <typename PreviousLayer>
|
||||
class Sum<PreviousLayer> {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
using OutputType = InputType;
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xBCE400B4u;
|
||||
hash_value ^= PreviousLayer::GetHashValue() >> 1;
|
||||
hash_value ^= PreviousLayer::GetHashValue() << 31;
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "Sum[" +
|
||||
std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
return previous_layer_.Propagate(transformed_features, buffer);
|
||||
}
|
||||
|
||||
protected:
|
||||
// A string that represents the list of layers to be summed
|
||||
static std::string GetSummandsString() {
|
||||
return PreviousLayer::GetStructureString();
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<Sum>;
|
||||
|
||||
// the layer immediately before this layer
|
||||
PreviousLayer previous_layer_;
|
||||
};
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,30 @@
|
||||
// Class for difference calculation of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_ACCUMULATOR_H_
|
||||
#define _NNUE_ACCUMULATOR_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "nnue_architecture.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
// Keep the evaluation value that is the final output together
|
||||
struct alignas(32) Accumulator {
|
||||
std::int16_t
|
||||
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
Value score = VALUE_ZERO;
|
||||
bool computed_accumulation = false;
|
||||
bool computed_score = false;
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,33 @@
|
||||
// Input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_ARCHITECTURE_H_
|
||||
#define _NNUE_ARCHITECTURE_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
// include a header that defines the input features and network structure
|
||||
//#include "architectures/k-p_256x2-32-32.h"
|
||||
//#include "architectures/k-p-cr_256x2-32-32.h"
|
||||
//#include "architectures/k-p-cr-ep_256x2-32-32.h"
|
||||
#include "architectures/halfkp_256x2-32-32.h"
|
||||
//#include "architectures/halfkp-cr-ep_256x2-32-32.h"
|
||||
//#include "architectures/halfkp_384x2-32-32.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
|
||||
static_assert(Network::kOutputDimensions == 1, "");
|
||||
static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
|
||||
|
||||
// List of timings to perform all calculations instead of difference calculation
|
||||
constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,64 @@
|
||||
// Constants used in NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_COMMON_H_
|
||||
#define _NNUE_COMMON_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#include <immintrin.h>
|
||||
#elif defined(USE_SSE41)
|
||||
#include <smmintrin.h>
|
||||
#elif defined(USE_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
#elif defined(USE_SSE2)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// A constant that represents the version of the evaluation function file
|
||||
constexpr std::uint32_t kVersion = 0x7AF32F16u;
|
||||
|
||||
// Constant used in evaluation value calculation
|
||||
constexpr int FV_SCALE = 16;
|
||||
constexpr int kWeightScaleBits = 6;
|
||||
|
||||
// Size of cache line (in bytes)
|
||||
constexpr std::size_t kCacheLineSize = 64;
|
||||
|
||||
// SIMD width (in bytes)
|
||||
#if defined(USE_AVX2)
|
||||
constexpr std::size_t kSimdWidth = 32;
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr std::size_t kSimdWidth = 16;
|
||||
#elif defined(IS_ARM)
|
||||
constexpr std::size_t kSimdWidth = 16;
|
||||
#endif
|
||||
constexpr std::size_t kMaxSimdWidth = 32;
|
||||
|
||||
// Type of input feature after conversion
|
||||
using TransformedFeatureType = std::uint8_t;
|
||||
|
||||
// index type
|
||||
using IndexType = std::uint32_t;
|
||||
|
||||
// Forward declaration of learning class template
|
||||
template <typename Layer>
|
||||
class Trainer;
|
||||
|
||||
// find the smallest multiple of n and above
|
||||
template <typename IntType>
|
||||
constexpr IntType CeilToMultiple(IntType n, IntType base) {
|
||||
return (n + base - 1) / base * base;
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,357 @@
|
||||
// A class that converts the input features of the NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURE_TRANSFORMER_H_
|
||||
#define _NNUE_FEATURE_TRANSFORMER_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "nnue_common.h"
|
||||
#include "nnue_architecture.h"
|
||||
#include "features/index_list.h"
|
||||
|
||||
#include <cstring> // std::memset()
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input feature converter
|
||||
class FeatureTransformer {
|
||||
private:
|
||||
// number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
|
||||
public:
|
||||
// output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
|
||||
|
||||
// size of forward propagation buffer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
}
|
||||
|
||||
// a string representing the structure
|
||||
static std::string GetStructureString() {
|
||||
return RawFeatures::GetName() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
stream.read(reinterpret_cast<char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
stream.read(reinterpret_cast<char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// proceed with the difference calculation if possible
|
||||
bool UpdateAccumulatorIfPossible(const Position& pos) const {
|
||||
const auto now = pos.state();
|
||||
if (now->accumulator.computed_accumulation) {
|
||||
return true;
|
||||
}
|
||||
const auto prev = now->previous;
|
||||
if (prev && prev->accumulator.computed_accumulation) {
|
||||
UpdateAccumulator(pos);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// convert input features
|
||||
void Transform(const Position& pos, OutputType* output, bool refresh) const {
|
||||
if (refresh || !UpdateAccumulatorIfPossible(pos)) {
|
||||
RefreshAccumulator(pos);
|
||||
}
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
#elif defined(USE_SSSE3)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
const __m128i kZero = _mm_setzero_si128();
|
||||
#ifndef USE_SSE41
|
||||
const __m128i k0x80s = _mm_set1_epi8(-128);
|
||||
#endif
|
||||
#elif defined(IS_ARM)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
const int8x8_t kZero = {0};
|
||||
#endif
|
||||
const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
#if defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i sum0 =
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
|
||||
// compiled with g++ in MSYS2 crashes here because the output memory is not aligned
|
||||
// even though alignas is specified.
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m256i sum1 =
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_loadu_si256
|
||||
#else
|
||||
_mm256_load_si256
|
||||
#endif
|
||||
(&reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_storeu_si256
|
||||
#else
|
||||
_mm256_store_si256
|
||||
#endif
|
||||
(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
}
|
||||
#elif defined(USE_SSSE3)
|
||||
auto out = reinterpret_cast<__m128i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
);
|
||||
}
|
||||
#elif defined(IS_ARM)
|
||||
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][i])[j]);
|
||||
}
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
}
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
|
||||
}
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Calculate cumulative value without using difference calculation
|
||||
void RefreshAccumulator(const Position& pos) const {
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (const auto perspective : Colors) {
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
#if defined(USE_AVX2)
|
||||
auto accumulation = reinterpret_cast<__m256i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
_mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
|
||||
#else
|
||||
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
|
||||
#endif
|
||||
}
|
||||
#elif defined(USE_SSE2)
|
||||
auto accumulation = reinterpret_cast<__m128i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
#elif defined(IS_ARM)
|
||||
auto accumulation = reinterpret_cast<int16x8_t*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
|
||||
}
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
accumulator.computed_accumulation = true;
|
||||
accumulator.computed_score = false;
|
||||
}
|
||||
|
||||
// Calculate cumulative value using difference calculation
|
||||
void UpdateAccumulator(const Position& pos) const {
|
||||
const auto prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2];
|
||||
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
for (const auto perspective : Colors) {
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<__m256i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<__m128i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
#elif defined(IS_ARM)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<int16x8_t*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
#endif
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
} else {// Difference calculation for the feature amount changed from 1 to 0
|
||||
std::memcpy(accumulator.accumulation[perspective][i],
|
||||
prev_accumulator.accumulation[perspective][i],
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
#if defined(USE_AVX2)
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
#elif defined(USE_SSE2)
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
#elif defined(IS_ARM)
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = vsubq_s16(accumulation[j], column[j]);
|
||||
}
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
accumulator.accumulation[perspective][i][j] -=
|
||||
weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
{// Difference calculation for features that changed from 0 to 1
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
#if defined(USE_AVX2)
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
#elif defined(USE_SSE2)
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
#elif defined(IS_ARM)
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
|
||||
}
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
accumulator.accumulation[perspective][i][j] +=
|
||||
weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
accumulator.computed_accumulation = true;
|
||||
accumulator.computed_score = false;
|
||||
}
|
||||
|
||||
// parameter type
|
||||
using BiasType = std::int16_t;
|
||||
using WeightType = std::int16_t;
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<FeatureTransformer>;
|
||||
|
||||
// parameter
|
||||
alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
WeightType weights_[kHalfDimensions * kInputDimensions];
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,201 @@
|
||||
// USI extended command for NNUE evaluation function
|
||||
|
||||
#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../thread.h"
|
||||
#include "../../uci.h"
|
||||
#include "evaluate_nnue.h"
|
||||
#include "nnue_test_command.h"
|
||||
|
||||
#include <set>
|
||||
#include <fstream>
|
||||
|
||||
#define ASSERT(X) { if (!(X)) { std::cout << "\nError : ASSERT(" << #X << "), " << __FILE__ << "(" << __LINE__ << "): " << __func__ << std::endl; \
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(3000)); *(int*)1 =0;} }
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace {
|
||||
|
||||
// Testing RawFeatures mainly for difference calculation
|
||||
void TestFeatures(Position& pos) {
|
||||
const std::uint64_t num_games = 1000;
|
||||
StateInfo si;
|
||||
pos.set(StartFEN, false, &si, Threads.main());
|
||||
const int MAX_PLY = 256; // test up to 256 hands
|
||||
|
||||
StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
|
||||
int ply; // Trouble from the initial phase
|
||||
|
||||
PRNG prng(20171128);
|
||||
|
||||
std::uint64_t num_moves = 0;
|
||||
std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
|
||||
std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
|
||||
constexpr IndexType kUnknown = -1;
|
||||
std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
|
||||
auto make_index_sets = [&](const Position& pos) {
|
||||
std::vector<std::vector<std::set<IndexType>>> index_sets(
|
||||
kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (const auto perspective : Colors) {
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT(index_sets[i][perspective].count(index) == 0);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
index_sets[i][perspective].insert(index);
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return index_sets;
|
||||
};
|
||||
auto update_index_sets = [&](const Position& pos, auto* index_sets) {
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2];
|
||||
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
for (const auto perspective : Colors) {
|
||||
if (reset[perspective]) {
|
||||
(*index_sets)[i][perspective].clear();
|
||||
++num_resets[i];
|
||||
} else {
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT((*index_sets)[i][perspective].count(index) == 1);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
(*index_sets)[i][perspective].erase(index);
|
||||
++num_updates.back();
|
||||
++num_updates[i];
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT((*index_sets)[i][perspective].count(index) == 0);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
(*index_sets)[i][perspective].insert(index);
|
||||
++num_updates.back();
|
||||
++num_updates[i];
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::cout << "feature set: " << RawFeatures::GetName()
|
||||
<< "[" << RawFeatures::kDimensions << "]" << std::endl;
|
||||
std::cout << "start testing with random games";
|
||||
|
||||
for (std::uint64_t i = 0; i < num_games; ++i) {
|
||||
auto index_sets = make_index_sets(pos);
|
||||
for (ply = 0; ply < MAX_PLY; ++ply) {
|
||||
MoveList<LEGAL> mg(pos); // Generate all legal hands
|
||||
|
||||
// There was no legal move == Clog
|
||||
if (mg.size() == 0)
|
||||
break;
|
||||
|
||||
// Randomly choose from the generated moves and advance the phase with the moves.
|
||||
Move m = mg.begin()[prng.rand(mg.size())];
|
||||
pos.do_move(m, state[ply]);
|
||||
|
||||
++num_moves;
|
||||
update_index_sets(pos, &index_sets);
|
||||
ASSERT(index_sets == make_index_sets(pos));
|
||||
}
|
||||
|
||||
pos.set(StartFEN, false, &si, Threads.main());
|
||||
|
||||
// Output'.' every 100 times (so you can see that it's progressing)
|
||||
if ((i % 100) == 0)
|
||||
std::cout << "." << std::flush;
|
||||
}
|
||||
std::cout << "passed." << std::endl;
|
||||
std::cout << num_games << " games, " << num_moves << " moves, "
|
||||
<< num_updates.back() << " updates, "
|
||||
<< (1.0 * num_updates.back() / num_moves)
|
||||
<< " updates per move" << std::endl;
|
||||
std::size_t num_observed_indices = 0;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
|
||||
num_observed_indices += count;
|
||||
std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
|
||||
<< "): " << count << " features ("
|
||||
<< (100.0 * count / RawFeatures::kDimensions) << "%), "
|
||||
<< num_updates[i] << " updates ("
|
||||
<< (1.0 * num_updates[i] / num_moves) << " per move), "
|
||||
<< num_resets[i] << " resets ("
|
||||
<< (100.0 * num_resets[i] / num_moves) << "%)"
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << "observed " << num_observed_indices << " ("
|
||||
<< (100.0 * num_observed_indices / RawFeatures::kDimensions)
|
||||
<< "% of " << RawFeatures::kDimensions
|
||||
<< ") features" << std::endl;
|
||||
}
|
||||
|
||||
// Output a string that represents the structure of the evaluation function
|
||||
void PrintInfo(std::istream& stream) {
|
||||
std::cout << "network architecture: " << GetArchitectureString() << std::endl;
|
||||
|
||||
while (true) {
|
||||
std::string file_name;
|
||||
stream >> file_name;
|
||||
if (file_name.empty()) break;
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
const bool success = [&]() {
|
||||
std::ifstream file_stream(file_name, std::ios::binary);
|
||||
if (!file_stream) return false;
|
||||
if (!ReadHeader(file_stream, &hash_value, &architecture)) return false;
|
||||
return true;
|
||||
}();
|
||||
|
||||
std::cout << file_name << ": ";
|
||||
if (success) {
|
||||
if (hash_value == kHashValue) {
|
||||
std::cout << "matches with this binary";
|
||||
if (architecture != GetArchitectureString()) {
|
||||
std::cout << ", but architecture string differs: " << architecture;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
} else {
|
||||
std::cout << architecture << std::endl;
|
||||
}
|
||||
} else {
|
||||
std::cout << "failed to read header" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// USI extended command for NNUE evaluation function
|
||||
void TestCommand(Position& pos, std::istream& stream) {
|
||||
std::string sub_command;
|
||||
stream >> sub_command;
|
||||
|
||||
if (sub_command == "test_features") {
|
||||
TestFeatures(pos);
|
||||
} else if (sub_command == "info") {
|
||||
PrintInfo(stream);
|
||||
} else {
|
||||
std::cout << "usage:" << std::endl;
|
||||
std::cout << " test nnue test_features" << std::endl;
|
||||
std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
@@ -0,0 +1,21 @@
|
||||
// USI extended command interface for NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TEST_COMMAND_H_
|
||||
#define _NNUE_TEST_COMMAND_H_
|
||||
|
||||
#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// USI extended command for NNUE evaluation function
|
||||
void TestCommand(Position& pos, std::istream& stream);
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,110 @@
|
||||
// NNUE evaluation function feature conversion class template
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../nnue_common.h"
|
||||
#include "../trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// By default, the learning feature is the same as the original input feature, and specialized as necessary
|
||||
template <typename FeatureType>
|
||||
class Factorizer {
|
||||
public:
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return FeatureType::kDimensions;
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
assert(base_index <FeatureType::kDimensions);
|
||||
training_features->emplace_back(base_index);
|
||||
}
|
||||
};
|
||||
|
||||
// Learning feature information
|
||||
struct FeatureProperties {
|
||||
bool active;
|
||||
IndexType dimensions;
|
||||
};
|
||||
|
||||
// Add the original input features to the learning features
|
||||
template <typename FeatureType>
|
||||
IndexType AppendBaseFeature(
|
||||
FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
assert(properties.dimensions == FeatureType::kDimensions);
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
training_features->emplace_back(base_index);
|
||||
return properties.dimensions;
|
||||
}
|
||||
|
||||
// If the learning rate scale is not 0, inherit other types of learning features
|
||||
template <typename FeatureType>
|
||||
IndexType InheritFeaturesIfRequired(
|
||||
IndexType index_offset, FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
if (!properties.active) {
|
||||
return 0;
|
||||
}
|
||||
assert(properties.dimensions == Factorizer<FeatureType>::GetDimensions());
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
const auto start = training_features->size();
|
||||
Factorizer<FeatureType>::AppendTrainingFeatures(
|
||||
base_index, training_features);
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
|
||||
feature.ShiftIndex(index_offset);
|
||||
}
|
||||
return properties.dimensions;
|
||||
}
|
||||
|
||||
// Return the index difference as needed, without adding learning features
|
||||
// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
|
||||
IndexType SkipFeatures(FeatureProperties properties) {
|
||||
if (!properties.active) {
|
||||
return 0;
|
||||
}
|
||||
return properties.dimensions;
|
||||
}
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
template <std::size_t N>
|
||||
constexpr IndexType GetActiveDimensions(
|
||||
const FeatureProperties (&properties)[N]) {
|
||||
static_assert(N > 0, "");
|
||||
IndexType dimensions = properties[0].dimensions;
|
||||
for (std::size_t i = 1; i < N; ++i) {
|
||||
if (properties[i].active) {
|
||||
dimensions += properties[i].dimensions;
|
||||
}
|
||||
}
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
// get the number of elements in the array
|
||||
template <typename T, std::size_t N>
|
||||
constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
|
||||
return N;
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,104 @@
|
||||
// Specialization for feature set of feature conversion class template of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../features/feature_set.h"
|
||||
#include "factorizer.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for FeatureSet
|
||||
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
|
||||
class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
|
||||
private:
|
||||
using Head = Factorizer<FeatureSet<FirstFeatureType>>;
|
||||
using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
|
||||
|
||||
public:
|
||||
// number of dimensions of original input features
|
||||
static constexpr IndexType kBaseDimensions =
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return Head::GetDimensions() + Tail::GetDimensions();
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
assert(base_index < kBaseDimensions);
|
||||
constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
|
||||
if (base_index < boundary) {
|
||||
Tail::AppendTrainingFeatures(
|
||||
base_index, training_features, base_dimensions);
|
||||
} else {
|
||||
const auto start = training_features->size();
|
||||
Head::AppendTrainingFeatures(
|
||||
base_index - boundary, training_features, base_dimensions);
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
const auto index = feature.GetIndex();
|
||||
assert(index < Head::GetDimensions() ||
|
||||
(index >= base_dimensions &&
|
||||
index < base_dimensions +
|
||||
Head::GetDimensions() - Head::kBaseDimensions));
|
||||
if (index < Head::kBaseDimensions) {
|
||||
feature.ShiftIndex(Tail::kBaseDimensions);
|
||||
} else {
|
||||
feature.ShiftIndex(Tail::GetDimensions() - Tail::kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization when FeatureSet has one template argument
|
||||
template <typename FeatureType>
|
||||
class Factorizer<FeatureSet<FeatureType>> {
|
||||
public:
|
||||
// number of dimensions of original input features
|
||||
static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return Factorizer<FeatureType>::GetDimensions();
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
assert(base_index < kBaseDimensions);
|
||||
const auto start = training_features->size();
|
||||
Factorizer<FeatureType>::AppendTrainingFeatures(
|
||||
base_index, training_features);
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
|
||||
if (feature.GetIndex() >= kBaseDimensions) {
|
||||
feature.ShiftIndex(base_dimensions - kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,103 @@
|
||||
// Specialization of NNUE evaluation function feature conversion class template for HalfKP
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../features/half_kp.h"
|
||||
#include "../../features/p.h"
|
||||
#include "../../features/half_relative_kp.h"
|
||||
#include "factorizer.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for HalfKP
|
||||
template <Side AssociatedKing>
|
||||
class Factorizer<HalfKP<AssociatedKing>> {
|
||||
private:
|
||||
using FeatureType = HalfKP<AssociatedKing>;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
|
||||
// Type of learning feature
|
||||
enum TrainingFeatureType {
|
||||
kFeaturesHalfKP,
|
||||
kFeaturesHalfK,
|
||||
kFeaturesP,
|
||||
kFeaturesHalfRelativeKP,
|
||||
kNumTrainingFeatureTypes,
|
||||
};
|
||||
|
||||
// Learning feature information
|
||||
static constexpr FeatureProperties kProperties[] = {
|
||||
// kFeaturesHalfKP
|
||||
{true, FeatureType::kDimensions},
|
||||
// kFeaturesHalfK
|
||||
{true, SQUARE_NB},
|
||||
// kFeaturesP
|
||||
{true, Factorizer<P>::GetDimensions()},
|
||||
// kFeaturesHalfRelativeKP
|
||||
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::GetDimensions()},
|
||||
};
|
||||
static_assert(GetArrayLength(kProperties) == kNumTrainingFeatureTypes, "");
|
||||
|
||||
public:
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return GetActiveDimensions(kProperties);
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
// kFeaturesHalfKP
|
||||
IndexType index_offset = AppendBaseFeature<FeatureType>(
|
||||
kProperties[kFeaturesHalfKP], base_index, training_features);
|
||||
|
||||
const auto sq_k = static_cast<Square>(base_index / fe_end);
|
||||
const auto p = static_cast<BonaPiece>(base_index % fe_end);
|
||||
// kFeaturesHalfK
|
||||
{
|
||||
const auto& properties = kProperties[kFeaturesHalfK];
|
||||
if (properties.active) {
|
||||
training_features->emplace_back(index_offset + sq_k);
|
||||
index_offset += properties.dimensions;
|
||||
}
|
||||
}
|
||||
// kFeaturesP
|
||||
index_offset += InheritFeaturesIfRequired<P>(
|
||||
index_offset, kProperties[kFeaturesP], p, training_features);
|
||||
// kFeaturesHalfRelativeKP
|
||||
if (p >= fe_hand_end) {
|
||||
index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
|
||||
index_offset, kProperties[kFeaturesHalfRelativeKP],
|
||||
HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
|
||||
training_features);
|
||||
} else {
|
||||
index_offset += SkipFeatures(kProperties[kFeaturesHalfRelativeKP]);
|
||||
}
|
||||
|
||||
assert(index_offset == GetDimensions());
|
||||
}
|
||||
};
|
||||
|
||||
template <Side AssociatedKing>
|
||||
constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,125 @@
|
||||
// Common header of class template for learning NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TRAINER_H_
|
||||
#define _NNUE_TRAINER_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../nnue_common.h"
|
||||
#include "../features/index_list.h"
|
||||
|
||||
#include <sstream>
|
||||
#if defined(USE_BLAS)
|
||||
static_assert(std::is_same<LearnFloatType, float>::value, "");
|
||||
#include <cblas.h>
|
||||
#endif
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Ponanza constant used in the relation between evaluation value and winning percentage
|
||||
constexpr double kPonanzaConstant = 600.0;
|
||||
|
||||
// Class that represents one index of learning feature
|
||||
class TrainingFeature {
|
||||
using StorageType = std::uint32_t;
|
||||
static_assert(std::is_unsigned<StorageType>::value, "");
|
||||
|
||||
public:
|
||||
static constexpr std::uint32_t kIndexBits = 24;
|
||||
static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
|
||||
static constexpr std::uint32_t kCountBits =
|
||||
std::numeric_limits<StorageType>::digits - kIndexBits;
|
||||
|
||||
explicit TrainingFeature(IndexType index) :
|
||||
index_and_count_((index << kCountBits) | 1) {
|
||||
assert(index < (1 << kIndexBits));
|
||||
}
|
||||
TrainingFeature& operator+=(const TrainingFeature& other) {
|
||||
assert(other.GetIndex() == GetIndex());
|
||||
assert(other.GetCount() + GetCount() < (1 << kCountBits));
|
||||
index_and_count_ += other.GetCount();
|
||||
return *this;
|
||||
}
|
||||
IndexType GetIndex() const {
|
||||
return static_cast<IndexType>(index_and_count_ >> kCountBits);
|
||||
}
|
||||
void ShiftIndex(IndexType offset) {
|
||||
assert(GetIndex() + offset < (1 << kIndexBits));
|
||||
index_and_count_ += offset << kCountBits;
|
||||
}
|
||||
IndexType GetCount() const {
|
||||
return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
|
||||
}
|
||||
bool operator<(const TrainingFeature& other) const {
|
||||
return index_and_count_ < other.index_and_count_;
|
||||
}
|
||||
|
||||
private:
|
||||
StorageType index_and_count_;
|
||||
};
|
||||
|
||||
// Structure that represents one sample of training data
|
||||
struct Example {
|
||||
std::vector<TrainingFeature> training_features[2];
|
||||
Learner::PackedSfenValue psv;
|
||||
int sign;
|
||||
double weight;
|
||||
};
|
||||
|
||||
// Message used for setting hyperparameters
|
||||
struct Message {
|
||||
Message(const std::string& name, const std::string& value = ""):
|
||||
name(name), value(value), num_peekers(0), num_receivers(0) {}
|
||||
const std::string name;
|
||||
const std::string value;
|
||||
std::uint32_t num_peekers;
|
||||
std::uint32_t num_receivers;
|
||||
};
|
||||
|
||||
// determine whether to accept the message
|
||||
bool ReceiveMessage(const std::string& name, Message* message) {
|
||||
const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
|
||||
if (message->name.substr(0, name.size() + 1) == name + "[") {
|
||||
++message->num_peekers;
|
||||
}
|
||||
if (message->name == name || message->name == name + subscript) {
|
||||
++message->num_receivers;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// split the string
|
||||
std::vector<std::string> Split(const std::string& input, char delimiter) {
|
||||
std::istringstream stream(input);
|
||||
std::string field;
|
||||
std::vector<std::string> fields;
|
||||
while (std::getline(stream, field, delimiter)) {
|
||||
fields.push_back(field);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
// round a floating point number to an integer
|
||||
template <typename IntType>
|
||||
IntType Round(double value) {
|
||||
return static_cast<IntType>(std::floor(value + 0.5));
|
||||
}
|
||||
|
||||
// make_shared with alignment
|
||||
template <typename T, typename... ArgumentTypes>
|
||||
std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
|
||||
const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
|
||||
T(std::forward<ArgumentTypes>(arguments)...);
|
||||
return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,301 @@
|
||||
// Specialization of NNUE evaluation function learning class template for AffineTransform
|
||||
|
||||
#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
|
||||
#define _NNUE_TRAINER_AFFINE_TRANSFORM_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../learn/learn.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "trainer.h"
|
||||
|
||||
#include <random>
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Learning: Affine transformation layer
|
||||
template <typename PreviousLayer, IndexType OutputDimensions>
|
||||
class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
if (ReceiveMessage("momentum", message)) {
|
||||
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
if (ReceiveMessage("learning_rate_scale", message)) {
|
||||
learning_rate_scale_ =
|
||||
static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
if (ReceiveMessage("reset", message)) {
|
||||
DequantizeParameters();
|
||||
}
|
||||
if (ReceiveMessage("quantize_parameters", message)) {
|
||||
QuantizeParameters();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
if (kIsOutputLayer) {
|
||||
// Initialize output layer with 0
|
||||
std::fill(std::begin(biases_), std::end(biases_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
std::fill(std::begin(weights_), std::end(weights_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
} else {
|
||||
// Assuming that the input distribution is unit-mean 0.5, equal variance,
|
||||
// Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
|
||||
const double kSigma = 1.0 / std::sqrt(kInputDimensions);
|
||||
auto distribution = std::normal_distribution<double>(0.0, kSigma);
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
double sum = 0.0;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const auto weight = static_cast<LearnFloatType>(distribution(rng));
|
||||
weights_[kInputDimensions * i + j] = weight;
|
||||
sum += weight;
|
||||
}
|
||||
biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
|
||||
}
|
||||
}
|
||||
QuantizeParameters();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
batch_input_ = previous_layer_trainer_->Propagate(batch);
|
||||
#if defined(USE_BLAS)
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
cblas_scopy(kOutputDimensions, biases_, 1, &output_[batch_offset], 1);
|
||||
}
|
||||
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
|
||||
kOutputDimensions, batch_size_, kInputDimensions, 1.0,
|
||||
weights_, kInputDimensions,
|
||||
batch_input_, kInputDimensions,
|
||||
1.0, &output_[0], kOutputDimensions);
|
||||
#else
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_batch_offset = kInputDimensions * b;
|
||||
const IndexType output_batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
double sum = biases_[i];
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const IndexType index = kInputDimensions * i + j;
|
||||
sum += weights_[index] * batch_input_[input_batch_offset + j];
|
||||
}
|
||||
output_[output_batch_offset + i] = static_cast<LearnFloatType>(sum);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
const LearnFloatType local_learning_rate =
|
||||
learning_rate * learning_rate_scale_;
|
||||
#if defined(USE_BLAS)
|
||||
// backpropagate
|
||||
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
|
||||
kInputDimensions, batch_size_, kOutputDimensions, 1.0,
|
||||
weights_, kInputDimensions,
|
||||
gradients, kOutputDimensions,
|
||||
0.0, &gradients_[0], kInputDimensions);
|
||||
// update
|
||||
cblas_sscal(kOutputDimensions, momentum_, biases_diff_, 1);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
cblas_saxpy(kOutputDimensions, 1.0,
|
||||
&gradients[batch_offset], 1, biases_diff_, 1);
|
||||
}
|
||||
cblas_saxpy(kOutputDimensions, -local_learning_rate,
|
||||
biases_diff_, 1, biases_, 1);
|
||||
cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
|
||||
kOutputDimensions, kInputDimensions, batch_size_, 1.0,
|
||||
gradients, kOutputDimensions,
|
||||
batch_input_, kInputDimensions,
|
||||
momentum_, weights_diff_, kInputDimensions);
|
||||
cblas_saxpy(kOutputDimensions * kInputDimensions, -local_learning_rate,
|
||||
weights_diff_, 1, weights_, 1);
|
||||
#else
|
||||
// backpropagate
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_batch_offset = kInputDimensions * b;
|
||||
const IndexType output_batch_offset = kOutputDimensions * b;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
double sum = 0.0;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = kInputDimensions * i + j;
|
||||
sum += weights_[index] * gradients[output_batch_offset + i];
|
||||
}
|
||||
gradients_[input_batch_offset + j] = static_cast<LearnFloatType>(sum);
|
||||
}
|
||||
}
|
||||
// update
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_diff_[i] *= momentum_;
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_diff_[i] *= momentum_;
|
||||
}
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_batch_offset = kInputDimensions * b;
|
||||
const IndexType output_batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_diff_[i] += gradients[output_batch_offset + i];
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const IndexType index = kInputDimensions * i + j;
|
||||
weights_diff_[index] += gradients[output_batch_offset + i] *
|
||||
batch_input_[input_batch_offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_[i] -= local_learning_rate * biases_diff_[i];
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_[i] -= local_learning_rate * weights_diff_[i];
|
||||
}
|
||||
#endif
|
||||
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
batch_input_(nullptr),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer),
|
||||
biases_(),
|
||||
weights_(),
|
||||
biases_diff_(),
|
||||
weights_diff_(),
|
||||
momentum_(0.0),
|
||||
learning_rate_scale_(1.0) {
|
||||
DequantizeParameters();
|
||||
}
|
||||
|
||||
// Weight saturation and parameterization
|
||||
void QuantizeParameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_[i] = std::max(-kMaxWeightMagnitude,
|
||||
std::min(+kMaxWeightMagnitude, weights_[i]));
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
target_layer_->biases_[i] =
|
||||
Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const auto offset = kInputDimensions * i;
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
target_layer_->weights_[padded_offset + j] =
|
||||
Round<typename LayerType::WeightType>(
|
||||
weights_[offset + j] * kWeightScale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read parameterized integer
|
||||
void DequantizeParameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->biases_[i] / kBiasScale);
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const auto offset = kInputDimensions * i;
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
weights_[offset + j] = static_cast<LearnFloatType>(
|
||||
target_layer_->weights_[padded_offset + j] / kWeightScale);
|
||||
}
|
||||
}
|
||||
std::fill(std::begin(biases_diff_), std::end(biases_diff_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
std::fill(std::begin(weights_diff_), std::end(weights_diff_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// If the output dimensionality is 1, the output layer
|
||||
static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
|
||||
|
||||
// Coefficient used for parameterization
|
||||
static constexpr LearnFloatType kActivationScale =
|
||||
std::numeric_limits<std::int8_t>::max();
|
||||
static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
|
||||
(kPonanzaConstant * FV_SCALE) :
|
||||
((1 << kWeightScaleBits) * kActivationScale);
|
||||
static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
|
||||
|
||||
// Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
|
||||
static constexpr LearnFloatType kMaxWeightMagnitude =
|
||||
std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Input mini batch
|
||||
const LearnFloatType* batch_input_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// parameter
|
||||
LearnFloatType biases_[kOutputDimensions];
|
||||
LearnFloatType weights_[kOutputDimensions * kInputDimensions];
|
||||
|
||||
// Buffer used for updating parameters
|
||||
LearnFloatType biases_diff_[kOutputDimensions];
|
||||
LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
|
||||
// hyper parameter
|
||||
LearnFloatType momentum_;
|
||||
LearnFloatType learning_rate_scale_;
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,142 @@
|
||||
// Specialization of NNUE evaluation function learning class template for ClippedReLU
|
||||
|
||||
#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
|
||||
#define _NNUE_TRAINER_CLIPPED_RELU_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../learn/learn.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
#include "trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Learning: Affine transformation layer
|
||||
template <typename PreviousLayer>
|
||||
class Trainer<Layers::ClippedReLU<PreviousLayer>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::ClippedReLU<PreviousLayer>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
if (ReceiveMessage("check_health", message)) {
|
||||
CheckHealth();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
const auto input = previous_layer_trainer_->Propagate(batch);
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
output_[index] = std::max(+kZero, std::min(+kOne, input[index]));
|
||||
min_activations_[i] = std::min(min_activations_[i], output_[index]);
|
||||
max_activations_[i] = std::max(max_activations_[i], output_[index]);
|
||||
}
|
||||
}
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
gradients_[index] = gradients[index] *
|
||||
(output_[index] > kZero) * (output_[index] < kOne);
|
||||
}
|
||||
}
|
||||
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer) {
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_),
|
||||
std::numeric_limits<LearnFloatType>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
const auto largest_min_activation = *std::max_element(
|
||||
std::begin(min_activations_), std::end(min_activations_));
|
||||
const auto smallest_max_activation = *std::min_element(
|
||||
std::begin(max_activations_), std::end(max_activations_));
|
||||
std::cout << "INFO: largest min activation = " << largest_min_activation
|
||||
<< ", smallest max activation = " << smallest_max_activation
|
||||
<< std::endl;
|
||||
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_),
|
||||
std::numeric_limits<LearnFloatType>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// LearnFloatType constant
|
||||
static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
|
||||
static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
|
||||
// Health check statistics
|
||||
LearnFloatType min_activations_[kOutputDimensions];
|
||||
LearnFloatType max_activations_[kOutputDimensions];
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,377 @@
|
||||
// Specialization for feature transformer of learning class template of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
|
||||
#define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../learn/learn.h"
|
||||
#include "../nnue_feature_transformer.h"
|
||||
#include "trainer.h"
|
||||
#include "features/factorizer_feature_set.h"
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <set>
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Learning: Input feature converter
|
||||
template <>
|
||||
class Trainer<FeatureTransformer> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = FeatureTransformer;
|
||||
|
||||
public:
|
||||
template <typename T>
|
||||
friend struct AlignedDeleter;
|
||||
template <typename T, typename... ArgumentTypes>
|
||||
friend std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments);
|
||||
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(LayerType* target_layer) {
|
||||
return MakeAlignedSharedPtr<Trainer>(target_layer);
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
if (ReceiveMessage("momentum", message)) {
|
||||
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
if (ReceiveMessage("learning_rate_scale", message)) {
|
||||
learning_rate_scale_ =
|
||||
static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
if (ReceiveMessage("reset", message)) {
|
||||
DequantizeParameters();
|
||||
}
|
||||
if (ReceiveMessage("quantize_parameters", message)) {
|
||||
QuantizeParameters();
|
||||
}
|
||||
if (ReceiveMessage("clear_unobserved_feature_weights", message)) {
|
||||
ClearUnobservedFeatureWeights();
|
||||
}
|
||||
if (ReceiveMessage("check_health", message)) {
|
||||
CheckHealth();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
std::fill(std::begin(weights_), std::end(weights_), +kZero);
|
||||
const double kSigma = 0.1 / std::sqrt(RawFeatures::kMaxActiveDimensions);
|
||||
auto distribution = std::normal_distribution<double>(0.0, kSigma);
|
||||
for (IndexType i = 0; i < kHalfDimensions * RawFeatures::kDimensions; ++i) {
|
||||
const auto weight = static_cast<LearnFloatType>(distribution(rng));
|
||||
weights_[i] = weight;
|
||||
}
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(0.5);
|
||||
}
|
||||
QuantizeParameters();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kOutputDimensions * batch.size());
|
||||
}
|
||||
batch_ = &batch;
|
||||
// affine transform
|
||||
#pragma omp parallel for
|
||||
for (IndexType b = 0; b < batch.size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kHalfDimensions, biases_, 1, &output_[output_offset], 1);
|
||||
for (const auto& feature : batch[b].training_features[c]) {
|
||||
const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
|
||||
cblas_saxpy(kHalfDimensions, (float)feature.GetCount(),
|
||||
&weights_[weights_offset], 1, &output_[output_offset], 1);
|
||||
}
|
||||
#else
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
output_[output_offset + i] = biases_[i];
|
||||
}
|
||||
for (const auto& feature : batch[b].training_features[c]) {
|
||||
const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
output_[output_offset + i] +=
|
||||
feature.GetCount() * weights_[weights_offset + i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
// clipped ReLU
|
||||
for (IndexType b = 0; b < batch.size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
min_pre_activation_ = std::min(min_pre_activation_, output_[index]);
|
||||
max_pre_activation_ = std::max(max_pre_activation_, output_[index]);
|
||||
output_[index] = std::max(+kZero, std::min(+kOne, output_[index]));
|
||||
const IndexType t = i % kHalfDimensions;
|
||||
min_activations_[t] = std::min(min_activations_[t], output_[index]);
|
||||
max_activations_[t] = std::max(max_activations_[t], output_[index]);
|
||||
}
|
||||
}
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
const LearnFloatType local_learning_rate =
|
||||
learning_rate * learning_rate_scale_;
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
gradients_[index] = gradients[index] *
|
||||
((output_[index] > kZero) * (output_[index] < kOne));
|
||||
}
|
||||
}
|
||||
// Since the weight matrix updates only the columns corresponding to the features that appeared in the input,
|
||||
// Correct the learning rate and adjust the scale without using momentum
|
||||
const LearnFloatType effective_learning_rate =
|
||||
static_cast<LearnFloatType>(local_learning_rate / (1.0 - momentum_));
|
||||
#if defined(USE_BLAS)
|
||||
cblas_sscal(kHalfDimensions, momentum_, biases_diff_, 1);
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
cblas_saxpy(kHalfDimensions, 1.0,
|
||||
&gradients_[output_offset], 1, biases_diff_, 1);
|
||||
}
|
||||
}
|
||||
cblas_saxpy(kHalfDimensions, -local_learning_rate,
|
||||
biases_diff_, 1, biases_, 1);
|
||||
#pragma omp parallel
|
||||
{
|
||||
#if defined(_OPENMP)
|
||||
const IndexType num_threads = omp_get_num_threads();
|
||||
const IndexType thread_index = omp_get_thread_num();
|
||||
#endif
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
for (const auto& feature : (*batch_)[b].training_features[c]) {
|
||||
#if defined(_OPENMP)
|
||||
if (feature.GetIndex() % num_threads != thread_index) continue;
|
||||
#endif
|
||||
const IndexType weights_offset =
|
||||
kHalfDimensions * feature.GetIndex();
|
||||
const auto scale = static_cast<LearnFloatType>(
|
||||
effective_learning_rate / feature.GetCount());
|
||||
cblas_saxpy(kHalfDimensions, -scale,
|
||||
&gradients_[output_offset], 1,
|
||||
&weights_[weights_offset], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
biases_diff_[i] *= momentum_;
|
||||
}
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
biases_diff_[i] += gradients_[output_offset + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
biases_[i] -= local_learning_rate * biases_diff_[i];
|
||||
}
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
const IndexType output_offset = batch_offset + kHalfDimensions * c;
|
||||
for (const auto& feature : (*batch_)[b].training_features[c]) {
|
||||
const IndexType weights_offset = kHalfDimensions * feature.GetIndex();
|
||||
const auto scale = static_cast<LearnFloatType>(
|
||||
effective_learning_rate / feature.GetCount());
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
weights_[weights_offset + i] -=
|
||||
scale * gradients_[output_offset + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (IndexType b = 0; b < batch_->size(); ++b) {
|
||||
for (IndexType c = 0; c < 2; ++c) {
|
||||
for (const auto& feature : (*batch_)[b].training_features[c]) {
|
||||
observed_features.set(feature.GetIndex());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer) :
|
||||
batch_(nullptr),
|
||||
target_layer_(target_layer),
|
||||
biases_(),
|
||||
weights_(),
|
||||
biases_diff_(),
|
||||
momentum_(0.0),
|
||||
learning_rate_scale_(1.0) {
|
||||
min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
|
||||
max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_),
|
||||
std::numeric_limits<LearnFloatType>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
DequantizeParameters();
|
||||
}
|
||||
|
||||
// Weight saturation and parameterization
|
||||
void QuantizeParameters() {
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
target_layer_->biases_[i] =
|
||||
Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
}
|
||||
std::vector<TrainingFeature> training_features;
|
||||
#pragma omp parallel for private(training_features)
|
||||
for (IndexType j = 0; j < RawFeatures::kDimensions; ++j) {
|
||||
training_features.clear();
|
||||
Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
|
||||
j, &training_features);
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
double sum = 0.0;
|
||||
for (const auto& feature : training_features) {
|
||||
sum += weights_[kHalfDimensions * feature.GetIndex() + i];
|
||||
}
|
||||
target_layer_->weights_[kHalfDimensions * j + i] =
|
||||
Round<typename LayerType::WeightType>(sum * kWeightScale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read parameterized integer
|
||||
void DequantizeParameters() {
|
||||
for (IndexType i = 0; i < kHalfDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->biases_[i] / kBiasScale);
|
||||
}
|
||||
std::fill(std::begin(weights_), std::end(weights_), +kZero);
|
||||
for (IndexType i = 0; i < kHalfDimensions * RawFeatures::kDimensions; ++i) {
|
||||
weights_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->weights_[i] / kWeightScale);
|
||||
}
|
||||
std::fill(std::begin(biases_diff_), std::end(biases_diff_), +kZero);
|
||||
}
|
||||
|
||||
// Set the weight corresponding to the feature that does not appear in the learning data to 0
|
||||
void ClearUnobservedFeatureWeights() {
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
if (!observed_features.test(i)) {
|
||||
std::fill(std::begin(weights_) + kHalfDimensions * i,
|
||||
std::begin(weights_) + kHalfDimensions * (i + 1), +kZero);
|
||||
}
|
||||
}
|
||||
QuantizeParameters();
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
std::cout << "INFO: observed " << observed_features.count()
|
||||
<< " (out of " << kInputDimensions << ") features" << std::endl;
|
||||
|
||||
constexpr LearnFloatType kPreActivationLimit =
|
||||
std::numeric_limits<typename LayerType::WeightType>::max() /
|
||||
kWeightScale;
|
||||
std::cout << "INFO: (min, max) of pre-activations = "
|
||||
<< min_pre_activation_ << ", "
|
||||
<< max_pre_activation_ << " (limit = "
|
||||
<< kPreActivationLimit << ")" << std::endl;
|
||||
|
||||
const auto largest_min_activation = *std::max_element(
|
||||
std::begin(min_activations_), std::end(min_activations_));
|
||||
const auto smallest_max_activation = *std::min_element(
|
||||
std::begin(max_activations_), std::end(max_activations_));
|
||||
std::cout << "INFO: largest min activation = " << largest_min_activation
|
||||
<< ", smallest max activation = " << smallest_max_activation
|
||||
<< std::endl;
|
||||
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_),
|
||||
std::numeric_limits<LearnFloatType>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
Features::Factorizer<RawFeatures>::GetDimensions();
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
static constexpr IndexType kHalfDimensions = LayerType::kHalfDimensions;
|
||||
|
||||
// Coefficient used for parameterization
|
||||
static constexpr LearnFloatType kActivationScale =
|
||||
std::numeric_limits<std::int8_t>::max();
|
||||
static constexpr LearnFloatType kBiasScale = kActivationScale;
|
||||
static constexpr LearnFloatType kWeightScale = kActivationScale;
|
||||
|
||||
// LearnFloatType constant
|
||||
static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
|
||||
static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
|
||||
|
||||
// mini batch
|
||||
const std::vector<Example>* batch_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// parameter
|
||||
alignas(kCacheLineSize) LearnFloatType biases_[kHalfDimensions];
|
||||
alignas(kCacheLineSize)
|
||||
LearnFloatType weights_[kHalfDimensions * kInputDimensions];
|
||||
|
||||
// Buffer used for updating parameters
|
||||
LearnFloatType biases_diff_[kHalfDimensions];
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
|
||||
// Features that appeared in the training data
|
||||
std::bitset<kInputDimensions> observed_features;
|
||||
|
||||
// hyper parameter
|
||||
LearnFloatType momentum_;
|
||||
LearnFloatType learning_rate_scale_;
|
||||
|
||||
// Health check statistics
|
||||
LearnFloatType min_pre_activation_;
|
||||
LearnFloatType max_pre_activation_;
|
||||
LearnFloatType min_activations_[kHalfDimensions];
|
||||
LearnFloatType max_activations_[kHalfDimensions];
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,251 @@
|
||||
// Specialization of NNUE evaluation function learning class template for InputSlice
|
||||
|
||||
#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
|
||||
#define _NNUE_TRAINER_INPUT_SLICE_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../learn/learn.h"
|
||||
#include "../layers/input_slice.h"
|
||||
#include "trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Learning: Input layer
|
||||
class SharedInputTrainer {
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<SharedInputTrainer> Create(
|
||||
FeatureTransformer* feature_transformer) {
|
||||
static std::shared_ptr<SharedInputTrainer> instance;
|
||||
if (!instance) {
|
||||
instance.reset(new SharedInputTrainer(feature_transformer));
|
||||
}
|
||||
++instance->num_referrers_;
|
||||
return instance;
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kSendMessage;
|
||||
feature_transformer_trainer_->SendMessage(message);
|
||||
}
|
||||
assert(current_operation_ == Operation::kSendMessage);
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kInitialize;
|
||||
feature_transformer_trainer_->Initialize(rng);
|
||||
}
|
||||
assert(current_operation_ == Operation::kInitialize);
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (gradients_.size() < kInputDimensions * batch.size()) {
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kPropagate;
|
||||
output_ = feature_transformer_trainer_->Propagate(batch);
|
||||
}
|
||||
assert(current_operation_ == Operation::kPropagate);
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
return output_;
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
if (num_referrers_ == 1) {
|
||||
feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
return;
|
||||
}
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kBackPropagate;
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(current_operation_ == Operation::kBackPropagate);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] += gradients[batch_offset + i];
|
||||
}
|
||||
}
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
feature_transformer_trainer_->Backpropagate(
|
||||
gradients_.data(), learning_rate);
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
SharedInputTrainer(FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
num_referrers_(0),
|
||||
num_calls_(0),
|
||||
current_operation_(Operation::kNone),
|
||||
feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
|
||||
feature_transformer)),
|
||||
output_(nullptr) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
FeatureTransformer::kOutputDimensions;
|
||||
|
||||
// type of processing
|
||||
enum class Operation {
|
||||
kNone,
|
||||
kSendMessage,
|
||||
kInitialize,
|
||||
kPropagate,
|
||||
kBackPropagate,
|
||||
};
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// number of layers sharing this layer as input
|
||||
std::uint32_t num_referrers_;
|
||||
|
||||
// Number of times the current process has been called
|
||||
std::uint32_t num_calls_;
|
||||
|
||||
// current processing type
|
||||
Operation current_operation_;
|
||||
|
||||
// Trainer of input feature converter
|
||||
const std::shared_ptr<Trainer<FeatureTransformer>>
|
||||
feature_transformer_trainer_;
|
||||
|
||||
// pointer to output shared for forward propagation
|
||||
const LearnFloatType* output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
};
|
||||
|
||||
// Learning: Input layer
|
||||
template <IndexType OutputDimensions, IndexType Offset>
|
||||
class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* /*target_layer*/, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(new Trainer(feature_transformer));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
shared_input_trainer_->SendMessage(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
shared_input_trainer_->Initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
const auto input = shared_input_trainer_->Propagate(batch);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
|
||||
&output_[output_offset], 1);
|
||||
#else
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output_[output_offset + i] = input[input_offset + Offset + i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
if (i < Offset || i >= Offset + kOutputDimensions) {
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
} else {
|
||||
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
|
||||
}
|
||||
}
|
||||
}
|
||||
shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(FeatureTransformer* feature_transformer):
|
||||
batch_size_(0),
|
||||
shared_input_trainer_(SharedInputTrainer::Create(feature_transformer)) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
FeatureTransformer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Trainer of shared input layer
|
||||
const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,190 @@
|
||||
// Specialization of NNUE evaluation function learning class template for Sum
|
||||
|
||||
#ifndef _NNUE_TRAINER_SUM_H_
|
||||
#define _NNUE_TRAINER_SUM_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../../learn/learn.h"
|
||||
#include "../layers/sum.h"
|
||||
#include "trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Learning: A layer that sums the outputs of multiple layers
|
||||
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
|
||||
class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
|
||||
Trainer<Layers::Sum<RemainingPreviousLayers...>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
|
||||
using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
// The results of other member functions do not depend on the processing order, so
|
||||
// Tail is processed first for the purpose of simplifying the implementation, but
|
||||
// SendMessage processes Head first to make it easier to understand subscript correspondence
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
Tail::SendMessage(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
Tail::Initialize(rng);
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
auto output = Tail::Propagate(batch);
|
||||
const auto head_output = previous_layer_trainer_->Propagate(batch);
|
||||
#if defined(USE_BLAS)
|
||||
cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
|
||||
head_output, 1, output, 1);
|
||||
#else
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output[batch_offset + i] += head_output[batch_offset + i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return output;
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
Tail::Backpropagate(gradients, learning_rate);
|
||||
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer):
|
||||
Tail(target_layer, feature_transformer),
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<FirstPreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// make subclass friend
|
||||
template <typename SumLayer>
|
||||
friend class Trainer;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
};
|
||||
|
||||
|
||||
// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
|
||||
template <typename PreviousLayer>
|
||||
class Trainer<Layers::Sum<PreviousLayer>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::Sum<PreviousLayer>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
const auto output = previous_layer_trainer_->Propagate(batch);
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
|
||||
#else
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output_[batch_offset + i] = output[batch_offset + i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// make subclass friend
|
||||
template <typename SumLayer>
|
||||
friend class Trainer;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user