Merge branch 'master' into stockfish-nnue-2020-08-30-macos

This commit is contained in:
Kenn Costales
2020-12-08 22:49:11 +08:00
committed by GitHub
121 changed files with 23203 additions and 9127 deletions
+101 -67
View File
@@ -28,43 +28,49 @@ else
EXE = stockfish
endif
### Installation dir definitions
PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
### Built-in benchmark for pgo-builds
PGOBENCH = ./$(EXE) bench
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 100000
### Source and object files
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
nnue/evaluate_nnue.cpp \
nnue/evaluate_nnue_learner.cpp \
nnue/features/half_kp.cpp \
nnue/features/half_relative_kp.cpp \
nnue/features/k.cpp \
nnue/features/p.cpp \
nnue/features/castling_right.cpp \
nnue/features/enpassant.cpp \
nnue/nnue_test_command.cpp \
extra/sfen_packer.cpp \
learn/gensfen2019.cpp \
learn/learner.cpp \
learn/learning_tools.cpp \
learn/multi_think.cpp
OBJS = $(notdir $(SRCS:.cpp=.o))
VPATH = syzygy:nnue:nnue/features:eval:extra:learn
### Establish the operating system name
KERNEL = $(shell uname -s)
ifeq ($(KERNEL),Linux)
OS = $(shell uname -o)
endif
### Installation dir definitions
PREFIX = /usr/local
BINDIR = $(PREFIX)/bin
### Built-in benchmark for pgo-builds
PGO_TRAINING_DATA_FILE = pgo_training_data.bin
PGOBENCH = ./$(EXE) bench
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 sfen_format bin output_file_name $(PGO_TRAINING_DATA_FILE)
### Source and object files
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
extra/stockfish_blas.cpp \
nnue/evaluate_nnue.cpp \
nnue/evaluate_nnue_learner.cpp \
nnue/features/half_kp.cpp \
nnue/features/half_ka.cpp \
nnue/features/half_relative_kp.cpp \
nnue/features/half_relative_ka.cpp \
nnue/features/k.cpp \
nnue/features/p.cpp \
nnue/features/a.cpp \
nnue/features/castling_right.cpp \
nnue/features/enpassant.cpp \
nnue/nnue_test_command.cpp \
learn/sfen_packer.cpp \
learn/learn.cpp \
learn/gensfen.cpp \
learn/opening_book.cpp \
learn/convert.cpp \
learn/transform.cpp
OBJS = $(notdir $(SRCS:.cpp=.o))
VPATH = syzygy:nnue:nnue/features:eval:extra:learn
### ==========================================================================
### Section 2. High-level Configuration
### ==========================================================================
@@ -99,17 +105,23 @@ endif
### 2.1. General and architecture defaults
ifeq ($(ARCH),)
ARCH = x86-64-modern
help_skip_sanity = yes
endif
# explicitly check for the list of supported architectures (as listed with make help),
# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
ifeq ($(ARCH), $(filter $(ARCH), \
x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
SUPPORTED_ARCH=true
else
SUPPORTED_ARCH=false
endif
blas = no
optimize = yes
debug = no
sanitize = no
@@ -127,7 +139,6 @@ avx512 = no
vnni256 = no
vnni512 = no
neon = no
ARCH = x86-64-modern
STRIP = strip
### 2.2 Architecture specific
@@ -306,9 +317,9 @@ endif
### ==========================================================================
### 3.1 Selecting compiler (default = gcc)
CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(LEARNCXXFLAGS)
DEPENDFLAGS += -std=c++17
LDFLAGS += $(EXTRALDFLAGS) $(LEARNLDFLAGS)
CXXFLAGS += -g -Wall -Wcast-qual -fno-exceptions -std=c++17 -fopenmp -I. $(EXTRACXXFLAGS)
LDFLAGS += -fopenmp $(EXTRALDFLAGS)
DEPENDFLAGS += -std=c++17 -I.
ifeq ($(COMP),)
COMP=gcc
@@ -391,19 +402,6 @@ ifeq ($(COMP),clang)
endif
endif
ifeq ($(comp),icc)
profile_make = icc-profile-make
profile_use = icc-profile-use
else
ifeq ($(comp),clang)
profile_make = clang-profile-make
profile_use = clang-profile-use
else
profile_make = gcc-profile-make
profile_use = gcc-profile-use
endif
endif
ifeq ($(KERNEL),Darwin)
CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
@@ -415,20 +413,30 @@ endif
# Currently we don't know how to make PGO builds with the NDK yet.
ifeq ($(COMP),ndk)
CXXFLAGS += -stdlib=libc++ -fPIE
comp=clang
ifeq ($(arch),armv7)
comp=armv7a-linux-androideabi16-clang
CXX=armv7a-linux-androideabi16-clang++
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
STRIP=arm-linux-androideabi-strip
endif
ifeq ($(arch),armv8)
comp=aarch64-linux-android21-clang
CXX=aarch64-linux-android21-clang++
STRIP=aarch64-linux-android-strip
endif
LDFLAGS += -static-libstdc++ -pie -lm -latomic
endif
ifeq ($(comp),icc)
profile_make = icc-profile-make
profile_use = icc-profile-use
else ifeq ($(comp),clang)
profile_make = clang-profile-make
profile_use = clang-profile-use
else
profile_make = gcc-profile-make
profile_use = gcc-profile-use
endif
### Travis CI script uses COMPILER to overwrite CXX
ifdef COMPILER
COMPCXX=$(COMPILER)
@@ -463,14 +471,33 @@ ifneq ($(comp),mingw)
endif
endif
### 3.2.1 Debugging
### 3.2.1. BLAS libraries
ifeq ($(blas), yes)
LDFLAGS += -lopenblas
ifeq ($(KERNEL),Linux)
LDFLAGS +=
else
CXXFLAGS += -I/mingw64/include/OpenBLAS
ifeq ($(debug),yes)
LDFLAGS += -Wl,-static
else
LDFLAGS += -Wl,-s -static
endif
endif
CXXFLAGS += -DUSE_BLAS
endif
### 3.2.2 Debugging
ifeq ($(debug),no)
CXXFLAGS += -DNDEBUG
else
CXXFLAGS += -g
endif
### 3.2.2 Debugging with undefined behavior sanitizers
### 3.2.3 Debugging with undefined behavior sanitizers
ifneq ($(sanitize),no)
CXXFLAGS += -g3 -fsanitize=$(sanitize)
LDFLAGS += -fsanitize=$(sanitize)
@@ -600,11 +627,13 @@ endif
### needs access to the optimization flags.
ifeq ($(optimize),yes)
ifeq ($(debug), no)
ifeq ($(COMP),ndk)
CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS)
else ifeq ($(comp),clang)
ifeq ($(comp),clang)
CXXFLAGS += -flto=thin
ifneq ($(findstring MINGW,$(KERNEL)),)
CXXFLAGS += -fuse-ld=lld
else ifneq ($(findstring MSYS,$(KERNEL)),)
CXXFLAGS += -fuse-ld=lld
endif
LDFLAGS += $(CXXFLAGS)
# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
@@ -628,10 +657,12 @@ ifeq ($(debug), no)
# So, only enable it for a cross from Linux by default.
else ifeq ($(comp),mingw)
ifeq ($(KERNEL),Linux)
ifneq ($(arch),i386)
CXXFLAGS += -flto
LDFLAGS += $(CXXFLAGS) -flto=jobserver
endif
endif
endif
endif
endif
@@ -707,11 +738,12 @@ help:
@echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
@echo ""
@echo "-------------------------------"
ifeq ($(SUPPORTED_ARCH), true)
ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true)
@echo "The selected architecture $(ARCH) will enable the following configuration: "
@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
else
@echo "Specify a supported architecture with the ARCH option for more details"
@echo ""
endif
@@ -719,7 +751,7 @@ endif
config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
clang-profile-use clang-profile-make
build: config-sanity
build: net config-sanity
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
profile-build: net config-sanity objclean profileclean
@@ -729,6 +761,7 @@ profile-build: net config-sanity objclean profileclean
@echo ""
@echo "Step 2/4. Running benchmark for pgo-build ..."
$(PGOBENCH) > /dev/null
$(PGOGENSFEN) > /dev/null
@echo ""
@echo "Step 3/4. Building optimized executable ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
@@ -745,12 +778,13 @@ install:
-cp $(EXE) $(BINDIR)
-strip $(BINDIR)/$(EXE)
#clean all
# clean all
clean: objclean profileclean
@rm -f .depend *~ core
# evaluation network (nnue)
net:
$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
$(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
@echo "Default net: $(nnuenet)"
$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
@@ -772,7 +806,6 @@ net:
echo "shasum / sha256sum not found, skipping net validation"; \
fi
# clean binaries and objects
objclean:
@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o
@@ -782,6 +815,7 @@ profileclean:
@rm -rf profdir
@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda
@rm -f stockfish.profdata *.profraw
@rm -f $(PGO_TRAINING_DATA_FILE)
default:
help
@@ -792,7 +826,7 @@ default:
all: $(EXE) .depend
config-sanity:
config-sanity: net
@echo ""
@echo "Config:"
@echo "debug: '$(debug)'"
@@ -913,6 +947,6 @@ profile-learn: config-sanity objclean profileclean
rm generated_kifu.bin
.depend:
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@
-include .depend
+2
View File
@@ -164,5 +164,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
++posCounter;
}
list.emplace_back("setoption name Use NNUE value true");
return list;
}
-82
View File
@@ -1,82 +0,0 @@
#ifndef _EVALUATE_COMMON_H_
#define _EVALUATE_COMMON_H_
// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
#include <functional>
// KK file name
#define KK_BIN "KK_synthesized.bin"
// KKP file name
#define KKP_BIN "KKP_synthesized.bin"
// KPP file name
#define KPP_BIN "KPP_synthesized.bin"
namespace Eval
{
#if defined(USE_EVAL_HASH)
// prefetch function
void prefetch_evalhash(const Key key);
#endif
// An operator that applies the function f to each parameter of the evaluation function.
// Used for parameter analysis etc.
// type indicates the survey target.
// type = -1 :KK,KKP,KPP all
// type = 0: KK only
// type = 1: KKP only
// type = 2: KPP only
void foreach_eval_param(std::function<void(int32_t, int32_t)>f, int type = -1);
// --------------------------
// for learning
// --------------------------
#if defined(EVAL_LEARN)
// Initialize the gradient array during learning
// Pass the learning rate as an argument. If 0.0, the default value is used.
// The epoch of update_weights() gradually changes from eta to eta2 until eta_epoch.
// After eta2_epoch, gradually change from eta2 to eta3.
void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3);
// Add the gradient difference value to the gradient array for all features that appear in the current phase.
// freeze[0]: Flag that kk does not learn
// freeze[1]: Flag that kkp does not learn
// freeze[2]: Flag that kpp does not learn
// freeze[3]: Flag that kppp does not learn
void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4>& freeze);
// Do SGD or AdaGrad or something based on the current gradient.
// epoch: Generation counter (starting from 0)
// freeze[0]: Flag that kk does not learn
// freeze[1]: Flag that kkp does not learn
// freeze[2]: Flag that kpp does not learn
// freeze[3]: Flag that kppp does not learn
void update_weights(uint64_t epoch, const std::array<bool, 4>& freeze);
// Save the evaluation function parameters to a file.
// You can specify the extension added to the end of the file.
void save_eval(std::string suffix);
// Get the current eta.
double get_eta();
// --learning related commands
// A function that normalizes KK. Note that it is not completely equivalent to the original evaluation function.
// By making the values of kkp and kpp as close to zero as possible, the value of the feature factor (which is zero) that did not appear during learning
// The idea of ensuring it is valid.
void regularize_kk();
#endif
}
#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN)
#endif // _EVALUATE_KPPT_COMMON_H_
+93 -99
View File
@@ -20,61 +20,25 @@
#include <cassert>
#include <cstdlib>
#include <cstring> // For std::memset
#include <fstream>
#include <iomanip>
#include <sstream>
#include <iostream>
#include <set>
#include <streambuf>
#include <vector>
#include "nnue/evaluate_nnue.h"
#include "bitboard.h"
#include "evaluate.h"
#include "material.h"
#include "misc.h"
#include "pawns.h"
#include "thread.h"
#include "uci.h"
#include "incbin/incbin.h"
#ifdef EVAL_LEARN
namespace Learner
{
extern bool use_raw_nnue_eval;
}
#endif
namespace Eval {
bool useNNUE;
std::string eval_file_loaded="None";
void init_NNUE() {
useNNUE = Options["Use NNUE"];
std::string eval_file = std::string(Options["EvalFile"]);
if (useNNUE && eval_file_loaded != eval_file)
if (Eval::NNUE::load_eval_file(eval_file))
eval_file_loaded = eval_file;
}
void verify_NNUE() {
std::string eval_file = std::string(Options["EvalFile"]);
if (useNNUE && eval_file_loaded != eval_file)
{
UCI::OptionsMap defaults;
UCI::init(defaults);
sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl;
sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl;
sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl;
sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl;
sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl;
std::exit(EXIT_FAILURE);
}
if (useNNUE)
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
else
sync_cout << "info string classical evaluation enabled." << sync_endl;
}
}
using namespace std;
namespace Trace {
@@ -120,11 +84,11 @@ using namespace Trace;
namespace {
// Threshold for lazy and space evaluation
constexpr Value LazyThreshold1 = Value(1400);
constexpr Value LazyThreshold2 = Value(1300);
constexpr Value SpaceThreshold = Value(12222);
constexpr Value NNUEThreshold1 = Value(550);
constexpr Value NNUEThreshold2 = Value(150);
constexpr Value LazyThreshold1 = Value(1565);
constexpr Value LazyThreshold2 = Value(1102);
constexpr Value SpaceThreshold = Value(11551);
constexpr Value NNUEThreshold1 = Value(682);
constexpr Value NNUEThreshold2 = Value(176);
// KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -132,7 +96,7 @@ namespace {
// SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type,
// higher if multiple safe checks are possible for that piece type.
constexpr int SafeCheck[][2] = {
{}, {}, {792, 1283}, {645, 967}, {1084, 1897}, {772, 1119}
{}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132}
};
#define S(mg, eg) make_score(mg, eg)
@@ -140,19 +104,25 @@ namespace {
// MobilityBonus[PieceType-2][attacked] contains bonuses for middle and end game,
// indexed by piece type and number of attacked squares in the mobility area.
constexpr Score MobilityBonus[][32] = {
{ S(-62,-81), S(-53,-56), S(-12,-31), S( -4,-16), S( 3, 5), S( 13, 11), // Knight
S( 22, 17), S( 28, 20), S( 33, 25) },
{ S(-48,-59), S(-20,-23), S( 16, -3), S( 26, 13), S( 38, 24), S( 51, 42), // Bishop
S( 55, 54), S( 63, 57), S( 63, 65), S( 68, 73), S( 81, 78), S( 81, 86),
S( 91, 88), S( 98, 97) },
{ S(-60,-78), S(-20,-17), S( 2, 23), S( 3, 39), S( 3, 70), S( 11, 99), // Rook
S( 22,103), S( 31,121), S( 40,134), S( 40,139), S( 41,158), S( 48,164),
S( 57,168), S( 57,169), S( 62,172) },
{ S(-30,-48), S(-12,-30), S( -8, -7), S( -9, 19), S( 20, 40), S( 23, 55), // Queen
S( 23, 59), S( 35, 75), S( 38, 78), S( 53, 96), S( 64, 96), S( 65,100),
S( 65,121), S( 66,127), S( 67,131), S( 67,133), S( 72,136), S( 72,141),
S( 77,147), S( 79,150), S( 93,151), S(108,168), S(108,168), S(108,171),
S(110,182), S(114,182), S(114,192), S(116,219) }
{ S(-62,-79), S(-53,-57), S(-12,-31), S( -3,-17), S( 3, 7), S( 12, 13), // Knight
S( 21, 16), S( 28, 21), S( 37, 26) },
{ S(-47,-59), S(-20,-25), S( 14, -8), S( 29, 12), S( 39, 21), S( 53, 40), // Bishop
S( 53, 56), S( 60, 58), S( 62, 65), S( 69, 72), S( 78, 78), S( 83, 87),
S( 91, 88), S( 96, 98) },
{ S(-60,-82), S(-24,-15), S( 0, 17) ,S( 3, 43), S( 4, 72), S( 14,100), // Rook
S( 20,102), S( 30,122), S( 41,133), S(41 ,139), S( 41,153), S( 45,160),
S( 57,165), S( 58,170), S( 67,175) },
{ S(-29,-49), S(-16,-29), S( -8, -8), S( -8, 17), S( 18, 39), S( 25, 54), // Queen
S( 23, 59), S( 37, 73), S( 41, 76), S( 54, 95), S( 65, 95) ,S( 68,101),
S( 69,124), S( 70,128), S( 70,132), S( 70,133) ,S( 71,136), S( 72,140),
S( 74,147), S( 76,149), S( 90,153), S(104,169), S(105,171), S(106,171),
S(112,178), S(114,185), S(114,187), S(119,221) }
};
// BishopPawns[distance from edge] contains a file-dependent penalty for pawns on
// squares of the same color as our bishop.
constexpr Score BishopPawns[int(FILE_NB) / 2] = {
S(3, 8), S(3, 9), S(1, 8), S(3, 7)
};
// KingProtector[knight/bishop] contains penalty for each distance unit to own king
@@ -160,32 +130,31 @@ namespace {
// Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
// pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
constexpr Score Outpost[] = { S(56, 36), S(30, 23) };
constexpr Score Outpost[] = { S(56, 34), S(31, 23) };
// PassedRank[Rank] contains a bonus according to the rank of a passed pawn
constexpr Score PassedRank[RANK_NB] = {
S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260)
};
// RookOnFile[semiopen/open] contains bonuses for each rook when there is
// no (friendly) pawn on the rook file.
constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) };
constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) };
// ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
// which piece type attacks which one. Attacks on lesser pieces which are
// pawn-defended are not considered.
constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161)
S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
};
constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41)
S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
};
// Assorted bonuses and penalties
constexpr Score BadOutpost = S( -7, 36);
constexpr Score BishopOnKingRing = S( 24, 0);
constexpr Score BishopPawns = S( 3, 7);
constexpr Score BishopXRayPawns = S( 4, 5);
constexpr Score CorneredBishop = S( 50, 50);
constexpr Score FlankAttacks = S( 8, 0);
@@ -198,7 +167,6 @@ namespace {
constexpr Score ReachableOutpost = S( 31, 22);
constexpr Score RestrictedPiece = S( 7, 7);
constexpr Score RookOnKingRing = S( 16, 0);
constexpr Score RookOnQueenFile = S( 6, 11);
constexpr Score SliderOnQueen = S( 60, 18);
constexpr Score ThreatByKing = S( 24, 89);
constexpr Score ThreatByPawnPush = S( 48, 39);
@@ -387,7 +355,7 @@ namespace {
// when the bishop is outside the pawn chain.
Bitboard blocked = pos.pieces(Us, PAWN) & shift<Down>(pos.pieces());
score -= BishopPawns * pos.pawns_on_same_color_squares(Us, s)
score -= BishopPawns[edge_distance(file_of(s))] * pos.pawns_on_same_color_squares(Us, s)
* (!(attackedBy[Us][PAWN] & s) + popcount(blocked & CenterFiles));
// Penalty for all enemy pawns x-rayed
@@ -414,10 +382,6 @@ namespace {
if (Pt == ROOK)
{
// Bonus for rook on the same file as a queen
if (file_bb(s) & pos.pieces(QUEEN))
score += RookOnQueenFile;
// Bonus for rook on an open or semi-open file
if (pos.is_on_semiopen_file(Us, s))
score += RookOnFile[pos.is_on_semiopen_file(Them, s)];
@@ -515,18 +479,18 @@ namespace {
int kingFlankAttack = popcount(b1) + popcount(b2);
int kingFlankDefense = popcount(b3);
kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them]
+ 185 * popcount(kingRing[Us] & weak)
+ 148 * popcount(unsafeChecks)
+ 98 * popcount(pos.blockers_for_king(Us))
+ 69 * kingAttacksCount[Them]
+ 3 * kingFlankAttack * kingFlankAttack / 8
+ mg_value(mobility[Them] - mobility[Us])
- 873 * !pos.count<QUEEN>(Them)
- 100 * bool(attackedBy[Us][KNIGHT] & attackedBy[Us][KING])
- 6 * mg_value(score) / 8
- 4 * kingFlankDefense
+ 37;
kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them] // (~10 Elo)
+ 185 * popcount(kingRing[Us] & weak) // (~15 Elo)
+ 148 * popcount(unsafeChecks) // (~4 Elo)
+ 98 * popcount(pos.blockers_for_king(Us)) // (~2 Elo)
+ 69 * kingAttacksCount[Them] // (~0.5 Elo)
+ 3 * kingFlankAttack * kingFlankAttack / 8 // (~0.5 Elo)
+ mg_value(mobility[Them] - mobility[Us]) // (~0.5 Elo)
- 873 * !pos.count<QUEEN>(Them) // (~24 Elo)
- 100 * bool(attackedBy[Us][KNIGHT] & attackedBy[Us][KING]) // (~5 Elo)
- 6 * mg_value(score) / 8 // (~8 Elo)
- 4 * kingFlankDefense // (~5 Elo)
+ 37; // (~0.5 Elo)
// Transform the kingDanger units into a Score, and subtract it from the evaluation
if (kingDanger > 100)
@@ -843,7 +807,9 @@ namespace {
sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
: pos.count<BISHOP>(WHITE) + pos.count<KNIGHT>(WHITE));
else
sf = std::min(sf, 36 + 7 * pos.count<PAWN>(strongSide));
sf = std::min(sf, 36 + 7 * pos.count<PAWN>(strongSide)) - 4 * !pawnsOnBothFlanks;
sf -= 4 * !pawnsOnBothFlanks;
}
// Interpolate between the middlegame and (scaled by 'sf') endgame score
@@ -947,19 +913,47 @@ make_v:
/// evaluation of the position from the point of view of the side to move.
Value Eval::evaluate(const Position& pos) {
#ifdef EVAL_LEARN
if (Learner::use_raw_nnue_eval) {
return NNUE::evaluate(pos);
Value v;
if (NNUE::useNNUE == NNUE::UseNNUEMode::Pure) {
v = NNUE::evaluate(pos);
// Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
return v;
}
#endif
else if (NNUE::useNNUE == NNUE::UseNNUEMode::False)
v = Evaluation<NO_TRACE>(pos).value();
else
{
// Scale and shift NNUE for compatibility with search and classical evaluation
auto adjusted_NNUE = [&](){
int mat = pos.non_pawn_material() + PawnValueMg * pos.count<PAWN>();
return NNUE::evaluate(pos) * (720 + mat / 32) / 1024 + Tempo;
};
bool classical = !Eval::useNNUE
|| abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
Value v = classical ? Evaluation<NO_TRACE>(pos).value()
: NNUE::evaluate(pos) * 5 / 4 + Tempo;
// If there is PSQ imbalance use classical eval, with small probability if it is small
Value psq = Value(abs(eg_value(pos.psq_score())));
int r50 = 16 + pos.rule50_count();
bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count<PAWN>() < 2;
v = classical || strongClassical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
// If the classical eval is small and imbalance large, use NNUE nevertheless.
// For the case of opposite colored bishops, switch to NNUE eval with
// small probability if the classical eval is less than the threshold.
if ( largePsq && !strongClassical
&& ( abs(v) * 16 < NNUEThreshold2 * r50
|| ( pos.opposite_bishops()
&& abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
&& !(pos.this_thread()->nodes & 0xB))))
v = adjusted_NNUE();
}
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;
@@ -1015,7 +1009,7 @@ std::string Eval::trace(const Position& pos) {
ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
if (Eval::useNNUE)
if (NNUE::useNNUE != NNUE::UseNNUEMode::False)
{
v = NNUE::evaluate(pos);
v = pos.side_to_move() == WHITE ? v : -v;
+4 -14
View File
@@ -26,23 +26,13 @@
class Position;
namespace Eval {
std::string trace(const Position& pos);
Value evaluate(const Position& pos);
extern bool useNNUE;
extern std::string eval_file_loaded;
void init_NNUE();
void verify_NNUE();
namespace NNUE {
Value evaluate(const Position& pos);
Value compute_eval(const Position& pos);
void update_eval(const Position& pos);
bool load_eval_file(const std::string& evalFile);
} // namespace NNUE
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro, as it is used in the Makefile.
#define EvalFileDefaultName "nn-c3ca321c51c9.nnue"
} // namespace Eval
File diff suppressed because it is too large Load Diff
-429
View File
@@ -1,429 +0,0 @@
#if defined (EVAL_LEARN)
#include "../misc.h"
#include "../position.h"
#include <sstream>
#include <fstream>
#include <cstring> // std::memset()
using namespace std;
// -----------------------------------
// stage compression/decompression
// -----------------------------------
// Class that handles bitstream
// useful when doing aspect encoding
struct BitStream
{
// Set the memory to store the data in advance.
// Assume that memory is cleared to 0.
void set_data(uint8_t* data_) { data = data_; reset(); }
// Get the pointer passed in set_data().
uint8_t* get_data() const { return data; }
// Get the cursor.
int get_cursor() const { return bit_cursor; }
// reset the cursor
void reset() { bit_cursor = 0; }
// Write 1bit to the stream.
// If b is non-zero, write out 1. If 0, write 0.
void write_one_bit(int b)
{
if (b)
data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
++bit_cursor;
}
// Get 1 bit from the stream.
int read_one_bit()
{
int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
++bit_cursor;
return b;
}
// write n bits of data
// Data shall be written out from the lower order of d.
void write_n_bit(int d, int n)
{
for (int i = 0; i <n; ++i)
write_one_bit(d & (1 << i));
}
// read n bits of data
// Reverse conversion of write_n_bit().
int read_n_bit(int n)
{
int result = 0;
for (int i = 0; i < n; ++i)
result |= read_one_bit() ? (1 << i) : 0;
return result;
}
private:
// Next bit position to read/write.
int bit_cursor;
// data entity
uint8_t* data;
};
// Huffman coding
// * is simplified from mini encoding to make conversion easier.
//
// 1 box on the board (other than NO_PIECE) = 2 to 6 bits (+ 1-bit flag + 1-bit forward and backward)
// 1 piece of hand piece = 1-5bit (+ 1-bit flag + 1bit ahead and behind)
//
// empty xxxxx0 + 0 (none)
// step xxxx01 + 2 xxxx0 + 2
// incense xx0011 + 2 xx001 + 2
// Katsura xx1011 + 2 xx101 + 2
// silver xx0111 + 2 xx011 + 2
// Gold x01111 + 1 x0111 + 1 // Gold is valid and has no flags.
// corner 011111 + 2 01111 + 2
// Fly 111111 + 2 11111 + 2
//
// Assuming all pieces are on the board,
// Sky 81-40 pieces = 41 boxes = 41bit
// Walk 4bit*18 pieces = 72bit
// Incense 6bit*4 pieces = 24bit
// Katsura 6bit*4 pieces = 24bit
// Silver 6bit*4 pieces = 24bit
// Gold 6bit* 4 pieces = 24bit
// corner 8bit* 2 pieces = 16bit
// Fly 8bit* 2 pieces = 16bit
// -------
// 241bit + 1bit (turn) + 7bit × 2 (King's position after) = 256bit
//
// When the piece on the board moves to the hand piece, the piece on the board becomes empty, so the box on the board can be expressed with 1 bit,
// Since the hand piece can be expressed by 1 bit less than the piece on the board, the total number of bits does not change in the end.
// Therefore, in this expression, any aspect can be expressed by this bit number.
// It is a hand piece and no flag is required, but if you include this, the bit number of the piece on the board will be -1
// Since the total number of bits can be fixed, we will include this as well.
// Huffman Encoding
//
// Empty xxxxxxx0
// Pawn xxxxx001 + 1 bit (Side to move)
// Knight xxxxx011 + 1 bit (Side to move)
// Bishop xxxxx101 + 1 bit (Side to move)
// Rook xxxxx111 + 1 bit (Side to move)
struct HuffmanedPiece
{
int code; // how it will be coded
int bits; // How many bits do you have
};
HuffmanedPiece huffman_table[] =
{
{0b0000,1}, // NO_PIECE
{0b0001,4}, // PAWN
{0b0011,4}, // KNIGHT
{0b0101,4}, // BISHOP
{0b0111,4}, // ROOK
{0b1001,4}, // QUEEN
};
// Class for compressing/decompressing sfen
// sfen can be packed to 256bit (32bytes) by Huffman coding.
// This is proven by mini. The above is Huffman coding.
//
// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
// Side to move (White = 0, Black = 1) (1bit)
// White King Position (6 bits)
// Black King Position (6 bits)
// Huffman Encoding of the board
// Castling availability (1 bit x 4)
// En passant square (1 or 1 + 6 bits)
// Rule 50 (6 bits)
// Game play (8 bits)
//
// TODO(someone): Rename SFEN to FEN.
//
struct SfenPacker
{
// Pack sfen and store in data[32].
void pack(const Position& pos)
{
// cout << pos;
memset(data, 0, 32 /* 256bit */);
stream.set_data(data);
// turn
// Side to move.
stream.write_one_bit((int)(pos.side_to_move()));
// 7-bit positions for leading and trailing balls
// White king and black king, 6 bits for each.
for(auto c: Colors)
stream.write_n_bit(pos.king_square(c), 6);
// Write the pieces on the board other than the kings.
for (Rank r = RANK_8; r >= RANK_1; --r)
{
for (File f = FILE_A; f <= FILE_H; ++f)
{
Piece pc = pos.piece_on(make_square(f, r));
if (type_of(pc) == KING)
continue;
write_board_piece_to_stream(pc);
}
}
// TODO(someone): Support chess960.
stream.write_one_bit(pos.can_castle(WHITE_OO));
stream.write_one_bit(pos.can_castle(WHITE_OOO));
stream.write_one_bit(pos.can_castle(BLACK_OO));
stream.write_one_bit(pos.can_castle(BLACK_OOO));
if (pos.ep_square() == SQ_NONE) {
stream.write_one_bit(0);
}
else {
stream.write_one_bit(1);
stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
}
stream.write_n_bit(pos.state()->rule50, 6);
stream.write_n_bit(1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2, 8);
assert(stream.get_cursor() <= 256);
}
// sfen packed by pack() (256bit = 32bytes)
// Or sfen to decode with unpack()
uint8_t *data; // uint8_t[32];
//private:
// Position::set_from_packed_sfen(uint8_t data[32]) I want to use these functions, so the line is bad, but I want to keep it public.
BitStream stream;
// Output the board pieces to stream.
void write_board_piece_to_stream(Piece pc)
{
// piece type
PieceType pr = type_of(pc);
auto c = huffman_table[pr];
stream.write_n_bit(c.code, c.bits);
if (pc == NO_PIECE)
return;
// first and second flag
stream.write_one_bit(color_of(pc));
}
// Read one board piece from stream
Piece read_board_piece_from_stream()
{
PieceType pr = NO_PIECE_TYPE;
int code = 0, bits = 0;
while (true)
{
code |= stream.read_one_bit() << bits;
++bits;
assert(bits <= 6);
for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
if (huffman_table[pr].code == code
&& huffman_table[pr].bits == bits)
goto Found;
}
Found:;
if (pr == NO_PIECE_TYPE)
return NO_PIECE;
// first and second flag
Color c = (Color)stream.read_one_bit();
return make_piece(c, pr);
}
};
// -----------------------------------
// Add to Position class
// -----------------------------------
// Add a function that directly unpacks for speed. It's pretty tough.
// Write it by combining packer::unpack() and Position::set().
// If there is a problem with the passed phase and there is an error, non-zero is returned.
int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thread* th, bool mirror)
{
SfenPacker packer;
auto& stream = packer.stream;
stream.set_data((uint8_t*)&sfen);
std::memset(this, 0, sizeof(Position));
std::memset(si, 0, sizeof(StateInfo));
std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
st = si;
// Active color
sideToMove = (Color)stream.read_one_bit();
pieceList[W_KING][0] = SQUARE_NB;
pieceList[B_KING][0] = SQUARE_NB;
// First the position of the ball
if (mirror)
{
for (auto c : Colors)
board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
}
else
{
for (auto c : Colors)
board[stream.read_n_bit(6)] = make_piece(c, KING);
}
// Piece placement
for (Rank r = RANK_8; r >= RANK_1; --r)
{
for (File f = FILE_A; f <= FILE_H; ++f)
{
auto sq = make_square(f, r);
if (mirror) {
sq = flip_file(sq);
}
// it seems there are already balls
Piece pc;
if (type_of(board[sq]) != KING)
{
assert(board[sq] == NO_PIECE);
pc = packer.read_board_piece_from_stream();
}
else
{
pc = board[sq];
board[sq] = NO_PIECE; // put_piece() will catch ASSERT unless you remove it all.
}
// There may be no pieces, so skip in that case.
if (pc == NO_PIECE)
continue;
put_piece(Piece(pc), sq);
//cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl;
if (stream.get_cursor()> 256)
return 1;
//assert(stream.get_cursor() <= 256);
}
}
// Castling availability.
// TODO(someone): Support chess960.
st->castlingRights = 0;
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(WHITE, SQ_H1); piece_on(rsq) != W_ROOK; --rsq) {}
set_castling_right(WHITE, rsq);
}
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(WHITE, SQ_A1); piece_on(rsq) != W_ROOK; ++rsq) {}
set_castling_right(WHITE, rsq);
}
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(BLACK, SQ_H1); piece_on(rsq) != B_ROOK; --rsq) {}
set_castling_right(BLACK, rsq);
}
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(BLACK, SQ_A1); piece_on(rsq) != B_ROOK; ++rsq) {}
set_castling_right(BLACK, rsq);
}
// En passant square. Ignore if no pawn capture is possible
if (stream.read_one_bit()) {
Square ep_square = static_cast<Square>(stream.read_n_bit(6));
if (mirror) {
ep_square = flip_file(ep_square);
}
st->epSquare = ep_square;
if (!(attackers_to(st->epSquare) & pieces(sideToMove, PAWN))
|| !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))))
st->epSquare = SQ_NONE;
}
else {
st->epSquare = SQ_NONE;
}
// Halfmove clock
st->rule50 = static_cast<Square>(stream.read_n_bit(6));
// Fullmove number
gamePly = static_cast<Square>(stream.read_n_bit(8));
// Convert from fullmove starting from 1 to gamePly starting from 0,
// handle also common incorrect FEN with fullmove = 0.
gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
assert(stream.get_cursor() <= 256);
chess960 = false;
thisThread = th;
set_state(st);
//std::cout << *this << std::endl;
assert(pos_is_ok());
return 0;
}
// Give the board, hand piece, and turn, and return the sfen.
//std::string Position::sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly_)
//{
// // Copy it to an internal structure and call sfen() if the conversion process depends only on it
// // Maybe it will be converted normally...
// Position pos;
//
// memcpy(pos.board, board, sizeof(Piece) * 81);
// memcpy(pos.hand, hands, sizeof(Hand) * 2);
// pos.sideToMove = turn;
// pos.gamePly = gamePly_;
//
// return pos.sfen();
//
// // Implementation of ↑ is beautiful, but slow.
// // This is a bottleneck when learning a large amount of game records, so write a function to unpack directly.
//}
// Get the packed sfen. Returns to the buffer specified in the argument.
void Position::sfen_pack(PackedSfen& sfen)
{
SfenPacker sp;
sp.data = (uint8_t*)&sfen;
sp.pack(*this);
}
//// Unpack the packed sfen. Returns an sfen string.
//std::string Position::sfen_unpack(const PackedSfen& sfen)
//{
// SfenPacker sp;
// sp.data = (uint8_t*)&sfen;
// return sp.unpack();
//}
#endif // USE_SFEN_PACKER
File diff suppressed because it is too large Load Diff
+140
View File
@@ -0,0 +1,140 @@
#ifndef _STOCKFISH_BLAS_H_
#define _STOCKFISH_BLAS_H_
struct ThreadPool;
#if defined (_MSC_VER)
#define SF_BLAS_RESTRICT __restrict
#elif defined (__INTEL_COMPILER)
#define SF_BLAS_RESTRICT restrict
#elif defined (__clang__)
#define SF_BLAS_RESTRICT __restrict__
#elif defined (__GNUC__)
#define SF_BLAS_RESTRICT __restrict__
#endif
namespace Blas {
enum struct MatrixLayout {
RowMajor = 101,
ColMajor = 102
};
enum struct MatrixTranspose {
NoTrans = 111,
Trans = 112
};
void scopy(
const int N,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void scopy(
const int N,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void scopy(
ThreadPool& thread_pool,
const int N,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void scopy(
ThreadPool& thread_pool,
const int N,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void sscal(
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X
);
void sscal(
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X, const int incX
);
void sscal(
ThreadPool& thread_pool,
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X
);
void sscal(
ThreadPool& thread_pool,
const int N,
const float alpha,
float * SF_BLAS_RESTRICT X, const int incX
);
void saxpy(
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void saxpy(
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void saxpy(
ThreadPool& thread_pool,
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X,
float * SF_BLAS_RESTRICT Y
);
void saxpy(
ThreadPool& thread_pool,
const int N,
const float alpha,
const float * SF_BLAS_RESTRICT X, const int incX,
float * SF_BLAS_RESTRICT Y, const int incY
);
void sgemm(
ThreadPool& thread_pool,
MatrixLayout layout, MatrixTranspose TransA, MatrixTranspose TransB,
const int M, const int N, const int K,
const float alpha,
const float * SF_BLAS_RESTRICT A, const int lda,
const float * SF_BLAS_RESTRICT B, const int ldb,
const float beta,
float * SF_BLAS_RESTRICT C, const int ldc
);
void sgemm(
MatrixLayout layout, MatrixTranspose TransA, MatrixTranspose TransB,
const int M, const int N, const int K,
const float alpha,
const float * SF_BLAS_RESTRICT A, const int lda,
const float * SF_BLAS_RESTRICT B, const int ldb,
const float beta,
float * SF_BLAS_RESTRICT C, const int ldc
);
void test(
ThreadPool& thread_pool
);
void bench(
ThreadPool& thread_pool
);
}
#endif
+26
View File
@@ -0,0 +1,26 @@
The file "incbin.h" is free and unencumbered software released into
the public domain by Dale Weiler, see:
<https://github.com/graphitemaster/incbin>
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>
+368
View File
@@ -0,0 +1,368 @@
/**
* @file incbin.h
* @author Dale Weiler
* @brief Utility for including binary files
*
* Facilities for including binary files into the current translation unit and
* making use from them externally in other translation units.
*/
#ifndef INCBIN_HDR
#define INCBIN_HDR
#include <limits.h>
#if defined(__AVX512BW__) || \
defined(__AVX512CD__) || \
defined(__AVX512DQ__) || \
defined(__AVX512ER__) || \
defined(__AVX512PF__) || \
defined(__AVX512VL__) || \
defined(__AVX512F__)
# define INCBIN_ALIGNMENT_INDEX 6
#elif defined(__AVX__) || \
defined(__AVX2__)
# define INCBIN_ALIGNMENT_INDEX 5
#elif defined(__SSE__) || \
defined(__SSE2__) || \
defined(__SSE3__) || \
defined(__SSSE3__) || \
defined(__SSE4_1__) || \
defined(__SSE4_2__) || \
defined(__neon__)
# define INCBIN_ALIGNMENT_INDEX 4
#elif ULONG_MAX != 0xffffffffu
# define INCBIN_ALIGNMENT_INDEX 3
# else
# define INCBIN_ALIGNMENT_INDEX 2
#endif
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
#define INCBIN_ALIGN_SHIFT_0 1
#define INCBIN_ALIGN_SHIFT_1 2
#define INCBIN_ALIGN_SHIFT_2 4
#define INCBIN_ALIGN_SHIFT_3 8
#define INCBIN_ALIGN_SHIFT_4 16
#define INCBIN_ALIGN_SHIFT_5 32
#define INCBIN_ALIGN_SHIFT_6 64
/* Actual alignment value */
#define INCBIN_ALIGNMENT \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
INCBIN_ALIGNMENT_INDEX)
/* Stringize */
#define INCBIN_STR(X) \
#X
#define INCBIN_STRINGIZE(X) \
INCBIN_STR(X)
/* Concatenate */
#define INCBIN_CAT(X, Y) \
X ## Y
#define INCBIN_CONCATENATE(X, Y) \
INCBIN_CAT(X, Y)
/* Deferred macro expansion */
#define INCBIN_EVAL(X) \
X
#define INCBIN_INVOKE(N, ...) \
INCBIN_EVAL(N(__VA_ARGS__))
/* Green Hills uses a different directive for including binary data */
#if defined(__ghs__)
# if (__ghs_asm == 2)
# define INCBIN_MACRO ".file"
/* Or consider the ".myrawdata" entry in the ld file */
# else
# define INCBIN_MACRO "\tINCBIN"
# endif
#else
# define INCBIN_MACRO ".incbin"
#endif
#ifndef _MSC_VER
# define INCBIN_ALIGN \
__attribute__((aligned(INCBIN_ALIGNMENT)))
#else
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
#endif
#if defined(__arm__) || /* GNU C and RealView */ \
defined(__arm) || /* Diab */ \
defined(_ARM) /* ImageCraft */
# define INCBIN_ARM
#endif
#ifdef __GNUC__
/* Utilize .balign where supported */
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".balign 1\n"
#elif defined(INCBIN_ARM)
/*
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
* the shift count. This is the value passed to `.align'
*/
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
# define INCBIN_ALIGN_BYTE ".align 0\n"
#else
/* We assume other inline assembler's treat `.align' as `.balign' */
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
# define INCBIN_ALIGN_BYTE ".align 1\n"
#endif
/* INCBIN_CONST is used by incbin.c generated files */
#if defined(__cplusplus)
# define INCBIN_EXTERNAL extern "C"
# define INCBIN_CONST extern const
#else
# define INCBIN_EXTERNAL extern
# define INCBIN_CONST const
#endif
/**
* @brief Optionally override the linker section into which data is emitted.
*
* @warning If you use this facility, you'll have to deal with platform-specific linker output
* section naming on your own
*
* Overriding the default linker output section, e.g for esp8266/Arduino:
* @code
* #define INCBIN_OUTPUT_SECTION ".irom.text"
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
* // Data is emitted into program memory that never gets copied to RAM
* @endcode
*/
#if !defined(INCBIN_OUTPUT_SECTION)
# if defined(__APPLE__)
# define INCBIN_OUTPUT_SECTION ".const_data"
# else
# define INCBIN_OUTPUT_SECTION ".rodata"
# endif
#endif
#if defined(__APPLE__)
/* The directives are different for Apple branded compilers */
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# define INCBIN_INT ".long "
# define INCBIN_MANGLE "_"
# define INCBIN_BYTE ".byte "
# define INCBIN_TYPE(...)
#else
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
# if defined(__ghs__)
# define INCBIN_INT ".word "
# else
# define INCBIN_INT ".int "
# endif
# if defined(__USER_LABEL_PREFIX__)
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
# else
# define INCBIN_MANGLE ""
# endif
# if defined(INCBIN_ARM)
/* On arm assemblers, `@' is used as a line comment token */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
# elif defined(__MINGW32__) || defined(__MINGW64__)
/* Mingw doesn't support this directive either */
# define INCBIN_TYPE(NAME)
# else
/* It's safe to use `@' on other architectures */
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
# endif
# define INCBIN_BYTE ".byte "
#endif
/* List of style types used for symbol names */
#define INCBIN_STYLE_CAMEL 0
#define INCBIN_STYLE_SNAKE 1
/**
* @brief Specify the prefix to use for symbol names.
*
* By default this is `g', producing symbols of the form:
* @code
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char gFooData[];
* // const unsigned char *const gFooEnd;
* // const unsigned int gFooSize;
* @endcode
*
* If however you specify a prefix before including: e.g:
* @code
* #define INCBIN_PREFIX incbin
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols instead:
* // const unsigned char incbinFooData[];
* // const unsigned char *const incbinFooEnd;
* // const unsigned int incbinFooSize;
* @endcode
*/
#if !defined(INCBIN_PREFIX)
# define INCBIN_PREFIX g
#endif
/**
* @brief Specify the style used for symbol names.
*
* Possible options are
* - INCBIN_STYLE_CAMEL "CamelCase"
* - INCBIN_STYLE_SNAKE "snake_case"
*
* Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
* @code
* #include "incbin.h"
* INCBIN(Foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>FooData[];
* // const unsigned char *const <prefix>FooEnd;
* // const unsigned int <prefix>FooSize;
* @endcode
*
* If however you specify a style before including: e.g:
* @code
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
* #include "incbin.h"
* INCBIN(foo, "foo.txt");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>foo_data[];
* // const unsigned char *const <prefix>foo_end;
* // const unsigned int <prefix>foo_size;
* @endcode
*/
#if !defined(INCBIN_STYLE)
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
#endif
/* Style lookup tables */
#define INCBIN_STYLE_0_DATA Data
#define INCBIN_STYLE_0_END End
#define INCBIN_STYLE_0_SIZE Size
#define INCBIN_STYLE_1_DATA _data
#define INCBIN_STYLE_1_END _end
#define INCBIN_STYLE_1_SIZE _size
/* Style lookup: returning identifier */
#define INCBIN_STYLE_IDENT(TYPE) \
INCBIN_CONCATENATE( \
INCBIN_STYLE_, \
INCBIN_CONCATENATE( \
INCBIN_EVAL(INCBIN_STYLE), \
INCBIN_CONCATENATE(_, TYPE)))
/* Style lookup: returning string literal */
#define INCBIN_STYLE_STRING(TYPE) \
INCBIN_STRINGIZE( \
INCBIN_STYLE_IDENT(TYPE)) \
/* Generate the global labels by indirectly invoking the macro with our style
* type and concatenating the name against them. */
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
INCBIN_INVOKE( \
INCBIN_GLOBAL, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE))) \
INCBIN_INVOKE( \
INCBIN_TYPE, \
INCBIN_CONCATENATE( \
NAME, \
INCBIN_INVOKE( \
INCBIN_STYLE_IDENT, \
TYPE)))
/**
* @brief Externally reference binary data included in another translation unit.
*
* Produces three external symbols that reference the binary data included in
* another translation unit.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name given for the binary data
*
* @code
* INCBIN_EXTERN(Foo);
*
* // Now you have the following symbols:
* // extern const unsigned char <prefix>FooData[];
* // extern const unsigned char *const <prefix>FooEnd;
* // extern const unsigned int <prefix>FooSize;
* @endcode
*/
#define INCBIN_EXTERN(NAME) \
INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(DATA))[]; \
INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(END)); \
INCBIN_EXTERNAL const unsigned int \
INCBIN_CONCATENATE( \
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
INCBIN_STYLE_IDENT(SIZE))
/**
* @brief Include a binary file into the current translation unit.
*
* Includes a binary file into the current translation unit, producing three symbols
* for objects that encode the data and size respectively.
*
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
* "Data", as well as "End" and "Size" after. An example is provided below.
*
* @param NAME The name to associate with this binary data (as an identifier.)
* @param FILENAME The file to include (as a string literal.)
*
* @code
* INCBIN(Icon, "icon.png");
*
* // Now you have the following symbols:
* // const unsigned char <prefix>IconData[];
* // const unsigned char *const <prefix>IconEnd;
* // const unsigned int <prefix>IconSize;
* @endcode
*
* @warning This must be used in global scope
* @warning The identifiers may be different if INCBIN_STYLE is not default
*
* To externally reference the data included by this in another translation unit
* please @see INCBIN_EXTERN.
*/
#ifdef _MSC_VER
#define INCBIN(NAME, FILENAME) \
INCBIN_EXTERN(NAME)
#else
#define INCBIN(NAME, FILENAME) \
__asm__(INCBIN_SECTION \
INCBIN_GLOBAL_LABELS(NAME, DATA) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
INCBIN_MACRO " \"" FILENAME "\"\n" \
INCBIN_GLOBAL_LABELS(NAME, END) \
INCBIN_ALIGN_BYTE \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
INCBIN_BYTE "1\n" \
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
INCBIN_ALIGN_HOST \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
INCBIN_ALIGN_HOST \
".text\n" \
); \
INCBIN_EXTERN(NAME)
#endif
#endif
+667
View File
@@ -0,0 +1,667 @@
#ifndef LEARNER_AUTOGRAD_H
#define LEARNER_AUTOGRAD_H
#include <cmath>
#include <utility>
#include <type_traits>
#include <memory>
#include <tuple>
#include <optional>
#include <algorithm>
#include <cstdint>
namespace Learner
{
template <typename T>
struct ValueWithGrad
{
T value;
T grad;
ValueWithGrad& operator+=(const ValueWithGrad<T>& rhs)
{
value += rhs.value;
grad += rhs.grad;
return *this;
}
ValueWithGrad& operator-=(const ValueWithGrad<T>& rhs)
{
value -= rhs.value;
grad -= rhs.grad;
return *this;
}
ValueWithGrad& operator*=(T rhs)
{
value *= rhs;
grad *= rhs;
return *this;
}
ValueWithGrad& operator/=(T rhs)
{
value /= rhs;
grad /= rhs;
return *this;
}
[[nodiscard]] ValueWithGrad abs() const
{
return { std::abs(value), std::abs(grad) };
}
[[nodiscard]] ValueWithGrad clamp_grad(T max) const
{
return { value, std::clamp(grad, -max, max) };
}
};
}
namespace Learner::Autograd::UnivariateStatic
{
template <typename T>
struct Identity
{
using type = T;
};
template <typename T>
using Id = typename Identity<T>::type;
template <typename T>
using StoreValueOrRef = std::conditional_t<
std::is_rvalue_reference_v<T>,
std::remove_reference_t<T>,
const std::remove_reference_t<T>&
>;
namespace Detail
{
using CallIdType = std::uint32_t;
struct CallId
{
CallIdType call_id{};
constexpr CallId() :
call_id(0)
{
}
constexpr CallId(CallIdType id) :
call_id(id)
{
}
[[nodiscard]] bool operator==(CallId rhs) const noexcept
{
return call_id == rhs.call_id;
}
[[nodiscard]] bool operator!=(CallId rhs) const noexcept
{
return call_id != rhs.call_id;
}
};
[[nodiscard]] inline CallId next_call_id()
{
static thread_local CallIdType s_call_id = 0;
return CallId{ s_call_id++ };
}
template <typename T, typename Tuple>
struct TupleContains;
template <typename T, typename... Us>
struct TupleContains<T, std::tuple<Us...>> : std::disjunction<std::is_same<T, Us>...> {};
template <typename T, typename Tuple>
constexpr bool TupleContainsV = TupleContains<T, Tuple>::value;
template <typename... Ts>
constexpr bool AreAllConstantV = (std::remove_reference_t<Ts>::is_constant && ...);
}
template <typename T, typename ChildT>
struct Evaluable
{
constexpr Evaluable() = default;
// We append a unique call id so that we can invalidate the cache when
// the next computation starts. A single evaluation should see
// the same call_id at every node.
template <typename... ArgsTs>
[[nodiscard]] auto eval(const std::tuple<ArgsTs...>& args) const
{
const auto call_id = Detail::next_call_id();
const auto new_args = std::tuple_cat(args, std::tuple(call_id));
return ValueWithGrad<T>{ value(new_args), grad(new_args) };
}
template <typename... ArgsTs,
typename SFINAE = std::enable_if_t<Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
[[nodiscard]] auto value(const std::tuple<ArgsTs...>& args) const
{
const ChildT* this_ = static_cast<const ChildT*>(this);
const auto call_id = std::get<Detail::CallId>(args);
if (!value_cache.has_value() || value_cache_call_id != call_id)
{
value_cache_call_id = call_id;
value_cache = this_->calculate_value(args);
}
return *value_cache;
}
template <typename... ArgsTs,
typename SFINAE = std::enable_if_t<!Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
[[nodiscard]] auto value(const std::tuple<ArgsTs...>& args, ...) const
{
const auto call_id = Detail::next_call_id();
const auto new_args = std::tuple_cat(args, std::tuple(call_id));
return value(new_args);
}
template <typename... ArgsTs,
typename SFINAE = std::enable_if_t<Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
[[nodiscard]] auto grad(const std::tuple<ArgsTs...>& args) const
{
if constexpr (ChildT::is_constant)
{
return T(0.0);
}
else
{
const ChildT* this_ = static_cast<const ChildT*>(this);
const auto call_id = std::get<Detail::CallId>(args);
if (!grad_cache.has_value() || grad_cache_call_id != call_id)
{
grad_cache_call_id = call_id;
grad_cache = this_->calculate_grad(args);
}
return *grad_cache;
}
}
template <typename... ArgsTs,
typename SFINAE = std::enable_if_t<!Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
[[nodiscard]] auto grad(const std::tuple<ArgsTs...>& args, ...) const
{
const auto call_id = Detail::next_call_id();
const auto new_args = std::tuple_cat(args, std::tuple(call_id));
return grad(new_args);
}
private:
mutable std::optional<T> value_cache;
mutable std::optional<T> grad_cache;
mutable Detail::CallId value_cache_call_id{};
mutable Detail::CallId grad_cache_call_id{};
};
template <typename T, int I>
struct VariableParameter : Evaluable<T, VariableParameter<T, I>>
{
using ValueType = T;
static constexpr bool is_constant = false;
constexpr VariableParameter()
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return std::get<I>(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
{
return T(1.0);
}
};
template <typename T, int I>
struct ConstantParameter : Evaluable<T, ConstantParameter<T, I>>
{
using ValueType = T;
static constexpr bool is_constant = true;
constexpr ConstantParameter()
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return std::get<I>(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
{
return T(0.0);
}
};
template <typename T>
struct Constant : Evaluable<T, Constant<T>>
{
using ValueType = T;
static constexpr bool is_constant = true;
constexpr Constant(T x) :
m_x(std::move(x))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>&) const
{
return m_x;
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
{
return T(0.0);
}
private:
T m_x;
};
// The "constant" may change between executions, but is assumed to be
// constant during a single evaluation.
template <typename T>
struct ConstantRef : Evaluable<T, ConstantRef<T>>
{
using ValueType = T;
static constexpr bool is_constant = true;
constexpr ConstantRef(const T& x) :
m_x(x)
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>&) const
{
return m_x;
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
{
return T(0.0);
}
private:
const T& m_x;
};
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
struct Sum : Evaluable<T, Sum<LhsT, RhsT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
constexpr Sum(LhsT&& lhs, RhsT&& rhs) :
m_lhs(std::forward<LhsT>(lhs)),
m_rhs(std::forward<RhsT>(rhs))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.value(args) + m_rhs.value(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.grad(args) + m_rhs.grad(args);
}
private:
StoreValueOrRef<LhsT> m_lhs;
StoreValueOrRef<RhsT> m_rhs;
};
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator+(LhsT&& lhs, RhsT&& rhs)
{
return Sum<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator+(LhsT&& lhs, Id<T> rhs)
{
return Sum<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
}
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
[[nodiscard]] constexpr auto operator+(Id<T> lhs, RhsT&& rhs)
{
return Sum<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
struct Difference : Evaluable<T, Difference<LhsT, RhsT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
constexpr Difference(LhsT&& lhs, RhsT&& rhs) :
m_lhs(std::forward<LhsT>(lhs)),
m_rhs(std::forward<RhsT>(rhs))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.value(args) - m_rhs.value(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.grad(args) - m_rhs.grad(args);
}
private:
StoreValueOrRef<LhsT> m_lhs;
StoreValueOrRef<RhsT> m_rhs;
};
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator-(LhsT&& lhs, RhsT&& rhs)
{
return Difference<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator-(LhsT&& lhs, Id<T> rhs)
{
return Difference<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
}
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
[[nodiscard]] constexpr auto operator-(Id<T> lhs, RhsT&& rhs)
{
return Difference<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
struct Product : Evaluable<T, Product<LhsT, RhsT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
constexpr Product(LhsT&& lhs, RhsT&& rhs) :
m_lhs(std::forward<LhsT>(lhs)),
m_rhs(std::forward<RhsT>(rhs))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.value(args) * m_rhs.value(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.grad(args) * m_rhs.value(args) + m_lhs.value(args) * m_rhs.grad(args);
}
private:
StoreValueOrRef<LhsT> m_lhs;
StoreValueOrRef<RhsT> m_rhs;
};
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator*(LhsT&& lhs, RhsT&& rhs)
{
return Product<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator*(LhsT&& lhs, Id<T> rhs)
{
return Product<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
}
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
[[nodiscard]] constexpr auto operator*(Id<T> lhs, RhsT&& rhs)
{
return Product<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
struct Quotient : Evaluable<T, Quotient<LhsT, RhsT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
constexpr Quotient(LhsT&& lhs, RhsT&& rhs) :
m_lhs(std::forward<LhsT>(lhs)),
m_rhs(std::forward<RhsT>(rhs))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return m_lhs.value(args) / m_rhs.value(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
auto g = m_rhs.value(args);
return (m_lhs.grad(args) * g - m_lhs.value(args) * m_rhs.grad(args)) / (g * g);
}
private:
StoreValueOrRef<LhsT> m_lhs;
StoreValueOrRef<RhsT> m_rhs;
};
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator/(LhsT&& lhs, RhsT&& rhs)
{
return Quotient<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
}
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
[[nodiscard]] constexpr auto operator/(LhsT&& lhs, Id<T> rhs)
{
return Quotient<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
}
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
[[nodiscard]] constexpr auto operator/(Id<T> lhs, RhsT&& rhs)
{
return Quotient<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
}
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
struct Negation : Evaluable<T, Negation<ArgT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
constexpr explicit Negation(ArgT&& x) :
m_x(std::forward<ArgT>(x))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return -m_x.value(args);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return -m_x.grad(args);
}
private:
StoreValueOrRef<ArgT> m_x;
};
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
[[nodiscard]] constexpr auto operator-(ArgT&& x)
{
return Negation<ArgT&&>(std::forward<ArgT>(x));
}
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
struct Sigmoid : Evaluable<T, Sigmoid<ArgT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
constexpr explicit Sigmoid(ArgT&& x) :
m_x(std::forward<ArgT>(x))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return value_(m_x.value(args));
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return m_x.grad(args) * grad_(m_x.value(args));
}
private:
StoreValueOrRef<ArgT> m_x;
[[nodiscard]] T value_(T x) const
{
return 1.0 / (1.0 + std::exp(-x));
}
[[nodiscard]] T grad_(T x) const
{
return value_(x) * (1.0 - value_(x));
}
};
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
[[nodiscard]] constexpr auto sigmoid(ArgT&& x)
{
return Sigmoid<ArgT&&>(std::forward<ArgT>(x));
}
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
struct Pow : Evaluable<T, Pow<ArgT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
constexpr explicit Pow(ArgT&& x, Id<T> exponent) :
m_x(std::forward<ArgT>(x)),
m_exponent(std::move(exponent))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return std::pow(m_x.value(args), m_exponent);
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return m_exponent * std::pow(m_x.value(args), m_exponent - T(1.0)) * m_x.grad(args);
}
private:
StoreValueOrRef<ArgT> m_x;
T m_exponent;
};
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
[[nodiscard]] constexpr auto pow(ArgT&& x, Id<T> exp)
{
return Pow<ArgT&&>(std::forward<ArgT>(x), std::move(exp));
}
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
struct Log : Evaluable<T, Log<ArgT, T>>
{
using ValueType = T;
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
constexpr explicit Log(ArgT&& x) :
m_x(std::forward<ArgT>(x))
{
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
{
return value_(m_x.value(args));
}
template <typename... ArgsTs>
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
{
return m_x.grad(args) * grad_(m_x.value(args));
}
private:
StoreValueOrRef<ArgT> m_x;
T value_(T x) const
{
return std::log(x);
}
T grad_(T x) const
{
return 1.0 / x;
}
};
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
[[nodiscard]] constexpr auto log(ArgT&& x)
{
return Log<ArgT&&>(std::forward<ArgT>(x));
}
}
#endif
+815
View File
@@ -0,0 +1,815 @@
#include "convert.h"
#include "uci.h"
#include "misc.h"
#include "thread.h"
#include "position.h"
#include "tt.h"
#include "extra/nnue_data_binpack_format.h"
#include "nnue/evaluate_nnue.h"
#include "syzygy/tbprobe.h"
#include <sstream>
#include <fstream>
#include <unordered_set>
#include <iomanip>
#include <list>
#include <cmath> // std::exp(),std::pow(),std::log()
#include <cstring> // memcpy()
#include <memory>
#include <limits>
#include <optional>
#include <chrono>
#include <random>
#include <regex>
#include <filesystem>
using namespace std;
namespace Learner
{
bool fen_is_ok(Position& pos, std::string input_fen) {
std::string pos_fen = pos.fen();
std::istringstream ss_input(input_fen);
std::istringstream ss_pos(pos_fen);
// example : "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3 w - h6 0 24"
// --> "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3"
std::string str_input, str_pos;
ss_input >> str_input;
ss_pos >> str_pos;
// Only compare "Piece placement field" between input_fen and pos.fen().
return str_input == str_pos;
}
void convert_bin(
const vector<string>& filenames,
const string& output_file_name,
const int ply_minimum,
const int ply_maximum,
const int interpolate_eval,
const int src_score_min_value,
const int src_score_max_value,
const int dest_score_min_value,
const int dest_score_max_value,
const bool check_invalid_fen,
const bool check_illegal_move)
{
std::cout << "check_invalid_fen=" << check_invalid_fen << std::endl;
std::cout << "check_illegal_move=" << check_illegal_move << std::endl;
std::fstream fs;
uint64_t data_size = 0;
uint64_t filtered_size = 0;
uint64_t filtered_size_fen = 0;
uint64_t filtered_size_move = 0;
uint64_t filtered_size_ply = 0;
auto th = Threads.main();
auto& tpos = th->rootPos;
// convert plain rag to packed sfenvalue for Yaneura king
fs.open(output_file_name, ios::app | ios::binary);
StateListPtr states;
for (auto filename : filenames) {
std::cout << "convert " << filename << " ... ";
std::string line;
ifstream ifs;
ifs.open(filename);
PackedSfenValue p;
data_size = 0;
filtered_size = 0;
filtered_size_fen = 0;
filtered_size_move = 0;
filtered_size_ply = 0;
p.gamePly = 1; // Not included in apery format. Should be initialized
bool ignore_flag_fen = false;
bool ignore_flag_move = false;
bool ignore_flag_ply = false;
while (std::getline(ifs, line)) {
std::stringstream ss(line);
std::string token;
std::string value;
ss >> token;
if (token == "fen") {
states = StateListPtr(new std::deque<StateInfo>(1)); // Drop old and create a new one
std::string input_fen = line.substr(4);
tpos.set(input_fen, false, &states->back(), Threads.main());
if (check_invalid_fen && !fen_is_ok(tpos, input_fen)) {
ignore_flag_fen = true;
filtered_size_fen++;
}
else {
tpos.sfen_pack(p.sfen);
}
}
else if (token == "move") {
ss >> value;
Move move = UCI::to_move(tpos, value);
if (check_illegal_move && move == MOVE_NONE) {
ignore_flag_move = true;
filtered_size_move++;
}
else {
p.move = move;
}
}
else if (token == "score") {
double score;
ss >> score;
// Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
// Normalize to [0.0, 1.0].
score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value);
// Scale to [dest_score_min_value, dest_score_max_value].
score = score * (dest_score_max_value - dest_score_min_value) + dest_score_min_value;
p.score = Math::clamp((int32_t)std::round(score), -(int32_t)VALUE_MATE, (int32_t)VALUE_MATE);
}
else if (token == "ply") {
int temp;
ss >> temp;
if (temp < ply_minimum || temp > ply_maximum) {
ignore_flag_ply = true;
filtered_size_ply++;
}
p.gamePly = uint16_t(temp); // No cast here?
if (interpolate_eval != 0) {
p.score = min(3000, interpolate_eval * temp);
}
}
else if (token == "result") {
int temp;
ss >> temp;
p.game_result = int8_t(temp); // Do you need a cast here?
if (interpolate_eval) {
p.score = p.score * p.game_result;
}
}
else if (token == "e") {
if (!(ignore_flag_fen || ignore_flag_move || ignore_flag_ply)) {
fs.write((char*)&p, sizeof(PackedSfenValue));
data_size += 1;
// debug
// std::cout<<tpos<<std::endl;
// std::cout<<p.score<<","<<int(p.gamePly)<<","<<int(p.game_result)<<std::endl;
}
else {
filtered_size++;
}
ignore_flag_fen = false;
ignore_flag_move = false;
ignore_flag_ply = false;
}
}
std::cout << "done " << data_size << " parsed " << filtered_size << " is filtered"
<< " (invalid fen:" << filtered_size_fen << ", illegal move:" << filtered_size_move << ", invalid ply:" << filtered_size_ply << ")" << std::endl;
ifs.close();
}
std::cout << "all done" << std::endl;
fs.close();
}
static inline void ltrim(std::string& s) {
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) {
return !std::isspace(ch);
}));
}
static inline void rtrim(std::string& s) {
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {
return !std::isspace(ch);
}).base(), s.end());
}
static inline void trim(std::string& s) {
ltrim(s);
rtrim(s);
}
int parse_game_result_from_pgn_extract(std::string result) {
// White Win
if (result == "\"1-0\"") {
return 1;
}
// Black Win
else if (result == "\"0-1\"") {
return -1;
}
// Draw
else {
return 0;
}
}
// 0.25 --> 0.25 * PawnValueEg
// #-4 --> -mate_in(4)
// #3 --> mate_in(3)
// -M4 --> -mate_in(4)
// +M3 --> mate_in(3)
Value parse_score_from_pgn_extract(std::string eval, bool& success) {
success = true;
if (eval.substr(0, 1) == "#") {
if (eval.substr(1, 1) == "-") {
return -mate_in(stoi(eval.substr(2, eval.length() - 2)));
}
else {
return mate_in(stoi(eval.substr(1, eval.length() - 1)));
}
}
else if (eval.substr(0, 2) == "-M") {
//std::cout << "eval=" << eval << std::endl;
return -mate_in(stoi(eval.substr(2, eval.length() - 2)));
}
else if (eval.substr(0, 2) == "+M") {
//std::cout << "eval=" << eval << std::endl;
return mate_in(stoi(eval.substr(2, eval.length() - 2)));
}
else {
char* endptr;
double value = strtod(eval.c_str(), &endptr);
if (*endptr != '\0') {
success = false;
return VALUE_ZERO;
}
else {
return Value(value * static_cast<double>(PawnValueEg));
}
}
}
// for Debug
//#define DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT
bool is_like_fen(std::string fen) {
int count_space = std::count(fen.cbegin(), fen.cend(), ' ');
int count_slash = std::count(fen.cbegin(), fen.cend(), '/');
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
//std::cout << "count_space=" << count_space << std::endl;
//std::cout << "count_slash=" << count_slash << std::endl;
#endif
return count_space == 5 && count_slash == 7;
}
void convert_bin_from_pgn_extract(
const vector<string>& filenames,
const string& output_file_name,
const bool pgn_eval_side_to_move,
const bool convert_no_eval_fens_as_score_zero)
{
std::cout << "pgn_eval_side_to_move=" << pgn_eval_side_to_move << std::endl;
std::cout << "convert_no_eval_fens_as_score_zero=" << convert_no_eval_fens_as_score_zero << std::endl;
auto th = Threads.main();
auto& pos = th->rootPos;
std::fstream ofs;
ofs.open(output_file_name, ios::out | ios::binary);
int game_count = 0;
int fen_count = 0;
for (auto filename : filenames) {
std::cout << now_string() << " convert " << filename << std::endl;
ifstream ifs;
ifs.open(filename);
int game_result = 0;
std::string line;
while (std::getline(ifs, line)) {
if (line.empty()) {
continue;
}
else if (line.substr(0, 1) == "[") {
std::regex pattern_result(R"(\[Result (.+?)\])");
std::smatch match;
// example: [Result "1-0"]
if (std::regex_search(line, match, pattern_result)) {
game_result = parse_game_result_from_pgn_extract(match.str(1));
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
std::cout << "game_result=" << game_result << std::endl;
#endif
game_count++;
if (game_count % 10000 == 0) {
std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
}
}
continue;
}
else {
int gamePly = 1;
auto itr = line.cbegin();
while (true) {
gamePly++;
PackedSfenValue psv;
memset((char*)&psv, 0, sizeof(PackedSfenValue));
// fen
{
bool fen_found = false;
while (!fen_found) {
std::regex pattern_bracket(R"(\{(.+?)\})");
std::smatch match;
if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
break;
}
itr += match.position(0) + match.length(0) - 1;
std::string str_fen = match.str(1);
trim(str_fen);
if (is_like_fen(str_fen)) {
fen_found = true;
StateInfo si;
pos.set(str_fen, false, &si, th);
pos.sfen_pack(psv.sfen);
}
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
std::cout << "str_fen=" << str_fen << std::endl;
std::cout << "fen_found=" << fen_found << std::endl;
#endif
}
if (!fen_found) {
break;
}
}
// move
{
std::regex pattern_move(R"(\}(.+?)\{)");
std::smatch match;
if (!std::regex_search(itr, line.cend(), match, pattern_move)) {
break;
}
itr += match.position(0) + match.length(0) - 1;
std::string str_move = match.str(1);
trim(str_move);
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
std::cout << "str_move=" << str_move << std::endl;
#endif
psv.move = UCI::to_move(pos, str_move);
}
// eval
bool eval_found = false;
{
std::regex pattern_bracket(R"(\{(.+?)\})");
std::smatch match;
if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
break;
}
std::string str_eval_clk = match.str(1);
trim(str_eval_clk);
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
std::cout << "str_eval_clk=" << str_eval_clk << std::endl;
#endif
// example: { [%eval 0.25] [%clk 0:10:00] }
// example: { [%eval #-4] [%clk 0:10:00] }
// example: { [%eval #3] [%clk 0:10:00] }
// example: { +0.71/22 1.2s }
// example: { -M4/7 0.003s }
// example: { M3/245 0.017s }
// example: { +M1/245 0.010s, White mates }
// example: { 0.60 }
// example: { book }
// example: { rnbqkb1r/pp3ppp/2p1pn2/3p4/2PP4/2N2N2/PP2PPPP/R1BQKB1R w KQkq - 0 5 }
// Considering the absence of eval
if (!is_like_fen(str_eval_clk)) {
itr += match.position(0) + match.length(0) - 1;
if (str_eval_clk != "book") {
std::regex pattern_eval1(R"(\[\%eval (.+?)\])");
std::regex pattern_eval2(R"((.+?)\/)");
std::string str_eval;
if (std::regex_search(str_eval_clk, match, pattern_eval1) ||
std::regex_search(str_eval_clk, match, pattern_eval2)) {
str_eval = match.str(1);
trim(str_eval);
}
else {
str_eval = str_eval_clk;
}
bool success = false;
Value value = parse_score_from_pgn_extract(str_eval, success);
if (success) {
eval_found = true;
psv.score = Math::clamp(value, -VALUE_MATE, VALUE_MATE);
}
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
std::cout << "str_eval=" << str_eval << std::endl;
std::cout << "success=" << success << ", psv.score=" << psv.score << std::endl;
#endif
}
}
}
// write
if (eval_found || convert_no_eval_fens_as_score_zero) {
if (!eval_found && convert_no_eval_fens_as_score_zero) {
psv.score = 0;
}
psv.gamePly = gamePly;
psv.game_result = game_result;
if (pos.side_to_move() == BLACK) {
if (!pgn_eval_side_to_move) {
psv.score *= -1;
}
psv.game_result *= -1;
}
ofs.write((char*)&psv, sizeof(PackedSfenValue));
fen_count++;
}
}
game_result = 0;
}
}
}
std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
std::cout << now_string() << " all done" << std::endl;
ofs.close();
}
void convert_plain(
const vector<string>& filenames,
const string& output_file_name)
{
Position tpos;
std::ofstream ofs;
ofs.open(output_file_name, ios::app);
auto th = Threads.main();
for (auto filename : filenames) {
std::cout << "convert " << filename << " ... ";
// Just convert packedsfenvalue to text
std::fstream fs;
fs.open(filename, ios::in | ios::binary);
PackedSfenValue p;
while (true)
{
if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
StateInfo si;
tpos.set_from_packed_sfen(p.sfen, &si, th);
// write as plain text
ofs << "fen " << tpos.fen() << std::endl;
ofs << "move " << UCI::move(Move(p.move), false) << std::endl;
ofs << "score " << p.score << std::endl;
ofs << "ply " << int(p.gamePly) << std::endl;
ofs << "result " << int(p.game_result) << std::endl;
ofs << "e" << std::endl;
}
else {
break;
}
}
fs.close();
std::cout << "done" << std::endl;
}
ofs.close();
std::cout << "all done" << std::endl;
}
static inline const std::string plain_extension = ".plain";
static inline const std::string bin_extension = ".bin";
static inline const std::string binpack_extension = ".binpack";
static bool file_exists(const std::string& name)
{
std::ifstream f(name);
return f.good();
}
static bool ends_with(const std::string& lhs, const std::string& end)
{
if (end.size() > lhs.size()) return false;
return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
}
static bool is_convert_of_type(
const std::string& input_path,
const std::string& output_path,
const std::string& expected_input_extension,
const std::string& expected_output_extension)
{
return ends_with(input_path, expected_input_extension)
&& ends_with(output_path, expected_output_extension);
}
using ConvertFunctionType = void(std::string inputPath, std::string outputPath, std::ios_base::openmode om, bool validate);
static ConvertFunctionType* get_convert_function(const std::string& input_path, const std::string& output_path)
{
if (is_convert_of_type(input_path, output_path, plain_extension, bin_extension))
return binpack::convertPlainToBin;
if (is_convert_of_type(input_path, output_path, plain_extension, binpack_extension))
return binpack::convertPlainToBinpack;
if (is_convert_of_type(input_path, output_path, bin_extension, plain_extension))
return binpack::convertBinToPlain;
if (is_convert_of_type(input_path, output_path, bin_extension, binpack_extension))
return binpack::convertBinToBinpack;
if (is_convert_of_type(input_path, output_path, binpack_extension, plain_extension))
return binpack::convertBinpackToPlain;
if (is_convert_of_type(input_path, output_path, binpack_extension, bin_extension))
return binpack::convertBinpackToBin;
return nullptr;
}
static void convert(const std::string& input_path, const std::string& output_path, std::ios_base::openmode om, bool validate)
{
if(!file_exists(input_path))
{
std::cerr << "Input file does not exist.\n";
return;
}
auto func = get_convert_function(input_path, output_path);
if (func != nullptr)
{
func(input_path, output_path, om, validate);
}
else
{
std::cerr << "Conversion between files of these types is not supported.\n";
}
}
static void convert(const std::vector<std::string>& args)
{
if (args.size() < 2 || args.size() > 4)
{
std::cerr << "Invalid arguments.\n";
std::cerr << "Usage: convert from_path to_path [append] [validate]\n";
return;
}
const bool append = std::find(args.begin() + 2, args.end(), "append") != args.end();
const bool validate = std::find(args.begin() + 2, args.end(), "validate") != args.end();
const std::ios_base::openmode openmode =
append
? std::ios_base::app
: std::ios_base::trunc;
convert(args[0], args[1], openmode, validate);
}
void convert(istringstream& is)
{
std::vector<std::string> args;
while (true)
{
std::string token = "";
is >> token;
if (token == "")
break;
args.push_back(token);
}
convert(args);
}
static void append_files_from_dir(
std::vector<std::string>& filenames,
const std::string& base_dir,
const std::string& target_dir)
{
string kif_base_dir = Path::combine(base_dir, target_dir);
namespace sys = std::filesystem;
sys::path p(kif_base_dir); // Origin of enumeration
std::for_each(sys::directory_iterator(p), sys::directory_iterator(),
[&](const sys::path& path) {
if (sys::is_regular_file(path))
filenames.push_back(Path::combine(target_dir, path.filename().generic_string()));
});
}
static void rebase_files(
std::vector<std::string>& filenames,
const std::string& base_dir)
{
for (auto& file : filenames)
{
file = Path::combine(base_dir, file);
}
}
void convert_bin_from_pgn_extract(std::istringstream& is)
{
std::vector<std::string> filenames;
string base_dir;
string target_dir;
bool pgn_eval_side_to_move = false;
bool convert_no_eval_fens_as_score_zero = false;
string output_file_name = "shuffled_sfen.bin";
while (true)
{
string option;
is >> option;
if (option == "")
break;
if (option == "targetdir") is >> target_dir;
else if (option == "targetfile")
{
std::string filename;
is >> filename;
filenames.push_back(filename);
}
else if (option == "basedir") is >> base_dir;
else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move;
else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero;
else if (option == "output_file_name") is >> output_file_name;
else
{
cout << "Unknown option: " << option << ". Ignoring.\n";
}
}
if (!target_dir.empty())
{
append_files_from_dir(filenames, base_dir, target_dir);
}
rebase_files(filenames, base_dir);
Eval::NNUE::init();
cout << "convert_bin_from_pgn-extract.." << endl;
convert_bin_from_pgn_extract(
filenames,
output_file_name,
pgn_eval_side_to_move,
convert_no_eval_fens_as_score_zero);
}
void convert_bin(std::istringstream& is)
{
std::vector<std::string> filenames;
string base_dir;
string target_dir;
int ply_minimum = 0;
int ply_maximum = 114514;
bool interpolate_eval = 0;
bool check_invalid_fen = false;
bool check_illegal_move = false;
bool pgn_eval_side_to_move = false;
bool convert_no_eval_fens_as_score_zero = false;
double src_score_min_value = 0.0;
double src_score_max_value = 1.0;
double dest_score_min_value = 0.0;
double dest_score_max_value = 1.0;
string output_file_name = "shuffled_sfen.bin";
while (true)
{
string option;
is >> option;
if (option == "")
break;
if (option == "targetdir") is >> target_dir;
else if (option == "targetfile")
{
std::string filename;
is >> filename;
filenames.push_back(filename);
}
else if (option == "basedir") is >> base_dir;
else if (option == "ply_minimum") is >> ply_minimum;
else if (option == "ply_maximum") is >> ply_maximum;
else if (option == "interpolate_eval") is >> interpolate_eval;
else if (option == "check_invalid_fen") is >> check_invalid_fen;
else if (option == "check_illegal_move") is >> check_illegal_move;
else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move;
else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero;
else if (option == "src_score_min_value") is >> src_score_min_value;
else if (option == "src_score_max_value") is >> src_score_max_value;
else if (option == "dest_score_min_value") is >> dest_score_min_value;
else if (option == "dest_score_max_value") is >> dest_score_max_value;
else if (option == "output_file_name") is >> output_file_name;
else
{
cout << "Unknown option: " << option << ". Ignoring.\n";
}
}
if (!target_dir.empty())
{
append_files_from_dir(filenames, base_dir, target_dir);
}
rebase_files(filenames, base_dir);
Eval::NNUE::init();
cout << "convert_bin.." << endl;
convert_bin(
filenames,
output_file_name,
ply_minimum,
ply_maximum,
interpolate_eval,
src_score_min_value,
src_score_max_value,
dest_score_min_value,
dest_score_max_value,
check_invalid_fen,
check_illegal_move
);
}
void convert_plain(std::istringstream& is)
{
std::vector<std::string> filenames;
string base_dir;
string target_dir;
string output_file_name = "shuffled_sfen.bin";
while (true)
{
string option;
is >> option;
if (option == "")
break;
if (option == "targetdir") is >> target_dir;
else if (option == "targetfile")
{
std::string filename;
is >> filename;
filenames.push_back(filename);
}
else if (option == "basedir") is >> base_dir;
else if (option == "output_file_name") is >> output_file_name;
else
{
cout << "Unknown option: " << option << ". Ignoring.\n";
}
}
if (!target_dir.empty())
{
append_files_from_dir(filenames, base_dir, target_dir);
}
rebase_files(filenames, base_dir);
Eval::NNUE::init();
cout << "convert_plain.." << endl;
convert_plain(filenames, output_file_name);
}
}
+18
View File
@@ -0,0 +1,18 @@
#ifndef _CONVERT_H_
#define _CONVERT_H_
#include <vector>
#include <string>
#include <sstream>
namespace Learner {
void convert(std::istringstream& is);
void convert_bin_from_pgn_extract(std::istringstream& is);
void convert_bin(std::istringstream& is);
void convert_plain(std::istringstream& is);
}
#endif
+962
View File
@@ -0,0 +1,962 @@
#include "gensfen.h"
#include "sfen_writer.h"
#include "packed_sfen.h"
#include "opening_book.h"
#include "misc.h"
#include "position.h"
#include "thread.h"
#include "tt.h"
#include "uci.h"
#include "extra/nnue_data_binpack_format.h"
#include "nnue/evaluate_nnue.h"
#include "nnue/evaluate_nnue_learner.h"
#include "syzygy/tbprobe.h"
#include <atomic>
#include <chrono>
#include <climits>
#include <cmath>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <limits>
#include <list>
#include <memory>
#include <optional>
#include <random>
#include <shared_mutex>
#include <sstream>
#include <unordered_set>
using namespace std;
namespace Learner
{
// Class to generate sfen with multiple threads
struct Gensfen
{
struct Params
{
// Min and max depths for search during gensfen
int search_depth_min = 3;
int search_depth_max = -1;
// Number of the nodes to be searched.
// 0 represents no limits.
uint64_t nodes = 0;
// Upper limit of evaluation value of generated situation
int eval_limit = 3000;
// minimum ply with random move
// maximum ply with random move
// Number of random moves in one station
int random_move_minply = 1;
int random_move_maxply = 24;
int random_move_count = 5;
// Move kings with a probability of 1/N when randomly moving like Apery software.
// When you move the king again, there is a 1/N chance that it will randomly moved
// once in the opponent's turn.
// Apery has N=2. Specifying 0 here disables this function.
int random_move_like_apery = 0;
// For when using multi pv instead of random move.
// random_multi_pv is the number of candidates for MultiPV.
// When adopting the move of the candidate move, the difference
// between the evaluation value of the move of the 1st place
// and the evaluation value of the move of the Nth place is.
// Must be in the range random_multi_pv_diff.
// random_multi_pv_depth is the search depth for MultiPV.
int random_multi_pv = 0;
int random_multi_pv_diff = 32000;
int random_multi_pv_depth = -1;
// The minimum and maximum ply (number of steps from
// the initial phase) of the sfens to write out.
int write_minply = 16;
int write_maxply = 400;
uint64_t save_every = std::numeric_limits<uint64_t>::max();
std::string output_file_name = "generated_kifu";
SfenOutputType sfen_format = SfenOutputType::Binpack;
std::string seed;
bool write_out_draw_game_in_training_data_generation = true;
bool detect_draw_by_consecutive_low_score = true;
bool detect_draw_by_insufficient_mating_material = true;
bool ensure_quiet = false;
uint64_t num_threads;
std::string book;
void enforce_constraints()
{
search_depth_max = std::max(search_depth_min, search_depth_max);
random_multi_pv_depth = std::max(search_depth_min, random_multi_pv_depth);
// Limit the maximum to a one-stop score. (Otherwise you might not end the loop)
eval_limit = std::min(eval_limit, (int)mate_in(2));
save_every = std::max(save_every, REPORT_STATS_EVERY);
num_threads = Options["Threads"];
}
};
// Hash to limit the export of identical sfens
static constexpr uint64_t GENSFEN_HASH_SIZE = 64 * 1024 * 1024;
// It must be 2**N because it will be used as the mask to calculate hash_index.
static_assert((GENSFEN_HASH_SIZE& (GENSFEN_HASH_SIZE - 1)) == 0);
static constexpr uint64_t REPORT_DOT_EVERY = 5000;
static constexpr uint64_t REPORT_STATS_EVERY = 200000;
static_assert(REPORT_STATS_EVERY % REPORT_DOT_EVERY == 0);
Gensfen(
const Params& prm
) :
params(prm),
prng(prm.seed),
sfen_writer(prm.output_file_name, prm.num_threads, prm.save_every, prm.sfen_format)
{
hash.resize(GENSFEN_HASH_SIZE);
if (!prm.book.empty())
{
opening_book = open_opening_book(prm.book, prng);
if (opening_book == nullptr)
{
std::cout << "WARNING: Failed to open opening book " << prm.book << ". Falling back to startpos.\n";
}
}
// Output seed to veryfy by the user if it's not identical by chance.
std::cout << prng << std::endl;
}
void generate(uint64_t limit);
private:
Params params;
PRNG prng;
std::mutex stats_mutex;
TimePoint last_stats_report_time;
// sfen exporter
SfenWriter sfen_writer;
SynchronizedRegionLogger::Region out;
vector<Key> hash; // 64MB*sizeof(HASH_KEY) = 512MB
std::unique_ptr<OpeningBook> opening_book;
static void set_gensfen_search_limits();
void generate_worker(
Thread& th,
std::atomic<uint64_t>& counter,
uint64_t limit);
bool was_seen_before(const Position& pos);
optional<int8_t> get_current_game_result(
Position& pos,
const vector<int>& move_hist_scores) const;
vector<uint8_t> generate_random_move_flags();
optional<Move> choose_random_move(
Position& pos,
std::vector<uint8_t>& random_move_flag,
int ply,
int& random_move_c);
bool commit_psv(
Thread& th,
PSVector& sfens,
int8_t lastTurnIsWin,
std::atomic<uint64_t>& counter,
uint64_t limit,
Color result_color);
void report(uint64_t done, uint64_t new_done);
void maybe_report(uint64_t done);
};
void Gensfen::set_gensfen_search_limits()
{
// About Search::Limits
// Be careful because this member variable is global and affects other threads.
auto& limits = Search::Limits;
// Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
limits.infinite = true;
// Since PV is an obstacle when displayed, erase it.
limits.silent = true;
// If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
limits.nodes = 0;
// depth is also processed by the one passed as an argument of Learner::search().
limits.depth = 0;
}
void Gensfen::generate(uint64_t limit)
{
last_stats_report_time = 0;
set_gensfen_search_limits();
std::atomic<uint64_t> counter{0};
Threads.execute_with_workers([&counter, limit, this](Thread& th) {
generate_worker(th, counter, limit);
});
Threads.wait_for_workers_finished();
sfen_writer.flush();
if (limit % REPORT_STATS_EVERY != 0)
{
report(limit, limit % REPORT_STATS_EVERY);
}
std::cout << std::endl;
}
void Gensfen::generate_worker(
Thread& th,
std::atomic<uint64_t>& counter,
uint64_t limit)
{
// For the time being, it will be treated as a draw
// at the maximum number of steps to write.
// Maximum StateInfo + Search PV to advance to leaf buffer
std::vector<StateInfo, AlignedAllocator<StateInfo>> states(
params.write_maxply + MAX_PLY /* == search_depth_min + α */);
StateInfo si;
// end flag
bool quit = false;
// repeat until the specified number of times
while (!quit)
{
// It is necessary to set a dependent thread for Position.
// When parallelizing, Threads (since this is a vector<Thread*>,
// Do the same for up to Threads[0]...Threads[thread_num-1].
auto& pos = th.rootPos;
if (opening_book != nullptr)
{
auto& fen = opening_book->next_fen();
pos.set(fen, false, &si, &th);
}
else
{
pos.set(StartFEN, false, &si, &th);
}
int resign_counter = 0;
bool should_resign = prng.rand(10) > 1;
// Vector for holding the sfens in the current simulated game.
PSVector packed_sfens;
packed_sfens.reserve(params.write_maxply + MAX_PLY);
// Precomputed flags. Used internally by choose_random_move.
vector<uint8_t> random_move_flag = generate_random_move_flags();
// A counter that keeps track of the number of random moves
// When random_move_minply == -1, random moves are
// performed continuously, so use it at this time.
// Used internally by choose_random_move.
int actual_random_move_count = 0;
// Save history of move scores for adjudication
vector<int> move_hist_scores;
auto flush_psv = [&](int8_t result) {
quit = commit_psv(th, packed_sfens, result, counter, limit, pos.side_to_move());
};
for (int ply = 0; ; ++ply)
{
// Current search depth
const int depth = params.search_depth_min + (int)prng.rand(params.search_depth_max - params.search_depth_min + 1);
// Starting search calls init_for_search
auto [search_value, search_pv] = Search::search(pos, depth, 1, params.nodes);
// This has to be performed after search because it needs to know
// rootMoves which are filled in init_for_search.
const auto result = get_current_game_result(pos, move_hist_scores);
if (result.has_value())
{
flush_psv(result.value());
break;
}
// Always adjudivate by eval limit.
// Also because of this we don't have to check for TB/MATE scores
if (abs(search_value) >= params.eval_limit)
{
resign_counter++;
if ((should_resign && resign_counter >= 4) || abs(search_value) >= VALUE_KNOWN_WIN) {
flush_psv((search_value >= params.eval_limit) ? 1 : -1);
break;
}
}
else
{
resign_counter = 0;
}
// In case there is no PV and the game was not ended here
// there is nothing we can do, we can't continue the game,
// we don't know the result, so discard this game.
if (search_pv.empty())
{
break;
}
// Save the move score for adjudication.
move_hist_scores.push_back(search_value);
// Discard stuff before write_minply is reached
// because it can harm training due to overfitting.
// Initial positions would be too common.
if (ply >= params.write_minply)
{
packed_sfens.emplace_back(PackedSfenValue());
auto& psv = packed_sfens.back();
if (params.ensure_quiet)
{
auto [qsearch_value, qsearch_pv] = Search::qsearch(pos);
if (qsearch_pv.empty())
{
// Here we only write the position data.
// Result is added after the whole game is done.
pos.sfen_pack(psv.sfen);
// Already a quiet position
psv.score = search_value;
psv.move = search_pv[0];
psv.gamePly = ply;
}
else
{
// Navigate to a quiet
int old_ply = ply;
for (auto m : qsearch_pv)
{
pos.do_move(m, states[ply++]);
}
if (was_seen_before(pos))
{
// Just skip the move.
packed_sfens.pop_back();
}
else
{
// Reevaluate
auto [quiet_search_value, quiet_search_pv] = Search::search(pos, depth, 1, params.nodes);
if (quiet_search_pv.empty())
{
// Just skip the move.
packed_sfens.pop_back();
}
else
{
// Here we only write the position data.
// Result is added after the whole game is done.
pos.sfen_pack(psv.sfen);
psv.score = quiet_search_value;
psv.move = quiet_search_pv[0];
psv.gamePly = ply;
}
}
// Get back to the game
for (auto it = qsearch_pv.rbegin(); it != qsearch_pv.rend(); ++it)
{
pos.undo_move(*it);
}
ply = old_ply;
}
}
else
{
if (was_seen_before(pos))
{
packed_sfens.pop_back();
}
else
{
// Here we only write the position data.
// Result is added after the whole game is done.
pos.sfen_pack(psv.sfen);
psv.score = search_value;
psv.move = search_pv[0];
psv.gamePly = ply;
}
}
}
// Update the next move according to best search result or random move.
auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
const Move next_move = random_move.has_value() ? *random_move : search_pv[0];
// We don't have the whole game yet, but it ended,
// so the writing process ends and the next game starts.
// This shouldn't really happen.
if (!is_ok(next_move))
{
break;
}
// Do move.
pos.do_move(next_move, states[ply]);
}
}
}
bool Gensfen::was_seen_before(const Position& pos)
{
// Look into the position hashtable to see if the same
// position was seen before.
// This is a good heuristic to exlude already seen
// positions without many false positives.
auto key = pos.key();
auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
auto old_key = hash[hash_index];
if (key == old_key)
{
return true;
}
else
{
// Replace with the current key.
hash[hash_index] = key;
return false;
}
}
optional<int8_t> Gensfen::get_current_game_result(
Position& pos,
const vector<int>& move_hist_scores) const
{
// Variables for draw adjudication.
// Todo: Make this as an option.
// start the adjudication when ply reaches this value
constexpr int adj_draw_ply = 80;
// 4 move scores for each side have to be checked
constexpr int adj_draw_cnt = 8;
// move score in CP
constexpr int adj_draw_score = 0;
// For the time being, it will be treated as a
// draw at the maximum number of steps to write.
const int ply = move_hist_scores.size();
// has it reached the max length or is a draw
if (ply >= params.write_maxply || pos.is_draw(ply))
{
return 0;
}
if(pos.this_thread()->rootMoves.empty())
{
// If there is no legal move
return pos.checkers()
? -1 /* mate */
: 0 /* stalemate */;
}
// Adjudicate game to a draw if the last 4 scores of each engine is 0.
if (params.detect_draw_by_consecutive_low_score)
{
if (ply >= adj_draw_ply)
{
int num_cons_plies_within_draw_score = 0;
bool is_adj_draw = false;
for (auto it = move_hist_scores.rbegin();
it != move_hist_scores.rend(); ++it)
{
if (abs(*it) <= adj_draw_score)
{
num_cons_plies_within_draw_score++;
}
else
{
// Draw scores must happen on consecutive plies
break;
}
if (num_cons_plies_within_draw_score >= adj_draw_cnt)
{
is_adj_draw = true;
break;
}
}
if (is_adj_draw)
{
return 0;
}
}
}
// Draw by insufficient mating material
if (params.detect_draw_by_insufficient_mating_material)
{
if (pos.count<ALL_PIECES>() <= 4)
{
int num_pieces = pos.count<ALL_PIECES>();
// (1) KvK
if (num_pieces == 2)
{
return 0;
}
// (2) KvK + 1 minor piece
if (num_pieces == 3)
{
int minor_pc = pos.count<BISHOP>(WHITE) + pos.count<KNIGHT>(WHITE) +
pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK);
if (minor_pc == 1)
{
return 0;
}
}
// (3) KBvKB, bishops of the same color
else if (num_pieces == 4)
{
if (pos.count<BISHOP>(WHITE) == 1 && pos.count<BISHOP>(BLACK) == 1)
{
// Color of bishops is black.
if ((pos.pieces(WHITE, BISHOP) & DarkSquares)
&& (pos.pieces(BLACK, BISHOP) & DarkSquares))
{
return 0;
}
// Color of bishops is white.
if ((pos.pieces(WHITE, BISHOP) & ~DarkSquares)
&& (pos.pieces(BLACK, BISHOP) & ~DarkSquares))
{
return 0;
}
}
}
}
}
return nullopt;
}
vector<uint8_t> Gensfen::generate_random_move_flags()
{
vector<uint8_t> random_move_flag;
// Depending on random move selection parameters setup
// the array of flags that indicates whether a random move
// be taken at a given ply.
// Make an array like a[0] = 0 ,a[1] = 1, ...
// Fisher-Yates shuffle and take out the first N items.
// Actually, I only want N pieces, so I only need
// to shuffle the first N pieces with Fisher-Yates.
vector<int> a;
a.reserve((size_t)params.random_move_maxply);
// random_move_minply ,random_move_maxply is specified by 1 origin,
// Note that we are handling 0 origin here.
for (int i = std::max(params.random_move_minply - 1, 0); i < params.random_move_maxply; ++i)
{
a.push_back(i);
}
// In case of Apery random move, insert() may be called random_move_count times.
// Reserve only the size considering it.
random_move_flag.resize((size_t)params.random_move_maxply + params.random_move_count);
// A random move that exceeds the size() of a[] cannot be applied, so limit it.
for (int i = 0; i < std::min(params.random_move_count, (int)a.size()); ++i)
{
swap(a[i], a[prng.rand((uint64_t)a.size() - i) + i]);
random_move_flag[a[i]] = true;
}
return random_move_flag;
}
optional<Move> Gensfen::choose_random_move(
Position& pos,
std::vector<uint8_t>& random_move_flag,
int ply,
int& random_move_c)
{
optional<Move> random_move;
// Randomly choose one from legal move
if (
// 1. Random move of random_move_count times from random_move_minply to random_move_maxply
(params.random_move_minply != -1 && ply < (int)random_move_flag.size() && random_move_flag[ply]) ||
// 2. A mode to perform random move of random_move_count times after leaving the startpos
(params.random_move_minply == -1 && random_move_c < params.random_move_count))
{
++random_move_c;
// It's not a mate, so there should be one legal move...
if (params.random_multi_pv == 0)
{
// Normal random move
MoveList<LEGAL> list(pos);
// I don't really know the goodness and badness of making this the Apery method.
if (params.random_move_like_apery == 0
|| prng.rand(params.random_move_like_apery) != 0)
{
// Normally one move from legal move
random_move = list.at((size_t)prng.rand((uint64_t)list.size()));
}
else
{
// if you can move the king, move the king
Move moves[8]; // Near 8
Move* p = &moves[0];
for (auto& m : list)
{
if (type_of(pos.moved_piece(m)) == KING)
{
*(p++) = m;
}
}
size_t n = p - &moves[0];
if (n != 0)
{
// move to move the king
random_move = moves[prng.rand(n)];
// In Apery method, at this time there is a 1/2 chance
// that the opponent will also move randomly
if (prng.rand(2) == 0)
{
// Is it a simple hack to add a "1" next to random_move_flag[ply]?
random_move_flag.insert(random_move_flag.begin() + ply + 1, 1, true);
}
}
else
{
// Normally one move from legal move
random_move = list.at((size_t)prng.rand((uint64_t)list.size()));
}
}
}
else
{
Search::search(pos, params.random_multi_pv_depth, params.random_multi_pv);
// Select one from the top N hands of root Moves
auto& rm = pos.this_thread()->rootMoves;
uint64_t s = min((uint64_t)rm.size(), (uint64_t)params.random_multi_pv);
for (uint64_t i = 1; i < s; ++i)
{
// The difference from the evaluation value of rm[0] must
// be within the range of random_multi_pv_diff.
// It can be assumed that rm[x].score is arranged in descending order.
if (rm[0].score > rm[i].score + params.random_multi_pv_diff)
{
s = i;
break;
}
}
random_move = rm[prng.rand(s)].pv[0];
}
}
return random_move;
}
// Write out the phases loaded in sfens to a file.
// result: win/loss in the next phase after the final phase in sfens
// 1 when winning. -1 when losing. Pass 0 for a draw.
// Return value: true if the specified number of
// sfens has already been reached and the process ends.
bool Gensfen::commit_psv(
Thread& th,
PSVector& sfens,
int8_t result,
std::atomic<uint64_t>& counter,
uint64_t limit,
Color result_color)
{
if (!params.write_out_draw_game_in_training_data_generation && result == 0)
{
// We didn't write anything so why quit.
return false;
}
auto side_to_move_from_sfen = [](auto& sfen){
return (Color)(sfen.sfen.data[0] & 1);
};
// From the final stage (one step before) to the first stage, give information on the outcome of the game for each stage.
// The phases stored in sfens are assumed to be continuous (in order).
for (auto it = sfens.rbegin(); it != sfens.rend(); ++it)
{
// The side to move is packed as the lowest bit of the first byte
const Color side_to_move = side_to_move_from_sfen(*it);
it->game_result = side_to_move == result_color ? result : -result;
}
// Write sfens in move order to make potential compression easier
for (auto& sfen : sfens)
{
// Return true if there is already enough data generated.
const auto iter = counter.fetch_add(1);
if (iter >= limit)
return true;
// because `iter` was done, now we do one more
maybe_report(iter + 1);
// Write out one sfen.
sfen_writer.write(th.thread_idx(), sfen);
}
return false;
}
void Gensfen::report(uint64_t done, uint64_t new_done)
{
const auto now_time = now();
const TimePoint elapsed = now_time - last_stats_report_time + 1;
out
<< endl
<< done << " sfens, "
<< new_done * 1000 / elapsed << " sfens/second, "
<< "at " << now_string() << sync_endl;
last_stats_report_time = now_time;
out = sync_region_cout.new_region();
}
void Gensfen::maybe_report(uint64_t done)
{
if (done % REPORT_DOT_EVERY == 0)
{
std::lock_guard lock(stats_mutex);
if (last_stats_report_time == 0)
{
last_stats_report_time = now();
out = sync_region_cout.new_region();
}
if (done != 0)
{
out << '.';
if (done % REPORT_STATS_EVERY == 0)
{
report(done, REPORT_STATS_EVERY);
}
}
}
}
// Command to generate a game record
void gensfen(istringstream& is)
{
// Number of generated game records default = 8 billion phases (Ponanza specification)
uint64_t loop_max = 8000000000UL;
Gensfen::Params params;
// Add a random number to the end of the file name.
bool random_file_name = false;
std::string sfen_format = "binpack";
string token;
while (true)
{
token = "";
is >> token;
if (token == "")
break;
if (token == "depth")
is >> params.search_depth_min;
else if (token == "depth2")
is >> params.search_depth_max;
else if (token == "nodes")
is >> params.nodes;
else if (token == "loop")
is >> loop_max;
else if (token == "output_file_name")
is >> params.output_file_name;
else if (token == "eval_limit")
is >> params.eval_limit;
else if (token == "random_move_minply")
is >> params.random_move_minply;
else if (token == "random_move_maxply")
is >> params.random_move_maxply;
else if (token == "random_move_count")
is >> params.random_move_count;
else if (token == "random_move_like_apery")
is >> params.random_move_like_apery;
else if (token == "random_multi_pv")
is >> params.random_multi_pv;
else if (token == "random_multi_pv_diff")
is >> params.random_multi_pv_diff;
else if (token == "random_multi_pv_depth")
is >> params.random_multi_pv_depth;
else if (token == "write_minply")
is >> params.write_minply;
else if (token == "write_maxply")
is >> params.write_maxply;
else if (token == "save_every")
is >> params.save_every;
else if (token == "book")
is >> params.book;
else if (token == "random_file_name")
is >> random_file_name;
// Accept also the old option name.
else if (token == "use_draw_in_training_data_generation" || token == "write_out_draw_game_in_training_data_generation")
is >> params.write_out_draw_game_in_training_data_generation;
// Accept also the old option name.
else if (token == "use_game_draw_adjudication" || token == "detect_draw_by_consecutive_low_score")
is >> params.detect_draw_by_consecutive_low_score;
else if (token == "detect_draw_by_insufficient_mating_material")
is >> params.detect_draw_by_insufficient_mating_material;
else if (token == "sfen_format")
is >> sfen_format;
else if (token == "seed")
is >> params.seed;
else if (token == "set_recommended_uci_options")
{
UCI::setoption("Contempt", "0");
UCI::setoption("Skill Level", "20");
UCI::setoption("UCI_Chess960", "false");
UCI::setoption("UCI_AnalyseMode", "false");
UCI::setoption("UCI_LimitStrength", "false");
UCI::setoption("PruneAtShallowDepth", "false");
UCI::setoption("EnableTranspositionTable", "true");
}
else if (token == "ensure_quiet")
{
params.ensure_quiet = true;
}
else
cout << "ERROR: Ignoring unknown option " << token << endl;
}
if (!sfen_format.empty())
{
if (sfen_format == "bin")
params.sfen_format = SfenOutputType::Bin;
else if (sfen_format == "binpack")
params.sfen_format = SfenOutputType::Binpack;
else
cout << "WARNING: Unknown sfen format `" << sfen_format << "`. Using bin\n";
}
if (params.ensure_quiet)
{
// Otherwise we can't ensure quiet positions...
UCI::setoption("EnableTranspositionTable", "false");
}
if (random_file_name)
{
// Give a random number to output_file_name at this point.
// Do not use std::random_device(). Because it always the same integers on MinGW.
PRNG r(params.seed);
// Just in case, reassign the random numbers.
for (int i = 0; i < 10; ++i)
r.rand(1);
auto to_hex = [](uint64_t u) {
std::stringstream ss;
ss << std::hex << u;
return ss.str();
};
// I don't want to wear 64bit numbers by accident, so I'next_move going to make a 64bit number 2 just in case.
params.output_file_name += "_" + to_hex(r.rand<uint64_t>()) + to_hex(r.rand<uint64_t>());
}
params.enforce_constraints();
std::cout << "INFO: Executing gensfen command\n";
std::cout << "INFO: Parameters:\n";
std::cout
<< " - search_depth_min = " << params.search_depth_min << endl
<< " - search_depth_max = " << params.search_depth_max << endl
<< " - nodes = " << params.nodes << endl
<< " - num sfens to generate = " << loop_max << endl
<< " - eval_limit = " << params.eval_limit << endl
<< " - num threads (UCI) = " << params.num_threads << endl
<< " - random_move_minply = " << params.random_move_minply << endl
<< " - random_move_maxply = " << params.random_move_maxply << endl
<< " - random_move_count = " << params.random_move_count << endl
<< " - random_move_like_apery = " << params.random_move_like_apery << endl
<< " - random_multi_pv = " << params.random_multi_pv << endl
<< " - random_multi_pv_diff = " << params.random_multi_pv_diff << endl
<< " - random_multi_pv_depth = " << params.random_multi_pv_depth << endl
<< " - write_minply = " << params.write_minply << endl
<< " - write_maxply = " << params.write_maxply << endl
<< " - book = " << params.book << endl
<< " - output_file_name = " << params.output_file_name << endl
<< " - save_every = " << params.save_every << endl
<< " - random_file_name = " << random_file_name << endl
<< " - write_drawn_games = " << params.write_out_draw_game_in_training_data_generation << endl
<< " - draw by low score = " << params.detect_draw_by_consecutive_low_score << endl
<< " - draw by insuff. mat. = " << params.detect_draw_by_insufficient_mating_material << endl;
// Show if the training data generator uses NNUE.
Eval::NNUE::verify_eval_file_loaded();
Threads.main()->ponder = false;
Gensfen gensfen(params);
gensfen.generate(loop_max);
std::cout << "INFO: Gensfen finished." << endl;
}
}
+14
View File
@@ -0,0 +1,14 @@
#ifndef _GENSFEN_H_
#define _GENSFEN_H_
#include "position.h"
#include <sstream>
namespace Learner {
// Automatic generation of teacher position
void gensfen(std::istringstream& is);
}
#endif
-1
View File
@@ -1 +0,0 @@
// just a place holder
+90 -90
View File
@@ -7,126 +7,126 @@
// Floating point operation by 16bit type
// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.
#include "../types.h"
#include "types.h"
namespace HalfFloat
{
// IEEE 754 float 32 format is :
// sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
//
// Our float16 format is :
// sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
union float32_converter
{
int32_t n;
float f;
};
// IEEE 754 float 32 format is :
// sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
//
// Our float16 format is :
// sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
union float32_converter
{
int32_t n;
float f;
};
// 16-bit float
struct float16
{
// --- constructors
// 16-bit float
struct float16
{
// --- constructors
float16() {}
float16(int16_t n) { from_float((float)n); }
float16(int32_t n) { from_float((float)n); }
float16(float n) { from_float(n); }
float16(double n) { from_float((float)n); }
float16() {}
float16(int16_t n) { from_float((float)n); }
float16(int32_t n) { from_float((float)n); }
float16(float n) { from_float(n); }
float16(double n) { from_float((float)n); }
// build from a float
void from_float(float f) { *this = to_float16(f); }
// build from a float
void from_float(float f) { *this = to_float16(f); }
// --- implicit converters
// --- implicit converters
operator int32_t() const { return (int32_t)to_float(*this); }
operator float() const { return to_float(*this); }
operator double() const { return double(to_float(*this)); }
operator int32_t() const { return (int32_t)to_float(*this); }
operator float() const { return to_float(*this); }
operator double() const { return double(to_float(*this)); }
// --- operators
// --- operators
float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
float16 operator - () const { return float16(-to_float(*this)); }
bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
bool operator != (float16 rhs) const { return !(*this == rhs); }
float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
float16 operator - () const { return float16(-to_float(*this)); }
bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
bool operator != (float16 rhs) const { return !(*this == rhs); }
static void UnitTest() { unit_test(); }
static void UnitTest() { unit_test(); }
private:
private:
// --- entity
// --- entity
uint16_t v_;
uint16_t v_;
// --- conversion between float and float16
// --- conversion between float and float16
static float16 to_float16(float f)
{
float32_converter c;
c.f = f;
u32 n = c.n;
static float16 to_float16(float f)
{
float32_converter c;
c.f = f;
u32 n = c.n;
// The sign bit is MSB in common.
uint16_t sign_bit = (n >> 16) & 0x8000;
// The sign bit is MSB in common.
uint16_t sign_bit = (n >> 16) & 0x8000;
// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
// The fraction is limited to 10-bit.
uint16_t fraction = (n >> (23-10)) & 0x3ff;
// The fraction is limited to 10-bit.
uint16_t fraction = (n >> (23-10)) & 0x3ff;
float16 f_;
f_.v_ = sign_bit | exponent | fraction;
float16 f_;
f_.v_ = sign_bit | exponent | fraction;
return f_;
}
return f_;
}
static float to_float(float16 v)
{
u32 sign_bit = (v.v_ & 0x8000) << 16;
u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
static float to_float(float16 v)
{
u32 sign_bit = (v.v_ & 0x8000) << 16;
u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
float32_converter c;
c.n = sign_bit | exponent | fraction;
return c.f;
}
float32_converter c;
c.n = sign_bit | exponent | fraction;
return c.f;
}
// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
static void unit_test()
{
float16 a, b, c, d;
a = 1;
std::cout << (float)a << std::endl;
b = -118.625;
std::cout << (float)b << std::endl;
c = 2.5;
std::cout << (float)c << std::endl;
d = a + c;
std::cout << (float)d << std::endl;
// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
static void unit_test()
{
float16 a, b, c, d;
a = 1;
std::cout << (float)a << std::endl;
b = -118.625;
std::cout << (float)b << std::endl;
c = 2.5;
std::cout << (float)c << std::endl;
d = a + c;
std::cout << (float)d << std::endl;
c *= 1.5;
std::cout << (float)c << std::endl;
c *= 1.5;
std::cout << (float)c << std::endl;
b /= 3;
std::cout << (float)b << std::endl;
b /= 3;
std::cout << (float)b << std::endl;
float f1 = 1.5;
a += f1;
std::cout << (float)a << std::endl;
float f1 = 1.5;
a += f1;
std::cout << (float)a << std::endl;
a += f1 * (float)a;
std::cout << (float)a << std::endl;
}
a += f1 * (float)a;
std::cout << (float)a << std::endl;
}
};
};
}
+1335
View File
File diff suppressed because it is too large Load Diff
+100 -189
View File
@@ -1,101 +1,6 @@
#ifndef _LEARN_H_
#define _LEARN_H_
#if defined(EVAL_LEARN)
#include <vector>
// =====================
// Settings for learning
// =====================
// If you select one of the following, the details after that will be automatically selected.
// If you don't select any of them, you need to set the subsequent details one by one.
// Learning setting by elmo method. This is the default setting.
// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command.
#define LEARN_ELMO_METHOD
// ----------------------
// update formula
// ----------------------
// Ada Grad. Recommended because it is stable.
// #define ADA_GRAD_UPDATE
// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
// #define SGD_UPDATE
// ----------------------
// Settings for learning
// ----------------------
// mini-batch size.
// Calculate the gradient by combining this number of phases.
// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
// I don't think you need to change this value in most cases.
#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1)
// The number of phases to read from the file at one time. After reading this much, shuffle.
// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10)
// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
// Needless to say, the longer the saving interval, the shorter the learning time.
// Folder name is incremented for each save like 0/, 1/, 2/...
// By default, once every 1 billion phases.
#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL)
// ----------------------
// Select the objective function
// ----------------------
// The objective function is the sum of squares of the difference in winning percentage
// See learner.cpp for more information.
//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
// Objective function is cross entropy
// See learner.cpp for more information.
// So-called ordinary "rag cloth squeezer"
//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
// A version in which the objective function is cross entropy, but the win rate function is not passed
// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
// elmo (WCSC27) method
// #define LOSS_FUNCTION_IS_ELMO_METHOD
// ※ Other things may be added.
// ----------------------
// debug settings for learning
// ----------------------
// Reduce the output of rmse during learning to 1 for this number of times.
// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
#define LEARN_RMSE_OUTPUT_INTERVAL 1
// ----------------------
// learning from zero vector
// ----------------------
// Start learning the evaluation function parameters from the zero vector.
// Initialize to zero, generate a game, learn from zero vector,
// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
// (very time consuming)
//#define RESET_TO_ZERO_VECTOR
// ----------------------
// Floating point for learning
// ----------------------
@@ -105,7 +10,7 @@
// Even if it is a double type, there is almost no difference in the way of convergence, so fix it to float.
// when using float
typedef float LearnFloatType;
using LearnFloatType = float;
// when using double
//typedef double LearnFloatType;
@@ -114,59 +19,6 @@ typedef float LearnFloatType;
//#include "half_float.h"
//typedef HalfFloat::float16 LearnFloatType;
// ----------------------
// save memory
// ----------------------
// Use a triangular array for the Weight array (of which is KPP) to save memory.
// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
#define USE_TRIANGLE_WEIGHT_ARRAY
// ----------------------
// dimension down
// ----------------------
// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
// All on by default.
// Dimension reduction using mirror and inverse for KK. (Unclear effect)
// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
#define USE_KK_MIRROR_WRITE
#define USE_KK_INVERSE_WRITE
// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
#define USE_KKP_MIRROR_WRITE
#define USE_KKP_INVERSE_WRITE
// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
// KPP has no inverse. (Because there is only K on the front side)
#define USE_KPP_MIRROR_WRITE
// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
// KPPP has no inverse. (Because there is only K on the front side)
#define USE_KPPP_MIRROR_WRITE
// Reduce the dimension by KPP for learning the KKPP component.
// Learning is very slow.
// Do not use as it is not debugged.
//#define USE_KKPP_LOWER_DIM
// ======================
// Settings for creating teacher phases
// ======================
// ----------------------
// write out the draw
// ----------------------
// When you reach a draw, write it out as a teacher position
// It's subtle whether it's better to do this.
// #define LEARN_GENSFEN_USE_DRAW_RESULT
// ======================
// configure
// ======================
@@ -175,63 +27,122 @@ typedef float LearnFloatType;
// Learning with the method of elmo (WCSC27)
// ----------------------
#if defined( LEARN_ELMO_METHOD )
#define LOSS_FUNCTION_IS_ELMO_METHOD
#define ADA_GRAD_UPDATE
#endif
#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
// ----------------------
// Definition of struct used in Learner
// ----------------------
#include "../position.h"
#include "autograd.h"
#include "packed_sfen.h"
#include "position.h"
#include <sstream>
#include <vector>
#include <mutex>
#include <string>
namespace Learner
{
//Structure in which PackedSfen and evaluation value are integrated
// If you write different contents for each option, it will be a problem when reusing the teacher game
// For the time being, write all the following members regardless of the options.
struct PackedSfenValue
{
// phase
PackedSfen sfen;
// ----------------------
// Settings for learning
// ----------------------
// Evaluation value returned from Learner::search()
int16_t score;
// mini-batch size.
// Calculate the gradient by combining this number of phases.
// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
// I don't think you need to change this value in most cases.
// PV first move
// Used when finding the match rate with the teacher
uint16_t move;
constexpr std::size_t LEARN_MINI_BATCH_SIZE = 1000 * 1000 * 1;
// Trouble of the phase from the initial phase.
uint16_t gamePly;
// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
// Needless to say, the longer the saving interval, the shorter the learning time.
// Folder name is incremented for each save like 0/, 1/, 2/...
// By default, once every 1 billion phases.
constexpr std::size_t LEARN_EVAL_SAVE_INTERVAL = 100'000'000ULL;
// 1 if the player on this side ultimately wins the game. -1 if you are losing.
// 0 if a draw is reached.
// The draw is in the teacher position generation command gensfen,
// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
int8_t game_result;
// Reduce the output of rmse during learning to 1 for this number of times.
// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1;
// When exchanging the file that wrote the teacher aspect with other people
//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
uint8_t padding;
// Learning from the generated game record
void learn(std::istringstream& is);
// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
};
using CalcLossFunc = ValueWithGrad<double>(Value, Value, int, int);
// Type that returns the reading line and the evaluation value at that time
// Used in Learner::search(), Learner::qsearch().
typedef std::pair<Value, std::vector<Move> > ValueAndPV;
struct Loss
{
double value() const
{
return m_loss.value;
}
// So far, only Yaneura King 2018 Otafuku has this stub
// This stub is required if EVAL_LEARN is defined.
extern Learner::ValueAndPV search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0);
extern Learner::ValueAndPV qsearch(Position& pos);
double grad() const
{
return m_loss.grad;
}
double calc_grad(Value shallow, const PackedSfenValue& psv);
uint64_t count() const
{
return m_count;
}
Loss() = default;
Loss(const Loss& other) :
m_loss(other.m_loss),
m_count(other.m_count)
{
}
Loss& operator += (const ValueWithGrad<double>& rhs)
{
std::unique_lock lock(m_mutex);
m_loss += rhs.abs();
m_count += 1;
return *this;
}
Loss& operator += (const Loss& rhs)
{
std::unique_lock lock(m_mutex);
m_loss += rhs.m_loss.abs();
m_count += rhs.m_count;
return *this;
}
void reset()
{
std::unique_lock lock(m_mutex);
m_loss = ValueWithGrad<double>{ 0.0, 0.0 };
m_count = 0;
}
template <typename StreamT>
void print_with_grad(const std::string& prefix, StreamT& s) const
{
s << " - " << prefix << "_loss = " << m_loss.value / (double)m_count << std::endl;
s << " - " << prefix << "_grad_norm = " << m_loss.grad / (double)m_count << std::endl;
}
template <typename StreamT>
void print_only_loss(const std::string& prefix, StreamT& s) const
{
s << " - " << prefix << "_loss = " << m_loss.value / (double)m_count << std::endl;
}
private:
ValueWithGrad<double> m_loss{ 0.0, 0.0 };
uint64_t m_count{0};
std::mutex m_mutex;
};
}
#endif
#endif // ifndef _LEARN_H_
File diff suppressed because it is too large Load Diff
-25
View File
@@ -1,25 +0,0 @@
#include "learning_tools.h"
#if defined (EVAL_LEARN)
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "../misc.h"
using namespace Eval;
namespace EvalLearningTools
{
// --- static variables
double Weight::eta;
double Weight::eta1;
double Weight::eta2;
double Weight::eta3;
uint64_t Weight::eta1_epoch;
uint64_t Weight::eta2_epoch;
}
#endif
-200
View File
@@ -1,200 +0,0 @@
#ifndef __LEARN_WEIGHT_H__
#define __LEARN_WEIGHT_H__
// A set of machine learning tools related to the weight array used for machine learning of evaluation functions
#include "learn.h"
#if defined (EVAL_LEARN)
#include <array>
#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE)
#include "../misc.h" // PRNG , my_insertion_sort
#endif
#include <cmath> // std::sqrt()
namespace EvalLearningTools
{
// -------------------------------------------------
// Array for learning that stores gradients etc.
// -------------------------------------------------
#if defined(_MSC_VER)
#pragma pack(push,2)
#elif defined(__GNUC__)
#pragma pack(2)
#endif
struct Weight
{
// cumulative value of one mini-batch gradient
LearnFloatType g = LearnFloatType(0);
// When ADA_GRAD_UPDATE. LearnFloatType == float,
// total 4*2 + 4*2 + 1*2 = 18 bytes
// It suffices to secure a Weight array that is 4.5 times the size of the evaluation function parameter of 1GB.
// However, sizeof(Weight)==20 code is generated if the structure alignment is in 4-byte units, so
// Specify pragma pack(2).
// For SGD_UPDATE, this structure is reduced by 10 bytes to 8 bytes.
// Learning rate η(eta) such as AdaGrad.
// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
// After eta2_epoch, gradually change from eta2 to eta3.
static double eta;
static double eta1;
static double eta2;
static double eta3;
static uint64_t eta1_epoch;
static uint64_t eta2_epoch;
// Batch initialization of eta. If 0 is passed, the default value will be set.
static void init_eta(double eta1, double eta2, double eta3, uint64_t eta1_epoch, uint64_t eta2_epoch)
{
Weight::eta1 = (eta1 != 0) ? eta1 : 30.0;
Weight::eta2 = (eta2 != 0) ? eta2 : 30.0;
Weight::eta3 = (eta3 != 0) ? eta3 : 30.0;
Weight::eta1_epoch = (eta1_epoch != 0) ? eta1_epoch : 0;
Weight::eta2_epoch = (eta2_epoch != 0) ? eta2_epoch : 0;
}
// Set eta according to epoch.
static void calc_eta(uint64_t epoch)
{
if (Weight::eta1_epoch == 0) // Exclude eta2
Weight::eta = Weight::eta1;
else if (epoch < Weight::eta1_epoch)
// apportion
Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
else if (Weight::eta2_epoch == 0) // Exclude eta3
Weight::eta = Weight::eta2;
else if (epoch < Weight::eta2_epoch)
Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
else
Weight::eta = Weight::eta3;
}
template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
#if defined (ADA_GRAD_UPDATE)
// Since the maximum value that can be accurately calculated with float is INT16_MAX*256-1
// Keep the small value as a marker.
const LearnFloatType V0_NOT_INIT = (INT16_MAX * 128);
// What holds v internally. The previous implementation kept a fixed decimal with only a fractional part to save memory,
// Since it is doubtful in accuracy and the visibility is bad, it was abolished.
LearnFloatType v0 = LearnFloatType(V0_NOT_INIT);
// AdaGrad g2
LearnFloatType g2 = LearnFloatType(0);
// update with AdaGrad
// When executing this function, the value of g and the member do not change
// Guaranteed by the caller. It does not have to be an atomic operation.
// k is a coefficient for eta. 1.0 is usually sufficient. If you want to lower eta for your turn item, set this to 1/8.0 etc.
template <typename T>
void updateFV(T& v,double k)
{
// AdaGrad update formula
// Gradient vector is g, vector to be updated is v, η(eta) is a constant,
// g2 = g2 + g^2
// v = v - ηg/sqrt(g2)
constexpr double epsilon = 0.000001;
if (g == LearnFloatType(0))
return;
g2 += g * g;
// If v0 is V0_NOT_INIT, it means that the value is not initialized with the value of KK/KKP/KPP array,
// In this case, read the value of v from the one passed in the argument.
double V = (v0 == V0_NOT_INIT) ? v : v0;
V -= k * eta * (double)g / sqrt((double)g2 + epsilon);
// Limit the value of V to be within the range of types.
// By the way, windows.h defines the min and max macros, so to avoid it,
// Here, it is enclosed in parentheses so that it is not treated as a function-like macro.
V = (std::min)((double)(std::numeric_limits<T>::max)() , V);
V = (std::max)((double)(std::numeric_limits<T>::min)() , V);
v0 = (LearnFloatType)V;
v = (T)round(V);
// Clear g because one update of mini-batch for this element is over
// g[i] = 0;
// → There is a problem of dimension reduction, so this will be done by the caller.
}
#elif defined(SGD_UPDATE)
// See only the sign of the gradient Update with SGD
// When executing this function, the value of g and the member do not change
// Guaranteed by the caller. It does not have to be an atomic operation.
template <typename T>
void updateFV(T & v , double k)
{
if (g == 0)
return;
// See only the sign of g and update.
// If g <0, add v a little.
// If g> 0, subtract v slightly.
// Since we only add integers, no decimal part is required.
// It's a good idea to move around 0-5.
// It is better to have a Gaussian distribution, so generate a 5-bit random number (each bit has a 1/2 probability of 1),
// Pop_count() it. At this time, it has a binomial distribution.
//int16_t diff = (int16_t)POPCNT32((u32)prng.rand(31));
// → If I do this with 80 threads, this AsyncPRNG::rand() locks, so I slowed down. This implementation is not good.
int16_t diff = 1;
double V = v;
if (g > 0.0)
V-= diff;
else
V+= diff;
V = (std::min)((double)(std::numeric_limits<T>::max)(), V);
V = (std::max)((double)(std::numeric_limits<T>::min)(), V);
v = (T)V;
}
#endif
// grad setting
template <typename T> void set_grad(const T& g_) { g = g_; }
// Add grad
template <typename T> void add_grad(const T& g_) { g += g_; }
LearnFloatType get_grad() const { return g; }
};
#if defined(_MSC_VER)
#pragma pack(pop)
#elif defined(__GNUC__)
#pragma pack(0)
#endif
// Turned weight array
// In order to be able to handle it transparently, let's have the same member as Weight.
struct Weight2
{
Weight w[2];
//Evaluate your turn, eta 1/8.
template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
};
}
#endif // defined (EVAL_LEARN)
#endif
-123
View File
@@ -1,123 +0,0 @@
#include "../types.h"
#if defined(EVAL_LEARN)
#include "multi_think.h"
#include "../tt.h"
#include "../uci.h"
#include <thread>
void MultiThink::go_think()
{
// Keep a copy to restore the Options settings later.
auto oldOptions = Options;
// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
Options["BookOnTheFly"] = std::string("false");
// Read evaluation function, etc.
// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
// Skip memory corruption check.
Eval::init_NNUE();
// Call the derived class's init().
init();
// The loop upper limit is set with set_loop_max().
loop_count = 0;
done_count = 0;
// Create threads as many as Options["Threads"] and start thinking.
std::vector<std::thread> threads;
auto thread_num = (size_t)Options["Threads"];
// Secure end flag of worker thread
thread_finished.resize(thread_num);
// start worker thread
for (size_t i = 0; i < thread_num; ++i)
{
thread_finished[i] = 0;
threads.push_back(std::thread([i, this]
{
// exhaust all processor threads.
WinProcGroup::bindThisThread(i);
// execute the overridden process
this->thread_worker(i);
// Set the end flag because the thread has ended
this->thread_finished[i] = 1;
}));
}
// wait for all threads to finish
// for (auto& th :threads)
// th.join();
// If you write like, the thread will rush here while it is still working,
// During that time, callback_func() cannot be called and you cannot save.
// Therefore, you need to check the end flag yourself.
// function to determine if all threads have finished
auto threads_done = [&]()
{
// returns false if no one is finished
for (auto& f : thread_finished)
if (!f)
return false;
return true;
};
// Call back if the callback function is set.
auto do_a_callback = [&]()
{
if (callback_func)
callback_func();
};
for (uint64_t i = 0 ; ; )
{
// If all threads have finished, exit the loop.
if (threads_done())
break;
sleep(1000);
// callback_func() is called every callback_seconds.
if (++i == callback_seconds)
{
do_a_callback();
// Since I am returning from ↑, I reset the counter, so
// no matter how long it takes to save() etc. in do_a_callback()
// The next call will take a certain amount of time.
i = 0;
}
}
// Last save.
std::cout << std::endl << "finalize..";
// do_a_callback();
// → It should be saved by the caller, so I feel that it is not necessary here.
// It is possible that the exit code of the thread is running but the exit code of the thread is running, so
// We need to wait for the end with join().
for (auto& th : threads)
th.join();
// The file writing thread etc. are still running only when all threads are finished
// Since the work itself may not have completed, output only that all threads have finished.
std::cout << "all threads are joined." << std::endl;
// Restored because Options were rewritten.
// Restore the handler because the handler will not start unless you assign a value.
for (auto& s : oldOptions)
Options[s.first] = std::string(s.second);
}
#endif // defined(EVAL_LEARN)
-152
View File
@@ -1,152 +0,0 @@
#ifndef _MULTI_THINK_
#define _MULTI_THINK_
#if defined(EVAL_LEARN)
#include <functional>
#include <mutex>
#include "../misc.h"
#include "../learn/learn.h"
#include "../thread_win32_osx.h"
#include <atomic>
// Learning from a game record, when making yourself think and generating a fixed track, etc.
// Helper class used when multiple threads want to call Search::think() individually.
// Derive and use this class.
struct MultiThink
{
MultiThink() : prng(std::chrono::system_clock::now().time_since_epoch().count())
{
loop_count = 0;
}
// Call this function from the master thread, each thread will think,
// Return control when the thought ending condition is satisfied.
// Do something else.
// ・It is safe for each thread to call Learner::search(),qsearch()
// Separates the substitution table for each thread. (It will be restored after the end.)
// ・Book is not thread safe when in on the fly mode, so temporarily change this mode.
// Turn it off.
// [Requirements]
// 1) Override thread_worker()
// 2) Set the loop count with set_loop_max()
// 3) set a function to be called back periodically (if necessary)
// callback_func and callback_interval
void go_think();
// If there is something you want to initialize on the derived class side, override this,
// Called when initialization is completed with go_think().
// It is better to read the fixed trace at that timing.
virtual void init() {}
// A thread worker that is called by creating a thread when you go_think()
// Override and use this.
virtual void thread_worker(size_t thread_id) = 0;
// Called back every callback_seconds [seconds] when go_think().
std::function<void()> callback_func;
uint64_t callback_seconds = 600;
// Set the number of times worker processes (calls Search::think()).
void set_loop_max(uint64_t loop_max_) { loop_max = loop_max_; }
// Get the value set by set_loop_max().
uint64_t get_loop_max() const { return loop_max; }
// [ASYNC] Take the value of the loop counter and add the loop counter after taking it out.
// If the loop counter has reached loop_max, return UINT64_MAX.
// If you want to generate a phase, you must call this function at the time of generating the phase,
// Please note that the number of generated phases and the value of the counter will not match.
uint64_t get_next_loop_count() {
std::unique_lock<std::mutex> lk(loop_mutex);
if (loop_count >= loop_max)
return UINT64_MAX;
return loop_count++;
}
// [ASYNC] For returning the processed number. Each time it is called, it returns a counter that is incremented.
uint64_t get_done_count() {
std::unique_lock<std::mutex> lk(loop_mutex);
return ++done_count;
}
// Mutex when worker thread accesses I/O
std::mutex io_mutex;
protected:
// Random number generator body
AsyncPRNG prng;
private:
// number of times worker processes (calls Search::think())
std::atomic<uint64_t> loop_max;
// number of times the worker has processed (calls Search::think())
std::atomic<uint64_t> loop_count;
// To return the number of times it has been processed.
std::atomic<uint64_t> done_count;
// Mutex when changing the variables in ↑
std::mutex loop_mutex;
// Thread end flag.
// vector<bool> may not be reflected properly when trying to rewrite from multiple threads...
typedef uint8_t Flag;
std::vector<Flag> thread_finished;
};
// Mechanism to process task during idle time.
// master passes the task with push_task_async() whenever you like.
// When slave executes on_idle() in its spare time, it retrieves one task and continues execution until there is no queue.
// Convenient to use when you want to write MultiThink thread worker in master-slave method.
struct TaskDispatcher
{
typedef std::function<void(size_t /* thread_id */)> Task;
// slave calls this function during idle.
void on_idle(size_t thread_id)
{
Task task;
while ((task = get_task_async()) != nullptr)
task(thread_id);
sleep(1);
}
// Stack [ASYNC] task.
void push_task_async(Task task)
{
std::unique_lock<std::mutex> lk(task_mutex);
tasks.push_back(task);
}
// Allocate size array elements for task in advance.
void task_reserve(size_t size)
{
tasks.reserve(size);
}
protected:
// set of tasks
std::vector<Task> tasks;
// Take out one [ASYNC] task. Called from on_idle().
Task get_task_async()
{
std::unique_lock<std::mutex> lk(task_mutex);
if (tasks.size() == 0)
return nullptr;
Task task = *tasks.rbegin();
tasks.pop_back();
return task;
}
// a mutex for accessing tasks
std::mutex task_mutex;
};
#endif // defined(EVAL_LEARN) && defined(YANEURAOU_2018_OTAFUKU_ENGINE)
#endif
+43
View File
@@ -0,0 +1,43 @@
#include "opening_book.h"
#include <fstream>
namespace Learner {
EpdOpeningBook::EpdOpeningBook(const std::string& file, PRNG& prng) :
OpeningBook(file)
{
std::ifstream in(file);
if (!in)
{
return;
}
std::string line;
while (std::getline(in, line))
{
if (line.empty())
continue;
fens.emplace_back(line);
}
Algo::shuffle(fens, prng);
}
static bool ends_with(const std::string& lhs, const std::string& end)
{
if (end.size() > lhs.size()) return false;
return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
}
std::unique_ptr<OpeningBook> open_opening_book(const std::string& filename, PRNG& prng)
{
if (ends_with(filename, ".epd"))
return std::make_unique<EpdOpeningBook>(filename, prng);
return nullptr;
}
}
+56
View File
@@ -0,0 +1,56 @@
#ifndef LEARN_OPENING_BOOK_H
#define LEARN_OPENING_BOOK_H
#include "misc.h"
#include "position.h"
#include "thread.h"
#include <vector>
#include <random>
#include <optional>
#include <string>
#include <cstdint>
#include <memory>
namespace Learner {
struct OpeningBook {
const std::string& next_fen()
{
assert(fens.size() > 0);
auto& fen = fens[current_index++];
if (current_index >= fens.size())
current_index = 0;
return fen;
}
std::size_t size() const { return fens.size(); }
const std::string& get_filename() const { return filename; }
protected:
OpeningBook(const std::string& file) :
filename(file),
current_index(0)
{
}
std::string filename;
std::vector<std::string> fens;
std::size_t current_index;
};
struct EpdOpeningBook : OpeningBook {
EpdOpeningBook(const std::string& file, PRNG& prng);
};
std::unique_ptr<OpeningBook> open_opening_book(const std::string& filename, PRNG& prng);
}
#endif
+46
View File
@@ -0,0 +1,46 @@
#ifndef _PACKED_SFEN_H_
#define _PACKED_SFEN_H_
#include <vector>
#include <cstdint>
namespace Learner {
// packed sfen
struct PackedSfen { std::uint8_t data[32]; };
// Structure in which PackedSfen and evaluation value are integrated
// If you write different contents for each option, it will be a problem when reusing the teacher game
// For the time being, write all the following members regardless of the options.
struct PackedSfenValue
{
// phase
PackedSfen sfen;
// Evaluation value returned from Learner::search()
std::int16_t score;
// PV first move
// Used when finding the match rate with the teacher
std::uint16_t move;
// Trouble of the phase from the initial phase.
std::uint16_t gamePly;
// 1 if the player on this side ultimately wins the game. -1 if you are losing.
// 0 if a draw is reached.
// The draw is in the teacher position generation command gensfen,
// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
std::int8_t game_result;
// When exchanging the file that wrote the teacher aspect with other people
//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
std::uint8_t padding;
// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
};
// Phase array: PSVector stands for packed sfen vector.
using PSVector = std::vector<PackedSfenValue>;
}
#endif
+386
View File
@@ -0,0 +1,386 @@
#include "sfen_packer.h"
#include "packed_sfen.h"
#include "misc.h"
#include "position.h"
#include <sstream>
#include <fstream>
#include <cstring> // std::memset()
using namespace std;
namespace Learner {
// Class that handles bitstream
// useful when doing aspect encoding
struct BitStream
{
// Set the memory to store the data in advance.
// Assume that memory is cleared to 0.
void set_data(std::uint8_t* data_) { data = data_; reset(); }
// Get the pointer passed in set_data().
uint8_t* get_data() const { return data; }
// Get the cursor.
int get_cursor() const { return bit_cursor; }
// reset the cursor
void reset() { bit_cursor = 0; }
// Write 1bit to the stream.
// If b is non-zero, write out 1. If 0, write 0.
void write_one_bit(int b)
{
if (b)
data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
++bit_cursor;
}
// Get 1 bit from the stream.
int read_one_bit()
{
int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
++bit_cursor;
return b;
}
// write n bits of data
// Data shall be written out from the lower order of d.
void write_n_bit(int d, int n)
{
for (int i = 0; i <n; ++i)
write_one_bit(d & (1 << i));
}
// read n bits of data
// Reverse conversion of write_n_bit().
int read_n_bit(int n)
{
int result = 0;
for (int i = 0; i < n; ++i)
result |= read_one_bit() ? (1 << i) : 0;
return result;
}
private:
// Next bit position to read/write.
int bit_cursor;
// data entity
std::uint8_t* data;
};
// Class for compressing/decompressing sfen
// sfen can be packed to 256bit (32bytes) by Huffman coding.
// This is proven by mini. The above is Huffman coding.
//
// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
// Side to move (White = 0, Black = 1) (1bit)
// White King Position (6 bits)
// Black King Position (6 bits)
// Huffman Encoding of the board
// Castling availability (1 bit x 4)
// En passant square (1 or 1 + 6 bits)
// Rule 50 (6 bits)
// Game play (8 bits)
//
// TODO(someone): Rename SFEN to FEN.
//
struct SfenPacker
{
void pack(const Position& pos);
// sfen packed by pack() (256bit = 32bytes)
// Or sfen to decode with unpack()
uint8_t *data; // uint8_t[32];
BitStream stream;
// Output the board pieces to stream.
void write_board_piece_to_stream(Piece pc);
// Read one board piece from stream
Piece read_board_piece_from_stream();
};
// Huffman coding
// * is simplified from mini encoding to make conversion easier.
//
// Huffman Encoding
//
// Empty xxxxxxx0
// Pawn xxxxx001 + 1 bit (Color)
// Knight xxxxx011 + 1 bit (Color)
// Bishop xxxxx101 + 1 bit (Color)
// Rook xxxxx111 + 1 bit (Color)
// Queen xxxx1001 + 1 bit (Color)
//
// Worst case:
// - 32 empty squares 32 bits
// - 30 pieces 150 bits
// - 2 kings 12 bits
// - castling rights 4 bits
// - ep square 7 bits
// - rule50 7 bits
// - game ply 16 bits
// - TOTAL 228 bits < 256 bits
struct HuffmanedPiece
{
int code; // how it will be coded
int bits; // How many bits do you have
};
constexpr HuffmanedPiece huffman_table[] =
{
{0b0000,1}, // NO_PIECE
{0b0001,4}, // PAWN
{0b0011,4}, // KNIGHT
{0b0101,4}, // BISHOP
{0b0111,4}, // ROOK
{0b1001,4}, // QUEEN
};
// Pack sfen and store in data[32].
void SfenPacker::pack(const Position& pos)
{
memset(data, 0, 32 /* 256bit */);
stream.set_data(data);
// turn
// Side to move.
stream.write_one_bit((int)(pos.side_to_move()));
// 7-bit positions for leading and trailing balls
// White king and black king, 6 bits for each.
for(auto c: Colors)
stream.write_n_bit(pos.king_square(c), 6);
// Write the pieces on the board other than the kings.
for (Rank r = RANK_8; r >= RANK_1; --r)
{
for (File f = FILE_A; f <= FILE_H; ++f)
{
Piece pc = pos.piece_on(make_square(f, r));
if (type_of(pc) == KING)
continue;
write_board_piece_to_stream(pc);
}
}
// TODO(someone): Support chess960.
stream.write_one_bit(pos.can_castle(WHITE_OO));
stream.write_one_bit(pos.can_castle(WHITE_OOO));
stream.write_one_bit(pos.can_castle(BLACK_OO));
stream.write_one_bit(pos.can_castle(BLACK_OOO));
if (pos.ep_square() == SQ_NONE) {
stream.write_one_bit(0);
}
else {
stream.write_one_bit(1);
stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
}
stream.write_n_bit(pos.state()->rule50, 6);
const int fm = 1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2;
stream.write_n_bit(fm, 8);
// Write high bits of half move. This is a fix for the
// limited range of half move counter.
// This is backwards compatibile.
stream.write_n_bit(fm >> 8, 8);
// Write the highest bit of rule50 at the end. This is a backwards
// compatibile fix for rule50 having only 6 bits stored.
// This bit is just ignored by the old parsers.
stream.write_n_bit(pos.state()->rule50 >> 6, 1);
assert(stream.get_cursor() <= 256);
}
// Output the board pieces to stream.
void SfenPacker::write_board_piece_to_stream(Piece pc)
{
// piece type
PieceType pr = type_of(pc);
auto c = huffman_table[pr];
stream.write_n_bit(c.code, c.bits);
if (pc == NO_PIECE)
return;
// first and second flag
stream.write_one_bit(color_of(pc));
}
// Read one board piece from stream
Piece SfenPacker::read_board_piece_from_stream()
{
PieceType pr = NO_PIECE_TYPE;
int code = 0, bits = 0;
while (true)
{
code |= stream.read_one_bit() << bits;
++bits;
assert(bits <= 6);
for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
if (huffman_table[pr].code == code
&& huffman_table[pr].bits == bits)
goto Found;
}
Found:;
if (pr == NO_PIECE_TYPE)
return NO_PIECE;
// first and second flag
Color c = (Color)stream.read_one_bit();
return make_piece(c, pr);
}
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th)
{
SfenPacker packer;
auto& stream = packer.stream;
// TODO: separate streams for writing and reading. Here we actually have to
// const_cast which is not safe in the long run.
stream.set_data(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(&sfen)));
pos.clear();
std::memset(si, 0, sizeof(StateInfo));
std::fill_n(&pos.pieceList[0][0], sizeof(pos.pieceList) / sizeof(Square), SQ_NONE);
pos.st = si;
// Active color
pos.sideToMove = (Color)stream.read_one_bit();
pos.pieceList[W_KING][0] = SQUARE_NB;
pos.pieceList[B_KING][0] = SQUARE_NB;
// First the position of the ball
for (auto c : Colors)
pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
// Piece placement
for (Rank r = RANK_8; r >= RANK_1; --r)
{
for (File f = FILE_A; f <= FILE_H; ++f)
{
auto sq = make_square(f, r);
// it seems there are already balls
Piece pc;
if (type_of(pos.board[sq]) != KING)
{
assert(pos.board[sq] == NO_PIECE);
pc = packer.read_board_piece_from_stream();
}
else
{
pc = pos.board[sq];
// put_piece() will catch ASSERT unless you remove it all.
pos.board[sq] = NO_PIECE;
}
// There may be no pieces, so skip in that case.
if (pc == NO_PIECE)
continue;
pos.put_piece(Piece(pc), sq);
if (stream.get_cursor()> 256)
return 1;
}
}
// Castling availability.
// TODO(someone): Support chess960.
pos.st->castlingRights = 0;
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(WHITE, SQ_H1); pos.piece_on(rsq) != W_ROOK; --rsq) {}
pos.set_castling_right(WHITE, rsq);
}
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(WHITE, SQ_A1); pos.piece_on(rsq) != W_ROOK; ++rsq) {}
pos.set_castling_right(WHITE, rsq);
}
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(BLACK, SQ_H1); pos.piece_on(rsq) != B_ROOK; --rsq) {}
pos.set_castling_right(BLACK, rsq);
}
if (stream.read_one_bit()) {
Square rsq;
for (rsq = relative_square(BLACK, SQ_A1); pos.piece_on(rsq) != B_ROOK; ++rsq) {}
pos.set_castling_right(BLACK, rsq);
}
// En passant square. Ignore if no pawn capture is possible
if (stream.read_one_bit()) {
Square ep_square = static_cast<Square>(stream.read_n_bit(6));
pos.st->epSquare = ep_square;
if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
|| !(pos.pieces(~pos.sideToMove, PAWN) & (pos.st->epSquare + pawn_push(~pos.sideToMove))))
pos.st->epSquare = SQ_NONE;
}
else {
pos.st->epSquare = SQ_NONE;
}
// Halfmove clock
pos.st->rule50 = stream.read_n_bit(6);
// Fullmove number
pos.gamePly = stream.read_n_bit(8);
// Read the highest bit of rule50. This was added as a fix for rule50
// counter having only 6 bits stored.
// In older entries this will just be a zero bit.
pos.gamePly |= stream.read_n_bit(8) << 8;
// Read the highest bit of rule50. This was added as a fix for rule50
// counter having only 6 bits stored.
// In older entries this will just be a zero bit.
pos.st->rule50 |= stream.read_n_bit(1) << 6;
// Convert from fullmove starting from 1 to gamePly starting from 0,
// handle also common incorrect FEN with fullmove = 0.
pos.gamePly = std::max(2 * (pos.gamePly - 1), 0) + (pos.sideToMove == BLACK);
assert(stream.get_cursor() <= 256);
pos.chess960 = false;
pos.thisThread = th;
pos.set_state(pos.st);
assert(pos.pos_is_ok());
return 0;
}
PackedSfen sfen_pack(Position& pos)
{
PackedSfen sfen;
SfenPacker sp;
sp.data = (uint8_t*)&sfen;
sp.pack(pos);
return sfen;
}
}
+20
View File
@@ -0,0 +1,20 @@
#ifndef _SFEN_PACKER_H_
#define _SFEN_PACKER_H_
#include "types.h"
#include "learn/packed_sfen.h"
#include <cstdint>
class Position;
struct StateInfo;
class Thread;
namespace Learner {
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th);
PackedSfen sfen_pack(Position& pos);
}
#endif
+365
View File
@@ -0,0 +1,365 @@
#include "sfen_stream.h"
#include "packed_sfen.h"
#include "misc.h"
#include <string>
#include <vector>
#include <deque>
#include <memory>
#include <mutex>
#include <list>
#include <atomic>
#include <optional>
#include <iostream>
#include <cstdint>
#include <thread>
namespace Learner{
enum struct SfenReaderMode
{
Sequential,
Cyclic
};
// Sfen reader
struct SfenReader
{
// Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT
static constexpr size_t DEFAULT_THREAD_BUFFER_SIZE = 10 * 1000;
// Buffer for reading files (If this is made larger,
// the shuffle becomes larger and the phases may vary.
// If it is too large, the memory consumption will increase.
// SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
static constexpr const size_t DEFAULT_SFEN_READ_SIZE = 1000 * 1000 * 10;
// Do not use std::random_device().
// Because it always the same integers on MinGW.
SfenReader(
const std::vector<std::string>& filenames_,
bool do_shuffle,
SfenReaderMode mode_,
int thread_num,
const std::string& seed,
size_t read_size = DEFAULT_SFEN_READ_SIZE,
size_t buffer_size = DEFAULT_THREAD_BUFFER_SIZE
) :
filenames(filenames_.begin(), filenames_.end()),
mode(mode_),
sfen_read_size(read_size),
thread_buffer_size(buffer_size),
prng(seed)
{
packed_sfens.resize(thread_num);
total_read = 0;
end_of_files = false;
shuffle = do_shuffle;
stop_flag = false;
file_worker_thread = std::thread([&] {
this->file_read_worker();
});
}
~SfenReader()
{
stop_flag = true;
if (file_worker_thread.joinable())
file_worker_thread.join();
}
// Load the phase for calculation such as mse.
PSVector read_for_mse(uint64_t count)
{
PSVector sfen_for_mse;
sfen_for_mse.reserve(count);
for (uint64_t i = 0; i < count; ++i)
{
PackedSfenValue ps;
if (!read_to_thread_buffer(0, ps))
{
std::cout << "ERROR (sfen_reader): Reading failed." << std::endl;
return sfen_for_mse;
}
sfen_for_mse.push_back(ps);
}
return sfen_for_mse;
}
PSVector read_validation_set(const std::string& file_name, int eval_limit, bool use_draw_games)
{
PSVector sfen_for_mse;
auto input = open_sfen_input_file(file_name);
while(!input->eof())
{
std::optional<PackedSfenValue> p_opt = input->next();
if (p_opt.has_value())
{
auto& p = *p_opt;
if (eval_limit < abs(p.score))
continue;
if (!use_draw_games && p.game_result == 0)
continue;
sfen_for_mse.push_back(p);
}
else
{
break;
}
}
return sfen_for_mse;
}
// [ASYNC] Thread returns one aspect. Otherwise returns false.
bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps)
{
// If there are any positions left in the thread buffer
// then retrieve one and return it.
auto& thread_ps = packed_sfens[thread_id];
// Fill the read buffer if there is no remaining buffer,
// but if it doesn't even exist, finish.
// If the buffer is empty, fill it.
if ((thread_ps == nullptr || thread_ps->empty())
&& !read_to_thread_buffer_impl(thread_id))
return false;
// read_to_thread_buffer_impl() returned true,
// Since the filling of the thread buffer with the
// phase has been completed successfully
// thread_ps->rbegin() is alive.
ps = thread_ps->back();
thread_ps->pop_back();
// If you've run out of buffers, call delete yourself to free this buffer.
if (thread_ps->empty())
{
thread_ps.reset();
}
return true;
}
// [ASYNC] Read some aspects into thread buffer.
bool read_to_thread_buffer_impl(size_t thread_id)
{
while (true)
{
{
std::unique_lock<std::mutex> lk(mutex);
// If you can fill from the file buffer, that's fine.
if (packed_sfens_pool.size() != 0)
{
// It seems that filling is possible, so fill and finish.
packed_sfens[thread_id] = std::move(packed_sfens_pool.front());
packed_sfens_pool.pop_front();
total_read += thread_buffer_size;
return true;
}
}
// The file to read is already gone. No more use.
if (end_of_files)
return false;
// Waiting for file worker to fill packed_sfens_pool.
// The mutex isn't locked, so it should fill up soon.
// Poor man's condition variable.
sleep(1);
}
}
void file_read_worker()
{
std::string currentFilename;
uint64_t numEntriesReadFromCurrentFile = 0;
auto open_next_file = [&]() {
// no more
for(;;)
{
sfen_input_stream.reset();
if (filenames.empty())
return false;
// Get the next file name.
currentFilename = filenames.front();
filenames.pop_front();
numEntriesReadFromCurrentFile = 0;
sfen_input_stream = open_sfen_input_file(currentFilename);
auto out = sync_region_cout.new_region();
if (sfen_input_stream == nullptr)
{
out << "INFO (sfen_reader): File does not exist: " << currentFilename << '\n';
}
else
{
out << "INFO (sfen_reader): Opened file for reading: " << currentFilename << '\n';
// in case the file is empty or was deleted.
if (sfen_input_stream->eof())
{
out << " - File empty, nothing to read.\n";
}
else
{
return true;
}
}
}
};
if (sfen_input_stream == nullptr && !open_next_file())
{
auto out = sync_region_cout.new_region();
out << "INFO (sfen_reader): End of files." << std::endl;
end_of_files = true;
return;
}
while (true)
{
// Wait for the buffer to run out.
// This size() is read only, so you don't need to lock it.
while (!stop_flag && packed_sfens_pool.size() >= sfen_read_size / thread_buffer_size)
sleep(100);
if (stop_flag)
return;
PSVector sfens;
sfens.reserve(sfen_read_size);
// Read from the file into the file buffer.
while (sfens.size() < sfen_read_size)
{
std::optional<PackedSfenValue> p = sfen_input_stream->next();
if (p.has_value())
{
sfens.push_back(*p);
++numEntriesReadFromCurrentFile;
}
else
{
if (mode == SfenReaderMode::Cyclic
&& numEntriesReadFromCurrentFile > 0)
{
// The file contained data so we add it again to the end of the queue.
filenames.emplace_back(currentFilename);
}
if(!open_next_file())
{
// There was no next file. Abort.
auto out = sync_region_cout.new_region();
out << "INFO (sfen_reader): End of files." << std::endl;
end_of_files = true;
return;
}
}
}
// Shuffle the read phase data.
if (shuffle)
{
Algo::shuffle(sfens, prng);
}
// Divide this by thread_buffer_size. There should be size pieces.
// sfen_read_size shall be a multiple of thread_buffer_size.
assert((sfen_read_size % thread_buffer_size) == 0);
auto size = size_t(sfen_read_size / thread_buffer_size);
std::vector<std::unique_ptr<PSVector>> buffers;
buffers.reserve(size);
for (size_t i = 0; i < size; ++i)
{
// Delete this pointer on the receiving side.
auto buf = std::make_unique<PSVector>();
buf->resize(thread_buffer_size);
memcpy(
buf->data(),
&sfens[i * thread_buffer_size],
sizeof(PackedSfenValue) * thread_buffer_size);
buffers.emplace_back(std::move(buf));
}
{
std::unique_lock<std::mutex> lk(mutex);
// The mutex lock is required because the%
// contents of packed_sfens_pool are changed.
for (auto& buf : buffers)
packed_sfens_pool.emplace_back(std::move(buf));
}
}
}
protected:
// worker thread reading file in background
std::thread file_worker_thread;
// sfen files
std::deque<std::string> filenames;
std::atomic<bool> stop_flag;
// number of phases read (file to memory buffer)
std::atomic<uint64_t> total_read;
// Do not shuffle when reading the phase.
bool shuffle;
SfenReaderMode mode;
size_t sfen_read_size;
size_t thread_buffer_size;
// Random number to shuffle when reading the phase
PRNG prng;
// Did you read the files and reached the end?
std::atomic<bool> end_of_files;
// handle of sfen file
std::unique_ptr<BasicSfenInputStream> sfen_input_stream;
// sfen for each thread
// (When the thread is used up, the thread should call delete to release it.)
std::vector<std::unique_ptr<PSVector>> packed_sfens;
// Mutex when accessing packed_sfens_pool
std::mutex mutex;
// pool of sfen. The worker thread read from the file is added here.
// Each worker thread fills its own packed_sfens[thread_id] from here.
// * Lock and access the mutex.
std::list<std::unique_ptr<PSVector>> packed_sfens_pool;
};
}
+222
View File
@@ -0,0 +1,222 @@
#ifndef _SFEN_STREAM_H_
#define _SFEN_STREAM_H_
#include "packed_sfen.h"
#include "extra/nnue_data_binpack_format.h"
#include <optional>
#include <fstream>
#include <string>
#include <memory>
namespace Learner {
enum struct SfenOutputType
{
Bin,
Binpack
};
static bool ends_with(const std::string& lhs, const std::string& end)
{
if (end.size() > lhs.size()) return false;
return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
}
static bool has_extension(const std::string& filename, const std::string& extension)
{
return ends_with(filename, "." + extension);
}
static std::string filename_with_extension(const std::string& filename, const std::string& ext)
{
if (ends_with(filename, ext))
{
return filename;
}
else
{
return filename + "." + ext;
}
}
struct BasicSfenInputStream
{
virtual std::optional<PackedSfenValue> next() = 0;
virtual bool eof() const = 0;
virtual ~BasicSfenInputStream() {}
};
struct BinSfenInputStream : BasicSfenInputStream
{
static constexpr auto openmode = std::ios::in | std::ios::binary;
static inline const std::string extension = "bin";
BinSfenInputStream(std::string filename) :
m_stream(filename, openmode),
m_eof(!m_stream)
{
}
std::optional<PackedSfenValue> next() override
{
PackedSfenValue e;
if(m_stream.read(reinterpret_cast<char*>(&e), sizeof(PackedSfenValue)))
{
return e;
}
else
{
m_eof = true;
return std::nullopt;
}
}
bool eof() const override
{
return m_eof;
}
~BinSfenInputStream() override {}
private:
std::fstream m_stream;
bool m_eof;
};
struct BinpackSfenInputStream : BasicSfenInputStream
{
static constexpr auto openmode = std::ios::in | std::ios::binary;
static inline const std::string extension = "binpack";
BinpackSfenInputStream(std::string filename) :
m_stream(filename, openmode),
m_eof(!m_stream.hasNext())
{
}
std::optional<PackedSfenValue> next() override
{
static_assert(sizeof(binpack::nodchip::PackedSfenValue) == sizeof(PackedSfenValue));
if (!m_stream.hasNext())
{
m_eof = true;
return std::nullopt;
}
auto training_data_entry = m_stream.next();
auto v = binpack::trainingDataEntryToPackedSfenValue(training_data_entry);
PackedSfenValue psv;
// same layout, different types. One is from generic library.
std::memcpy(&psv, &v, sizeof(PackedSfenValue));
return psv;
}
bool eof() const override
{
return m_eof;
}
~BinpackSfenInputStream() override {}
private:
binpack::CompressedTrainingDataEntryReader m_stream;
bool m_eof;
};
struct BasicSfenOutputStream
{
virtual void write(const PSVector& sfens) = 0;
virtual ~BasicSfenOutputStream() {}
};
struct BinSfenOutputStream : BasicSfenOutputStream
{
static constexpr auto openmode = std::ios::out | std::ios::binary | std::ios::app;
static inline const std::string extension = "bin";
BinSfenOutputStream(std::string filename) :
m_stream(filename_with_extension(filename, extension), openmode)
{
}
void write(const PSVector& sfens) override
{
m_stream.write(reinterpret_cast<const char*>(sfens.data()), sizeof(PackedSfenValue) * sfens.size());
}
~BinSfenOutputStream() override {}
private:
std::fstream m_stream;
};
struct BinpackSfenOutputStream : BasicSfenOutputStream
{
static constexpr auto openmode = std::ios::out | std::ios::binary | std::ios::app;
static inline const std::string extension = "binpack";
BinpackSfenOutputStream(std::string filename) :
m_stream(filename_with_extension(filename, extension), openmode)
{
}
void write(const PSVector& sfens) override
{
static_assert(sizeof(binpack::nodchip::PackedSfenValue) == sizeof(PackedSfenValue));
for(auto& sfen : sfens)
{
// The library uses a type that's different but layout-compatibile.
binpack::nodchip::PackedSfenValue e;
std::memcpy(&e, &sfen, sizeof(binpack::nodchip::PackedSfenValue));
m_stream.addTrainingDataEntry(binpack::packedSfenValueToTrainingDataEntry(e));
}
}
~BinpackSfenOutputStream() override {}
private:
binpack::CompressedTrainingDataEntryWriter m_stream;
};
inline std::unique_ptr<BasicSfenInputStream> open_sfen_input_file(const std::string& filename)
{
if (has_extension(filename, BinSfenInputStream::extension))
return std::make_unique<BinSfenInputStream>(filename);
else if (has_extension(filename, BinpackSfenInputStream::extension))
return std::make_unique<BinpackSfenInputStream>(filename);
return nullptr;
}
inline std::unique_ptr<BasicSfenOutputStream> create_new_sfen_output(const std::string& filename, SfenOutputType sfen_output_type)
{
switch(sfen_output_type)
{
case SfenOutputType::Bin:
return std::make_unique<BinSfenOutputStream>(filename);
case SfenOutputType::Binpack:
return std::make_unique<BinpackSfenOutputStream>(filename);
}
assert(false);
return nullptr;
}
inline std::unique_ptr<BasicSfenOutputStream> create_new_sfen_output(const std::string& filename)
{
if (has_extension(filename, BinSfenOutputStream::extension))
return std::make_unique<BinSfenOutputStream>(filename);
else if (has_extension(filename, BinpackSfenOutputStream::extension))
return std::make_unique<BinpackSfenOutputStream>(filename);
return nullptr;
}
}
#endif
+206
View File
@@ -0,0 +1,206 @@
#include "packed_sfen.h"
#include "sfen_stream.h"
#include "misc.h"
#include "extra/nnue_data_binpack_format.h"
#include "syzygy/tbprobe.h"
#include <cstring>
#include <filesystem>
#include <fstream>
#include <limits>
#include <list>
#include <memory>
#include <optional>
#include <shared_mutex>
#include <thread>
#include <atomic>
using namespace std;
namespace Learner {
// Helper class for exporting Sfen
struct SfenWriter
{
// Amount of sfens required to flush the buffer.
static constexpr size_t SFEN_WRITE_SIZE = 5000;
// File name to write and number of threads to create
SfenWriter(string filename_, int thread_num, uint64_t save_count, SfenOutputType sfen_output_type)
{
sfen_buffers_pool.reserve((size_t)thread_num * 10);
sfen_buffers.resize(thread_num);
auto out = sync_region_cout.new_region();
out << "INFO (sfen_writer): Creating new data file at " << filename_ << endl;
sfen_format = sfen_output_type;
output_file_stream = create_new_sfen_output(filename_, sfen_format);
filename = filename_;
save_every = save_count;
finished = false;
file_worker_thread = std::thread([&] { this->file_write_worker(); });
}
~SfenWriter()
{
flush();
finished = true;
file_worker_thread.join();
output_file_stream.reset();
#if !defined(NDEBUG)
{
// All buffers should be empty since file_worker_thread
// should have written everything before exiting.
for (const auto& p : sfen_buffers) { assert(p == nullptr); (void)p ; }
assert(sfen_buffers_pool.empty());
}
#endif
}
void write(size_t thread_id, const PackedSfenValue& psv)
{
// We have a buffer for each thread and add it there.
// If the buffer overflows, write it to a file.
// This buffer is prepared for each thread.
auto& buf = sfen_buffers[thread_id];
// Secure since there is no buf at the first time
// and immediately after writing the thread buffer.
if (!buf)
{
buf = std::make_unique<PSVector>();
buf->reserve(SFEN_WRITE_SIZE);
}
// Buffer is exclusive to this thread.
// There is no need for a critical section.
buf->push_back(psv);
if (buf->size() >= SFEN_WRITE_SIZE)
{
// If you load it in sfen_buffers_pool, the worker will do the rest.
// Critical section since sfen_buffers_pool is shared among threads.
std::unique_lock<std::mutex> lk(mutex);
sfen_buffers_pool.emplace_back(std::move(buf));
}
}
void flush()
{
for (size_t i = 0; i < sfen_buffers.size(); ++i)
{
flush(i);
}
}
// Move what remains in the buffer for your thread to a buffer for writing to a file.
void flush(size_t thread_id)
{
std::unique_lock<std::mutex> lk(mutex);
auto& buf = sfen_buffers[thread_id];
// There is a case that buf==nullptr, so that check is necessary.
if (buf && buf->size() != 0)
{
sfen_buffers_pool.emplace_back(std::move(buf));
}
}
// Dedicated thread to write to file
void file_write_worker()
{
while (!finished || sfen_buffers_pool.size())
{
vector<std::unique_ptr<PSVector>> buffers;
{
std::unique_lock<std::mutex> lk(mutex);
// Atomically swap take the filled buffers and
// create a new buffer pool for threads to fill.
buffers = std::move(sfen_buffers_pool);
sfen_buffers_pool = std::vector<std::unique_ptr<PSVector>>();
}
if (!buffers.size())
{
// Poor man's condition variable.
sleep(100);
}
else
{
for (auto& buf : buffers)
{
output_file_stream->write(*buf);
sfen_write_count += buf->size();
// Add the processed number here, and if it exceeds save_every,
// change the file name and reset this counter.
sfen_write_count_current_file += buf->size();
if (sfen_write_count_current_file >= save_every)
{
sfen_write_count_current_file = 0;
// Sequential number attached to the file
int n = (int)(sfen_write_count / save_every);
// Rename the file and open it again.
// Add ios::app in consideration of overwriting.
// (Depending on the operation, it may not be necessary.)
string new_filename = filename + "_" + std::to_string(n);
output_file_stream = create_new_sfen_output(new_filename, sfen_format);
auto out = sync_region_cout.new_region();
out << "INFO (sfen_writer): Creating new data file at " << new_filename << endl;
}
}
}
}
}
private:
std::unique_ptr<BasicSfenOutputStream> output_file_stream;
// A new net is saved after every save_every sfens are processed.
uint64_t save_every = std::numeric_limits<uint64_t>::max();
// File name passed in the constructor
std::string filename;
// Thread to write to the file
std::thread file_worker_thread;
// Flag that all threads have finished
atomic<bool> finished;
SfenOutputType sfen_format;
// buffer before writing to file
// sfen_buffers is the buffer for each thread
// sfen_buffers_pool is a buffer for writing.
// After loading the phase in the former buffer by SFEN_WRITE_SIZE,
// transfer it to the latter.
std::vector<std::unique_ptr<PSVector>> sfen_buffers;
std::vector<std::unique_ptr<PSVector>> sfen_buffers_pool;
// Mutex required to access sfen_buffers_pool
std::mutex mutex;
// Number of sfens written in total, and the
// number of sfens written in the current file.
uint64_t sfen_write_count = 0;
uint64_t sfen_write_count_current_file = 0;
};
}
+242
View File
@@ -0,0 +1,242 @@
#include "transform.h"
#include "sfen_stream.h"
#include "packed_sfen.h"
#include "thread.h"
#include "position.h"
#include "evaluate.h"
#include "nnue/evaluate_nnue.h"
#include <string>
#include <map>
#include <iostream>
#include <cmath>
#include <algorithm>
#include <cstdint>
#include <limits>
namespace Learner
{
using CommandFunc = void(*)(std::istringstream&);
enum struct NudgedStaticMode
{
Absolute,
Relative,
Interpolate
};
struct NudgedStaticParams
{
std::string input_filename = "in.binpack";
std::string output_filename = "out.binpack";
NudgedStaticMode mode = NudgedStaticMode::Absolute;
int absolute_nudge = 5;
float relative_nudge = 0.1;
float interpolate_nudge = 0.1;
void enforce_constraints()
{
relative_nudge = std::max(relative_nudge, 0.0f);
absolute_nudge = std::max(absolute_nudge, 0);
}
};
[[nodiscard]] std::int16_t nudge(NudgedStaticParams& params, std::int16_t static_eval_i16, std::int16_t deep_eval_i16)
{
auto saturate_i32_to_i16 = [](int v) {
return static_cast<std::int16_t>(
std::clamp(
v,
(int)std::numeric_limits<std::int16_t>::min(),
(int)std::numeric_limits<std::int16_t>::max()
)
);
};
auto saturate_f32_to_i16 = [saturate_i32_to_i16](float v) {
return saturate_i32_to_i16((int)v);
};
int static_eval = static_eval_i16;
int deep_eval = deep_eval_i16;
switch(params.mode)
{
case NudgedStaticMode::Absolute:
return saturate_i32_to_i16(
static_eval + std::clamp(
deep_eval - static_eval,
-params.absolute_nudge,
params.absolute_nudge
)
);
case NudgedStaticMode::Relative:
return saturate_f32_to_i16(
(float)static_eval * std::clamp(
(float)deep_eval / (float)static_eval,
(1.0f - params.relative_nudge),
(1.0f + params.relative_nudge)
)
);
case NudgedStaticMode::Interpolate:
return saturate_f32_to_i16(
(float)static_eval * (1.0f - params.interpolate_nudge)
+ (float)deep_eval * params.interpolate_nudge
);
default:
assert(false);
return 0;
}
}
void do_nudged_static(NudgedStaticParams& params)
{
Thread* th = Threads.main();
Position& pos = th->rootPos;
StateInfo si;
auto in = Learner::open_sfen_input_file(params.input_filename);
auto out = Learner::create_new_sfen_output(params.output_filename);
if (in == nullptr)
{
std::cerr << "Invalid input file type.\n";
return;
}
if (out == nullptr)
{
std::cerr << "Invalid output file type.\n";
return;
}
PSVector buffer;
uint64_t batch_size = 1'000'000;
buffer.reserve(batch_size);
uint64_t num_processed = 0;
for (;;)
{
auto v = in->next();
if (!v.has_value())
break;
auto& ps = v.value();
pos.set_from_packed_sfen(ps.sfen, &si, th);
auto static_eval = Eval::evaluate(pos);
auto deep_eval = ps.score;
ps.score = nudge(params, static_eval, deep_eval);
buffer.emplace_back(ps);
if (buffer.size() >= batch_size)
{
num_processed += buffer.size();
out->write(buffer);
buffer.clear();
std::cout << "Processed " << num_processed << " positions.\n";
}
}
if (!buffer.empty())
{
num_processed += buffer.size();
out->write(buffer);
buffer.clear();
std::cout << "Processed " << num_processed << " positions.\n";
}
std::cout << "Finished.\n";
}
void nudged_static(std::istringstream& is)
{
NudgedStaticParams params{};
while(true)
{
std::string token;
is >> token;
if (token == "")
break;
if (token == "absolute")
{
params.mode = NudgedStaticMode::Absolute;
is >> params.absolute_nudge;
}
else if (token == "relative")
{
params.mode = NudgedStaticMode::Relative;
is >> params.relative_nudge;
}
else if (token == "interpolate")
{
params.mode = NudgedStaticMode::Interpolate;
is >> params.interpolate_nudge;
}
else if (token == "input_file")
is >> params.input_filename;
else if (token == "output_file")
is >> params.output_filename;
}
std::cout << "Performing transform nudged_static with parameters:\n";
std::cout << "input_file : " << params.input_filename << '\n';
std::cout << "output_file : " << params.output_filename << '\n';
std::cout << "\n";
if (params.mode == NudgedStaticMode::Absolute)
{
std::cout << "mode : absolute\n";
std::cout << "absolute_nudge : " << params.absolute_nudge << '\n';
}
else if (params.mode == NudgedStaticMode::Relative)
{
std::cout << "mode : relative\n";
std::cout << "relative_nudge : " << params.relative_nudge << '\n';
}
else if (params.mode == NudgedStaticMode::Interpolate)
{
std::cout << "mode : interpolate\n";
std::cout << "interpolate_nudge : " << params.interpolate_nudge << '\n';
}
std::cout << '\n';
params.enforce_constraints();
do_nudged_static(params);
}
void transform(std::istringstream& is)
{
const std::map<std::string, CommandFunc> subcommands = {
{ "nudged_static", &nudged_static }
};
Eval::NNUE::init();
std::string subcommand;
is >> subcommand;
auto func = subcommands.find(subcommand);
if (func == subcommands.end())
{
std::cout << "Invalid subcommand " << subcommand << ". Exiting...\n";
return;
}
func->second(is);
}
}
+12
View File
@@ -0,0 +1,12 @@
#ifndef _TRANSFORM_H_
#define _TRANSFORM_H_
#include <sstream>
namespace Learner {
void transform(std::istringstream& is);
}
#endif
+4 -1
View File
@@ -18,6 +18,8 @@
#include <iostream>
#include "nnue/evaluate_nnue.h"
#include "bitboard.h"
#include "endgame.h"
#include "position.h"
@@ -35,6 +37,7 @@ int main(int argc, char* argv[]) {
std::cout << engine_info() << std::endl;
CommandLine::init(argc, argv);
UCI::init(Options);
Tune::init();
PSQT::init();
@@ -44,7 +47,7 @@ int main(int argc, char* argv[]) {
Endgames::init();
Threads.set(size_t(Options["Threads"]));
Search::clear(); // After threads are up
Eval::init_NNUE();
Eval::NNUE::init();
UCI::loop(argc, argv);
+98 -114
View File
@@ -61,6 +61,8 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
using namespace std;
SynchronizedRegionLogger sync_region_cout(std::cout);
namespace {
/// Version number. If Version is left empty, then compile date in the format
@@ -132,6 +134,7 @@ public:
} // namespace
/// engine_info() returns the full name of the current Stockfish version. This
/// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
/// the program was compiled) or "Stockfish <Version>", depending on whether
@@ -356,27 +359,11 @@ void std_aligned_free(void* ptr) {
#endif
}
/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
/// The returned pointer is the aligned one, while the mem argument is the one that needs
/// to be passed to free. With c++17 some of this functionality could be simplified.
/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
#if defined(__linux__) && !defined(__ANDROID__)
#if defined(_WIN32)
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
if (posix_memalign(&mem, alignment, size))
mem = nullptr;
#if defined(MADV_HUGEPAGE)
madvise(mem, allocSize, MADV_HUGEPAGE);
#endif
return mem;
}
#elif defined(_WIN64)
static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
static void* aligned_large_pages_alloc_win(size_t allocSize) {
HANDLE hProcessToken { };
LUID luid { };
@@ -421,23 +408,10 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
return mem;
}
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
static bool firstCall = true;
void* aligned_large_pages_alloc(size_t allocSize) {
// Try to allocate large pages
mem = aligned_ttmem_alloc_large_pages(allocSize);
// Suppress info strings on the first call. The first call occurs before 'uci'
// is received and in that case this output confuses some GUIs.
if (!firstCall)
{
if (mem)
sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
else
sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
}
firstCall = false;
void* mem = aligned_large_pages_alloc_win(allocSize);
// Fall back to regular, page aligned, allocation if necessary
if (!mem)
@@ -448,23 +422,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
#else
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
void* aligned_large_pages_alloc(size_t allocSize) {
constexpr size_t alignment = 64; // assumed cache line size
size_t size = allocSize + alignment - 1; // allocate some extra space
mem = malloc(size);
void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
return ret;
#if defined(__linux__)
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
#else
constexpr size_t alignment = 4096; // assumed small page size
#endif
// round up to multiples of alignment
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
void *mem = std_aligned_alloc(alignment, size);
#if defined(MADV_HUGEPAGE)
madvise(mem, size, MADV_HUGEPAGE);
#endif
return mem;
}
#endif
/// aligned_ttmem_free() will free the previously allocated ttmem
/// aligned_large_pages_free() will free the previously allocated ttmem
#if defined(_WIN64)
#if defined(_WIN32)
void aligned_ttmem_free(void* mem) {
void aligned_large_pages_free(void* mem) {
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
{
@@ -477,8 +459,8 @@ void aligned_ttmem_free(void* mem) {
#else
void aligned_ttmem_free(void *mem) {
free(mem);
void aligned_large_pages_free(void *mem) {
std_aligned_free(mem);
}
#endif
@@ -590,6 +572,62 @@ void bindThisThread(size_t idx) {
} // namespace WinProcGroup
#ifdef _WIN32
#include <direct.h>
#define GETCWD _getcwd
#else
#include <unistd.h>
#define GETCWD getcwd
#endif
namespace CommandLine {
string argv0; // path+name of the executable binary, as given by argv[0]
string binaryDirectory; // path of the executable directory
string workingDirectory; // path of the working directory
void init(int argc, char* argv[]) {
(void)argc;
string pathSeparator;
// extract the path+name of the executable binary
argv0 = argv[0];
#ifdef _WIN32
pathSeparator = "\\";
#ifdef _MSC_VER
// Under windows argv[0] may not have the extension. Also _get_pgmptr() had
// issues in some windows 10 versions, so check returned values carefully.
char* pgmptr = nullptr;
if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
argv0 = pgmptr;
#endif
#else
pathSeparator = "/";
#endif
// extract the working directory
workingDirectory = "";
char buff[40000];
char* cwd = GETCWD(buff, 40000);
if (cwd)
workingDirectory = cwd;
// extract the binary directory path from argv0
binaryDirectory = argv0;
size_t pos = binaryDirectory.find_last_of("\\/");
if (pos == std::string::npos)
binaryDirectory = "." + pathSeparator;
else
binaryDirectory.resize(pos + 1);
// pattern replacement: "./" at the start of path is replaced by the working directory
if (binaryDirectory.find("." + pathSeparator) == 0)
binaryDirectory.replace(0, 1, workingDirectory);
}
} // namespace CommandLine
// Returns a string that represents the current time. (Used when learning evaluation functions)
std::string now_string()
{
@@ -627,18 +665,27 @@ void* aligned_malloc(size_t size, size_t align)
return p;
}
std::uint64_t get_file_size(std::fstream& fs)
{
auto pos = fs.tellg();
fs.seekg(0, fstream::end);
const uint64_t eofPos = (uint64_t)fs.tellg();
fs.clear(); // Otherwise, the next seek may fail.
fs.seekg(0, fstream::beg);
const uint64_t begPos = (uint64_t)fs.tellg();
fs.seekg(pos);
return eofPos - begPos;
}
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
{
fstream fs(filename, ios::in | ios::binary);
if (fs.fail())
return 1;
fs.seekg(0, fstream::end);
uint64_t eofPos = (uint64_t)fs.tellg();
fs.clear(); // Otherwise the next seek may fail.
fs.seekg(0, fstream::beg);
uint64_t begPos = (uint64_t)fs.tellg();
uint64_t file_size = eofPos - begPos;
const uint64_t file_size = get_file_size(fs);
//std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
// I know the file size, so call callback_func to get a buffer for this,
@@ -687,66 +734,3 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
fs.close();
return 0;
}
// ----------------------------
// mkdir wrapper
// ----------------------------
// Specify relative to the current folder. Returns 0 on success, non-zero on failure.
// Create a folder. Japanese is not used.
// In case of gcc under msys2 environment, folder creation fails with _wmkdir(). Cause unknown.
// Use _mkdir() because there is no help for it.
#if defined(_WIN32)
// for Windows
#if defined(_MSC_VER)
#include <codecvt> // I need this because I want wstring to mkdir
#include <locale> // This is required for wstring_convert.
namespace Dependency {
int mkdir(std::string dir_name)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
return _wmkdir(cv.from_bytes(dir_name).c_str());
// ::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
}
}
#elif defined(__GNUC__)
#include <direct.h>
namespace Dependency {
int mkdir(std::string dir_name)
{
return _mkdir(dir_name.c_str());
}
}
#endif
#elif defined(__linux__)
// In the linux environment, this symbol _LINUX is defined in the makefile.
// mkdir implementation for Linux.
#include "sys/stat.h"
namespace Dependency {
int mkdir(std::string dir_name)
{
return ::mkdir(dir_name.c_str(), 0777);
}
}
#else
// In order to judge whether it is a Linux environment, we have to divide the makefile..
// The function to dig a folder on linux is good for the time being... Only used to save the evaluation function file...
namespace Dependency {
int mkdir(std::string dir_name)
{
return 0;
}
}
#endif
+418 -17
View File
@@ -19,6 +19,7 @@
#ifndef MISC_H_INCLUDED
#define MISC_H_INCLUDED
#include <algorithm>
#include <cassert>
#include <chrono>
#include <functional>
@@ -27,6 +28,12 @@
#include <string>
#include <vector>
#include <cstdint>
#include <cmath>
#include <cctype>
#include <sstream>
#include <deque>
#include "types.h"
const std::string engine_info(bool to_uci = false);
@@ -35,8 +42,8 @@ void prefetch(void* addr);
void start_logger(const std::string& fname);
void* std_aligned_alloc(size_t alignment, size_t size);
void std_aligned_free(void* ptr);
void* aligned_ttmem_alloc(size_t size, void*& mem);
void aligned_ttmem_free(void* mem); // nop if mem == nullptr
void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
void aligned_large_pages_free(void* mem); // nop if mem == nullptr
void dbg_hit_on(bool b);
void dbg_hit_on(bool c, bool b);
@@ -44,9 +51,7 @@ void dbg_mean_of(int v);
void dbg_print();
typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
inline TimePoint now() {
return std::chrono::duration_cast<std::chrono::milliseconds>
(std::chrono::steady_clock::now().time_since_epoch()).count();
@@ -67,6 +72,232 @@ std::ostream& operator<<(std::ostream&, SyncCout);
#define sync_cout std::cout << IO_LOCK
#define sync_endl std::endl << IO_UNLOCK
// `ptr` must point to an array of size at least
// `sizeof(T) * N + alignment` bytes, where `N` is the
// number of elements in the array.
template <uintptr_t Alignment, typename T>
T* align_ptr_up(T* ptr)
{
static_assert(alignof(T) < Alignment);
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
}
// This logger allows printing many parts in a region atomically
// but doesn't block the threads trying to append to other regions.
// Instead if some region tries to pring while other region holds
// the lock the messages are queued to be printed as soon as the
// current region releases the lock.
struct SynchronizedRegionLogger
{
using RegionId = std::uint64_t;
struct Region
{
friend struct SynchronizedRegionLogger;
Region() :
logger(nullptr), region_id(0), is_held(false)
{
}
Region(const Region&) = delete;
Region& operator=(const Region&) = delete;
Region(Region&& other) :
logger(other.logger), region_id(other.region_id), is_held(other.is_held)
{
other.logger = nullptr;
other.is_held = false;
}
Region& operator=(Region&& other) {
if (is_held && logger != nullptr)
{
logger->release_region(region_id);
}
logger = other.logger;
region_id = other.region_id;
is_held = other.is_held;
other.is_held = false;
return *this;
}
~Region() { unlock(); }
void unlock() {
if (is_held) {
is_held = false;
if (logger != nullptr)
logger->release_region(region_id);
}
}
Region& operator << (std::ostream&(*pManip)(std::ostream&)) {
if (logger != nullptr)
logger->write(region_id, pManip);
return *this;
}
template <typename T>
Region& operator << (const T& value) {
if (logger != nullptr)
logger->write(region_id, value);
return *this;
}
private:
SynchronizedRegionLogger* logger;
RegionId region_id;
bool is_held;
Region(SynchronizedRegionLogger& log, RegionId id) :
logger(&log), region_id(id), is_held(true)
{
}
};
private:
struct RegionBookkeeping
{
RegionBookkeeping(RegionId rid) : id(rid), is_held(true) {}
std::vector<std::string> pending_parts;
RegionId id;
bool is_held;
};
RegionId init_next_region()
{
static RegionId next_id = 0;
std::lock_guard lock(mutex);
const auto id = next_id++;
regions.emplace_back(id);
return id;
}
void write(RegionId id, std::ostream&(*pManip)(std::ostream&)) {
std::lock_guard lock(mutex);
if (regions.empty())
return;
if (id == regions.front().id) {
// We can just directly print to the output because
// we are at the front of the region queue.
out << *pManip;
} else {
// We have to schedule the print until previous regions are
// processed
auto* region = find_region_nolock(id);
if (region == nullptr)
return;
std::stringstream ss;
ss << *pManip;
region->pending_parts.emplace_back(std::move(ss).str());
}
}
template <typename T>
void write(RegionId id, const T& value) {
std::lock_guard lock(mutex);
if (regions.empty())
return;
if (id == regions.front().id) {
// We can just directly print to the output because
// we are at the front of the region queue.
out << value;
} else {
// We have to schedule the print until previous regions are
// processed
auto* region = find_region_nolock(id);
if (region == nullptr)
return;
std::stringstream ss;
ss << value;
region->pending_parts.emplace_back(std::move(ss).str());
}
}
std::ostream& out;
std::deque<RegionBookkeeping> regions;
std::mutex mutex;
RegionBookkeeping* find_region_nolock(RegionId id) {
// Linear search because the amount of concurrent regions should be small.
auto it = std::find_if(
regions.begin(),
regions.end(),
[id](const RegionBookkeeping& r) { return r.id == id; });
if (it == regions.end())
return nullptr;
else
return &*it;
}
void release_region(RegionId id) {
std::lock_guard lock(mutex);
auto* region = find_region_nolock(id);
if (region == nullptr)
return;
region->is_held = false;
process_backlog_nolock();
}
void process_backlog_nolock()
{
while(!regions.empty()) {
auto& region = regions.front();
for(auto& part : region.pending_parts) {
out << part;
}
// If the region is still held then we don't
// want to start printing stuff from the next region.
if (region.is_held)
break;
regions.pop_front();
}
}
public:
SynchronizedRegionLogger(std::ostream& s) :
out(s)
{
}
[[nodiscard]] Region new_region() {
const auto id = init_next_region();
return Region(*this, id);
}
};
extern SynchronizedRegionLogger sync_region_cout;
/// xorshift64star Pseudo-Random Number Generator
/// This class is based on original code written and dedicated
@@ -83,6 +314,19 @@ std::ostream& operator<<(std::ostream&, SyncCout);
/// For further analysis see
/// <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>
static uint64_t string_hash(const std::string& str)
{
uint64_t h = 525201411107845655ull;
for (auto c : str) {
h ^= static_cast<uint64_t>(c);
h *= 0x5bd1e9955bd1e995ull;
h ^= h >> 47;
}
return h;
}
class PRNG {
uint64_t s;
@@ -94,7 +338,9 @@ class PRNG {
}
public:
PRNG() { set_seed_from_time(); }
PRNG(uint64_t seed) : s(seed) { assert(seed); }
PRNG(const std::string& seed) { set_seed(seed); }
template<typename T> T rand() { return T(rand64()); }
@@ -107,6 +353,40 @@ public:
// Return the random seed used internally.
uint64_t get_seed() const { return s; }
void set_seed(uint64_t seed) { s = seed; }
uint64_t next_random_seed()
{
uint64_t seed = 0;
for(int i = 0; i < 64; ++i)
{
const auto off = rand64() % 64;
seed |= (rand64() & (uint64_t(1) << off)) >> off;
seed <<= 1;
}
return seed;
}
void set_seed_from_time()
{
set_seed(std::chrono::system_clock::now().time_since_epoch().count());
}
void set_seed(const std::string& str)
{
if (str.empty())
{
set_seed_from_time();
}
else if (std::all_of(str.begin(), str.end(), [](char c) { return std::isdigit(c);} )) {
set_seed(std::stoull(str));
}
else
{
set_seed(string_hash(str));
}
}
};
// Display a random seed. (For debugging)
@@ -130,6 +410,74 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
#endif
}
// This bitset can be accessed concurrently, provided
// the concurrent accesses are performed on distinct
// instances of underlying type. That means the cuncurrent
// accesses need to be spaced by at least
// bits_per_bucket bits.
// But at least best_concurrent_access_stride bits
// is recommended to prevent false sharing.
template <uint64_t N>
struct LargeBitset
{
private:
constexpr static uint64_t cache_line_size = 64;
public:
using UnderlyingType = uint64_t;
constexpr static uint64_t num_bits = N;
constexpr static uint64_t bits_per_bucket = 8 * sizeof(uint64_t);
constexpr static uint64_t num_buckets = (num_bits + bits_per_bucket - 1) / bits_per_bucket;
constexpr static uint64_t best_concurrent_access_stride = 8 * cache_line_size;
LargeBitset()
{
std::fill(std::begin(bits), std::end(bits), 0);
}
void set(uint64_t idx)
{
const uint64_t bucket = idx / bits_per_bucket;
const uint64_t bit = uint64_t(1) << (idx % bits_per_bucket);
bits[bucket] |= bit;
}
bool test(uint64_t idx) const
{
const uint64_t bucket = idx / bits_per_bucket;
const uint64_t bit = uint64_t(1) << (idx % bits_per_bucket);
return bits[bucket] & bit;
}
uint64_t count() const
{
uint64_t c = 0;
uint64_t i = 0;
for (; i < num_buckets - 3; i += 4)
{
uint64_t c0 = popcount(bits[i+0]);
uint64_t c1 = popcount(bits[i+1]);
uint64_t c2 = popcount(bits[i+2]);
uint64_t c3 = popcount(bits[i+3]);
c0 += c1;
c2 += c3;
c += c0 + c2;
}
for (; i < num_buckets; ++i)
{
c += popcount(bits[i]);
}
return c;
}
private:
alignas(cache_line_size) UnderlyingType bits[num_buckets];
};
/// Under Windows it is not possible for a process to run on more than one
/// logical processor group. This usually means to be limited to use max 64
/// cores. To overcome this, some special platform specific API should be
@@ -155,6 +503,7 @@ std::string now_string();
// Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
// Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
std::uint64_t get_file_size(std::fstream& fs);
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
@@ -165,7 +514,9 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
// async version of PRNG
struct AsyncPRNG
{
AsyncPRNG() : prng() { }
AsyncPRNG(uint64_t seed) : prng(seed) { assert(seed); }
AsyncPRNG(const std::string& seed) : prng(seed) { }
// [ASYNC] Extract one random number.
template<typename T> T rand() {
std::unique_lock<std::mutex> lk(mutex);
@@ -199,20 +550,51 @@ inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng)
// Mathematical function used for progress calculation and learning
namespace Math {
// Sigmoid function
// = 1.0 / (1.0 + std::exp(-x))
double sigmoid(double x);
inline double sigmoid(double x)
{
return 1.0 / (1.0 + std::exp(-x));
}
// Differentiation of sigmoid function
// = sigmoid(x) * (1.0-sigmoid(x))
double dsigmoid(double x);
inline double dsigmoid(double x)
{
// Sigmoid function
// f(x) = 1/(1+exp(-x))
// the first derivative is
// f'(x) = df/dx = f(x)・{ 1-f(x)}
// becomes
return sigmoid(x) * (1.0 - sigmoid(x));
}
// Clip v so that it fits between [lo,hi].
// * In Stockfish, this function is written in bitboard.h.
template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
return v < lo ? lo : v > hi ? hi : v;
}
}
namespace Algo {
// Fisher-Yates
template <typename Rng, typename T>
void shuffle(std::vector<T>& buf, Rng&& prng)
{
const auto size = buf.size();
for (uint64_t i = 0; i < size; ++i)
std::swap(buf[i], buf[prng.rand(size - i) + i]);
}
// split the string
inline std::vector<std::string> split(const std::string& input, char delimiter) {
std::istringstream stream(input);
std::string field;
std::vector<std::string> fields;
while (std::getline(stream, field, delimiter)) {
fields.push_back(field);
}
return fields;
}
}
// --------------------
@@ -225,7 +607,7 @@ struct Path
{
// Combine the path name and file name and return it.
// If the folder name is not an empty string, append it if there is no'/' or'\\' at the end.
static std::string Combine(const std::string& folder, const std::string& filename)
static std::string combine(const std::string& folder, const std::string& filename)
{
if (folder.length() >= 1 && *folder.rbegin() != '/' && *folder.rbegin() != '\\')
return folder + "/" + filename;
@@ -234,7 +616,7 @@ struct Path
}
// Get the file name part (excluding the folder name) from the full path expression.
static std::string GetFileName(const std::string& path)
static std::string get_file_name(const std::string& path)
{
// I don't know which "\" or "/" is used.
auto path_index1 = path.find_last_of("\\") + 1;
@@ -259,7 +641,24 @@ public:
template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
T* allocate(std::size_t n) { return (T*)std_aligned_alloc(alignof(T), n * sizeof(T)); }
void deallocate(T* p, std::size_t n) { std_aligned_free(p); }
void deallocate(T* p, std::size_t ) { std_aligned_free(p); }
};
template <typename T>
class CacheLineAlignedAllocator {
public:
using value_type = T;
constexpr static uint64_t cache_line_size = 64;
CacheLineAlignedAllocator() {}
CacheLineAlignedAllocator(const CacheLineAlignedAllocator&) {}
CacheLineAlignedAllocator(CacheLineAlignedAllocator&&) {}
template <typename U> CacheLineAlignedAllocator(const CacheLineAlignedAllocator<U>&) {}
T* allocate(std::size_t n) { return (T*)std_aligned_alloc(cache_line_size, n * sizeof(T)); }
void deallocate(T* p, std::size_t) { std_aligned_free(p); }
};
// --------------------
@@ -273,11 +672,13 @@ namespace Dependency
// So when calling getline() on fstream,
// just write getline() instead of std::getline() and use this function.
extern bool getline(std::ifstream& fs, std::string& s);
}
// Create a folder.
// Specify relative to the current folder. Japanese is not used for dir_name.
// Returns 0 on success, non-zero on failure.
extern int mkdir(std::string dir_name);
namespace CommandLine {
void init(int argc, char* argv[]);
extern std::string binaryDirectory; // path of the executable directory
extern std::string workingDirectory; // path of the working directory
}
#endif // #ifndef MISC_H_INCLUDED
+3 -2
View File
@@ -73,8 +73,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
assert(d <= 0);
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
!(ttm && (depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare)
&& pos.pseudo_legal(ttm));
!( ttm
&& (pos.checkers() || depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare)
&& pos.pseudo_legal(ttm));
}
/// MovePicker constructor for ProbCut: we generate captures with SEE greater
@@ -0,0 +1,54 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of input features and network structure used in NNUE evaluation function
#ifndef NNUE_HALFKA_256X2_32_32_H_INCLUDED
#define NNUE_HALFKA_256X2_32_32_H_INCLUDED
#include "nnue/features/feature_set.h"
#include "nnue/features/half_ka.h"
#include "nnue/layers/input_slice.h"
#include "nnue/layers/affine_transform.h"
#include "nnue/layers/clipped_relu.h"
namespace Eval::NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKA<Features::Side::kFriend>>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// Define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
using Network = Layers::OutputLayer;
} // namespace Eval::NNUE
#endif // #ifndef NNUE_HALFA_256X2_32_32_H_INCLUDED
@@ -1,42 +1,57 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of input features and network structure used in NNUE evaluation function
#ifndef HALFKP_CR_EP_256X2_32_32_H
#define HALFKP_CR_EP_256X2_32_32_H
#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
#include "../features/feature_set.h"
#include "../features/half_kp.h"
#include "../features/castling_right.h"
#include "../features/enpassant.h"
#include "nnue/features/feature_set.h"
#include "nnue/features/half_kp.h"
#include "nnue/features/castling_right.h"
#include "nnue/features/enpassant.h"
#include "../layers/input_slice.h"
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
#include "nnue/layers/input_slice.h"
#include "nnue/layers/affine_transform.h"
#include "nnue/layers/clipped_relu.h"
namespace Eval {
namespace NNUE {
namespace Eval::NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
Features::EnPassant>;
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
Features::EnPassant>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
// Define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
using Network = Layers::OutputLayer;
} // namespace NNUE
} // namespace Eval::NNUE
} // namespace Eval
#endif // HALFKP_CR_EP_256X2_32_32_H
#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
@@ -0,0 +1,37 @@
// Definition of input features and network structure used in NNUE evaluation function
#ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
#define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
#include "nnue/features/feature_set.h"
#include "nnue/features/half_kp.h"
#include "nnue/features/castling_right.h"
#include "nnue/layers/input_slice.h"
#include "nnue/layers/affine_transform.h"
#include "nnue/layers/clipped_relu.h"
namespace Eval::NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// Define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
using Network = Layers::OutputLayer;
} // namespace Eval::NNUE
#endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+30 -30
View File
@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Definition of input features and network structure used in NNUE evaluation function
@@ -21,33 +21,33 @@
#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
#include "../features/feature_set.h"
#include "../features/half_kp.h"
#include "nnue/features/feature_set.h"
#include "nnue/features/half_kp.h"
#include "../layers/input_slice.h"
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
#include "nnue/layers/input_slice.h"
#include "nnue/layers/affine_transform.h"
#include "nnue/layers/clipped_relu.h"
namespace Eval::NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKP<Features::Side::kFriend>>;
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKP<Features::Side::kFriend>>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
namespace Layers {
// Define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
// Define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
} // namespace Layers
using Network = Layers::OutputLayer;
using Network = Layers::OutputLayer;
} // namespace Eval::NNUE
+20 -24
View File
@@ -3,37 +3,33 @@
#ifndef HALFKP_384X2_32_32_H
#define HALFKP_384X2_32_32_H
#include "../features/feature_set.h"
#include "../features/half_kp.h"
#include "nnue/features/feature_set.h"
#include "nnue/features/half_kp.h"
#include "../layers/input_slice.h"
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
#include "nnue/layers/input_slice.h"
#include "nnue/layers/affine_transform.h"
#include "nnue/layers/clipped_relu.h"
namespace Eval {
namespace Eval::NNUE {
namespace NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKP<Features::Side::kFriend>>;
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<
Features::HalfKP<Features::Side::kFriend>>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 384;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 384;
namespace Layers {
namespace Layers {
// define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
// define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
} // namespace Layers
using Network = Layers::OutputLayer;
using Network = Layers::OutputLayer;
} // namespace NNUE
} // namespace Eval
} // namespace Eval::NNUE
#endif // HALFKP_384X2_32_32_H
@@ -1,42 +0,0 @@
// Definition of input features and network structure used in NNUE evaluation function
#ifndef K_P_CR_EP_256X2_32_32_H
#define K_P_CR_EP_256X2_32_32_H
#include "../features/feature_set.h"
#include "../features/k.h"
#include "../features/p.h"
#include "../features/castling_right.h"
#include "../features/enpassant.h"
#include "../layers/input_slice.h"
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
namespace Eval {
namespace NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<Features::K, Features::P,
Features::CastlingRight, Features::EnPassant>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
using Network = Layers::OutputLayer;
} // namespace NNUE
} // namespace Eval
#endif // K_P_CR_EP_256X2_32_32_H
@@ -1,41 +0,0 @@
// Definition of input features and network structure used in NNUE evaluation function
#ifndef K_P_CR_256X2_32_32_H
#define K_P_CR_256X2_32_32_H
#include "../features/feature_set.h"
#include "../features/k.h"
#include "../features/p.h"
#include "../features/castling_right.h"
#include "../layers/input_slice.h"
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
namespace Eval {
namespace NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<Features::K, Features::P,
Features::CastlingRight>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
using Network = Layers::OutputLayer;
} // namespace NNUE
} // namespace Eval
#endif // K_P_CR_256X2_32_32_H
-38
View File
@@ -1,38 +0,0 @@
// Definition of input features and network structure used in NNUE evaluation function
#ifndef K_P_256X2_32_32_H
#define K_P_256X2_32_32_H
#include "../features/feature_set.h"
#include "../features/k.h"
#include "../features/p.h"
#include "../layers/input_slice.h"
#include "../layers/affine_transform.h"
#include "../layers/clipped_relu.h"
namespace Eval {
namespace NNUE {
// Input features used in evaluation function
using RawFeatures = Features::FeatureSet<Features::K, Features::P>;
// Number of input feature dimensions after conversion
constexpr IndexType kTransformedFeatureDimensions = 256;
namespace Layers {
// define network structure
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
} // namespace Layers
using Network = Layers::OutputLayer;
} // namespace NNUE
} // namespace Eval
#endif // K_P_256X2_32_32_H
+210 -75
View File
@@ -18,20 +18,29 @@
// Code for calculating NNUE evaluation function
#include <fstream>
#include "evaluate_nnue.h"
#include "position.h"
#include "misc.h"
#include "uci.h"
#include "types.h"
#include <iostream>
#include <string>
#include <fstream>
#include <set>
#include "../evaluate.h"
#include "../position.h"
#include "../misc.h"
#include "../uci.h"
#include "../types.h"
#include "evaluate_nnue.h"
namespace Eval::NNUE {
uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
// convention: W - us, B - them
// viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE },
@@ -53,7 +62,7 @@ namespace Eval::NNUE {
};
// Input feature converter
AlignedPtr<FeatureTransformer> feature_transformer;
LargePagePtr<FeatureTransformer> feature_transformer;
// Evaluation function
AlignedPtr<Network> network;
@@ -65,50 +74,77 @@ namespace Eval::NNUE {
std::string savedfileName = "nn.bin";
// Get a string that represents the structure of the evaluation function
std::string GetArchitectureString() {
return "Features=" + FeatureTransformer::GetStructureString() +
",Network=" + Network::GetStructureString();
std::string get_architecture_string() {
return "Features=" + FeatureTransformer::get_structure_string() +
",Network=" + Network::get_structure_string();
}
std::string get_layers_info() {
return
FeatureTransformer::get_layers_info()
+ '\n' + Network::get_layers_info();
}
UseNNUEMode useNNUE;
std::string eval_file_loaded = "None";
namespace Detail {
// Initialize the evaluation function parameters
template <typename T>
void Initialize(AlignedPtr<T>& pointer) {
void initialize(AlignedPtr<T>& pointer) {
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
template <typename T>
void initialize(LargePagePtr<T>& pointer) {
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
std::memset(pointer.get(), 0, sizeof(T));
}
// Read evaluation function parameters
template <typename T>
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
bool ReadParameters(std::istream& stream, T& reference) {
std::uint32_t header;
header = read_little_endian<std::uint32_t>(stream);
if (!stream || header != T::GetHashValue()) return false;
return pointer->ReadParameters(stream);
return reference.ReadParameters(stream);
}
// write evaluation function parameters
template <typename T>
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
return pointer->WriteParameters(stream);
}
template <typename T>
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
constexpr std::uint32_t header = T::GetHashValue();
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
return pointer->WriteParameters(stream);
}
} // namespace Detail
// Initialize the evaluation function parameters
void Initialize() {
void initialize() {
Detail::Initialize(feature_transformer);
Detail::Initialize(network);
Detail::initialize(feature_transformer);
Detail::initialize(network);
}
// Read network header
bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
{
std::uint32_t version, size;
@@ -122,13 +158,17 @@ namespace Eval::NNUE {
}
// write the header
bool WriteHeader(std::ostream& stream,
bool write_header(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture) {
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
stream.write(architecture.data(), size);
return !stream.fail();
}
@@ -137,81 +177,176 @@ namespace Eval::NNUE {
std::uint32_t hash_value;
std::string architecture;
if (!ReadHeader(stream, &hash_value, &architecture)) return false;
if (!read_header(stream, &hash_value, &architecture)) return false;
if (hash_value != kHashValue) return false;
if (!Detail::ReadParameters(stream, feature_transformer)) return false;
if (!Detail::ReadParameters(stream, network)) return false;
if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
if (!Detail::ReadParameters(stream, *network)) return false;
return stream && stream.peek() == std::ios::traits_type::eof();
}
// write evaluation function parameters
bool WriteParameters(std::ostream& stream) {
if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
if (!Detail::WriteParameters(stream, feature_transformer)) return false;
if (!Detail::WriteParameters(stream, network)) return false;
if (!write_header(stream, kHashValue, get_architecture_string()))
return false;
if (!Detail::WriteParameters(stream, feature_transformer))
return false;
if (!Detail::WriteParameters(stream, network))
return false;
return !stream.fail();
}
// Proceed with the difference calculation if possible
static void UpdateAccumulatorIfPossible(const Position& pos) {
feature_transformer->UpdateAccumulatorIfPossible(pos);
}
// Calculate the evaluation value
static Value ComputeScore(const Position& pos, bool refresh) {
auto& accumulator = pos.state()->accumulator;
if (!refresh && accumulator.computed_score) {
return accumulator.score;
}
alignas(kCacheLineSize) TransformedFeatureType
transformed_features[FeatureTransformer::kBufferSize];
feature_transformer->Transform(pos, transformed_features, refresh);
alignas(kCacheLineSize) char buffer[Network::kBufferSize];
const auto output = network->Propagate(transformed_features, buffer);
auto score = static_cast<Value>(output[0] / FV_SCALE);
accumulator.score = score;
accumulator.computed_score = true;
return accumulator.score;
}
// Load the evaluation function file
bool load_eval_file(const std::string& evalFile) {
Initialize();
if (Options["SkipLoadingEval"])
{
std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
return true;
}
fileName = evalFile;
std::ifstream stream(evalFile, std::ios::binary);
const bool result = ReadParameters(stream);
return result;
}
}
// Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos) {
return ComputeScore(pos, false);
// We manually align the arrays on the stack because with gcc < 9.3
// overaligning stack variables with alignas() doesn't work correctly.
constexpr uint64_t alignment = kCacheLineSize;
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
TransformedFeatureType transformed_features_unaligned[
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
char buffer_unaligned[Network::kBufferSize + alignment];
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
#else
alignas(alignment)
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
alignas(alignment) char buffer[Network::kBufferSize];
#endif
ASSERT_ALIGNED(transformed_features, alignment);
ASSERT_ALIGNED(buffer, alignment);
feature_transformer->Transform(pos, transformed_features);
const auto output = network->Propagate(transformed_features, buffer);
return static_cast<Value>(output[0] / FV_SCALE);
}
// Evaluation function. Perform full calculation.
Value compute_eval(const Position& pos) {
return ComputeScore(pos, true);
// Load eval, from a file stream or a memory stream
bool load_eval(std::string name, std::istream& stream) {
initialize();
fileName = name;
return ReadParameters(stream);
}
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
{
if (mode == "false")
return UseNNUEMode::False;
else if (mode == "true")
return UseNNUEMode::True;
else if (mode == "pure")
return UseNNUEMode::Pure;
return UseNNUEMode::False;
}
void init() {
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
{
eval_file_loaded.clear();
return;
}
// Proceed with the difference calculation if possible
void update_eval(const Position& pos) {
UpdateAccumulatorIfPossible(pos);
std::string eval_file = std::string(Options["EvalFile"]);
#if defined(DEFAULT_NNUE_DIRECTORY)
#define stringify2(x) #x
#define stringify(x) stringify2(x)
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
#else
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
#endif
for (std::string directory : dirs)
{
if (eval_file_loaded != eval_file)
{
std::ifstream stream(directory + eval_file, std::ios::binary);
if (load_eval(eval_file, stream))
{
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
eval_file_loaded = eval_file;
}
else
{
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
eval_file_loaded.clear();
}
}
}
#undef stringify2
#undef stringify
}
/// NNUE::verify() verifies that the last net used was loaded successfully
void verify_eval_file_loaded() {
std::string eval_file = std::string(Options["EvalFile"]);
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
{
UCI::OptionsMap defaults;
UCI::init(defaults);
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
std::string msg5 = "The engine will be terminated now.";
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg4 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
std::exit(EXIT_FAILURE);
}
if (useNNUE != UseNNUEMode::False)
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
else
sync_cout << "info string classical evaluation enabled" << sync_endl;
}
/// In training we override eval file so this is useful.
void verify_any_net_loaded() {
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
{
UCI::OptionsMap defaults;
UCI::init(defaults);
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
std::string msg5 = "The engine will be terminated now.";
sync_cout << "info string ERROR: " << msg1 << sync_endl;
sync_cout << "info string ERROR: " << msg2 << sync_endl;
sync_cout << "info string ERROR: " << msg3 << sync_endl;
sync_cout << "info string ERROR: " << msg5 << sync_endl;
std::exit(EXIT_FAILURE);
}
if (useNNUE != UseNNUEMode::False)
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
else
sync_cout << "info string classical evaluation enabled" << sync_endl;
}
} // namespace Eval::NNUE
+39 -6
View File
@@ -23,10 +23,19 @@
#include "nnue_feature_transformer.h"
#include "misc.h"
#include <memory>
namespace Eval::NNUE {
enum struct UseNNUEMode
{
False,
True,
Pure
};
// Hash value of evaluation function structure
constexpr std::uint32_t kHashValue =
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
@@ -40,11 +49,22 @@ namespace Eval::NNUE {
}
};
template <typename T>
struct LargePageDeleter {
void operator()(T* ptr) const {
ptr->~T();
aligned_large_pages_free(ptr);
}
};
template <typename T>
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
template <typename T>
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
// Input feature converter
extern AlignedPtr<FeatureTransformer> feature_transformer;
extern LargePagePtr<FeatureTransformer> feature_transformer;
// Evaluation function
extern AlignedPtr<Network> network;
@@ -55,16 +75,22 @@ namespace Eval::NNUE {
// Saved evaluation function file name
extern std::string savedfileName;
extern UseNNUEMode useNNUE;
extern std::string eval_file_loaded;
// Get a string that represents the structure of the evaluation function
std::string GetArchitectureString();
std::string get_architecture_string();
std::string get_layers_info();
// read the header
bool ReadHeader(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture);
bool read_header(std::istream& stream,
std::uint32_t* hash_value, std::string* architecture);
// write the header
bool WriteHeader(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture);
bool write_header(std::ostream& stream,
std::uint32_t hash_value, const std::string& architecture);
// read evaluation function parameters
bool ReadParameters(std::istream& stream);
@@ -72,6 +98,13 @@ namespace Eval::NNUE {
// write evaluation function parameters
bool WriteParameters(std::ostream& stream);
Value evaluate(const Position& pos);
bool load_eval(std::string name, std::istream& stream);
void init();
void verify_eval_file_loaded();
void verify_any_net_loaded();
} // namespace Eval::NNUE
#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+300 -189
View File
@@ -1,231 +1,342 @@
// Code for learning NNUE evaluation function
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include <random>
#include <random>
#include <fstream>
#include "../learn/learn.h"
#include "../learn/learning_tools.h"
#include "../position.h"
#include "../uci.h"
#include "../misc.h"
#include "../thread_win32_osx.h"
#include "../eval/evaluate_common.h"
#include <filesystem>
#include "evaluate_nnue.h"
#include "evaluate_nnue_learner.h"
#include "trainer/features/factorizer_feature_set.h"
#include "trainer/features/factorizer_half_kp.h"
#include "trainer/features/all_factorizers.h"
#include "trainer/trainer_feature_transformer.h"
#include "trainer/trainer_input_slice.h"
#include "trainer/trainer_affine_transform.h"
#include "trainer/trainer_clipped_relu.h"
#include "trainer/trainer_sum.h"
namespace Eval {
#include "position.h"
#include "uci.h"
#include "misc.h"
#include "thread_win32_osx.h"
#include "thread.h"
namespace NNUE {
// Code for learning NNUE evaluation function
namespace Eval::NNUE {
namespace {
namespace {
// learning data
std::vector<Example> examples;
// learning data
std::vector<Example> examples;
// Mutex for exclusive control of examples
std::mutex examples_mutex;
// Mutex for exclusive control of examples
std::mutex examples_mutex;
// number of samples in mini-batch
uint64_t batch_size;
// number of samples in mini-batch
uint64_t batch_size;
// random number generator
std::mt19937 rng;
// random number generator
std::mt19937 rng;
// learner
std::shared_ptr<Trainer<Network>> trainer;
// learner
std::shared_ptr<Trainer<Network>> trainer;
// Learning rate scale
double global_learning_rate_scale;
// Tell the learner options such as hyperparameters
void send_messages(std::vector<Message> messages) {
for (auto& message : messages) {
trainer->send_message(&message);
assert(message.num_receivers > 0);
}
}
// Get the learning rate scale
double GetGlobalLearningRateScale() {
return global_learning_rate_scale;
}
} // namespace
// Tell the learner options such as hyperparameters
void SendMessages(std::vector<Message> messages) {
for (auto& message : messages) {
trainer->SendMessage(&message);
assert(message.num_receivers > 0);
}
}
// Initialize learning
void initialize_training(
const std::string& seed,
SynchronizedRegionLogger::Region& out) {
} // namespace
#if defined (OPENBLAS_VERSION)
openblas_set_num_threads(1);
#elif defined (INTEL_MKL_VERSION)
mkl_set_num_threads(1);
#endif
// Initialize learning
void InitializeTraining(double eta1, uint64_t eta1_epoch,
double eta2, uint64_t eta2_epoch, double eta3) {
std::cout << "Initializing NN training for "
<< GetArchitectureString() << std::endl;
out << "INFO (initialize_training): Initializing NN training for "
<< get_architecture_string() << std::endl;
assert(feature_transformer);
assert(network);
trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
out << std::endl;
if (Options["SkipLoadingEval"]) {
trainer->Initialize(rng);
}
out << "Layers:\n"
<< get_layers_info() << std::endl;
global_learning_rate_scale = 1.0;
EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
}
out << std::endl;
// set the number of samples in the mini-batch
void SetBatchSize(uint64_t size) {
assert(size > 0);
batch_size = size;
}
out << "Factorizers:\n"
<< Features::Factorizer<RawFeatures>::get_factorizers_string() << std::endl;
// set the learning rate scale
void SetGlobalLearningRateScale(double scale) {
global_learning_rate_scale = scale;
}
out << std::endl;
// Set options such as hyperparameters
void SetOptions(const std::string& options) {
std::vector<Message> messages;
for (const auto& option : Split(options, ',')) {
const auto fields = Split(option, '=');
assert(fields.size() == 1 || fields.size() == 2);
if (fields.size() == 1) {
messages.emplace_back(fields[0]);
} else {
messages.emplace_back(fields[0], fields[1]);
}
}
SendMessages(std::move(messages));
}
assert(feature_transformer);
assert(network);
// Reread the evaluation function parameters for learning from the file
void RestoreParameters(const std::string& dir_name) {
const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
std::ifstream stream(file_name, std::ios::binary);
bool result = ReadParameters(stream);
assert(result);
trainer = Trainer<Network>::create(network.get(), feature_transformer.get());
rng.seed(PRNG(seed).rand<uint64_t>());
SendMessages({{"reset"}});
}
// Add 1 sample of learning data
void AddExample(Position& pos, Color rootColor,
const Learner::PackedSfenValue& psv, double weight) {
Example example;
if (rootColor == pos.side_to_move()) {
example.sign = 1;
} else {
example.sign = -1;
}
example.psv = psv;
example.weight = weight;
Features::IndexList active_indices[2];
for (const auto trigger : kRefreshTriggers) {
RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
}
if (pos.side_to_move() != WHITE) {
active_indices[0].swap(active_indices[1]);
}
for (const auto color : Colors) {
std::vector<TrainingFeature> training_features;
for (const auto base_index : active_indices[color]) {
static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
(1 << TrainingFeature::kIndexBits), "");
Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
base_index, &training_features);
}
std::sort(training_features.begin(), training_features.end());
auto& unique_features = example.training_features[color];
for (const auto& feature : training_features) {
if (!unique_features.empty() &&
feature.GetIndex() == unique_features.back().GetIndex()) {
unique_features.back() += feature;
} else {
unique_features.push_back(feature);
}
}
}
std::lock_guard<std::mutex> lock(examples_mutex);
examples.push_back(std::move(example));
}
// update the evaluation function parameters
void UpdateParameters(uint64_t epoch) {
assert(batch_size > 0);
EvalLearningTools::Weight::calc_eta(epoch);
const auto learning_rate = static_cast<LearnFloatType>(
get_eta() / batch_size);
std::lock_guard<std::mutex> lock(examples_mutex);
std::shuffle(examples.begin(), examples.end(), rng);
while (examples.size() >= batch_size) {
std::vector<Example> batch(examples.end() - batch_size, examples.end());
examples.resize(examples.size() - batch_size);
const auto network_output = trainer->Propagate(batch);
std::vector<LearnFloatType> gradients(batch.size());
for (std::size_t b = 0; b < batch.size(); ++b) {
const auto shallow = static_cast<Value>(Round<std::int32_t>(
batch[b].sign * network_output[b] * kPonanzaConstant));
const auto& psv = batch[b].psv;
const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
if (Options["SkipLoadingEval"]) {
out << "INFO (initialize_training): Performing random net initialization.\n";
trainer->initialize(rng);
}
}
trainer->Backpropagate(gradients.data(), learning_rate);
}
SendMessages({{"quantize_parameters"}});
}
// set the number of samples in the mini-batch
void set_batch_size(uint64_t size) {
assert(size > 0);
batch_size = size;
}
// Check if there are any problems with learning
void CheckHealth() {
SendMessages({{"check_health"}});
}
// Set options such as hyperparameters
void set_options(const std::string& options) {
std::vector<Message> messages;
for (const auto& option : Algo::split(options, ',')) {
const auto fields = Algo::split(option, '=');
assert(fields.size() == 1 || fields.size() == 2);
} // namespace NNUE
if (fields.size() == 1) {
messages.emplace_back(fields[0]);
} else {
messages.emplace_back(fields[0], fields[1]);
}
}
// save merit function parameters to a file
void save_eval(std::string dir_name) {
auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
send_messages(std::move(messages));
}
// mkdir() will fail if this folder already exists, but
// Apart from that. If not, I just want you to make it.
// Also, assume that the folders up to EvalSaveDir have been dug.
Dependency::mkdir(eval_dir);
// Reread the evaluation function parameters for learning from the file
void restore_parameters(const std::string& dir_name) {
const std::string file_name = Path::combine(dir_name, NNUE::savedfileName);
std::ifstream stream(file_name, std::ios::binary);
#ifndef NDEBUG
bool result =
#endif
ReadParameters(stream);
#ifndef NDEBUG
assert(result);
#endif
if (Options["SkipLoadingEval"] && NNUE::trainer) {
NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
}
send_messages({{"reset"}});
}
const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
std::ofstream stream(file_name, std::ios::binary);
const bool result = NNUE::WriteParameters(stream);
assert(result);
void finalize_net() {
send_messages({{"clear_unobserved_feature_weights"}});
}
std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
}
// Add 1 sample of learning data
void add_example(
Position& pos,
Color rootColor,
Value discrete_nn_eval,
const Learner::PackedSfenValue& psv,
double weight) {
// get the current eta
double get_eta() {
return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
}
Example example;
if (rootColor == pos.side_to_move()) {
example.sign = 1;
} else {
example.sign = -1;
}
} // namespace Eval
example.discrete_nn_eval = discrete_nn_eval;
example.psv = psv;
example.weight = weight;
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
Features::IndexList active_indices[2];
for (const auto trigger : kRefreshTriggers) {
RawFeatures::append_active_indices(pos, trigger, active_indices);
}
if (pos.side_to_move() != WHITE) {
active_indices[0].swap(active_indices[1]);
}
static thread_local std::vector<TrainingFeature> s_training_features;
auto& training_features = s_training_features;
for (const auto color : Colors) {
training_features.clear();
for (const auto base_index : active_indices[color]) {
static_assert(Features::Factorizer<RawFeatures>::get_dimensions() <
(1 << TrainingFeature::kIndexBits), "");
Features::Factorizer<RawFeatures>::append_training_features(
base_index, &training_features);
}
std::sort(training_features.begin(), training_features.end());
auto& unique_features = example.training_features[color];
unique_features.reserve(training_features.size());
for (const auto& feature : training_features) {
if (!unique_features.empty() &&
feature.get_index() == unique_features.back().get_index()) {
unique_features.back() += feature;
} else {
unique_features.push_back(feature);
}
}
}
std::lock_guard<std::mutex> lock(examples_mutex);
examples.push_back(std::move(example));
}
// update the evaluation function parameters
Learner::Loss update_parameters(
ThreadPool& thread_pool,
uint64_t epoch,
bool verbose,
double learning_rate,
double max_grad,
Learner::CalcLossFunc calc_loss)
{
using namespace Learner::Autograd::UnivariateStatic;
assert(batch_size > 0);
learning_rate /= batch_size;
std::lock_guard<std::mutex> lock(examples_mutex);
double abs_eval_diff_sum = 0.0;
double abs_discrete_eval_sum = 0.0;
double gradient_norm = 0.0;
bool collect_stats = verbose;
Learner::Loss loss_sum{};
std::vector<double> abs_eval_diff_sum_local(thread_pool.size(), 0.0);
std::vector<double> abs_discrete_eval_sum_local(thread_pool.size(), 0.0);
std::vector<double> gradient_norm_local(thread_pool.size(), 0.0);
std::vector<Learner::Loss> loss_sum_local(thread_pool.size());
auto prev_batch_begin = examples.end();
while ((long)(prev_batch_begin - examples.begin()) >= (long)batch_size) {
auto batch_begin = prev_batch_begin - batch_size;
auto batch_end = prev_batch_begin;
auto size = batch_end - batch_begin;
const auto network_output = trainer->step_start(thread_pool, batch_begin, batch_end);
std::vector<LearnFloatType> gradients(size);
thread_pool.for_each_index_chunk_with_workers(
std::size_t(0), size,
[&](Thread& th, std::size_t offset, std::size_t count) {
const auto thread_id = th.thread_idx();
trainer->propagate(th, offset, count);
for (std::size_t b = offset; b < offset + count; ++b) {
const auto& e = *(batch_begin + b);
const auto shallow = static_cast<Value>(round<std::int32_t>(
e.sign * network_output[b] * kPonanzaConstant));
const auto discrete = e.sign * e.discrete_nn_eval;
const auto& psv = e.psv;
auto loss = calc_loss(shallow, (Value)psv.score, psv.game_result, psv.gamePly);
loss.grad = std::clamp(
loss.grad * e.sign * kPonanzaConstant * e.weight, -max_grad, max_grad);
gradients[b] = static_cast<LearnFloatType>(loss.grad);
loss_sum_local[thread_id] += loss;
// The discrete eval will only be valid before first backpropagation,
// that is only for the first batch.
// Similarily we want only gradients from one batch.
if (collect_stats)
{
abs_eval_diff_sum_local[thread_id] += std::abs(discrete - shallow);
abs_discrete_eval_sum_local[thread_id] += std::abs(discrete);
gradient_norm_local[thread_id] += std::abs(loss.grad);
}
}
trainer->backpropagate(th, gradients.data(), offset, count);
}
);
// We can asyncronously erase the examples that we used in the previous
// step. This can be done safely because we're no longer using these
// examples and erase won't invalidate iterators.
examples.erase(prev_batch_begin, examples.end());
prev_batch_begin = batch_begin;
thread_pool.wait_for_workers_finished();
trainer->step_end(thread_pool, learning_rate);
collect_stats = false;
}
examples.erase(prev_batch_begin, examples.end());
if (verbose)
{
abs_eval_diff_sum = std::accumulate(abs_eval_diff_sum_local.begin(), abs_eval_diff_sum_local.end(), 0.0);
abs_discrete_eval_sum = std::accumulate(abs_discrete_eval_sum_local.begin(), abs_discrete_eval_sum_local.end(), 0.0);
gradient_norm = std::accumulate(gradient_norm_local.begin(), gradient_norm_local.end(), 0.0);
const double avg_abs_eval_diff = abs_eval_diff_sum / batch_size;
const double avg_abs_discrete_eval = abs_discrete_eval_sum / batch_size;
auto out = sync_region_cout.new_region();
out << "INFO (update_parameters):"
<< " epoch = " << epoch
<< " , avg_abs(trainer_eval-nnue_eval) = " << avg_abs_eval_diff
<< " , avg_abs(nnue_eval) = " << avg_abs_discrete_eval
<< " , avg_relative_error = " << avg_abs_eval_diff / avg_abs_discrete_eval
<< " , batch_size = " << batch_size
<< " , grad_norm = " << gradient_norm
<< std::endl;
} else {
// Display some progress but don't synchronize as
// we can't really decide when to release the output lock here
std::cout << '.';
}
send_messages({{"quantize_parameters"}});
for(auto& loss : loss_sum_local)
{
loss_sum += loss;
}
return loss_sum;
}
// Check if there are any problems with learning
void check_health() {
send_messages({{"check_health"}});
}
// save merit function parameters to a file
void save_eval(std::string dir_name) {
auto eval_dir = Path::combine(Options["EvalSaveDir"], dir_name);
auto out = sync_region_cout.new_region();
out << "INFO (save_eval): Saving current evaluation file in " << eval_dir << std::endl;
// mkdir() will fail if this folder already exists, but
// Apart from that. If not, I just want you to make it.
// Also, assume that the folders up to EvalSaveDir have been dug.
std::filesystem::create_directories(eval_dir);
const std::string file_name = Path::combine(eval_dir, NNUE::savedfileName);
std::ofstream stream(file_name, std::ios::binary);
#ifndef NDEBUG
bool result =
#endif
WriteParameters(stream);
#ifndef NDEBUG
assert(result);
#endif
out << "INFO (save_eval): Finished saving evaluation file in " << eval_dir << std::endl;
}
} // namespace Eval::NNUE
+36 -30
View File
@@ -1,46 +1,52 @@
// Interface used for learning NNUE evaluation function
#ifndef _EVALUATE_NNUE_LEARNER_H_
#ifndef _EVALUATE_NNUE_LEARNER_H_
#define _EVALUATE_NNUE_LEARNER_H_
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include "learn/learn.h"
#include "../learn/learn.h"
#include "misc.h"
namespace Eval {
struct ThreadPool;
namespace NNUE {
// Interface used for learning NNUE evaluation function
namespace Eval::NNUE {
// Initialize learning
void InitializeTraining(double eta1, uint64_t eta1_epoch,
double eta2, uint64_t eta2_epoch, double eta3);
// Initialize learning
void initialize_training(
const std::string& seed,
SynchronizedRegionLogger::Region& out);
// set the number of samples in the mini-batch
void SetBatchSize(uint64_t size);
// set the number of samples in the mini-batch
void set_batch_size(uint64_t size);
// set the learning rate scale
void SetGlobalLearningRateScale(double scale);
// Set options such as hyperparameters
void set_options(const std::string& options);
// Set options such as hyperparameters
void SetOptions(const std::string& options);
// Reread the evaluation function parameters for learning from the file
void restore_parameters(const std::string& dir_name);
// Reread the evaluation function parameters for learning from the file
void RestoreParameters(const std::string& dir_name);
// Add 1 sample of learning data
void add_example(
Position& pos,
Color rootColor,
Value discrete_nn_eval,
const Learner::PackedSfenValue& psv,
double weight);
// Add 1 sample of learning data
void AddExample(Position& pos, Color rootColor,
const Learner::PackedSfenValue& psv, double weight);
// update the evaluation function parameters
Learner::Loss update_parameters(
ThreadPool& thread_pool,
uint64_t epoch,
bool verbose,
double learning_rate,
double max_grad,
Learner::CalcLossFunc calc_loss);
// update the evaluation function parameters
void UpdateParameters(uint64_t epoch);
// Check if there are any problems with learning
void check_health();
// Check if there are any problems with learning
void CheckHealth();
void finalize_net();
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
void save_eval(std::string suffix);
} // namespace Eval::NNUE
#endif
+54
View File
@@ -0,0 +1,54 @@
#include "a.h"
#include "index_list.h"
// Definition of input feature A of NNUE evaluation function
namespace Eval::NNUE::Features {
// Orient a square according to perspective (rotate the board 180° for black)
// Important note for "halfka": this arch was designed with "flip" in mind
// although it still is untested which approach is better.
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Find the index of the feature quantity from the king position and PieceSquare
inline IndexType A::make_index(
Color perspective, Square s, Piece pc) {
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
}
// Get a list of indices with a value of 1 among the features
void A::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
Bitboard bb = pos.pieces();
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(make_index(perspective, s, pos.piece_on(s)));
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
void A::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (dp.from[i] != SQ_NONE)
removed->push_back(make_index(perspective, dp.from[i], pc));
if (dp.to[i] != SQ_NONE)
added->push_back(make_index(perspective, dp.to[i], pc));
}
}
} // namespace Eval::NNUE::Features
+54
View File
@@ -0,0 +1,54 @@
#ifndef _NNUE_FEATURES_A_H_
#define _NNUE_FEATURES_A_H_
#include "features_common.h"
#include "evaluate.h"
// Definition of input feature A of NNUE evaluation function
// A is a union of P features and K features, so technically the
// same effect can be achieved by including both P and K features
// but it would result in slower index appending because
// P would conditionally exclude K features and vice versa,
// where A doesn't have any conditionals.
namespace Eval::NNUE::Features {
// Feature P: PieceSquare of pieces other than balls
class A {
public:
// feature quantity name
static constexpr const char* kName = "A";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0x7A4C414Cu;
// number of feature dimensions
static constexpr IndexType kDimensions = PS_END2;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 32;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
private:
// Index of a feature for a given piece on some square
static IndexType make_index(Color perspective, Square s, Piece pc);
};
} // namespace Eval::NNUE::Features
#endif // #ifndef _NNUE_FEATURES_UNION_P_K_H_
+36 -44
View File
@@ -1,73 +1,65 @@
//Definition of input feature quantity K of NNUE evaluation function
#if defined(EVAL_NNUE)
#include "castling_right.h"
#include "index_list.h"
namespace Eval {
//Definition of input feature quantity CastlingRight of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace NNUE {
// Get a list of indices with a value of 1 among the features
void CastlingRight::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
namespace Features {
// Get a list of indices with a value of 1 among the features
void CastlingRight::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
// do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
int castling_rights = pos.state()->castlingRights;
int relative_castling_rights;
if (perspective == WHITE) {
relative_castling_rights = castling_rights;
relative_castling_rights = castling_rights;
}
else {
// Invert the perspective.
relative_castling_rights = ((castling_rights & 3) << 2)
& ((castling_rights >> 2) & 3);
// Invert the perspective.
relative_castling_rights = ((castling_rights & 3) << 2)
& ((castling_rights >> 2) & 3);
}
for (int i = 0; i <kDimensions; ++i) {
if (relative_castling_rights & (i << 1)) {
active->push_back(i);
}
for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
if (relative_castling_rights & (1 << i)) {
active->push_back(i);
}
}
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
void CastlingRight::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void CastlingRight::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* /* added */) {
int previous_castling_rights = pos.state()->previous->castlingRights;
int current_castling_rights = pos.state()->castlingRights;
int relative_previous_castling_rights;
int relative_current_castling_rights;
if (perspective == WHITE) {
relative_previous_castling_rights = previous_castling_rights;
relative_current_castling_rights = current_castling_rights;
relative_previous_castling_rights = previous_castling_rights;
relative_current_castling_rights = current_castling_rights;
}
else {
// Invert the perspective.
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
& ((previous_castling_rights >> 2) & 3);
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
& ((current_castling_rights >> 2) & 3);
// Invert the perspective.
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
& ((previous_castling_rights >> 2) & 3);
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
& ((current_castling_rights >> 2) & 3);
}
for (int i = 0; i < kDimensions; ++i) {
if ((relative_previous_castling_rights & (i << 1)) &&
(relative_current_castling_rights & (i << 1)) == 0) {
removed->push_back(i);
}
for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
if ((relative_previous_castling_rights & (1 << i)) &&
(relative_current_castling_rights & (1 << i)) == 0) {
removed->push_back(i);
}
}
}
}
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
+21 -25
View File
@@ -1,48 +1,44 @@
//Definition of input feature quantity K of NNUE evaluation function
#ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
#define _NNUE_FEATURES_CASTLING_RIGHT_H_
#if defined(EVAL_NNUE)
#include "../../evaluate.h"
#include "features_common.h"
namespace Eval {
#include "evaluate.h"
namespace NNUE {
//Definition of input feature quantity CastlingRight of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace Features {
// Feature K: Ball position
class CastlingRight {
public:
class CastlingRight {
public:
// feature quantity name
static constexpr const char* kName = "CastlingRight";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0x913968AAu;
// number of feature dimensions
static constexpr IndexType kDimensions = 4;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 4;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
};
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
};
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
#endif
+33 -31
View File
@@ -1,47 +1,49 @@
//Definition of input feature quantity K of NNUE evaluation function
#if defined(EVAL_NNUE)
#include "enpassant.h"
#include "index_list.h"
namespace Eval {
//Definition of input feature quantity EnPassant of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace NNUE {
// Get a list of indices with a value of 1 among the features
void EnPassant::append_active_indices(
const Position& pos,
Color /* perspective */,
IndexList* active) {
namespace Features {
// Get a list of indices with a value of 1 among the features
void EnPassant::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
// do nothing if array size is small to avoid compiler warning
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions)
return;
auto epSquare = pos.state()->epSquare;
if (epSquare == SQ_NONE) {
return;
}
if (perspective == BLACK) {
epSquare = rotate180(epSquare);
}
if (epSquare == SQ_NONE)
return;
auto file = file_of(epSquare);
active->push_back(file);
}
}
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
void EnPassant::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
// Not implemented.
assert(false);
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
void EnPassant::append_changed_indices(
const Position& pos,
Color /* perspective */,
IndexList* removed,
IndexList* added) {
} // namespace Features
auto previous_epSquare = pos.state()->previous->epSquare;
auto epSquare = pos.state()->epSquare;
} // namespace NNUE
if (previous_epSquare != SQ_NONE) {
if (epSquare != SQ_NONE && file_of(epSquare) == file_of(previous_epSquare))
return;
} // namespace Eval
auto file = file_of(previous_epSquare);
removed->push_back(file);
}
#endif // defined(EVAL_NNUE)
if (epSquare != SQ_NONE) {
auto file = file_of(epSquare);
added->push_back(file);
}
}
} // namespace Eval::NNUE::Features
+18 -26
View File
@@ -1,22 +1,15 @@
//Definition of input feature quantity K of NNUE evaluation function
#ifndef _NNUE_FEATURES_ENPASSANT_H_
#define _NNUE_FEATURES_ENPASSANT_H_
#if defined(EVAL_NNUE)
#include "../../evaluate.h"
#include "features_common.h"
namespace Eval {
#include "evaluate.h"
namespace NNUE {
//Definition of input feature quantity EnPassant of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace Features {
// Feature K: Ball position
class EnPassant {
public:
class EnPassant {
public:
// feature quantity name
static constexpr const char* kName = "EnPassant";
// Hash value embedded in the evaluation function file
@@ -26,23 +19,22 @@ namespace Eval {
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 1;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
};
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
};
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
#endif
+251 -197
View File
@@ -26,222 +26,276 @@
namespace Eval::NNUE::Features {
// Class template that represents a list of values
template <typename T, T... Values>
struct CompileTimeList;
// Class template that represents a list of values
template <typename T, T... Values>
struct CompileTimeList;
template <typename T, T First, T... Remaining>
struct CompileTimeList<T, First, Remaining...> {
static constexpr bool Contains(T value) {
return value == First || CompileTimeList<T, Remaining...>::Contains(value);
}
static constexpr std::array<T, sizeof...(Remaining) + 1>
kValues = {{First, Remaining...}};
};
template <typename T, T First, T... Remaining>
constexpr std::array<T, sizeof...(Remaining) + 1>
CompileTimeList<T, First, Remaining...>::kValues;
template <typename T>
struct CompileTimeList<T> {
static constexpr bool Contains(T /*value*/) {
return false;
}
static constexpr std::array<T, 0> kValues = { {} };
};
// Class template that adds to the beginning of the list
template <typename T, typename ListType, T Value>
struct AppendToList;
template <typename T, T... Values, T AnotherValue>
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
using Result = CompileTimeList<T, AnotherValue, Values...>;
};
// Class template for adding to a sorted, unique list
template <typename T, typename ListType, T Value>
struct InsertToSet;
template <typename T, T First, T... Remaining, T AnotherValue>
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
using Result = std::conditional_t<
CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
CompileTimeList<T, First, Remaining...>,
std::conditional_t<(AnotherValue < First),
CompileTimeList<T, AnotherValue, First, Remaining...>,
typename AppendToList<T, typename InsertToSet<
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
First>::Result>>;
};
template <typename T, T Value>
struct InsertToSet<T, CompileTimeList<T>, Value> {
using Result = CompileTimeList<T, Value>;
};
// Base class of feature set
template <typename Derived>
class FeatureSetBase {
public:
// Get a list of indices for active features
template <typename IndexListType>
static void AppendActiveIndices(
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
for (Color perspective : { WHITE, BLACK }) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &active[perspective]);
}
}
// Get a list of indices for recently changed features
template <typename PositionType, typename IndexListType>
static void AppendChangedIndices(
const PositionType& pos, TriggerEvent trigger,
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
const auto& dp = pos.state()->dirtyPiece;
if (dp.dirty_num == 0) return;
for (Color perspective : { WHITE, BLACK }) {
reset[perspective] = false;
switch (trigger) {
case TriggerEvent::kFriendKingMoved:
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
break;
default:
assert(false);
break;
template <typename T, T First, T... Remaining>
struct CompileTimeList<T, First, Remaining...> {
static constexpr bool contains(T value) {
return value == First || CompileTimeList<T, Remaining...>::contains(value);
}
if (reset[perspective]) {
Derived::CollectActiveIndices(
pos, trigger, perspective, &added[perspective]);
} else {
Derived::CollectChangedIndices(
pos, trigger, perspective,
&removed[perspective], &added[perspective]);
static constexpr std::array<T, sizeof...(Remaining) + 1>
kValues = {{First, Remaining...}};
};
template <typename T, T First, T... Remaining>
constexpr std::array<T, sizeof...(Remaining) + 1>
CompileTimeList<T, First, Remaining...>::kValues;
template <typename T>
struct CompileTimeList<T> {
static constexpr bool contains(T /*value*/) {
return false;
}
}
}
};
static constexpr std::array<T, 0> kValues = { {} };
};
// Class template that represents the feature set
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
public FeatureSetBase<
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
private:
using Head = FirstFeatureType;
using Tail = FeatureSet<RemainingFeatureTypes...>;
// Class template that adds to the beginning of the list
template <typename T, typename ListType, T Value>
struct AppendToList;
public:
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
// number of feature dimensions
static constexpr IndexType kDimensions =
Head::kDimensions + Tail::kDimensions;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
// List of timings to perform all calculations instead of difference calculation
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
template <typename T, T... Values, T AnotherValue>
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
using Result = CompileTimeList<T, AnotherValue, Values...>;
};
// Get the feature quantity name
static std::string GetName() {
return std::string(Head::kName) + "+" + Tail::GetName();
}
// Class template for adding to a sorted, unique list
template <typename T, typename ListType, T Value>
struct InsertToSet;
private:
// Get a list of indices with a value of 1 among the features
template <typename IndexListType>
static void CollectActiveIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexListType* const active) {
Tail::CollectActiveIndices(pos, trigger, perspective, active);
if (Head::kRefreshTrigger == trigger) {
const auto start = active->size();
Head::AppendActiveIndices(pos, perspective, active);
for (auto i = start; i < active->size(); ++i) {
(*active)[i] += Tail::kDimensions;
template <typename T, T First, T... Remaining, T AnotherValue>
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
using Result =
std::conditional_t<
CompileTimeList<T, First, Remaining...>::contains(AnotherValue),
CompileTimeList<T, First, Remaining...>,
std::conditional_t<
(AnotherValue < First),
CompileTimeList<T, AnotherValue, First, Remaining...>,
typename AppendToList<T, typename InsertToSet<
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
First
>::Result
>
>;
};
template <typename T, T Value>
struct InsertToSet<T, CompileTimeList<T>, Value> {
using Result = CompileTimeList<T, Value>;
};
// Base class of feature set
template <typename Derived>
class FeatureSetBase {
public:
// Get a list of indices for active features
template <typename IndexListType>
static void append_active_indices(
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
for (Color perspective : { WHITE, BLACK }) {
Derived::collect_active_indices(
pos, trigger, perspective, &active[perspective]);
}
}
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <typename IndexListType>
static void CollectChangedIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexListType* const removed, IndexListType* const added) {
Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
if (Head::kRefreshTrigger == trigger) {
const auto start_removed = removed->size();
const auto start_added = added->size();
Head::AppendChangedIndices(pos, perspective, removed, added);
for (auto i = start_removed; i < removed->size(); ++i) {
(*removed)[i] += Tail::kDimensions;
// Get a list of indices for recently changed features
template <typename PositionType, typename IndexListType>
static void append_changed_indices(
const PositionType& pos,
TriggerEvent trigger,
IndexListType removed[2],
IndexListType added[2],
bool reset[2]) {
const auto& dp = pos.state()->dirtyPiece;
for (Color perspective : { WHITE, BLACK }) {
switch (trigger) {
case TriggerEvent::kNone:
break;
case TriggerEvent::kFriendKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
break;
case TriggerEvent::kEnemyKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
break;
case TriggerEvent::kAnyKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = type_of(dp.piece[0]) == KING;
break;
case TriggerEvent::kAnyPieceMoved:
reset[perspective] = true;
break;
default:
assert(false);
break;
}
if (reset[perspective]) {
Derived::collect_active_indices(
pos, trigger, perspective, &added[perspective]);
} else {
Derived::collect_changed_indices(
pos, trigger, perspective,
&removed[perspective], &added[perspective]);
}
}
}
for (auto i = start_added; i < added->size(); ++i) {
(*added)[i] += Tail::kDimensions;
};
// Class template that represents the feature set
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
public FeatureSetBase<
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>
> {
private:
using Head = FirstFeatureType;
using Tail = FeatureSet<RemainingFeatureTypes...>;
public:
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
// number of feature dimensions
static constexpr IndexType kDimensions =
Head::kDimensions + Tail::kDimensions;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
// List of timings to perform all calculations instead of difference calculation
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
// Get the feature quantity name
static std::string get_name() {
return std::string(Head::kName) + "+" + Tail::get_name();
}
}
}
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
private:
// Get a list of indices with a value of 1 among the features
template <typename IndexListType>
static void collect_active_indices(
const Position& pos,
const TriggerEvent trigger,
const Color perspective,
IndexListType* const active) {
// Class template that represents the feature set
template <typename FeatureType>
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
Tail::collect_active_indices(pos, trigger, perspective, active);
if (Head::kRefreshTrigger == trigger) {
const auto start = active->size();
Head::append_active_indices(pos, perspective, active);
public:
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
// Number of feature dimensions
static constexpr IndexType kDimensions = FeatureType::kDimensions;
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// Trigger for full calculation instead of difference calculation
using SortedTriggerSet =
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
for (auto i = start; i < active->size(); ++i) {
(*active)[i] += Tail::kDimensions;
}
}
}
// Get the feature quantity name
static std::string GetName() {
return FeatureType::kName;
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <typename IndexListType>
static void collect_changed_indices(
const Position& pos,
const TriggerEvent trigger,
const Color perspective,
IndexListType* const removed,
IndexListType* const added) {
private:
// Get a list of indices for active features
static void CollectActiveIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const active) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendActiveIndices(pos, perspective, active);
}
}
Tail::collect_changed_indices(pos, trigger, perspective, removed, added);
if (Head::kRefreshTrigger == trigger) {
const auto start_removed = removed->size();
const auto start_added = added->size();
Head::append_changed_indices(pos, perspective, removed, added);
// Get a list of indices for recently changed features
static void CollectChangedIndices(
const Position& pos, const TriggerEvent trigger, const Color perspective,
IndexList* const removed, IndexList* const added) {
for (auto i = start_removed; i < removed->size(); ++i) {
(*removed)[i] += Tail::kDimensions;
}
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::AppendChangedIndices(pos, perspective, removed, added);
}
}
for (auto i = start_added; i < added->size(); ++i) {
(*added)[i] += Tail::kDimensions;
}
}
}
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
// Class template that represents the feature set
template <typename FeatureType>
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
public:
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
// Number of feature dimensions
static constexpr IndexType kDimensions = FeatureType::kDimensions;
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// Trigger for full calculation instead of difference calculation
using SortedTriggerSet =
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
// Get the feature quantity name
static std::string get_name() {
return FeatureType::kName;
}
private:
// Get a list of indices for active features
static void collect_active_indices(
const Position& pos,
const TriggerEvent trigger,
const Color perspective,
IndexList* const active) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::append_active_indices(pos, perspective, active);
}
}
// Get a list of indices for recently changed features
static void collect_changed_indices(
const Position& pos,
const TriggerEvent trigger,
const Color perspective,
IndexList* const removed,
IndexList* const added) {
if (FeatureType::kRefreshTrigger == trigger) {
FeatureType::append_changed_indices(pos, perspective, removed, added);
}
}
// Make the base class and the class template that recursively uses itself a friend
friend class FeatureSetBase<FeatureSet>;
template <typename... FeatureTypes>
friend class FeatureSet;
};
} // namespace Eval::NNUE::Features
+4 -4
View File
@@ -34,10 +34,10 @@ namespace Eval::NNUE::Features {
// Trigger to perform full calculations instead of difference only
enum class TriggerEvent {
kNone, // Calculate the difference whenever possible
kFriendKingMoved, // calculate all when own ball moves
kEnemyKingMoved, // do all calculations when enemy balls move
kAnyKingMoved, // do all calculations if either ball moves
kAnyPieceMoved, // always do all calculations
kFriendKingMoved, // calculate full evaluation when own king moves
kEnemyKingMoved, // calculate full evaluation when opponent king moves
kAnyKingMoved, // calculate full evaluation when any king moves
kAnyPieceMoved, // always calculate full evaluation
};
enum class Side {
+93
View File
@@ -0,0 +1,93 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Definition of input features HalfKA of NNUE evaluation function
#include "half_ka.h"
#include "index_list.h"
namespace Eval::NNUE::Features {
// Orient a square according to perspective (rotate the board 180° for black)
// Important note for "halfka": this arch was designed with "flip" in mind
// although it still is untested which approach is better.
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfKA<AssociatedKing>::make_index(
Color perspective,
Square s,
Piece pc,
Square ksq) {
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END2 * ksq);
}
// Get a list of indices for active features
template <Side AssociatedKing>
void HalfKA<AssociatedKing>::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces();
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
}
}
// Get a list of indices for recently changed features
template <Side AssociatedKing>
void HalfKA<AssociatedKing>::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (dp.from[i] != SQ_NONE)
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
if (dp.to[i] != SQ_NONE)
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
}
}
template class HalfKA<Side::kFriend>;
template class HalfKA<Side::kEnemy>;
} // namespace Eval::NNUE::Features
+75
View File
@@ -0,0 +1,75 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NNUE_FEATURES_HALF_KA_H_INCLUDED
#define NNUE_FEATURES_HALF_KA_H_INCLUDED
#include "features_common.h"
#include "evaluate.h"
//Definition of input features HalfKPK of NNUE evaluation function
namespace Eval::NNUE::Features {
// Feature HalfKPK: Combination of the position of own king
// and the position of pieces other than kings
template <Side AssociatedKing>
class HalfKA {
public:
// Feature name
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfKA(Friend)" : "HalfKA(Enemy)";
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue =
0x5F134CB9u ^ (AssociatedKing == Side::kFriend);
// Number of feature dimensions
static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END2);
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = 32;
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Get a list of indices for active features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices for recently changed features
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
private:
// Index of a feature for a given king position and another piece on some square
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
};
} // namespace Eval::NNUE::Features
#endif // #ifndef NNUE_FEATURES_HALF_KA_H_INCLUDED
+74 -52
View File
@@ -1,19 +1,19 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Definition of input features HalfKP of NNUE evaluation function
@@ -23,50 +23,72 @@
namespace Eval::NNUE::Features {
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfKP<AssociatedKing>::MakeIndex(
Color perspective, Square s, Piece pc, Square ksq) {
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
}
// Get a list of indices for active features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
// Orient a square according to perspective (rotate the board 180° for black)
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
}
// Get a list of indices for recently changed features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
// Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfKP<AssociatedKing>::make_index(
Color perspective,
Square s,
Piece pc,
Square ksq) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (type_of(pc) == KING) continue;
if (dp.from[i] != SQ_NONE)
removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
if (dp.to[i] != SQ_NONE)
added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
}
}
template class HalfKP<Side::kFriend>;
// Get a list of indices for active features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
}
}
// Get a list of indices for recently changed features
template <Side AssociatedKing>
void HalfKP<AssociatedKing>::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (type_of(pc) == KING)
continue;
if (dp.from[i] != SQ_NONE)
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
if (dp.to[i] != SQ_NONE)
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
}
}
template class HalfKP<Side::kFriend>;
template class HalfKP<Side::kEnemy>;
} // namespace Eval::NNUE::Features
+54 -42
View File
@@ -1,62 +1,74 @@
/*
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Stockfish is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//Definition of input features HalfKP of NNUE evaluation function
#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
#define NNUE_FEATURES_HALF_KP_H_INCLUDED
#include "../../evaluate.h"
#include "features_common.h"
#include "evaluate.h"
//Definition of input features HalfKP of NNUE evaluation function
namespace Eval::NNUE::Features {
// Feature HalfKP: Combination of the position of own king
// and the position of pieces other than kings
template <Side AssociatedKing>
class HalfKP {
// Feature HalfKP: Combination of the position of own king
// and the position of pieces other than kings
template <Side AssociatedKing>
class HalfKP {
public:
// Feature name
static constexpr const char* kName = "HalfKP(Friend)";
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue =
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
// Number of feature dimensions
static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
public:
// Feature name
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfKP(Friend)" : "HalfKP(Enemy)";
// Get a list of indices for active features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue =
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
// Get a list of indices for recently changed features
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// Number of feature dimensions
static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
private:
// Index of a feature for a given king position and another piece on some square
static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
};
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Get a list of indices for active features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices for recently changed features
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
private:
// Index of a feature for a given king position and another piece on some square
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
};
} // namespace Eval::NNUE::Features
+90
View File
@@ -0,0 +1,90 @@
#include "half_relative_ka.h"
#include "index_list.h"
//Definition of input features HalfRelativeKA of NNUE evaluation function
namespace Eval::NNUE::Features {
// Orient a square according to perspective (rotate the board 180° for black)
// Important note for "halfka": this arch was designed with "flip" in mind
// although it still is untested which approach is better.
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKA<AssociatedKing>::make_index(
Color perspective,
Square s,
Piece pc,
Square sq_k) {
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
return make_index(sq_k, p);
}
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKA<AssociatedKing>::make_index(
Square sq_k,
IndexType p) {
constexpr IndexType W = kBoardWidth;
constexpr IndexType H = kBoardHeight;
const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
return H * W * piece_index + H * relative_file + relative_rank;
}
// Get a list of indices with a value of 1 among the features
template <Side AssociatedKing>
void HalfRelativeKA<AssociatedKing>::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces();
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <Side AssociatedKing>
void HalfRelativeKA<AssociatedKing>::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (dp.from[i] != SQ_NONE)
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
if (dp.to[i] != SQ_NONE)
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
}
}
template class HalfRelativeKA<Side::kFriend>;
template class HalfRelativeKA<Side::kEnemy>;
} // namespace Eval::NNUE::Features
+68
View File
@@ -0,0 +1,68 @@
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KA_H_
#define _NNUE_FEATURES_HALF_RELATIVE_KA_H_
#include "features_common.h"
#include "evaluate.h"
// Definition of input features HalfRelativeKA of NNUE evaluation function
// K - King
// A - Any piece
// KA - product of K and A
namespace Eval::NNUE::Features {
// Feature HalfRelativeKA: Relative position of each piece other than the ball based on own ball or enemy ball
template <Side AssociatedKing>
class HalfRelativeKA {
public:
// feature quantity name
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfRelativeKA(Friend)" : "HalfRelativeKA(Enemy)";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
0xA123051Fu ^ (AssociatedKing == Side::kFriend);
static constexpr IndexType kNumPieceKinds = 6 * 2;
// width of the virtual board with the ball in the center
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
// height of a virtual board with balls in the center
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
// number of feature dimensions
static constexpr IndexType kDimensions =
kNumPieceKinds * kBoardHeight * kBoardWidth;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 32;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Get a list of indices with a value of 1 among the features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType make_index(Square s, IndexType p);
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
};
} // namespace Eval::NNUE::Features
#endif // #ifndef _NNUE_FEATURES_HALF_RELATIVE_KA_H_
+77 -64
View File
@@ -1,78 +1,91 @@
//Definition of input features HalfRelativeKP of NNUE evaluation function
#if defined(EVAL_NNUE)
#include "half_relative_kp.h"
#include "half_relative_kp.h"
#include "index_list.h"
namespace Eval {
//Definition of input features HalfRelativeKP of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace NNUE {
// Orient a square according to perspective (rotate the board 180° for black)
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
namespace Features {
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::make_index(
Color perspective,
Square s,
Piece pc,
Square sq_k) {
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
return make_index(sq_k, p);
}
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
Color perspective, Square s, Piece pc, Square sq_k) {
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
return MakeIndex(sq_k, p);
}
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::make_index(
Square sq_k,
IndexType p) {
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
Square sq_k, IndexType p) {
constexpr IndexType W = kBoardWidth;
constexpr IndexType H = kBoardHeight;
const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
return H * W * piece_index + H * relative_file + relative_rank;
}
constexpr IndexType W = kBoardWidth;
constexpr IndexType H = kBoardHeight;
const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
return H * W * piece_index + H * relative_file + relative_rank;
}
// Get a list of indices with a value of 1 among the features
template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
}
}
// Get a list of indices with a value of 1 among the features
template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (type_of(pc) == KING) continue;
if (dp.from[i] != SQ_NONE)
removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
if (dp.to[i] != SQ_NONE)
added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
}
}
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
template class HalfRelativeKP<Side::kFriend>;
template class HalfRelativeKP<Side::kEnemy>;
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
}
}
} // namespace Features
// Get a list of indices whose values have changed from the previous one in the feature quantity
template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
} // namespace NNUE
Square ksq = orient(
perspective,
pos.square<KING>(
AssociatedKing == Side::kFriend ? perspective : ~perspective));
} // namespace Eval
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
#endif // defined(EVAL_NNUE)
if (type_of(pc) == KING)
continue;
if (dp.from[i] != SQ_NONE)
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
if (dp.to[i] != SQ_NONE)
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
}
}
template class HalfRelativeKP<Side::kFriend>;
template class HalfRelativeKP<Side::kEnemy>;
} // namespace Eval::NNUE::Features
+50 -49
View File
@@ -1,65 +1,66 @@
//Definition of input features HalfRelativeKP of NNUE evaluation function
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
#define _NNUE_FEATURES_HALF_RELATIVE_KP_H_
#if defined(EVAL_NNUE)
#include "../../evaluate.h"
#include "features_common.h"
namespace Eval {
#include "evaluate.h"
namespace NNUE {
//Definition of input features HalfRelativeKP of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace Features {
// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
template <Side AssociatedKing>
class HalfRelativeKP {
public:
// feature quantity name
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
template <Side AssociatedKing>
class HalfRelativeKP {
public:
// feature quantity name
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
0xF9180919u ^ (AssociatedKing == Side::kFriend);
// Piece type excluding balls
static constexpr IndexType kNumPieceKinds = 5 * 2;
// width of the virtual board with the ball in the center
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
// height of a virtual board with balls in the center
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
// number of feature dimensions
static constexpr IndexType kDimensions =
kNumPieceKinds * kBoardHeight * kBoardWidth;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue =
0xF9180919u ^ (AssociatedKing == Side::kFriend);
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// Piece type excluding balls
static constexpr IndexType kNumPieceKinds = 5 * 2;
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// width of the virtual board with the ball in the center
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType MakeIndex(Square s, IndexType p);
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
};
// height of a virtual board with balls in the center
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
} // namespace Features
// number of feature dimensions
static constexpr IndexType kDimensions =
kNumPieceKinds * kBoardHeight * kBoardWidth;
} // namespace NNUE
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
} // namespace Eval
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
#endif // defined(EVAL_NNUE)
// Get a list of indices with a value of 1 among the features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType make_index(Square s, IndexType p);
// Find the index of the feature quantity from the ball position and PieceSquare
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
};
} // namespace Eval::NNUE::Features
#endif
+36 -49
View File
@@ -1,58 +1,45 @@
//Definition of input feature quantity K of NNUE evaluation function
#if defined(EVAL_NNUE)
#include "k.h"
#include "k.h"
#include "index_list.h"
namespace Eval {
//Definition of input feature quantity K of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace NNUE {
// Orient a square according to perspective (rotate the board 180° for black)
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
namespace Features {
// Index of a feature for a given king position.
IndexType K::make_index(Color perspective, Square s, Color king_color) {
return IndexType(orient(perspective, s) + bool(perspective ^ king_color) * 64);
}
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Get a list of indices with a value of 1 among the features
void K::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
// Index of a feature for a given king position.
IndexType K::MakeIndex(Color perspective, Square s, Color king_color) {
return IndexType(orient(perspective, s) + bool(perspective ^ king_color) * 64);
}
for (auto color : Colors) {
active->push_back(make_index(perspective, pos.square<KING>(color), color));
}
}
// Get a list of indices with a value of 1 among the features
void K::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
for (auto color : Colors) {
active->push_back(MakeIndex(perspective, pos.square<KING>(color), color));
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
void K::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void K::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
const auto& dp = pos.state()->dirtyPiece;
Color king_color;
if (dp.piece[0] == Piece::W_KING) {
king_color = WHITE;
}
else if (dp.piece[0] == Piece::B_KING) {
king_color = BLACK;
}
else {
return;
}
const auto& dp = pos.state()->dirtyPiece;
if (type_of(dp.piece[0]) == KING)
{
removed->push_back(make_index(perspective, dp.from[0], color_of(dp.piece[0])));
added->push_back(make_index(perspective, dp.to[0], color_of(dp.piece[0])));
}
}
removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
added->push_back(MakeIndex(perspective, dp.to[0], king_color));
}
} // namespace Features
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
+33 -36
View File
@@ -1,52 +1,49 @@
//Definition of input feature quantity K of NNUE evaluation function
#ifndef _NNUE_FEATURES_K_H_
#ifndef _NNUE_FEATURES_K_H_
#define _NNUE_FEATURES_K_H_
#if defined(EVAL_NNUE)
#include "../../evaluate.h"
#include "features_common.h"
namespace Eval {
#include "evaluate.h"
namespace NNUE {
//Definition of input feature quantity K of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace Features {
// Feature K: Ball position
class K {
public:
// feature quantity name
static constexpr const char* kName = "K";
// Feature K: Ball position
class K {
public:
// feature quantity name
static constexpr const char* kName = "K";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
// number of feature dimensions
static constexpr IndexType kDimensions = SQUARE_NB * 2;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 2;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// number of feature dimensions
static constexpr IndexType kDimensions = SQUARE_NB * 2;
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 2;
private:
// Index of a feature for a given king position.
static IndexType MakeIndex(Color perspective, Square s, Color king_color);
};
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
} // namespace Features
// Get a list of indices with a value of 1 among the features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
} // namespace NNUE
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
} // namespace Eval
private:
// Index of a feature for a given king position.
static IndexType make_index(Color perspective, Square s, Color king_color);
};
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
#endif
+43 -44
View File
@@ -1,56 +1,55 @@
//Definition of input feature P of NNUE evaluation function
#if defined(EVAL_NNUE)
#include "p.h"
#include "p.h"
#include "index_list.h"
namespace Eval {
//Definition of input feature P of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace NNUE {
// Orient a square according to perspective (rotate the board 180° for black)
// this has to stay until we find a better arch that works with "flip".
// allows us to use current master net for gensfen (primarily needed for higher quality data)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
namespace Features {
// Find the index of the feature quantity from the king position and PieceSquare
inline IndexType P::make_index(
Color perspective, Square s, Piece pc) {
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
}
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Get a list of indices with a value of 1 among the features
void P::append_active_indices(
const Position& pos,
Color perspective,
IndexList* active) {
// Find the index of the feature quantity from the king position and PieceSquare
inline IndexType P::MakeIndex(
Color perspective, Square s, Piece pc) {
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
}
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(make_index(perspective, s, pos.piece_on(s)));
}
}
// Get a list of indices with a value of 1 among the features
void P::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
active->push_back(MakeIndex(perspective, s, pos.piece_on(s)));
}
}
// Get a list of indices whose values have changed from the previous one in the feature quantity
void P::append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added) {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void P::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
if (type_of(pc) == KING) continue;
if (dp.from[i] != SQ_NONE)
removed->push_back(MakeIndex(perspective, dp.from[i], pc));
if (dp.to[i] != SQ_NONE)
added->push_back(MakeIndex(perspective, dp.to[i], pc));
}
}
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
} // namespace Features
if (type_of(pc) == KING)
continue;
} // namespace NNUE
if (dp.from[i] != SQ_NONE)
removed->push_back(make_index(perspective, dp.from[i], pc));
} // namespace Eval
if (dp.to[i] != SQ_NONE)
added->push_back(make_index(perspective, dp.to[i], pc));
}
}
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
+33 -36
View File
@@ -1,52 +1,49 @@
//Definition of input feature P of NNUE evaluation function
#ifndef _NNUE_FEATURES_P_H_
#ifndef _NNUE_FEATURES_P_H_
#define _NNUE_FEATURES_P_H_
#if defined(EVAL_NNUE)
#include "../../evaluate.h"
#include "features_common.h"
namespace Eval {
#include "evaluate.h"
namespace NNUE {
//Definition of input feature P of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace Features {
// Feature P: PieceSquare of pieces other than balls
class P {
public:
// feature quantity name
static constexpr const char* kName = "P";
// Feature P: PieceSquare of pieces other than balls
class P {
public:
// feature quantity name
static constexpr const char* kName = "P";
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
// number of feature dimensions
static constexpr IndexType kDimensions = PS_END;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// number of feature dimensions
static constexpr IndexType kDimensions = PS_END;
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
private:
// Index of a feature for a given piece on some square
static IndexType MakeIndex(Color perspective, Square s, Piece pc);
};
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
} // namespace Features
// Get a list of indices with a value of 1 among the features
static void append_active_indices(
const Position& pos,
Color perspective,
IndexList* active);
} // namespace NNUE
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void append_changed_indices(
const Position& pos,
Color perspective,
IndexList* removed,
IndexList* added);
} // namespace Eval
private:
// Index of a feature for a given piece on some square
static IndexType make_index(Color perspective, Square s, Piece pc);
};
#endif // defined(EVAL_NNUE)
} // namespace Eval::NNUE::Features
#endif
+627 -108
View File
@@ -24,6 +24,10 @@
#include <iostream>
#include "../nnue_common.h"
#include <string>
#include <type_traits>
#include <cstdint>
namespace Eval::NNUE::Layers {
// Affine transformation layer
@@ -50,6 +54,8 @@ namespace Eval::NNUE::Layers {
static constexpr std::size_t kBufferSize =
PreviousLayer::kBufferSize + kSelfBufferSize;
static constexpr int kLayerIndex = PreviousLayer::kLayerIndex + 1;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xCC03DAE4u;
@@ -59,14 +65,27 @@ namespace Eval::NNUE::Layers {
return hash_value;
}
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "AffineTransform[" +
std::to_string(kOutputDimensions) + "<-" +
std::to_string(kInputDimensions) + "](" +
PreviousLayer::GetStructureString() + ")";
static std::string get_name() {
return "AffineTransform[" +
std::to_string(kOutputDimensions) + "<-" +
std::to_string(kInputDimensions) + "]";
}
// A string that represents the structure from the input layer to this layer
static std::string get_structure_string() {
return get_name() + "(" +
PreviousLayer::get_structure_string() + ")";
}
static std::string get_layers_info() {
std::string info = PreviousLayer::get_layers_info();
info += "\n - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
if (!previous_layer_.ReadParameters(stream)) return false;
@@ -79,13 +98,17 @@ namespace Eval::NNUE::Layers {
// write parameters
bool WriteParameters(std::ostream& stream) const {
if (!previous_layer_.WriteParameters(stream)) return false;
stream.write(reinterpret_cast<const char*>(biases_),
kOutputDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kOutputDimensions * kPaddedInputDimensions *
sizeof(WeightType));
return !stream.fail();
if (!previous_layer_.WriteParameters(stream))
return false;
stream.write(reinterpret_cast<const char*>(biases_),
kOutputDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kOutputDimensions * kPaddedInputDimensions *
sizeof(WeightType));
return !stream.fail();
}
// Forward propagation
@@ -93,113 +116,606 @@ namespace Eval::NNUE::Layers {
const TransformedFeatureType* transformed_features, char* buffer) const {
const auto input = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
#if defined (USE_AVX512)
[[maybe_unused]] const __m512i kOnes512 = _mm512_set1_epi16(1);
[[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
return _mm512_reduce_add_epi32(sum) + bias;
};
// This function takes
// sum0 = [xmm0a, xmm0b, xmm0c, xmm0d]
// sum1 = [xmm1a, xmm1b, xmm1c, xmm1d]
// sum2 = [xmm2a, xmm2b, xmm2c, xmm2d]
// sum3 = [xmm3a, xmm3b, xmm3c, xmm3d]
// and returns
// ret = [
// reduce_add_epi32(xmm0a), reduce_add_epi32(xmm1a), reduce_add_epi32(xmm2a), reduce_add_epi32(xmm3a),
// reduce_add_epi32(xmm0b), reduce_add_epi32(xmm1b), reduce_add_epi32(xmm2b), reduce_add_epi32(xmm3b),
// reduce_add_epi32(xmm0c), reduce_add_epi32(xmm1c), reduce_add_epi32(xmm2c), reduce_add_epi32(xmm3c),
// reduce_add_epi32(xmm0d), reduce_add_epi32(xmm1d), reduce_add_epi32(xmm2d), reduce_add_epi32(xmm3d)
// ]
[[maybe_unused]] auto m512_hadd128x16_interleave = [](
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) -> __m512i {
__m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
__m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
__m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
__m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
__m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
__m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
__m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
__m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
return _mm512_add_epi32(sum0123a, sum0123b);
};
[[maybe_unused]] auto m512_haddx4 = [m512_hadd128x16_interleave](
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, __m128i bias) -> __m128i {
__m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
__m256i sum256lo = _mm512_castsi512_si256(sum);
__m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
sum256lo = _mm256_add_epi32(sum256lo, sum256hi);
__m128i sum128lo = _mm256_castsi256_si128(sum256lo);
__m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1);
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
};
[[maybe_unused]] auto m512_haddx8 = [m512_hadd128x16_interleave](
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
__m512i sum4, __m512i sum5, __m512i sum6, __m512i sum7, __m256i bias) -> __m256i {
__m512i suma = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
__m512i sumb = m512_hadd128x16_interleave(sum4, sum5, sum6, sum7);
__m512i indices0 = _mm512_setr_epi64(0, 1, 8, 9, 4, 5, 12, 13);
__m512i indices1 = _mm512_setr_epi64(2, 3, 10, 11, 6, 7, 14, 15);
__m512i x = _mm512_add_epi32(
_mm512_permutex2var_epi64(suma, indices0, sumb),
_mm512_permutex2var_epi64(suma, indices1, sumb));
__m256i sum256lo = _mm512_castsi512_si256(x);
__m256i sum256hi = _mm512_extracti64x4_epi64(x, 1);
return _mm256_add_epi32(_mm256_add_epi32(sum256lo, sum256hi), bias);
};
[[maybe_unused]] auto m512_hadd256x8 =[m512_hadd128x16_interleave](
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, __m256i bias) -> __m256i {
__m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
__m512i indices = _mm512_setr_epi32(
0, 4, 8, 12, 2, 6, 10, 14,
1, 5, 9, 13, 3, 7, 11, 15);
sum = _mm512_permutexvar_epi32(indices, sum);
__m256i sum256lo = _mm512_castsi512_si256(sum);
__m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
return _mm256_add_epi32(_mm256_hadd_epi32(sum256lo, sum256hi), bias);
};
[[maybe_unused]] auto m512_hadd256x16 = [m512_hadd128x16_interleave](
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
__m512i sum4, __m512i sum5, __m512i sum6, __m512i sum7, __m512i bias) -> __m512i {
__m512i suma = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
__m512i sumb = m512_hadd128x16_interleave(sum4, sum5, sum6, sum7);
__m512i indices0 = _mm512_setr_epi64(0, 1, 8, 9, 4, 5, 12, 13);
__m512i indices1 = _mm512_setr_epi64(2, 3, 10, 11, 6, 7, 14, 15);
__m512i x = _mm512_add_epi32(
_mm512_permutex2var_epi64(suma, indices0, sumb),
_mm512_permutex2var_epi64(suma, indices1, sumb));
__m512i indices = _mm512_setr_epi32(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15);
return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias);
};
#if defined (USE_VNNI)
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
acc = _mm512_dpbusd_epi32(acc, a, b);
#else
[[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i {
__m512i product0 = _mm512_maddubs_epi16(a, b);
return _mm512_madd_epi16(product0, kOnes512);
#endif
};
#endif
#if defined (USE_AVX2)
[[maybe_unused]] const __m256i kOnes256 = _mm256_set1_epi16(1);
[[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
return _mm_cvtsi128_si32(sum128) + bias;
};
[[maybe_unused]] auto m256_haddx4 = [](__m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, __m128i bias) -> __m128i {
sum0 = _mm256_hadd_epi32(sum0, sum1);
sum2 = _mm256_hadd_epi32(sum2, sum3);
sum0 = _mm256_hadd_epi32(sum0, sum2);
__m128i sum128lo = _mm256_castsi256_si128(sum0);
__m128i sum128hi = _mm256_extracti128_si256(sum0, 1);
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
};
#if defined (USE_VNNI)
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
acc = _mm256_dpbusd_epi32(acc, a, b);
#else
[[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i {
__m256i product0 = _mm256_maddubs_epi16(a, b);
return _mm256_madd_epi16(product0, kOnes256);
#endif
};
#endif
#if defined (USE_SSSE3)
[[maybe_unused]] const __m128i kOnes128 = _mm_set1_epi16(1);
[[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
return _mm_cvtsi128_si32(sum) + bias;
};
[[maybe_unused]] auto m128_haddx4 = [](__m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, __m128i bias) -> __m128i {
sum0 = _mm_hadd_epi32(sum0, sum1);
sum2 = _mm_hadd_epi32(sum2, sum3);
sum0 = _mm_hadd_epi32(sum0, sum2);
return _mm_add_epi32(sum0, bias);
};
[[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i {
__m128i product0 = _mm_maddubs_epi16(a, b);
return _mm_madd_epi16(product0, kOnes128);
};
#endif
#if defined (USE_AVX512)
constexpr IndexType kNumChunks512 = kPaddedInputDimensions / (kSimdWidth * 2);
constexpr IndexType kNumChunks256 = kPaddedInputDimensions / kSimdWidth;
const auto output = reinterpret_cast<OutputType*>(buffer);
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
const auto input_vector = reinterpret_cast<const __m512i*>(input);
#if !defined(USE_VNNI)
const __m512i kOnes = _mm512_set1_epi16(1);
#endif
// Since to saturate a zmm register it takes 64 bytes we
// cannot use AVX512 for the smaller affine transforms.
// Instead we fallback to a AVX2 implementation if the
// kInputDimensions isn't a multiple of 64.
// Note that this means that for example for
// kInputDimensions of 96 we fallback to AVX2 even though
// the first 64 elements could be processed with AVX512.
// This is caused by mixing the __m256 and __m512 variables
// required to better handle that case and it would
// require handling more cases statically not to lose performance.
// This should be revisited if such input dimensions are to be considered.
[[maybe_unused]] const auto input_vector512 = reinterpret_cast<const __m512i*>(input);
[[maybe_unused]] const auto input_vector256 = reinterpret_cast<const __m256i*>(input);
// kOutputDimensions is either 1 or a multiple of kSimdWidth
// because then it is also an input dimension.
if constexpr (kOutputDimensions % 16 == 0 && kNumChunks256 == 1)
{
for (IndexType i = 0; i < kOutputDimensions; i += 16)
{
const IndexType offset01a = (i + 0) * kPaddedInputDimensions;
const IndexType offset23a = (i + 2) * kPaddedInputDimensions;
const IndexType offset45a = (i + 4) * kPaddedInputDimensions;
const IndexType offset67a = (i + 6) * kPaddedInputDimensions;
const IndexType offset01b = (i + 8) * kPaddedInputDimensions;
const IndexType offset23b = (i + 10) * kPaddedInputDimensions;
const IndexType offset45b = (i + 12) * kPaddedInputDimensions;
const IndexType offset67b = (i + 14) * kPaddedInputDimensions;
const __m512i bias = *reinterpret_cast<const __m512i*>(&biases_[i]);
__m512i* outptr = reinterpret_cast<__m512i*>(&output[i]);
const auto row01a = *reinterpret_cast<const __m512i*>(&weights_[offset01a]);
const auto row23a = *reinterpret_cast<const __m512i*>(&weights_[offset23a]);
const auto row45a = *reinterpret_cast<const __m512i*>(&weights_[offset45a]);
const auto row67a = *reinterpret_cast<const __m512i*>(&weights_[offset67a]);
const auto row01b = *reinterpret_cast<const __m512i*>(&weights_[offset01b]);
const auto row23b = *reinterpret_cast<const __m512i*>(&weights_[offset23b]);
const auto row45b = *reinterpret_cast<const __m512i*>(&weights_[offset45b]);
const auto row67b = *reinterpret_cast<const __m512i*>(&weights_[offset67b]);
const __m256i in256 = input_vector256[0];
const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1);
#if defined (USE_VNNI)
__m512i sum01a = _mm512_setzero_si512();
__m512i sum23a = _mm512_setzero_si512();
__m512i sum45a = _mm512_setzero_si512();
__m512i sum67a = _mm512_setzero_si512();
__m512i sum01b = _mm512_setzero_si512();
__m512i sum23b = _mm512_setzero_si512();
__m512i sum45b = _mm512_setzero_si512();
__m512i sum67b = _mm512_setzero_si512();
m512_add_dpbusd_epi32(sum01a, in, row01a);
m512_add_dpbusd_epi32(sum23a, in, row23a);
m512_add_dpbusd_epi32(sum45a, in, row45a);
m512_add_dpbusd_epi32(sum67a, in, row67a);
m512_add_dpbusd_epi32(sum01b, in, row01b);
m512_add_dpbusd_epi32(sum23b, in, row23b);
m512_add_dpbusd_epi32(sum45b, in, row45b);
m512_add_dpbusd_epi32(sum67b, in, row67b);
#else
__m512i sum01a = m512_dpbusd_epi32(in, row01a);
__m512i sum23a = m512_dpbusd_epi32(in, row23a);
__m512i sum45a = m512_dpbusd_epi32(in, row45a);
__m512i sum67a = m512_dpbusd_epi32(in, row67a);
__m512i sum01b = m512_dpbusd_epi32(in, row01b);
__m512i sum23b = m512_dpbusd_epi32(in, row23b);
__m512i sum45b = m512_dpbusd_epi32(in, row45b);
__m512i sum67b = m512_dpbusd_epi32(in, row67b);
#endif
*outptr = m512_hadd256x16(
sum01a, sum23a, sum45a, sum67a,
sum01b, sum23b, sum45b, sum67b, bias);
}
}
else if constexpr (kOutputDimensions % 4 == 0)
{
for (IndexType i = 0; i < kOutputDimensions; i += 4)
{
const IndexType offset0 = (i + 0) * kPaddedInputDimensions;
const IndexType offset1 = (i + 1) * kPaddedInputDimensions;
const IndexType offset2 = (i + 2) * kPaddedInputDimensions;
const IndexType offset3 = (i + 3) * kPaddedInputDimensions;
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
{
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
#if defined (USE_VNNI)
__m512i sum0 = _mm512_setzero_si512();
__m512i sum1 = _mm512_setzero_si512();
__m512i sum2 = _mm512_setzero_si512();
__m512i sum3 = _mm512_setzero_si512();
const IndexType kStart = 0;
#else
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
__m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
__m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
__m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks512; ++j)
{
const __m512i in = input_vector512[j];
#if defined (USE_VNNI)
m512_add_dpbusd_epi32(sum0, in, row0[j]);
m512_add_dpbusd_epi32(sum1, in, row1[j]);
m512_add_dpbusd_epi32(sum2, in, row2[j]);
m512_add_dpbusd_epi32(sum3, in, row3[j]);
#else
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j]));
sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j]));
sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j]));
#endif
}
*outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias);
}
else
{
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
__m256i sum1 = _mm256_setzero_si256();
__m256i sum2 = _mm256_setzero_si256();
__m256i sum3 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
__m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]);
__m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]);
__m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks256; ++j)
{
const __m256i in = input_vector256[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
m256_add_dpbusd_epi32(sum1, in, row1[j]);
m256_add_dpbusd_epi32(sum2, in, row2[j]);
m256_add_dpbusd_epi32(sum3, in, row3[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
#endif
}
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
}
}
}
else if constexpr (kOutputDimensions == 1)
{
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
{
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
#if defined (USE_VNNI)
__m512i sum0 = _mm512_setzero_si512();
const IndexType kStart = 0;
#else
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks512; ++j)
{
const __m512i in = input_vector512[j];
#if defined (USE_VNNI)
m512_add_dpbusd_epi32(sum0, in, row0[j]);
#else
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
#endif
}
output[0] = m512_hadd(sum0, biases_[0]);
}
else
{
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks256; ++j)
{
const __m256i in = input_vector256[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
#endif
}
output[0] = m256_hadd(sum0, biases_[0]);
}
}
else
{
// This case can never happen because kOutputDimensions
// is always 1 or a multiple of kSimdWidth.
assert(false);
}
#elif defined (USE_AVX2)
#elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto output = reinterpret_cast<OutputType*>(buffer);
const auto input_vector = reinterpret_cast<const __m256i*>(input);
#if !defined(USE_VNNI)
const __m256i kOnes = _mm256_set1_epi16(1);
#endif
#elif defined(USE_SSE2)
// kOutputDimensions is either 1 or a multiple of kSimdWidth
// because then it is also an input dimension.
if constexpr (kOutputDimensions % 4 == 0)
{
for (IndexType i = 0; i < kOutputDimensions; i += 4)
{
const IndexType offset0 = (i + 0) * kPaddedInputDimensions;
const IndexType offset1 = (i + 1) * kPaddedInputDimensions;
const IndexType offset2 = (i + 2) * kPaddedInputDimensions;
const IndexType offset3 = (i + 3) * kPaddedInputDimensions;
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
__m256i sum1 = _mm256_setzero_si256();
__m256i sum2 = _mm256_setzero_si256();
__m256i sum3 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
__m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]);
__m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]);
__m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks; ++j)
{
const __m256i in = input_vector[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
m256_add_dpbusd_epi32(sum1, in, row1[j]);
m256_add_dpbusd_epi32(sum2, in, row2[j]);
m256_add_dpbusd_epi32(sum3, in, row3[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
#endif
}
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
}
}
else if constexpr (kOutputDimensions == 1)
{
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
#if defined (USE_VNNI)
__m256i sum0 = _mm256_setzero_si256();
const IndexType kStart = 0;
#else
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
const IndexType kStart = 1;
#endif
for (IndexType j = kStart; j < kNumChunks; ++j)
{
const __m256i in = input_vector[j];
#if defined (USE_VNNI)
m256_add_dpbusd_epi32(sum0, in, row0[j]);
#else
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
#endif
}
output[0] = m256_hadd(sum0, biases_[0]);
}
else
{
// This case can never happen because kOutputDimensions
// is always 1 or a multiple of kSimdWidth.
assert(false);
}
#elif defined (USE_SSSE3)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
#ifndef USE_SSSE3
const __m128i kZeros = _mm_setzero_si128();
#else
const __m128i kOnes = _mm_set1_epi16(1);
#endif
auto output = reinterpret_cast<OutputType*>(buffer);
const auto input_vector = reinterpret_cast<const __m128i*>(input);
#elif defined(USE_MMX)
// kOutputDimensions is either 1 or a multiple of kSimdWidth
// because then it is also an input dimension.
if constexpr (kOutputDimensions % 4 == 0)
{
for (IndexType i = 0; i < kOutputDimensions; i += 4)
{
const IndexType offset0 = (i + 0) * kPaddedInputDimensions;
const IndexType offset1 = (i + 1) * kPaddedInputDimensions;
const IndexType offset2 = (i + 2) * kPaddedInputDimensions;
const IndexType offset3 = (i + 3) * kPaddedInputDimensions;
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[offset0]);
const auto row1 = reinterpret_cast<const __m128i*>(&weights_[offset1]);
const auto row2 = reinterpret_cast<const __m128i*>(&weights_[offset2]);
const auto row3 = reinterpret_cast<const __m128i*>(&weights_[offset3]);
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
__m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]);
__m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]);
__m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]);
for (int j = 1; j < (int)kNumChunks; ++j)
{
const __m128i in = input_vector[j];
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j]));
sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j]));
sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j]));
sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j]));
}
*outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias);
}
}
else if constexpr (kOutputDimensions == 1)
{
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[0]);
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
for (int j = 1; j < (int)kNumChunks; ++j)
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j]));
output[0] = m128_hadd(sum0, biases_[0]);
}
else
{
// This case can never happen because kOutputDimensions
// is always 1 or a multiple of kSimdWidth.
assert(false);
}
#else
// Use old implementation for the other architectures.
auto output = reinterpret_cast<OutputType*>(buffer);
#if defined(USE_SSE2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
#ifndef USE_SSSE3
const __m128i kZeros = _mm_setzero_si128();
#else
const __m128i kOnes = _mm_set1_epi16(1);
#endif
const auto input_vector = reinterpret_cast<const __m128i*>(input);
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m64 kZeros = _mm_setzero_si64();
const auto input_vector = reinterpret_cast<const __m64*>(input);
#elif defined(USE_NEON)
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
#endif
#endif
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType offset = i * kPaddedInputDimensions;
#if defined(USE_AVX512)
__m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
#if defined(USE_VNNI)
sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
__m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product);
#endif
}
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
// and we have to do one more 256bit chunk.
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
{
const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
#if defined(USE_VNNI)
__m256i product256 = _mm256_dpbusd_epi32(
_mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
sum = _mm512_inserti32x8(sum, product256, 0);
#else
__m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
#endif
}
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
#elif defined(USE_AVX2)
__m256i sum = _mm256_setzero_si256();
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
#if defined(USE_VNNI)
sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
#else
__m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product);
#endif
}
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
output[i] = _mm_cvtsi128_si32(sum128) + biases_[i];
#elif defined(USE_SSSE3)
__m128i sum = _mm_setzero_si128();
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (int j = 0; j < (int)kNumChunks - 1; j += 2) {
__m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
product0 = _mm_madd_epi16(product0, kOnes);
sum = _mm_add_epi32(sum, product0);
__m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1]));
product1 = _mm_madd_epi16(product1, kOnes);
sum = _mm_add_epi32(sum, product1);
}
if (kNumChunks & 0x1) {
__m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1]));
product = _mm_madd_epi16(product, kOnes);
sum = _mm_add_epi32(sum, product);
}
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
output[i] = _mm_cvtsi128_si32(sum) + biases_[i];
#elif defined(USE_SSE2)
#if defined(USE_SSE2)
__m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
__m128i sum_hi = kZeros;
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
@@ -223,7 +739,7 @@ namespace Eval::NNUE::Layers {
sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum);
#elif defined(USE_MMX)
#elif defined(USE_MMX)
__m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
__m64 sum_hi = kZeros;
const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
@@ -244,7 +760,7 @@ namespace Eval::NNUE::Layers {
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
output[i] = _mm_cvtsi64_si32(sum);
#elif defined(USE_NEON)
#elif defined(USE_NEON)
int32x4_t sum = {biases_[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -254,18 +770,21 @@ namespace Eval::NNUE::Layers {
}
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
#else
#else
OutputType sum = biases_[i];
for (IndexType j = 0; j < kInputDimensions; ++j) {
sum += weights_[offset + j] * input[j];
}
output[i] = sum;
#endif
#endif
}
#if defined(USE_MMX)
#if defined(USE_MMX)
_mm_empty();
#endif
#endif
#endif
return output;
}
+32 -13
View File
@@ -23,6 +23,10 @@
#include "../nnue_common.h"
#include <string>
#include <cstdint>
#include <type_traits>
namespace Eval::NNUE::Layers {
// Clipped ReLU
@@ -47,6 +51,8 @@ namespace Eval::NNUE::Layers {
static constexpr std::size_t kBufferSize =
PreviousLayer::kBufferSize + kSelfBufferSize;
static constexpr int kLayerIndex = PreviousLayer::kLayerIndex + 1;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0x538D24C7u;
@@ -54,11 +60,24 @@ namespace Eval::NNUE::Layers {
return hash_value;
}
static std::string get_name() {
return "ClippedReLU[" +
std::to_string(kOutputDimensions) + "]";
}
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "ClippedReLU[" +
std::to_string(kOutputDimensions) + "](" +
PreviousLayer::GetStructureString() + ")";
static std::string get_structure_string() {
return get_name() + "(" +
PreviousLayer::get_structure_string() + ")";
}
static std::string get_layers_info() {
std::string info = PreviousLayer::get_layers_info();
info += "\n - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Read network parameters
@@ -68,7 +87,7 @@ namespace Eval::NNUE::Layers {
// write parameters
bool WriteParameters(std::ostream& stream) const {
return previous_layer_.WriteParameters(stream);
return previous_layer_.WriteParameters(stream);
}
// Forward propagation
@@ -86,12 +105,12 @@ namespace Eval::NNUE::Layers {
const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_loadA_si256(&in[i * 4 + 0]),
_mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits);
_mm256_load_si256(&in[i * 4 + 0]),
_mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_loadA_si256(&in[i * 4 + 2]),
_mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits);
_mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_load_si256(&in[i * 4 + 2]),
_mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
_mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
}
constexpr IndexType kStart = kNumChunks * kSimdWidth;
@@ -170,9 +189,9 @@ namespace Eval::NNUE::Layers {
}
private:
// Make the learning class a friend
friend class Trainer<ClippedReLU>;
// Make the learning class a friend
friend class Trainer<ClippedReLU>;
PreviousLayer previous_layer_;
};
+21 -7
View File
@@ -41,6 +41,8 @@ class InputSlice {
// Size of forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize = 0;
static constexpr int kLayerIndex = 1;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xEC42E90Du;
@@ -48,12 +50,24 @@ class InputSlice {
return hash_value;
}
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
std::to_string(Offset) + ":" +
std::to_string(Offset + kOutputDimensions) + ")]";
}
static std::string get_name() {
return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
std::to_string(Offset) + ":" +
std::to_string(Offset + kOutputDimensions) + ")]";
}
// A string that represents the structure from the input layer to this layer
static std::string get_structure_string() {
return get_name();
}
static std::string get_layers_info() {
std::string info = " - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Read network parameters
bool ReadParameters(std::istream& /*stream*/) {
@@ -62,7 +76,7 @@ class InputSlice {
// write parameters
bool WriteParameters(std::ostream& /*stream*/) const {
return true;
return true;
}
// Forward propagation
+160 -127
View File
@@ -1,163 +1,196 @@
// Definition of layer Sum of NNUE evaluation function
#ifndef _NNUE_LAYERS_SUM_H_
#ifndef _NNUE_LAYERS_SUM_H_
#define _NNUE_LAYERS_SUM_H_
#if defined(EVAL_NNUE)
#include "nnue/nnue_common.h"
#include "../nnue_common.h"
// Definition of layer Sum of NNUE evaluation function
namespace Eval::NNUE::Layers {
namespace Eval {
// Layer that sums the output of multiple layers
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
class Sum : public Sum<RemainingPreviousLayers...> {
private:
using Head = FirstPreviousLayer;
using Tail = Sum<RemainingPreviousLayers...>;
namespace NNUE {
public:
// Input/output type
using InputType = typename Head::OutputType;
namespace Layers {
using OutputType = InputType;
// Layer that sums the output of multiple layers
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
class Sum : public Sum<RemainingPreviousLayers...> {
private:
using Head = FirstPreviousLayer;
using Tail = Sum<RemainingPreviousLayers...>;
static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
public:
// Input/output type
using InputType = typename Head::OutputType;
using OutputType = InputType;
static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
// number of input/output dimensions
static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
// number of input/output dimensions
static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
static constexpr IndexType kOutputDimensions = kInputDimensions;
static_assert(kInputDimensions == Tail::kInputDimensions ,"");
static constexpr IndexType kOutputDimensions = kInputDimensions;
// Size of forward propagation buffer used in this layer
static constexpr std::size_t kSelfBufferSize =
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
static_assert(kInputDimensions == Tail::kInputDimensions ,"");
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize =
std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
// Size of forward propagation buffer used in this layer
static constexpr std::size_t kSelfBufferSize =
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xBCE400B4u;
hash_value ^= Head::GetHashValue() >> 1;
hash_value ^= Head::GetHashValue() << 31;
hash_value ^= Tail::GetHashValue() >> 2;
hash_value ^= Tail::GetHashValue() << 30;
return hash_value;
}
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize =
std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "Sum[" +
std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
}
static constexpr int kLayerIndex = Tail::kLayerIndex + 1;
// read parameters
bool ReadParameters(std::istream& stream) {
if (!Tail::ReadParameters(stream)) return false;
return previous_layer_.ReadParameters(stream);
}
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xBCE400B4u;
hash_value ^= Head::GetHashValue() >> 1;
hash_value ^= Head::GetHashValue() << 31;
hash_value ^= Tail::GetHashValue() >> 2;
hash_value ^= Tail::GetHashValue() << 30;
return hash_value;
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
if (!Tail::WriteParameters(stream)) return false;
return previous_layer_.WriteParameters(stream);
}
static std::string get_name() {
return "Sum[" +
std::to_string(kOutputDimensions) + "]";
}
// forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
Tail::Propagate(transformed_features, buffer);
const auto head_output = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
const auto output = reinterpret_cast<OutputType*>(buffer);
for (IndexType i = 0; i <kOutputDimensions; ++i) {
output[i] += head_output[i];
}
return output;
}
// A string that represents the structure from the input layer to this layer
static std::string get_structure_string() {
return get_name() + "(" + get_summands_string() + ")";
}
protected:
// A string that represents the list of layers to be summed
static std::string GetSummandsString() {
return Head::GetStructureString() + "," + Tail::GetSummandsString();
}
static std::string get_layers_info() {
std::string info = Tail::get_layers_info();
info += "\n - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Make the learning class a friend
friend class Trainer<Sum>;
// read parameters
bool ReadParameters(std::istream& stream) {
if (!Tail::ReadParameters(stream))
return false;
// the layer immediately before this layer
FirstPreviousLayer previous_layer_;
};
return previous_layer_.ReadParameters(stream);
}
// Layer that sums the output of multiple layers (when there is one template argument)
template <typename PreviousLayer>
class Sum<PreviousLayer> {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
using OutputType = InputType;
// write parameters
bool WriteParameters(std::ostream& stream) const {
if (!Tail::WriteParameters(stream))
return false;
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
PreviousLayer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = kInputDimensions;
return previous_layer_.WriteParameters(stream);
}
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
// forward propagation
const OutputType* propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xBCE400B4u;
hash_value ^= PreviousLayer::GetHashValue() >> 1;
hash_value ^= PreviousLayer::GetHashValue() << 31;
return hash_value;
}
Tail::propagate(transformed_features, buffer);
// A string that represents the structure from the input layer to this layer
static std::string GetStructureString() {
return "Sum[" +
std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
}
const auto head_output = previous_layer_.Propagate(
transformed_features, buffer + kSelfBufferSize);
// read parameters
bool ReadParameters(std::istream& stream) {
return previous_layer_.ReadParameters(stream);
}
const auto output = reinterpret_cast<OutputType*>(buffer);
// write parameters
bool WriteParameters(std::ostream& stream) const {
return previous_layer_.WriteParameters(stream);
}
for (IndexType i = 0; i <kOutputDimensions; ++i) {
output[i] += head_output[i];
}
// forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
return previous_layer_.Propagate(transformed_features, buffer);
}
return output;
}
protected:
// A string that represents the list of layers to be summed
static std::string GetSummandsString() {
return PreviousLayer::GetStructureString();
}
protected:
// A string that represents the list of layers to be summed
static std::string get_summands_string() {
return Head::get_structure_string() + "," + Tail::get_summands_string();
}
// Make the learning class a friend
friend class Trainer<Sum>;
// Make the learning class a friend
friend class Trainer<Sum>;
// the layer immediately before this layer
PreviousLayer previous_layer_;
};
// the layer immediately before this layer
FirstPreviousLayer previous_layer_;
};
} // namespace Layers
// Layer that sums the output of multiple layers (when there is one template argument)
template <typename PreviousLayer>
class Sum<PreviousLayer> {
public:
// Input/output type
using InputType = typename PreviousLayer::OutputType;
} // namespace NNUE
using OutputType = InputType;
} // namespace Eval
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
PreviousLayer::kOutputDimensions;
#endif // defined(EVAL_NNUE)
static constexpr IndexType kOutputDimensions = kInputDimensions;
// Size of the forward propagation buffer used from the input layer to this layer
static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
static constexpr int kLayerIndex = PreviousLayer::kLayerIndex + 1;
// Hash value embedded in the evaluation function file
static constexpr std::uint32_t GetHashValue() {
std::uint32_t hash_value = 0xBCE400B4u;
hash_value ^= PreviousLayer::GetHashValue() >> 1;
hash_value ^= PreviousLayer::GetHashValue() << 31;
return hash_value;
}
static std::string get_name() {
return "Sum[" +
std::to_string(kOutputDimensions) + "]";
}
// A string that represents the structure from the input layer to this layer
static std::string get_structure_string() {
return get_name() + "(" + get_summands_string() + ")";
}
static std::string get_layers_info() {
std::string info = PreviousLayer::get_layers_info();
info += '\n';
info += std::to_string(kLayerIndex);
info += ": ";
info += get_name();
return info;
}
// read parameters
bool ReadParameters(std::istream& stream) {
return previous_layer_.ReadParameters(stream);
}
// write parameters
bool WriteParameters(std::ostream& stream) const {
return previous_layer_.WriteParameters(stream);
}
// forward propagation
const OutputType* Propagate(
const TransformedFeatureType* transformed_features, char* buffer) const {
return previous_layer_.Propagate(transformed_features, buffer);
}
protected:
// A string that represents the list of layers to be summed
static std::string get_summands_string() {
return PreviousLayer::get_structure_string();
}
// Make the learning class a friend
friend class Trainer<Sum>;
// the layer immediately before this layer
PreviousLayer previous_layer_;
};
} // namespace Eval::NNUE::Layers
#endif
+2 -5
View File
@@ -27,11 +27,8 @@ namespace Eval::NNUE {
// Class that holds the result of affine transformation of input features
struct alignas(kCacheLineSize) Accumulator {
std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
Value score;
bool computed_accumulation;
bool computed_score;
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
bool computed_accumulation;
};
} // namespace Eval::NNUE
+3 -24
View File
@@ -21,6 +21,8 @@
#ifndef NNUE_COMMON_H_INCLUDED
#define NNUE_COMMON_H_INCLUDED
#include "types.h"
#include <cstring>
#include <iostream>
@@ -43,29 +45,6 @@
#include <arm_neon.h>
#endif
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
// compiled with older g++ crashes because the output memory is not aligned
// even though alignas is specified.
#if defined(USE_AVX2)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
#define _mm256_loadA_si256 _mm256_loadu_si256
#define _mm256_storeA_si256 _mm256_storeu_si256
#else
#define _mm256_loadA_si256 _mm256_load_si256
#define _mm256_storeA_si256 _mm256_store_si256
#endif
#endif
#if defined(USE_AVX512)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
#define _mm512_loadA_si512 _mm512_loadu_si512
#define _mm512_storeA_si512 _mm512_storeu_si512
#else
#define _mm512_loadA_si512 _mm512_load_si512
#define _mm512_storeA_si512 _mm512_store_si512
#endif
#endif
namespace Eval::NNUE {
// Version of the evaluation file
@@ -113,7 +92,7 @@ namespace Eval::NNUE {
PS_END2 = 12 * SQUARE_NB + 1
};
extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
// Type of input feature after conversion
using TransformedFeatureType = std::uint8_t;
+295 -177
View File
@@ -25,10 +25,66 @@
#include "nnue_architecture.h"
#include "features/index_list.h"
#include <cstring> // std::memset()
#include <cstring>
#include <string>
namespace Eval::NNUE {
// If vector instructions are enabled, we update and refresh the
// accumulator tile by tile such that each tile fits in the CPU's
// vector registers.
#define VECTOR
#ifdef USE_AVX512
typedef __m512i vec_t;
#define vec_load(a) _mm512_load_si512(a)
#define vec_store(a,b) _mm512_store_si512(a,b)
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
#define vec_zero _mm512_setzero_si512()
static constexpr IndexType kNumRegs = 8; // only 8 are needed
#elif USE_AVX2
typedef __m256i vec_t;
#define vec_load(a) _mm256_load_si256(a)
#define vec_store(a,b) _mm256_store_si256(a,b)
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
#define vec_zero _mm256_setzero_si256()
static constexpr IndexType kNumRegs = 16;
#elif USE_SSE2
typedef __m128i vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_epi16(a,b)
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
#define vec_zero _mm_setzero_si128()
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
#elif USE_MMX
typedef __m64 vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) _mm_add_pi16(a,b)
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
#define vec_zero _mm_setzero_si64()
static constexpr IndexType kNumRegs = 8;
#elif USE_NEON
typedef int16x8_t vec_t;
#define vec_load(a) (*(a))
#define vec_store(a,b) *(a)=(b)
#define vec_add_16(a,b) vaddq_s16(a,b)
#define vec_sub_16(a,b) vsubq_s16(a,b)
#define vec_zero {0}
static constexpr IndexType kNumRegs = 16;
#else
#undef VECTOR
#endif
// Input feature converter
class FeatureTransformer {
@@ -36,6 +92,11 @@ namespace Eval::NNUE {
// Number of output dimensions for one side
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
#ifdef VECTOR
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
#endif
public:
// Output type
using OutputType = TransformedFeatureType;
@@ -48,20 +109,36 @@ namespace Eval::NNUE {
static constexpr std::size_t kBufferSize =
kOutputDimensions * sizeof(OutputType);
static constexpr int kLayerIndex = 0;
// Hash value embedded in the evaluation file
static constexpr std::uint32_t GetHashValue() {
return RawFeatures::kHashValue ^ kOutputDimensions;
}
static std::string get_name() {
return RawFeatures::get_name() + "[" +
std::to_string(kInputDimensions) + "->" +
std::to_string(kHalfDimensions) + "x2]";
}
// a string representing the structure
static std::string GetStructureString() {
return RawFeatures::GetName() + "[" +
std::to_string(kInputDimensions) + "->" +
std::to_string(kHalfDimensions) + "x2]";
static std::string get_structure_string() {
return get_name();
}
static std::string get_layers_info() {
std::string info = " - ";
info += std::to_string(kLayerIndex);
info += " - ";
info += get_name();
return info;
}
// Read network parameters
bool ReadParameters(std::istream& stream) {
for (std::size_t i = 0; i < kHalfDimensions; ++i)
biases_[i] = read_little_endian<BiasType>(stream);
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
@@ -72,34 +149,45 @@ namespace Eval::NNUE {
// write parameters
bool WriteParameters(std::ostream& stream) const {
stream.write(reinterpret_cast<const char*>(biases_),
kHalfDimensions * sizeof(BiasType));
kHalfDimensions * sizeof(BiasType));
stream.write(reinterpret_cast<const char*>(weights_),
kHalfDimensions * kInputDimensions * sizeof(WeightType));
kHalfDimensions * kInputDimensions * sizeof(WeightType));
return !stream.fail();
}
// Proceed with the difference calculation if possible
bool UpdateAccumulatorIfPossible(const Position& pos) const {
bool update_accumulator_if_possible(const Position& pos) const {
const auto now = pos.state();
if (now->accumulator.computed_accumulation) {
if (now->accumulator.computed_accumulation)
return true;
}
const auto prev = now->previous;
if (prev && prev->accumulator.computed_accumulation) {
UpdateAccumulator(pos);
update_accumulator(pos);
return true;
}
return false;
}
// Convert input features
void Transform(const Position& pos, OutputType* output, bool refresh) const {
if (refresh || !UpdateAccumulatorIfPossible(pos)) {
RefreshAccumulator(pos);
}
void Transform(const Position& pos, OutputType* output) const {
if (!update_accumulator_if_possible(pos))
refresh_accumulator(pos);
const auto& accumulation = pos.state()->accumulator.accumulation;
#if defined(USE_AVX2)
#if defined(USE_AVX512)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
const __m512i kZero = _mm512_setzero_si512();
#elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256();
@@ -126,14 +214,39 @@ namespace Eval::NNUE {
for (IndexType p = 0; p < 2; ++p) {
const IndexType offset = kHalfDimensions * p;
#if defined(USE_AVX2)
#if defined(USE_AVX512)
auto out = reinterpret_cast<__m512i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m512i sum0 = _mm512_load_si512(
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m512i sum1 = _mm512_load_si512(
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm512_add_epi16(sum0, reinterpret_cast<const __m512i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm512_add_epi16(sum1, reinterpret_cast<const __m512i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
}
#elif defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 = _mm256_loadA_si256(
__m256i sum0 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
__m256i sum1 = _mm256_loadA_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
_mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
__m256i sum1 = _mm256_load_si256(
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
}
@@ -144,14 +257,21 @@ namespace Eval::NNUE {
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
_mm_store_si128(&out[j],
#ifdef USE_SSE41
_mm_max_epi8(packedbytes, kZero)
_mm_max_epi8(packedbytes, kZero)
#else
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
#endif
);
@@ -164,6 +284,13 @@ namespace Eval::NNUE {
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][i])[j * 2 + 0]);
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][i])[j * 2 + 1]);
}
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
@@ -173,12 +300,22 @@ namespace Eval::NNUE {
for (IndexType j = 0; j < kNumChunks; ++j) {
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][0])[j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
accumulation[perspectives[p]][i])[j]);
}
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
}
output[offset + j] = static_cast<OutputType>(
std::max<int>(0, std::min<int>(127, sum)));
}
@@ -192,108 +329,150 @@ namespace Eval::NNUE {
private:
// Calculate cumulative value without using difference calculation
void RefreshAccumulator(const Position& pos) const {
void refresh_accumulator(const Position& pos) const {
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch
vec_t acc[kNumRegs];
#endif
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
Features::IndexList active_indices[2];
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
active_indices);
for (Color perspective : { WHITE, BLACK }) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX512)
auto accumulation = reinterpret_cast<__m512i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
for (IndexType j = 0; j < kNumChunks; ++j)
_mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList active_indices[2];
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
active_indices);
for (Color perspective : { WHITE, BLACK }) {
#ifdef VECTOR
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
#elif defined(USE_AVX2)
auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
_mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
if (i == 0) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_zero;
}
#elif defined(USE_SSE2)
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
#elif defined(USE_MMX)
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
for (IndexType k = 0; k < kNumRegs; k++)
vec_store(&accTile[k], acc[k]);
}
#else
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#endif
}
#elif defined(USE_NEON)
auto accumulation = reinterpret_cast<int16x8_t*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
#else
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
#endif
}
}
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true;
accumulator.computed_score = false;
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true;
}
// Calculate cumulative value using difference calculation
void UpdateAccumulator(const Position& pos) const {
const auto prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
IndexType i = 0;
void update_accumulator(const Position& pos) const {
#ifdef VECTOR
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
// is defined in the VECTOR code below, once in each branch
vec_t acc[kNumRegs];
#endif
const auto& prev_accumulator = pos.state()->previous->accumulator;
auto& accumulator = pos.state()->accumulator;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2];
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
bool reset[2] = { false, false };
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
#ifdef VECTOR
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
for (Color perspective : { WHITE, BLACK }) {
auto accTile = reinterpret_cast<vec_t*>(
&accumulator.accumulation[perspective][i][j * kTileHeight]);
if (reset[perspective]) {
if (i == 0) {
auto biasesTile = reinterpret_cast<const vec_t*>(
&biases_[j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = biasesTile[k];
} else {
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_zero;
}
} else {
auto prevAccTile = reinterpret_cast<const vec_t*>(
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_load(&prevAccTile[k]);
// Difference calculation for the deactivated features
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_sub_16(acc[k], column[k]);
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
for (IndexType k = 0; k < kNumRegs; ++k)
acc[k] = vec_add_16(acc[k], column[k]);
}
}
for (IndexType k = 0; k < kNumRegs; ++k)
vec_store(&accTile[k], acc[k]);
}
}
#if defined(USE_MMX)
_mm_empty();
#endif
#else
for (Color perspective : { WHITE, BLACK }) {
#if defined(USE_AVX2)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<int16x8_t*>(
&accumulator.accumulation[perspective][i][0]);
#endif
if (reset[perspective]) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
if (i == 0) {
std::memcpy(accumulator.accumulation[perspective][i], biases_,
kHalfDimensions * sizeof(BiasType));
} else {
std::memset(accumulator.accumulation[perspective][i], 0,
kHalfDimensions * sizeof(BiasType));
}
} else {
std::memcpy(accumulator.accumulation[perspective][i],
prev_accumulator.accumulation[perspective][i],
@@ -302,83 +481,22 @@ namespace Eval::NNUE {
for (const auto index : removed_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
}
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
}
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
}
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = vsubq_s16(accumulation[j], column[j]);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
accumulator.accumulation[perspective][i][j] -=
weights_[offset + j];
}
#endif
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
}
}
{ // Difference calculation for the activated features
for (const auto index : added_indices[perspective]) {
const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX2)
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
}
#elif defined(USE_SSE2)
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
}
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
}
#elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
}
#else
for (IndexType j = 0; j < kHalfDimensions; ++j) {
accumulator.accumulation[perspective][i][j] +=
weights_[offset + j];
}
#endif
for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
}
}
#if defined(USE_MMX)
_mm_empty();
#endif
#endif
}
accumulator.computed_accumulation = true;
accumulator.computed_score = false;
}
using BiasType = std::int16_t;
+203 -189
View File
@@ -1,201 +1,215 @@
// USI extended command for NNUE evaluation function
#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
#include "../thread.h"
#include "../uci.h"
#include "evaluate_nnue.h"
#include "evaluate_nnue.h"
#include "nnue_test_command.h"
#include "thread.h"
#include "uci.h"
#include <set>
#include <fstream>
#define ASSERT(X) { if (!(X)) { std::cout << "\nError : ASSERT(" << #X << "), " << __FILE__ << "(" << __LINE__ << "): " << __func__ << std::endl; \
std::this_thread::sleep_for(std::chrono::microseconds(3000)); *(int*)1 =0;} }
namespace Eval {
namespace NNUE {
namespace {
// Testing RawFeatures mainly for difference calculation
void TestFeatures(Position& pos) {
const std::uint64_t num_games = 1000;
StateInfo si;
pos.set(StartFEN, false, &si, Threads.main());
const int MAX_PLY = 256; // test up to 256 hands
StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
int ply; // Trouble from the initial phase
PRNG prng(20171128);
std::uint64_t num_moves = 0;
std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
constexpr IndexType kUnknown = -1;
std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
auto make_index_sets = [&](const Position& pos) {
std::vector<std::vector<std::set<IndexType>>> index_sets(
kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList active_indices[2];
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
active_indices);
for (const auto perspective : Colors) {
for (const auto index : active_indices[perspective]) {
ASSERT(index < RawFeatures::kDimensions);
ASSERT(index_sets[i][perspective].count(index) == 0);
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
index_sets[i][perspective].insert(index);
trigger_map[index] = i;
}
}
}
return index_sets;
};
auto update_index_sets = [&](const Position& pos, auto* index_sets) {
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2];
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
removed_indices, added_indices, reset);
for (const auto perspective : Colors) {
if (reset[perspective]) {
(*index_sets)[i][perspective].clear();
++num_resets[i];
} else {
for (const auto index : removed_indices[perspective]) {
ASSERT(index < RawFeatures::kDimensions);
ASSERT((*index_sets)[i][perspective].count(index) == 1);
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
(*index_sets)[i][perspective].erase(index);
++num_updates.back();
++num_updates[i];
trigger_map[index] = i;
}
}
for (const auto index : added_indices[perspective]) {
ASSERT(index < RawFeatures::kDimensions);
ASSERT((*index_sets)[i][perspective].count(index) == 0);
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
(*index_sets)[i][perspective].insert(index);
++num_updates.back();
++num_updates[i];
trigger_map[index] = i;
}
}
}
};
std::cout << "feature set: " << RawFeatures::GetName()
<< "[" << RawFeatures::kDimensions << "]" << std::endl;
std::cout << "start testing with random games";
for (std::uint64_t i = 0; i < num_games; ++i) {
auto index_sets = make_index_sets(pos);
for (ply = 0; ply < MAX_PLY; ++ply) {
MoveList<LEGAL> mg(pos); // Generate all legal hands
// There was no legal move == Clog
if (mg.size() == 0)
break;
// Randomly choose from the generated moves and advance the phase with the moves.
Move m = mg.begin()[prng.rand(mg.size())];
pos.do_move(m, state[ply]);
++num_moves;
update_index_sets(pos, &index_sets);
ASSERT(index_sets == make_index_sets(pos));
}
pos.set(StartFEN, false, &si, Threads.main());
// Output'.' every 100 times (so you can see that it's progressing)
if ((i % 100) == 0)
std::cout << "." << std::flush;
}
std::cout << "passed." << std::endl;
std::cout << num_games << " games, " << num_moves << " moves, "
<< num_updates.back() << " updates, "
<< (1.0 * num_updates.back() / num_moves)
<< " updates per move" << std::endl;
std::size_t num_observed_indices = 0;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
num_observed_indices += count;
std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
<< "): " << count << " features ("
<< (100.0 * count / RawFeatures::kDimensions) << "%), "
<< num_updates[i] << " updates ("
<< (1.0 * num_updates[i] / num_moves) << " per move), "
<< num_resets[i] << " resets ("
<< (100.0 * num_resets[i] / num_moves) << "%)"
<< std::endl;
}
std::cout << "observed " << num_observed_indices << " ("
<< (100.0 * num_observed_indices / RawFeatures::kDimensions)
<< "% of " << RawFeatures::kDimensions
<< ") features" << std::endl;
#define ASSERT(X) { \
if (!(X)) { \
std::cout \
<< "\nError : ASSERT(" << #X << "), " \
<< __FILE__ << "(" << __LINE__ << "): " \
<< __func__ << std::endl; \
std::this_thread::sleep_for(std::chrono::microseconds(3000)); \
*(int*)1 =0; \
} \
}
// Output a string that represents the structure of the evaluation function
void PrintInfo(std::istream& stream) {
std::cout << "network architecture: " << GetArchitectureString() << std::endl;
while (true) {
std::string file_name;
stream >> file_name;
if (file_name.empty()) break;
std::uint32_t hash_value;
std::string architecture;
const bool success = [&]() {
std::ifstream file_stream(file_name, std::ios::binary);
if (!file_stream) return false;
if (!ReadHeader(file_stream, &hash_value, &architecture)) return false;
return true;
}();
std::cout << file_name << ": ";
if (success) {
if (hash_value == kHashValue) {
std::cout << "matches with this binary";
if (architecture != GetArchitectureString()) {
std::cout << ", but architecture string differs: " << architecture;
}
std::cout << std::endl;
} else {
std::cout << architecture << std::endl;
}
} else {
std::cout << "failed to read header" << std::endl;
}
}
}
} // namespace
// USI extended command for NNUE evaluation function
void TestCommand(Position& pos, std::istream& stream) {
std::string sub_command;
stream >> sub_command;
namespace Eval::NNUE {
if (sub_command == "test_features") {
TestFeatures(pos);
} else if (sub_command == "info") {
PrintInfo(stream);
} else {
std::cout << "usage:" << std::endl;
std::cout << " test nnue test_features" << std::endl;
std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
}
}
namespace {
} // namespace NNUE
// Testing RawFeatures mainly for difference calculation
void test_features(Position& pos) {
const std::uint64_t num_games = 1000;
StateInfo si;
pos.set(StartFEN, false, &si, Threads.main());
const int MAX_PLY = 256; // test up to 256 hands
} // namespace Eval
StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
int ply; // Trouble from the initial phase
#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
PRNG prng(20171128);
std::uint64_t num_moves = 0;
std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
constexpr IndexType kUnknown = -1;
std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
auto make_index_sets = [&](const Position& position) {
std::vector<std::vector<std::set<IndexType>>> index_sets(
kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList active_indices[2];
RawFeatures::append_active_indices(position, kRefreshTriggers[i],
active_indices);
for (const auto perspective : Colors) {
for (const auto index : active_indices[perspective]) {
ASSERT(index < RawFeatures::kDimensions);
ASSERT(index_sets[i][perspective].count(index) == 0);
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
index_sets[i][perspective].insert(index);
trigger_map[index] = i;
}
}
}
return index_sets;
};
auto update_index_sets = [&](const Position& position, auto* index_sets) {
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
Features::IndexList removed_indices[2], added_indices[2];
bool reset[2] = { false, false };
RawFeatures::append_changed_indices(position, kRefreshTriggers[i],
removed_indices, added_indices, reset);
for (const auto perspective : Colors) {
if (reset[perspective]) {
(*index_sets)[i][perspective].clear();
++num_resets[i];
} else {
for (const auto index : removed_indices[perspective]) {
ASSERT(index < RawFeatures::kDimensions);
ASSERT((*index_sets)[i][perspective].count(index) == 1);
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
(*index_sets)[i][perspective].erase(index);
++num_updates.back();
++num_updates[i];
trigger_map[index] = i;
}
}
for (const auto index : added_indices[perspective]) {
ASSERT(index < RawFeatures::kDimensions);
ASSERT((*index_sets)[i][perspective].count(index) == 0);
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
(*index_sets)[i][perspective].insert(index);
++num_updates.back();
++num_updates[i];
trigger_map[index] = i;
}
}
}
};
std::cout << "feature set: " << RawFeatures::get_name()
<< "[" << RawFeatures::kDimensions << "]" << std::endl;
std::cout << "start testing with random games";
for (std::uint64_t i = 0; i < num_games; ++i) {
auto index_sets = make_index_sets(pos);
for (ply = 0; ply < MAX_PLY; ++ply) {
MoveList<LEGAL> mg(pos); // Generate all legal hands
// There was no legal move == Clog
if (mg.size() == 0)
break;
// Randomly choose from the generated moves and advance the phase with the moves.
Move m = mg.begin()[prng.rand(mg.size())];
pos.do_move(m, state[ply]);
++num_moves;
update_index_sets(pos, &index_sets);
ASSERT(index_sets == make_index_sets(pos));
}
pos.set(StartFEN, false, &si, Threads.main());
// Output'.' every 100 times (so you can see that it's progressing)
if ((i % 100) == 0)
std::cout << "." << std::flush;
}
std::cout << "passed." << std::endl;
std::cout << num_games << " games, " << num_moves << " moves, "
<< num_updates.back() << " updates, "
<< (1.0 * num_updates.back() / num_moves)
<< " updates per move" << std::endl;
std::size_t num_observed_indices = 0;
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
num_observed_indices += count;
std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
<< "): " << count << " features ("
<< (100.0 * count / RawFeatures::kDimensions) << "%), "
<< num_updates[i] << " updates ("
<< (1.0 * num_updates[i] / num_moves) << " per move), "
<< num_resets[i] << " resets ("
<< (100.0 * num_resets[i] / num_moves) << "%)"
<< std::endl;
}
std::cout << "observed " << num_observed_indices << " ("
<< (100.0 * num_observed_indices / RawFeatures::kDimensions)
<< "% of " << RawFeatures::kDimensions
<< ") features" << std::endl;
}
// Output a string that represents the structure of the evaluation function
void print_info(std::istream& stream) {
std::cout << "network architecture: " << get_architecture_string() << std::endl;
while (true) {
std::string file_name;
stream >> file_name;
if (file_name.empty())
break;
std::uint32_t hash_value;
std::string architecture;
const bool success = [&]() {
std::ifstream file_stream(file_name, std::ios::binary);
if (!file_stream)
return false;
if (!read_header(file_stream, &hash_value, &architecture))
return false;
return true;
}();
std::cout << file_name << ": ";
if (success) {
if (hash_value == kHashValue) {
std::cout << "matches with this binary";
if (architecture != get_architecture_string()) {
std::cout << ", but architecture string differs: " << architecture;
}
std::cout << std::endl;
} else {
std::cout << architecture << std::endl;
}
} else {
std::cout << "failed to read header" << std::endl;
}
}
}
} // namespace
// USI extended command for NNUE evaluation function
void test_command(Position& pos, std::istream& stream) {
std::string sub_command;
stream >> sub_command;
if (sub_command == "test_features") {
test_features(pos);
} else if (sub_command == "info") {
print_info(stream);
} else {
std::cout << "usage:" << std::endl;
std::cout << " test nnue test_features" << std::endl;
std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
}
}
} // namespace Eval::NNUE
+6 -15
View File
@@ -1,21 +1,12 @@
// USI extended command interface for NNUE evaluation function
#ifndef _NNUE_TEST_COMMAND_H_
#ifndef _NNUE_TEST_COMMAND_H_
#define _NNUE_TEST_COMMAND_H_
#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
// USI extended command interface for NNUE evaluation function
namespace Eval::NNUE {
namespace Eval {
// USI extended command for NNUE evaluation function
void test_command(Position& pos, std::istream& stream);
namespace NNUE {
// USI extended command for NNUE evaluation function
void TestCommand(Position& pos, std::istream& stream);
} // namespace NNUE
} // namespace Eval
#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
} // namespace Eval::NNUE
#endif
@@ -0,0 +1,10 @@
#ifndef _NNUE_TRAINER_FEATURES_ALL_FACTORIZERS_H_
#define _NNUE_TRAINER_FEATURES_ALL_FACTORIZERS_H_
#include "factorizer.h"
#include "factorizer_feature_set.h"
#include "factorizer_half_kp.h"
#include "factorizer_half_ka.h"
#endif
+97 -90
View File
@@ -1,110 +1,117 @@
// NNUE evaluation function feature conversion class template
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
#define _NNUE_TRAINER_FEATURES_FACTORIZER_H_
#if defined(EVAL_NNUE)
#include "nnue/nnue_common.h"
#include "../../nnue_common.h"
#include "../trainer.h"
#include "nnue/trainer/trainer.h"
namespace Eval {
// NNUE evaluation function feature conversion class template
namespace Eval::NNUE::Features {
namespace NNUE {
// Class template that converts input features into learning features
// By default, the learning feature is the same as the original input feature, and specialized as necessary
template <typename FeatureType>
class Factorizer {
public:
static constexpr std::string get_name() {
return "Factorizer<" + FeatureType::get_name() + "> -> " + std::string("No factorizer");
}
namespace Features {
static constexpr std::string get_factorizers_string() {
return " - " + get_name();
}
// Class template that converts input features into learning features
// By default, the learning feature is the same as the original input feature, and specialized as necessary
template <typename FeatureType>
class Factorizer {
public:
// Get the dimensionality of the learning feature
static constexpr IndexType GetDimensions() {
return FeatureType::kDimensions;
}
// Get the dimensionality of the learning feature
static constexpr IndexType get_dimensions() {
return FeatureType::kDimensions;
}
// Get index of learning feature and scale of learning rate
static void AppendTrainingFeatures(
IndexType base_index, std::vector<TrainingFeature>* training_features) {
assert(base_index <FeatureType::kDimensions);
training_features->emplace_back(base_index);
}
};
// Get index of learning feature and scale of learning rate
static void append_training_features(
IndexType base_index, std::vector<TrainingFeature>* training_features) {
// Learning feature information
struct FeatureProperties {
bool active;
IndexType dimensions;
};
assert(base_index <FeatureType::kDimensions);
training_features->emplace_back(base_index);
}
};
// Add the original input features to the learning features
template <typename FeatureType>
IndexType AppendBaseFeature(
FeatureProperties properties, IndexType base_index,
std::vector<TrainingFeature>* training_features) {
assert(properties.dimensions == FeatureType::kDimensions);
assert(base_index < FeatureType::kDimensions);
training_features->emplace_back(base_index);
return properties.dimensions;
}
// Learning feature information
struct FeatureProperties {
bool active;
IndexType dimensions;
};
// If the learning rate scale is not 0, inherit other types of learning features
template <typename FeatureType>
IndexType InheritFeaturesIfRequired(
IndexType index_offset, FeatureProperties properties, IndexType base_index,
std::vector<TrainingFeature>* training_features) {
if (!properties.active) {
return 0;
}
assert(properties.dimensions == Factorizer<FeatureType>::GetDimensions());
assert(base_index < FeatureType::kDimensions);
const auto start = training_features->size();
Factorizer<FeatureType>::AppendTrainingFeatures(
base_index, training_features);
for (auto i = start; i < training_features->size(); ++i) {
auto& feature = (*training_features)[i];
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
feature.ShiftIndex(index_offset);
}
return properties.dimensions;
}
// Add the original input features to the learning features
template <typename FeatureType>
IndexType append_base_feature(
FeatureProperties properties, IndexType base_index,
std::vector<TrainingFeature>* training_features) {
// Return the index difference as needed, without adding learning features
// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
IndexType SkipFeatures(FeatureProperties properties) {
if (!properties.active) {
return 0;
}
return properties.dimensions;
}
// Get the dimensionality of the learning feature
template <std::size_t N>
constexpr IndexType GetActiveDimensions(
const FeatureProperties (&properties)[N]) {
static_assert(N > 0, "");
IndexType dimensions = properties[0].dimensions;
for (std::size_t i = 1; i < N; ++i) {
if (properties[i].active) {
dimensions += properties[i].dimensions;
assert(properties.dimensions == FeatureType::kDimensions);
assert(base_index < FeatureType::kDimensions);
training_features->emplace_back(base_index);
return properties.dimensions;
}
}
return dimensions;
}
// get the number of elements in the array
template <typename T, std::size_t N>
constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
return N;
}
// If the learning rate scale is not 0, inherit other types of learning features
template <typename FeatureType>
IndexType inherit_features_if_required(
IndexType index_offset, FeatureProperties properties, IndexType base_index,
std::vector<TrainingFeature>* training_features) {
} // namespace Features
if (!properties.active) {
return 0;
}
} // namespace NNUE
assert(properties.dimensions == Factorizer<FeatureType>::get_dimensions());
assert(base_index < FeatureType::kDimensions);
} // namespace Eval
const auto start = training_features->size();
Factorizer<FeatureType>::append_training_features(
base_index, training_features);
#endif // defined(EVAL_NNUE)
for (auto i = start; i < training_features->size(); ++i) {
auto& feature = (*training_features)[i];
assert(feature.get_index() < Factorizer<FeatureType>::get_dimensions());
feature.shift_index(index_offset);
}
return properties.dimensions;
}
// Return the index difference as needed, without adding learning features
// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
IndexType skip_features(FeatureProperties properties) {
if (!properties.active)
return 0;
return properties.dimensions;
}
// Get the dimensionality of the learning feature
template <std::size_t N>
constexpr IndexType get_active_dimensions(
const FeatureProperties (&properties)[N]) {
static_assert(N > 0, "");
IndexType dimensions = properties[0].dimensions;
for (std::size_t i = 1; i < N; ++i) {
if (properties[i].active) {
dimensions += properties[i].dimensions;
}
}
return dimensions;
}
// get the number of elements in the array
template <typename T, std::size_t N>
constexpr std::size_t get_array_length(const T (&/*array*/)[N]) {
return N;
}
} // namespace Eval::NNUE::Features
#endif
@@ -1,104 +1,121 @@
// Specialization for feature set of feature conversion class template of NNUE evaluation function
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
#define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
#if defined(EVAL_NNUE)
#include "../../features/feature_set.h"
#include "factorizer.h"
namespace Eval {
#include "nnue/features/feature_set.h"
namespace NNUE {
// Specialization for feature set of feature conversion class template of NNUE evaluation function
namespace Eval::NNUE::Features {
namespace Features {
// Class template that converts input features into learning features
// Specialization for FeatureSet
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
private:
using Head = Factorizer<FeatureSet<FirstFeatureType>>;
using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
// Class template that converts input features into learning features
// Specialization for FeatureSet
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
private:
using Head = Factorizer<FeatureSet<FirstFeatureType>>;
using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
public:
// number of dimensions of original input features
static constexpr IndexType kBaseDimensions =
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
public:
// number of dimensions of original input features
static constexpr IndexType kBaseDimensions =
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
// Get the dimensionality of the learning feature
static constexpr IndexType GetDimensions() {
return Head::GetDimensions() + Tail::GetDimensions();
}
// Get index of learning feature and scale of learning rate
static void AppendTrainingFeatures(
IndexType base_index, std::vector<TrainingFeature>* training_features,
IndexType base_dimensions = kBaseDimensions) {
assert(base_index < kBaseDimensions);
constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
if (base_index < boundary) {
Tail::AppendTrainingFeatures(
base_index, training_features, base_dimensions);
} else {
const auto start = training_features->size();
Head::AppendTrainingFeatures(
base_index - boundary, training_features, base_dimensions);
for (auto i = start; i < training_features->size(); ++i) {
auto& feature = (*training_features)[i];
const auto index = feature.GetIndex();
assert(index < Head::GetDimensions() ||
(index >= base_dimensions &&
index < base_dimensions +
Head::GetDimensions() - Head::kBaseDimensions));
if (index < Head::kBaseDimensions) {
feature.ShiftIndex(Tail::kBaseDimensions);
} else {
feature.ShiftIndex(Tail::GetDimensions() - Tail::kBaseDimensions);
static constexpr std::string get_factorizers_string() {
std::string str = " - ";
str += Head::get_name();
str += '\n';
str += Tail::get_factorizers_string();
return str;
}
}
}
}
};
// Class template that converts input features into learning features
// Specialization when FeatureSet has one template argument
template <typename FeatureType>
class Factorizer<FeatureSet<FeatureType>> {
public:
// number of dimensions of original input features
static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
// Get the dimensionality of the learning feature
static constexpr IndexType get_dimensions() {
return Head::get_dimensions() + Tail::get_dimensions();
}
// Get the dimensionality of the learning feature
static constexpr IndexType GetDimensions() {
return Factorizer<FeatureType>::GetDimensions();
}
// Get index of learning feature and scale of learning rate
static void append_training_features(
IndexType base_index, std::vector<TrainingFeature>* training_features,
IndexType base_dimensions = kBaseDimensions) {
// Get index of learning feature and scale of learning rate
static void AppendTrainingFeatures(
IndexType base_index, std::vector<TrainingFeature>* training_features,
IndexType base_dimensions = kBaseDimensions) {
assert(base_index < kBaseDimensions);
const auto start = training_features->size();
Factorizer<FeatureType>::AppendTrainingFeatures(
base_index, training_features);
for (auto i = start; i < training_features->size(); ++i) {
auto& feature = (*training_features)[i];
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
if (feature.GetIndex() >= kBaseDimensions) {
feature.ShiftIndex(base_dimensions - kBaseDimensions);
}
}
}
};
assert(base_index < kBaseDimensions);
} // namespace Features
constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
} // namespace NNUE
if (base_index < boundary) {
Tail::append_training_features(
base_index, training_features, base_dimensions);
}
else {
const auto start = training_features->size();
} // namespace Eval
Head::append_training_features(
base_index - boundary, training_features, base_dimensions);
#endif // defined(EVAL_NNUE)
for (auto i = start; i < training_features->size(); ++i) {
auto& feature = (*training_features)[i];
const auto index = feature.get_index();
assert(index < Head::get_dimensions() ||
(index >= base_dimensions &&
index < base_dimensions +
Head::get_dimensions() - Head::kBaseDimensions));
if (index < Head::kBaseDimensions) {
feature.shift_index(Tail::kBaseDimensions);
}
else {
feature.shift_index(Tail::get_dimensions() - Tail::kBaseDimensions);
}
}
}
}
};
// Class template that converts input features into learning features
// Specialization when FeatureSet has one template argument
template <typename FeatureType>
class Factorizer<FeatureSet<FeatureType>> {
public:
// number of dimensions of original input features
static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
static constexpr std::string get_name() {
return Factorizer<FeatureType>::get_name();
}
static constexpr std::string get_factorizers_string() {
return " - " + get_name();
}
// Get the dimensionality of the learning feature
static constexpr IndexType get_dimensions() {
return Factorizer<FeatureType>::get_dimensions();
}
// Get index of learning feature and scale of learning rate
static void append_training_features(
IndexType base_index, std::vector<TrainingFeature>* training_features,
IndexType base_dimensions = kBaseDimensions) {
assert(base_index < kBaseDimensions);
const auto start = training_features->size();
Factorizer<FeatureType>::append_training_features(
base_index, training_features);
for (auto i = start; i < training_features->size(); ++i) {
auto& feature = (*training_features)[i];
assert(feature.get_index() < Factorizer<FeatureType>::get_dimensions());
if (feature.get_index() >= kBaseDimensions) {
feature.shift_index(base_dimensions - kBaseDimensions);
}
}
}
};
} // namespace Eval::NNUE::Features
#endif
@@ -0,0 +1,93 @@
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KA_H_
#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KA_H_
#include "factorizer.h"
#include "nnue/features/half_ka.h"
#include "nnue/features/a.h"
#include "nnue/features/half_relative_ka.h"
// Specialization of NNUE evaluation function feature conversion class template for HalfKA
namespace Eval::NNUE::Features {
// Class template that converts input features into learning features
// Specialization for HalfKA
template <Side AssociatedKing>
class Factorizer<HalfKA<AssociatedKing>> {
private:
using FeatureType = HalfKA<AssociatedKing>;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// Type of learning feature
enum TrainingFeatureType {
kFeaturesHalfKA,
kFeaturesA,
kFeaturesHalfRelativeKA,
kNumTrainingFeatureTypes,
};
// Learning feature information
static constexpr FeatureProperties kProperties[] = {
// kFeaturesHalfA
{true, FeatureType::kDimensions},
// kFeaturesA
{true, Factorizer<A>::get_dimensions()},
// kFeaturesHalfRelativeKA
{true, Factorizer<HalfRelativeKA<AssociatedKing>>::get_dimensions()},
};
static_assert(get_array_length(kProperties) == kNumTrainingFeatureTypes, "");
public:
static constexpr std::string get_name() {
return std::string("Factorizer<") + FeatureType::kName + "> -> " + "A, HalfRelativeKA";
}
static constexpr std::string get_factorizers_string() {
return " - " + get_name();
}
// Get the dimensionality of the learning feature
static constexpr IndexType get_dimensions() {
return get_active_dimensions(kProperties);
}
// Get index of learning feature and scale of learning rate
static void append_training_features(
IndexType base_index, std::vector<TrainingFeature>* training_features) {
// kFeaturesHalfA
IndexType index_offset = append_base_feature<FeatureType>(
kProperties[kFeaturesHalfKA], base_index, training_features);
const auto sq_k = static_cast<Square>(base_index / PS_END2);
const auto a = static_cast<IndexType>(base_index % PS_END2);
// kFeaturesA
index_offset += inherit_features_if_required<A>(
index_offset, kProperties[kFeaturesA], a, training_features);
// kFeaturesHalfRelativeKA
if (a >= PS_W_PAWN) {
index_offset += inherit_features_if_required<HalfRelativeKA<AssociatedKing>>(
index_offset, kProperties[kFeaturesHalfRelativeKA],
HalfRelativeKA<AssociatedKing>::make_index(sq_k, a),
training_features);
}
else {
index_offset += skip_features(kProperties[kFeaturesHalfRelativeKA]);
}
assert(index_offset == get_dimensions());
}
};
template <Side AssociatedKing>
constexpr FeatureProperties Factorizer<HalfKA<AssociatedKing>>::kProperties[];
} // namespace Eval::NNUE::Features
#endif // #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KA_H_
+83 -82
View File
@@ -1,103 +1,104 @@
// Specialization of NNUE evaluation function feature conversion class template for HalfKP
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
#if defined(EVAL_NNUE)
#include "../../features/half_kp.h"
#include "../../features/p.h"
#include "../../features/half_relative_kp.h"
#include "factorizer.h"
namespace Eval {
#include "nnue/features/half_kp.h"
#include "nnue/features/p.h"
#include "nnue/features/half_relative_kp.h"
namespace NNUE {
// Specialization of NNUE evaluation function feature conversion class template for HalfKP
namespace Eval::NNUE::Features {
namespace Features {
// Class template that converts input features into learning features
// Specialization for HalfKP
template <Side AssociatedKing>
class Factorizer<HalfKP<AssociatedKing>> {
private:
using FeatureType = HalfKP<AssociatedKing>;
// Class template that converts input features into learning features
// Specialization for HalfKP
template <Side AssociatedKing>
class Factorizer<HalfKP<AssociatedKing>> {
private:
using FeatureType = HalfKP<AssociatedKing>;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions =
FeatureType::kMaxActiveDimensions;
// Type of learning feature
enum TrainingFeatureType {
kFeaturesHalfKP,
kFeaturesHalfK,
kFeaturesP,
kFeaturesHalfRelativeKP,
kNumTrainingFeatureTypes,
};
// Type of learning feature
enum TrainingFeatureType {
kFeaturesHalfKP,
kFeaturesHalfK,
kFeaturesP,
kFeaturesHalfRelativeKP,
kNumTrainingFeatureTypes,
};
// Learning feature information
static constexpr FeatureProperties kProperties[] = {
// kFeaturesHalfKP
{true, FeatureType::kDimensions},
// kFeaturesHalfK
{true, SQUARE_NB},
// kFeaturesP
{true, Factorizer<P>::get_dimensions()},
// kFeaturesHalfRelativeKP
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::get_dimensions()},
};
// Learning feature information
static constexpr FeatureProperties kProperties[] = {
// kFeaturesHalfKP
{true, FeatureType::kDimensions},
// kFeaturesHalfK
{true, SQUARE_NB},
// kFeaturesP
{true, Factorizer<P>::GetDimensions()},
// kFeaturesHalfRelativeKP
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::GetDimensions()},
};
static_assert(GetArrayLength(kProperties) == kNumTrainingFeatureTypes, "");
static_assert(get_array_length(kProperties) == kNumTrainingFeatureTypes, "");
public:
// Get the dimensionality of the learning feature
static constexpr IndexType GetDimensions() {
return GetActiveDimensions(kProperties);
}
public:
static constexpr std::string get_name() {
return std::string("Factorizer<") + FeatureType::kName + "> -> " + "HalfK, P, HalfRelativeKP";
}
// Get index of learning feature and scale of learning rate
static void AppendTrainingFeatures(
IndexType base_index, std::vector<TrainingFeature>* training_features) {
// kFeaturesHalfKP
IndexType index_offset = AppendBaseFeature<FeatureType>(
kProperties[kFeaturesHalfKP], base_index, training_features);
static constexpr std::string get_factorizers_string() {
return " - " + get_name();
}
const auto sq_k = static_cast<Square>(base_index / PS_END);
const auto p = static_cast<IndexType>(base_index % PS_END);
// kFeaturesHalfK
{
const auto& properties = kProperties[kFeaturesHalfK];
if (properties.active) {
training_features->emplace_back(index_offset + sq_k);
index_offset += properties.dimensions;
}
}
// kFeaturesP
index_offset += InheritFeaturesIfRequired<P>(
index_offset, kProperties[kFeaturesP], p, training_features);
// kFeaturesHalfRelativeKP
if (p >= PS_W_PAWN) {
index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
index_offset, kProperties[kFeaturesHalfRelativeKP],
HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
training_features);
} else {
index_offset += SkipFeatures(kProperties[kFeaturesHalfRelativeKP]);
}
// Get the dimensionality of the learning feature
static constexpr IndexType get_dimensions() {
return get_active_dimensions(kProperties);
}
assert(index_offset == GetDimensions());
}
};
// Get index of learning feature and scale of learning rate
static void append_training_features(
IndexType base_index, std::vector<TrainingFeature>* training_features) {
template <Side AssociatedKing>
constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
// kFeaturesHalfKP
IndexType index_offset = append_base_feature<FeatureType>(
kProperties[kFeaturesHalfKP], base_index, training_features);
} // namespace Features
const auto sq_k = static_cast<Square>(base_index / PS_END);
const auto p = static_cast<IndexType>(base_index % PS_END);
} // namespace NNUE
// kFeaturesHalfK
{
const auto& properties = kProperties[kFeaturesHalfK];
if (properties.active) {
training_features->emplace_back(index_offset + sq_k);
index_offset += properties.dimensions;
}
}
} // namespace Eval
// kFeaturesP
index_offset += inherit_features_if_required<P>(
index_offset, kProperties[kFeaturesP], p, training_features);
// kFeaturesHalfRelativeKP
if (p >= PS_W_PAWN) {
index_offset += inherit_features_if_required<HalfRelativeKP<AssociatedKing>>(
index_offset, kProperties[kFeaturesHalfRelativeKP],
HalfRelativeKP<AssociatedKing>::make_index(sq_k, p),
training_features);
}
else {
index_offset += skip_features(kProperties[kFeaturesHalfRelativeKP]);
}
#endif // defined(EVAL_NNUE)
assert(index_offset == get_dimensions());
}
};
template <Side AssociatedKing>
constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
} // namespace Eval::NNUE::Features
#endif
+96 -99
View File
@@ -1,125 +1,122 @@
// Common header of class template for learning NNUE evaluation function
#ifndef _NNUE_TRAINER_H_
#ifndef _NNUE_TRAINER_H_
#define _NNUE_TRAINER_H_
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include "../nnue_common.h"
#include "../features/index_list.h"
#include "nnue/nnue_common.h"
#include "nnue/features/index_list.h"
#include <sstream>
#if defined(USE_BLAS)
static_assert(std::is_same<LearnFloatType, float>::value, "");
#include <cblas.h>
#endif
namespace Eval {
// Common header of class template for learning NNUE evaluation function
namespace Eval::NNUE {
namespace NNUE {
// Ponanza constant used in the relation between evaluation value and winning percentage
constexpr double kPonanzaConstant = 600.0;
// Ponanza constant used in the relation between evaluation value and winning percentage
constexpr double kPonanzaConstant = 600.0;
// Class that represents one index of learning feature
class TrainingFeature {
using StorageType = std::uint32_t;
static_assert(std::is_unsigned<StorageType>::value, "");
// Class that represents one index of learning feature
class TrainingFeature {
using StorageType = std::uint32_t;
static_assert(std::is_unsigned<StorageType>::value, "");
public:
static constexpr std::uint32_t kIndexBits = 24;
public:
static constexpr std::uint32_t kIndexBits = 24;
static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
static constexpr std::uint32_t kCountBits =
std::numeric_limits<StorageType>::digits - kIndexBits;
static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
explicit TrainingFeature(IndexType index) :
index_and_count_((index << kCountBits) | 1) {
assert(index < (1 << kIndexBits));
}
TrainingFeature& operator+=(const TrainingFeature& other) {
assert(other.GetIndex() == GetIndex());
assert(other.GetCount() + GetCount() < (1 << kCountBits));
index_and_count_ += other.GetCount();
return *this;
}
IndexType GetIndex() const {
return static_cast<IndexType>(index_and_count_ >> kCountBits);
}
void ShiftIndex(IndexType offset) {
assert(GetIndex() + offset < (1 << kIndexBits));
index_and_count_ += offset << kCountBits;
}
IndexType GetCount() const {
return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
}
bool operator<(const TrainingFeature& other) const {
return index_and_count_ < other.index_and_count_;
}
static constexpr std::uint32_t kCountBits =
std::numeric_limits<StorageType>::digits - kIndexBits;
private:
StorageType index_and_count_;
};
explicit TrainingFeature(IndexType index) :
index_and_count_((index << kCountBits) | 1) {
// Structure that represents one sample of training data
struct Example {
std::vector<TrainingFeature> training_features[2];
Learner::PackedSfenValue psv;
int sign;
double weight;
};
assert(index < (1 << kIndexBits));
}
// Message used for setting hyperparameters
struct Message {
Message(const std::string& name, const std::string& value = ""):
name(name), value(value), num_peekers(0), num_receivers(0) {}
const std::string name;
const std::string value;
std::uint32_t num_peekers;
std::uint32_t num_receivers;
};
TrainingFeature& operator+=(const TrainingFeature& other) {
assert(other.get_index() == get_index());
assert(other.get_count() + get_count() < (1 << kCountBits));
index_and_count_ += other.get_count();
return *this;
}
// determine whether to accept the message
bool ReceiveMessage(const std::string& name, Message* message) {
const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
if (message->name.substr(0, name.size() + 1) == name + "[") {
++message->num_peekers;
}
if (message->name == name || message->name == name + subscript) {
++message->num_receivers;
return true;
}
return false;
}
IndexType get_index() const {
return static_cast<IndexType>(index_and_count_ >> kCountBits);
}
// split the string
std::vector<std::string> Split(const std::string& input, char delimiter) {
std::istringstream stream(input);
std::string field;
std::vector<std::string> fields;
while (std::getline(stream, field, delimiter)) {
fields.push_back(field);
}
return fields;
}
void shift_index(IndexType offset) {
assert(get_index() + offset < (1 << kIndexBits));
index_and_count_ += offset << kCountBits;
}
// round a floating point number to an integer
template <typename IntType>
IntType Round(double value) {
return static_cast<IntType>(std::floor(value + 0.5));
}
IndexType get_count() const {
return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
}
// make_shared with alignment
template <typename T, typename... ArgumentTypes>
std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
T(std::forward<ArgumentTypes>(arguments)...);
return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
}
bool operator<(const TrainingFeature& other) const {
return index_and_count_ < other.index_and_count_;
}
} // namespace NNUE
private:
StorageType index_and_count_;
};
} // namespace Eval
// Structure that represents one sample of training data
struct Example {
std::vector<TrainingFeature> training_features[2];
Learner::PackedSfenValue psv;
Value discrete_nn_eval;
int sign;
double weight;
};
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
// Message used for setting hyperparameters
struct Message {
Message(const std::string& message_name, const std::string& message_value = "") :
name(message_name), value(message_value), num_peekers(0), num_receivers(0)
{
}
const std::string name;
const std::string value;
std::uint32_t num_peekers;
std::uint32_t num_receivers;
};
// determine whether to accept the message
bool receive_message(const std::string& name, Message* message) {
const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
if (message->name.substr(0, name.size() + 1) == name + "[") {
++message->num_peekers;
}
if (message->name == name || message->name == name + subscript) {
++message->num_receivers;
return true;
}
return false;
}
// round a floating point number to an integer
template <typename IntType>
IntType round(double value) {
return static_cast<IntType>(std::floor(value + 0.5));
}
// make_shared with alignment
template <typename T, typename... ArgumentTypes>
std::shared_ptr<T> make_aligned_shared_ptr(ArgumentTypes&&... arguments) {
const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
T(std::forward<ArgumentTypes>(arguments)...);
return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
}
} // namespace Eval::NNUE
#endif
+456 -281
View File
@@ -1,301 +1,476 @@
// Specialization of NNUE evaluation function learning class template for AffineTransform
#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
#define _NNUE_TRAINER_AFFINE_TRANSFORM_H_
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include "../../learn/learn.h"
#include "../layers/affine_transform.h"
#include "trainer.h"
#include "extra/stockfish_blas.h"
#include "learn/learn.h"
#include "nnue/layers/affine_transform.h"
#include "thread.h"
#include <random>
namespace Eval {
// Specialization of NNUE evaluation function learning class template for AffineTransform
namespace Eval::NNUE {
namespace NNUE {
// Learning: Affine transformation layer
template <typename PreviousLayer, IndexType OutputDimensions>
class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
private:
// Type of layer to learn
using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
// Learning: Affine transformation layer
template <typename PreviousLayer, IndexType OutputDimensions>
class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
private:
// Type of layer to learn
using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
public:
// factory function
static std::shared_ptr<Trainer> create(
LayerType* target_layer, FeatureTransformer* ft) {
public:
// factory function
static std::shared_ptr<Trainer> Create(
LayerType* target_layer, FeatureTransformer* feature_transformer) {
return std::shared_ptr<Trainer>(
new Trainer(target_layer, feature_transformer));
}
// Set options such as hyperparameters
void SendMessage(Message* message) {
previous_layer_trainer_->SendMessage(message);
if (ReceiveMessage("momentum", message)) {
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
}
if (ReceiveMessage("learning_rate_scale", message)) {
learning_rate_scale_ =
static_cast<LearnFloatType>(std::stod(message->value));
}
if (ReceiveMessage("reset", message)) {
DequantizeParameters();
}
if (ReceiveMessage("quantize_parameters", message)) {
QuantizeParameters();
}
}
// Initialize the parameters with random numbers
template <typename RNG>
void Initialize(RNG& rng) {
previous_layer_trainer_->Initialize(rng);
if (kIsOutputLayer) {
// Initialize output layer with 0
std::fill(std::begin(biases_), std::end(biases_),
static_cast<LearnFloatType>(0.0));
std::fill(std::begin(weights_), std::end(weights_),
static_cast<LearnFloatType>(0.0));
} else {
// Assuming that the input distribution is unit-mean 0.5, equal variance,
// Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
const double kSigma = 1.0 / std::sqrt(kInputDimensions);
auto distribution = std::normal_distribution<double>(0.0, kSigma);
for (IndexType i = 0; i < kOutputDimensions; ++i) {
double sum = 0.0;
for (IndexType j = 0; j < kInputDimensions; ++j) {
const auto weight = static_cast<LearnFloatType>(distribution(rng));
weights_[kInputDimensions * i + j] = weight;
sum += weight;
return std::shared_ptr<Trainer>(
new Trainer(target_layer, ft));
}
biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
}
}
QuantizeParameters();
}
// forward propagation
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kInputDimensions * batch.size());
}
batch_size_ = static_cast<IndexType>(batch.size());
batch_input_ = previous_layer_trainer_->Propagate(batch);
// Set options such as hyperparameters
void send_message(Message* message) {
previous_layer_trainer_->send_message(message);
if (receive_message("momentum", message)) {
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
}
if (receive_message("learning_rate_scale", message)) {
learning_rate_scale_ =
static_cast<LearnFloatType>(std::stod(message->value));
}
if (receive_message("reset", message)) {
dequantize_parameters();
}
if (receive_message("quantize_parameters", message)) {
quantize_parameters();
}
if (receive_message("check_health", message)) {
check_health();
}
}
// Initialize the parameters with random numbers
template <typename RNG>
void initialize(RNG& rng) {
previous_layer_trainer_->initialize(rng);
if (kIsOutputLayer) {
// Initialize output layer with 0
std::fill(std::begin(biases_), std::end(biases_),
static_cast<LearnFloatType>(0.0));
std::fill(std::begin(weights_), std::end(weights_),
static_cast<LearnFloatType>(0.0));
}
else {
// Assuming that the input distribution is unit-mean 0.5, equal variance,
// Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
const double kSigma = 1.0 / std::sqrt(kInputDimensions);
auto distribution = std::normal_distribution<double>(0.0, kSigma);
for (IndexType i = 0; i < kOutputDimensions; ++i) {
double sum = 0.0;
for (IndexType j = 0; j < kInputDimensions; ++j) {
const auto weight = static_cast<LearnFloatType>(distribution(rng));
weights_[kInputDimensions * i + j] = weight;
sum += weight;
}
biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
}
}
quantize_parameters();
}
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
{
const auto size = batch_end - batch_begin;
if ((long)output_.size() < (long)kOutputDimensions * size) {
output_.resize(kOutputDimensions * size);
gradients_.resize(kInputDimensions * size);
}
if (thread_states_.size() < thread_pool.size())
{
thread_states_.resize(thread_pool.size());
}
combined_batch_size_ = size;
combined_batch_input_ = previous_layer_trainer_->step_start(thread_pool, batch_begin, batch_end);
auto& main_thread_state = thread_states_[0];
#if defined(USE_BLAS)
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
cblas_scopy(kOutputDimensions, biases_, 1, &output_[batch_offset], 1);
}
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
kOutputDimensions, batch_size_, kInputDimensions, 1.0,
weights_, kInputDimensions,
batch_input_, kInputDimensions,
1.0, &output_[0], kOutputDimensions);
#else
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_batch_offset = kInputDimensions * b;
const IndexType output_batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
double sum = biases_[i];
for (IndexType j = 0; j < kInputDimensions; ++j) {
const IndexType index = kInputDimensions * i + j;
sum += weights_[index] * batch_input_[input_batch_offset + j];
}
output_[output_batch_offset + i] = static_cast<LearnFloatType>(sum);
}
}
#endif
return output_.data();
}
// backpropagation
void Backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
const LearnFloatType local_learning_rate =
learning_rate * learning_rate_scale_;
// update
cblas_sscal(
kOutputDimensions, momentum_, main_thread_state.biases_diff_, 1
);
#else
Blas::sscal(
kOutputDimensions, momentum_, main_thread_state.biases_diff_, 1
);
#endif
for (IndexType i = 1; i < thread_states_.size(); ++i)
thread_states_[i].reset_biases();
return output_.data();
}
// forward propagation
void propagate(Thread& th, const uint64_t offset, const uint64_t count) {
previous_layer_trainer_->propagate(th, offset, count);
#if defined(USE_BLAS)
// backpropagate
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
kInputDimensions, batch_size_, kOutputDimensions, 1.0,
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
cblas_scopy(
kOutputDimensions, biases_, 1, &output_[batch_offset], 1
);
}
cblas_sgemm(
CblasColMajor, CblasTrans, CblasNoTrans,
kOutputDimensions, count, kInputDimensions,
1.0,
weights_, kInputDimensions,
gradients, kOutputDimensions,
0.0, &gradients_[0], kInputDimensions);
// update
cblas_sscal(kOutputDimensions, momentum_, biases_diff_, 1);
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
cblas_saxpy(kOutputDimensions, 1.0,
&gradients[batch_offset], 1, biases_diff_, 1);
}
cblas_saxpy(kOutputDimensions, -local_learning_rate,
biases_diff_, 1, biases_, 1);
cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
kOutputDimensions, kInputDimensions, batch_size_, 1.0,
gradients, kOutputDimensions,
batch_input_, kInputDimensions,
momentum_, weights_diff_, kInputDimensions);
cblas_saxpy(kOutputDimensions * kInputDimensions, -local_learning_rate,
weights_diff_, 1, weights_, 1);
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
1.0,
&output_[offset * kOutputDimensions], kOutputDimensions
);
#else
// backpropagate
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_batch_offset = kInputDimensions * b;
const IndexType output_batch_offset = kOutputDimensions * b;
for (IndexType j = 0; j < kInputDimensions; ++j) {
double sum = 0.0;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType index = kInputDimensions * i + j;
sum += weights_[index] * gradients[output_batch_offset + i];
}
gradients_[input_batch_offset + j] = static_cast<LearnFloatType>(sum);
}
}
// update
for (IndexType i = 0; i < kOutputDimensions; ++i) {
biases_diff_[i] *= momentum_;
}
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
weights_diff_[i] *= momentum_;
}
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_batch_offset = kInputDimensions * b;
const IndexType output_batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
biases_diff_[i] += gradients[output_batch_offset + i];
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
for (IndexType j = 0; j < kInputDimensions; ++j) {
const IndexType index = kInputDimensions * i + j;
weights_diff_[index] += gradients[output_batch_offset + i] *
batch_input_[input_batch_offset + j];
}
}
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
biases_[i] -= local_learning_rate * biases_diff_[i];
}
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
weights_[i] -= local_learning_rate * weights_diff_[i];
}
#endif
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
}
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
batch_size_(0),
batch_input_(nullptr),
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
&target_layer->previous_layer_, feature_transformer)),
target_layer_(target_layer),
biases_(),
weights_(),
biases_diff_(),
weights_diff_(),
momentum_(0.0),
learning_rate_scale_(1.0) {
DequantizeParameters();
}
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
Blas::scopy(
kOutputDimensions, biases_, 1, &output_[batch_offset], 1
);
}
// Weight saturation and parameterization
void QuantizeParameters() {
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
weights_[i] = std::max(-kMaxWeightMagnitude,
std::min(+kMaxWeightMagnitude, weights_[i]));
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
target_layer_->biases_[i] =
Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const auto offset = kInputDimensions * i;
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
for (IndexType j = 0; j < kInputDimensions; ++j) {
target_layer_->weights_[padded_offset + j] =
Round<typename LayerType::WeightType>(
weights_[offset + j] * kWeightScale);
}
}
}
// read parameterized integer
void DequantizeParameters() {
for (IndexType i = 0; i < kOutputDimensions; ++i) {
biases_[i] = static_cast<LearnFloatType>(
target_layer_->biases_[i] / kBiasScale);
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const auto offset = kInputDimensions * i;
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
for (IndexType j = 0; j < kInputDimensions; ++j) {
weights_[offset + j] = static_cast<LearnFloatType>(
target_layer_->weights_[padded_offset + j] / kWeightScale);
}
}
std::fill(std::begin(biases_diff_), std::end(biases_diff_),
static_cast<LearnFloatType>(0.0));
std::fill(std::begin(weights_diff_), std::end(weights_diff_),
static_cast<LearnFloatType>(0.0));
}
// number of input/output dimensions
static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
// If the output dimensionality is 1, the output layer
static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
// Coefficient used for parameterization
static constexpr LearnFloatType kActivationScale =
std::numeric_limits<std::int8_t>::max();
static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
(kPonanzaConstant * FV_SCALE) :
((1 << kWeightScaleBits) * kActivationScale);
static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
// Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
static constexpr LearnFloatType kMaxWeightMagnitude =
std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
// number of samples in mini-batch
IndexType batch_size_;
// Input mini batch
const LearnFloatType* batch_input_;
// Trainer of the previous layer
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
// layer to learn
LayerType* const target_layer_;
// parameter
LearnFloatType biases_[kOutputDimensions];
LearnFloatType weights_[kOutputDimensions * kInputDimensions];
// Buffer used for updating parameters
LearnFloatType biases_diff_[kOutputDimensions];
LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
// Forward propagation buffer
std::vector<LearnFloatType> output_;
// buffer for back propagation
std::vector<LearnFloatType> gradients_;
// hyper parameter
LearnFloatType momentum_;
LearnFloatType learning_rate_scale_;
};
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
Blas::sgemm(
Blas::MatrixLayout::ColMajor, Blas::MatrixTranspose::Trans, Blas::MatrixTranspose::NoTrans,
kOutputDimensions, count, kInputDimensions,
1.0,
weights_, kInputDimensions,
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
1.0,
&output_[offset * kOutputDimensions], kOutputDimensions
);
#endif
}
// backpropagation
void backpropagate(Thread& th,
const LearnFloatType* gradients,
uint64_t offset,
uint64_t count) {
auto& thread_state = thread_states_[th.thread_idx()];
const auto momentum = th.thread_idx() == 0 ? momentum_ : 0.0f;
#if defined(USE_BLAS)
cblas_sgemm(
CblasColMajor, CblasNoTrans, CblasNoTrans,
kInputDimensions, count, kOutputDimensions,
1.0,
weights_, kInputDimensions,
gradients + offset * kOutputDimensions, kOutputDimensions,
0.0,
&gradients_[offset * kInputDimensions], kInputDimensions
);
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
cblas_saxpy(
kOutputDimensions, 1.0,
&gradients[batch_offset], 1, thread_state.biases_diff_, 1
);
}
cblas_sgemm(
CblasRowMajor, CblasTrans, CblasNoTrans,
kOutputDimensions, kInputDimensions, count,
1.0,
gradients + offset * kOutputDimensions, kOutputDimensions,
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
momentum,
thread_state.weights_diff_, kInputDimensions
);
#else
// backpropagate
Blas::sgemm(
Blas::MatrixLayout::ColMajor, Blas::MatrixTranspose::NoTrans, Blas::MatrixTranspose::NoTrans,
kInputDimensions, count, kOutputDimensions,
1.0,
weights_, kInputDimensions,
gradients + offset * kOutputDimensions, kOutputDimensions,
0.0,
&gradients_[offset * kInputDimensions], kInputDimensions
);
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
Blas::saxpy(kOutputDimensions, 1.0,
&gradients[batch_offset], 1, thread_state.biases_diff_, 1);
}
Blas::sgemm(
Blas::MatrixLayout::RowMajor, Blas::MatrixTranspose::Trans, Blas::MatrixTranspose::NoTrans,
kOutputDimensions, kInputDimensions, count,
1.0,
gradients + offset * kOutputDimensions, kOutputDimensions,
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
momentum,
thread_state.weights_diff_, kInputDimensions
);
#endif
previous_layer_trainer_->backpropagate(th, gradients_.data(), offset, count);
}
void reduce_thread_state()
{
for (IndexType i = 1; i < thread_states_.size(); ++i)
{
thread_states_[0] += thread_states_[i];
}
}
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate)
{
const LearnFloatType local_learning_rate =
learning_rate * learning_rate_scale_;
reduce_thread_state();
auto& main_thread_state = thread_states_[0];
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const double d = local_learning_rate * main_thread_state.biases_diff_[i];
biases_[i] -= d;
abs_biases_diff_sum_ += std::abs(d);
}
num_biases_diffs_ += kOutputDimensions;
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
const double d = local_learning_rate * main_thread_state.weights_diff_[i];
weights_[i] -= d;
abs_weights_diff_sum_ += std::abs(d);
}
num_weights_diffs_ += kOutputDimensions * kInputDimensions;
previous_layer_trainer_->step_end(thread_pool, learning_rate);
}
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
combined_batch_size_(0),
combined_batch_input_(nullptr),
previous_layer_trainer_(Trainer<PreviousLayer>::create(
&target_layer->previous_layer_, ft)),
target_layer_(target_layer),
biases_(),
weights_(),
momentum_(0.2),
learning_rate_scale_(1.0) {
dequantize_parameters();
}
void reset_stats() {
abs_biases_diff_sum_ = 0.0;
abs_weights_diff_sum_ = 0.0;
num_biases_diffs_ = 0;
num_weights_diffs_ = 0;
}
void check_health() {
double abs_bias_sum = 0.0;
double abs_weight_sum = 0.0;
for(auto b : biases_)
abs_bias_sum += std::abs(b);
for(auto w : weights_)
abs_weight_sum += std::abs(w);
auto out = sync_region_cout.new_region();
out << "INFO (check_health):"
<< " layer " << LayerType::kLayerIndex
<< " - " << LayerType::get_name()
<< std::endl;
out << " - avg_abs_bias = " << abs_bias_sum / std::size(biases_) << std::endl;
out << " - avg_abs_bias_diff = " << abs_biases_diff_sum_ / num_biases_diffs_ << std::endl;
out << " - avg_abs_weight = " << abs_weight_sum / std::size(weights_) << std::endl;
out << " - avg_abs_weight_diff = " << abs_weights_diff_sum_ / num_weights_diffs_ << std::endl;
out.unlock();
reset_stats();
}
// Weight saturation and parameterization
void quantize_parameters() {
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
weights_[i] = std::max(-kMaxWeightMagnitude,
std::min(+kMaxWeightMagnitude, weights_[i]));
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
target_layer_->biases_[i] =
round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const auto offset = kInputDimensions * i;
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
for (IndexType j = 0; j < kInputDimensions; ++j) {
target_layer_->weights_[padded_offset + j] =
round<typename LayerType::WeightType>(
weights_[offset + j] * kWeightScale);
}
}
}
// read parameterized integer
void dequantize_parameters() {
for (IndexType i = 0; i < kOutputDimensions; ++i) {
biases_[i] = static_cast<LearnFloatType>(
target_layer_->biases_[i] / kBiasScale);
}
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const auto offset = kInputDimensions * i;
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
for (IndexType j = 0; j < kInputDimensions; ++j) {
weights_[offset + j] = static_cast<LearnFloatType>(
target_layer_->weights_[padded_offset + j] / kWeightScale);
}
}
for (auto& state : thread_states_)
{
state.reset_weights();
state.reset_biases();
}
reset_stats();
}
// number of input/output dimensions
static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
// If the output dimensionality is 1, the output layer
static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
// Coefficient used for parameterization
static constexpr LearnFloatType kActivationScale =
std::numeric_limits<std::int8_t>::max();
static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
(kPonanzaConstant * FV_SCALE) :
((1 << kWeightScaleBits) * kActivationScale);
static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
// Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
static constexpr LearnFloatType kMaxWeightMagnitude =
std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
// number of samples in mini-batch
IndexType combined_batch_size_;
double abs_biases_diff_sum_;
double abs_weights_diff_sum_;
uint64_t num_biases_diffs_;
uint64_t num_weights_diffs_;
// Input mini batch
const LearnFloatType* combined_batch_input_;
// Trainer of the previous layer
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
// layer to learn
LayerType* const target_layer_;
// parameter
struct alignas(kCacheLineSize) ThreadState
{
// Buffer used for updating parameters
alignas(kCacheLineSize) LearnFloatType biases_diff_[kOutputDimensions];
alignas(kCacheLineSize) LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
ThreadState() { reset_weights(); reset_biases(); }
ThreadState& operator+=(const ThreadState& other)
{
for (IndexType i = 0; i < kOutputDimensions; ++i)
{
biases_diff_[i] += other.biases_diff_[i];
}
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i)
{
weights_diff_[i] += other.weights_diff_[i];
}
return *this;
}
void reset_weights()
{
std::fill(std::begin(weights_diff_), std::end(weights_diff_), 0.0f);
}
void reset_biases()
{
std::fill(std::begin(biases_diff_), std::end(biases_diff_), 0.0f);
}
};
alignas(kCacheLineSize) LearnFloatType biases_[kOutputDimensions];
alignas(kCacheLineSize) LearnFloatType weights_[kOutputDimensions * kInputDimensions];
std::vector<ThreadState, CacheLineAlignedAllocator<ThreadState>> thread_states_;
// Forward propagation buffer
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
// buffer for back propagation
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
// hyper parameter
LearnFloatType momentum_;
LearnFloatType learning_rate_scale_;
};
} // namespace Eval::NNUE
#endif
+329 -115
View File
@@ -1,142 +1,356 @@
// Specialization of NNUE evaluation function learning class template for ClippedReLU
#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
#define _NNUE_TRAINER_CLIPPED_RELU_H_
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include "../../learn/learn.h"
#include "../layers/clipped_relu.h"
#include "trainer.h"
namespace Eval {
#include "learn/learn.h"
namespace NNUE {
#include "nnue/layers/clipped_relu.h"
// Learning: Affine transformation layer
template <typename PreviousLayer>
class Trainer<Layers::ClippedReLU<PreviousLayer>> {
private:
// Type of layer to learn
using LayerType = Layers::ClippedReLU<PreviousLayer>;
#include "thread.h"
public:
// factory function
static std::shared_ptr<Trainer> Create(
LayerType* target_layer, FeatureTransformer* feature_transformer) {
return std::shared_ptr<Trainer>(
new Trainer(target_layer, feature_transformer));
}
// Specialization of NNUE evaluation function learning class template for ClippedReLU
namespace Eval::NNUE {
// Set options such as hyperparameters
void SendMessage(Message* message) {
previous_layer_trainer_->SendMessage(message);
if (ReceiveMessage("check_health", message)) {
CheckHealth();
}
}
// Learning: Affine transformation layer
template <typename PreviousLayer>
class Trainer<Layers::ClippedReLU<PreviousLayer>> {
private:
// Type of layer to learn
using LayerType = Layers::ClippedReLU<PreviousLayer>;
// Initialize the parameters with random numbers
template <typename RNG>
void Initialize(RNG& rng) {
previous_layer_trainer_->Initialize(rng);
}
public:
// factory function
static std::shared_ptr<Trainer> create(
LayerType* target_layer, FeatureTransformer* ft) {
// forward propagation
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kInputDimensions * batch.size());
}
const auto input = previous_layer_trainer_->Propagate(batch);
batch_size_ = static_cast<IndexType>(batch.size());
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType index = batch_offset + i;
output_[index] = std::max(+kZero, std::min(+kOne, input[index]));
min_activations_[i] = std::min(min_activations_[i], output_[index]);
max_activations_[i] = std::max(max_activations_[i], output_[index]);
}
}
return output_.data();
}
return std::shared_ptr<Trainer>(
new Trainer(target_layer, ft));
}
// backpropagation
void Backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType index = batch_offset + i;
gradients_[index] = gradients[index] *
(output_[index] > kZero) * (output_[index] < kOne);
}
}
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
}
// Set options such as hyperparameters
void send_message(Message* message) {
previous_layer_trainer_->send_message(message);
if (receive_message("check_health", message)) {
check_health();
}
}
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
batch_size_(0),
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
&target_layer->previous_layer_, feature_transformer)),
target_layer_(target_layer) {
std::fill(std::begin(min_activations_), std::end(min_activations_),
std::numeric_limits<LearnFloatType>::max());
std::fill(std::begin(max_activations_), std::end(max_activations_),
std::numeric_limits<LearnFloatType>::lowest());
}
// Initialize the parameters with random numbers
template <typename RNG>
void initialize(RNG& rng) {
previous_layer_trainer_->initialize(rng);
}
// Check if there are any problems with learning
void CheckHealth() {
const auto largest_min_activation = *std::max_element(
std::begin(min_activations_), std::end(min_activations_));
const auto smallest_max_activation = *std::min_element(
std::begin(max_activations_), std::end(max_activations_));
std::cout << "INFO: largest min activation = " << largest_min_activation
<< ", smallest max activation = " << smallest_max_activation
<< std::endl;
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
{
const auto size = batch_end - batch_begin;
std::fill(std::begin(min_activations_), std::end(min_activations_),
std::numeric_limits<LearnFloatType>::max());
std::fill(std::begin(max_activations_), std::end(max_activations_),
std::numeric_limits<LearnFloatType>::lowest());
}
if ((long)output_.size() < (long)kOutputDimensions * size) {
output_.resize(kOutputDimensions * size);
gradients_.resize(kInputDimensions * size);
}
// number of input/output dimensions
static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
if (thread_states_.size() < thread_pool.size())
{
thread_states_.resize(thread_pool.size());
}
// LearnFloatType constant
static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
input_ = previous_layer_trainer_->step_start(thread_pool, batch_begin, batch_end);
// number of samples in mini-batch
IndexType batch_size_;
batch_size_ = size;
// Trainer of the previous layer
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
return output_.data();
}
// layer to learn
LayerType* const target_layer_;
// forward propagation
void propagate(Thread& th, const uint64_t offset, const uint64_t count) {
// Forward propagation buffer
std::vector<LearnFloatType> output_;
auto& thread_state = thread_states_[th.thread_idx()];
// buffer for back propagation
std::vector<LearnFloatType> gradients_;
previous_layer_trainer_->propagate(th, offset, count);
// Health check statistics
LearnFloatType min_activations_[kOutputDimensions];
LearnFloatType max_activations_[kOutputDimensions];
};
#if defined (USE_SSE2)
} // namespace NNUE
{
static_assert(kOutputDimensions % 16 == 0, "This implementation assumes that it can process 16 floats at a time");
} // namespace Eval
const __m128 kZero4 = _mm_set1_ps(+kZero);
const __m128 kOne4 = _mm_set1_ps(+kOne);
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
for (IndexType b = offset; b < offset + count; ++b)
{
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; i += 16)
{
__m128 out0 = _mm_loadu_ps(&input_[i + 0 + batch_offset]);
__m128 out1 = _mm_loadu_ps(&input_[i + 4 + batch_offset]);
__m128 out2 = _mm_loadu_ps(&input_[i + 8 + batch_offset]);
__m128 out3 = _mm_loadu_ps(&input_[i + 12 + batch_offset]);
out0 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out0));
out1 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out1));
out2 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out2));
out3 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out3));
_mm_storeu_ps(&output_[i + 0 + batch_offset], out0);
_mm_storeu_ps(&output_[i + 4 + batch_offset], out1);
_mm_storeu_ps(&output_[i + 8 + batch_offset], out2);
_mm_storeu_ps(&output_[i + 12 + batch_offset], out3);
__m128 minact0 = _mm_loadu_ps(&thread_state.min_activations_[i + 0]);
__m128 minact1 = _mm_loadu_ps(&thread_state.min_activations_[i + 4]);
__m128 minact2 = _mm_loadu_ps(&thread_state.min_activations_[i + 8]);
__m128 minact3 = _mm_loadu_ps(&thread_state.min_activations_[i + 12]);
__m128 maxact0 = _mm_loadu_ps(&thread_state.max_activations_[i + 0]);
__m128 maxact1 = _mm_loadu_ps(&thread_state.max_activations_[i + 4]);
__m128 maxact2 = _mm_loadu_ps(&thread_state.max_activations_[i + 8]);
__m128 maxact3 = _mm_loadu_ps(&thread_state.max_activations_[i + 12]);
minact0 = _mm_min_ps(out0, minact0);
minact1 = _mm_min_ps(out1, minact1);
minact2 = _mm_min_ps(out2, minact2);
minact3 = _mm_min_ps(out3, minact3);
maxact0 = _mm_max_ps(out0, maxact0);
maxact1 = _mm_max_ps(out1, maxact1);
maxact2 = _mm_max_ps(out2, maxact2);
maxact3 = _mm_max_ps(out3, maxact3);
_mm_storeu_ps(&thread_state.min_activations_[i + 0], minact0);
_mm_storeu_ps(&thread_state.min_activations_[i + 4], minact1);
_mm_storeu_ps(&thread_state.min_activations_[i + 8], minact2);
_mm_storeu_ps(&thread_state.min_activations_[i + 12], minact3);
_mm_storeu_ps(&thread_state.max_activations_[i + 0], maxact0);
_mm_storeu_ps(&thread_state.max_activations_[i + 4], maxact1);
_mm_storeu_ps(&thread_state.max_activations_[i + 8], maxact2);
_mm_storeu_ps(&thread_state.max_activations_[i + 12], maxact3);
}
}
}
#else
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType index = batch_offset + i;
output_[index] = std::max(+kZero, std::min(+kOne, input_[index]));
thread_state.min_activations_[i] = std::min(thread_state.min_activations_[i], output_[index]);
thread_state.max_activations_[i] = std::max(thread_state.max_activations_[i], output_[index]);
}
}
#endif
}
// backpropagation
void backpropagate(Thread& th,
const LearnFloatType* gradients,
const uint64_t offset,
const uint64_t count) {
auto& thread_state = thread_states_[th.thread_idx()];
#if defined (USE_SSE2)
{
static_assert(kOutputDimensions % 16 == 0, "This implementation assumes that it can process 16 floats at a time");
const __m128 kZero4 = _mm_set1_ps(+kZero);
const __m128 kOne4 = _mm_set1_ps(+kOne);
for (IndexType b = offset; b < offset + count; ++b)
{
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; i += 16)
{
__m128 out0 = _mm_loadu_ps(&output_[batch_offset + i + 0]);
__m128 out1 = _mm_loadu_ps(&output_[batch_offset + i + 4]);
__m128 out2 = _mm_loadu_ps(&output_[batch_offset + i + 8]);
__m128 out3 = _mm_loadu_ps(&output_[batch_offset + i + 12]);
__m128 clipped0 = _mm_or_ps(_mm_cmple_ps(out0, kZero4), _mm_cmpge_ps(out0, kOne4));
__m128 clipped1 = _mm_or_ps(_mm_cmple_ps(out1, kZero4), _mm_cmpge_ps(out1, kOne4));
__m128 clipped2 = _mm_or_ps(_mm_cmple_ps(out2, kZero4), _mm_cmpge_ps(out2, kOne4));
__m128 clipped3 = _mm_or_ps(_mm_cmple_ps(out3, kZero4), _mm_cmpge_ps(out3, kOne4));
__m128 grad0 = _mm_loadu_ps(&gradients[batch_offset + i + 0]);
__m128 grad1 = _mm_loadu_ps(&gradients[batch_offset + i + 4]);
__m128 grad2 = _mm_loadu_ps(&gradients[batch_offset + i + 8]);
__m128 grad3 = _mm_loadu_ps(&gradients[batch_offset + i + 12]);
grad0 = _mm_andnot_ps(clipped0, grad0);
grad1 = _mm_andnot_ps(clipped1, grad1);
grad2 = _mm_andnot_ps(clipped2, grad2);
grad3 = _mm_andnot_ps(clipped3, grad3);
_mm_storeu_ps(&gradients_[batch_offset + i + 0], grad0);
_mm_storeu_ps(&gradients_[batch_offset + i + 4], grad1);
_mm_storeu_ps(&gradients_[batch_offset + i + 8], grad2);
_mm_storeu_ps(&gradients_[batch_offset + i + 12], grad3);
const int clipped_mask =
(_mm_movemask_ps(clipped0) << 0)
| (_mm_movemask_ps(clipped1) << 4)
| (_mm_movemask_ps(clipped2) << 8)
| (_mm_movemask_ps(clipped3) << 12);
thread_state.num_clipped_ += popcount(clipped_mask);
}
}
}
#else
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
const IndexType index = batch_offset + i;
const bool clipped = (output_[index] <= kZero) | (output_[index] >= kOne);
gradients_[index] = gradients[index] * !clipped;
thread_state.num_clipped_ += clipped;
}
}
#endif
thread_state.num_total_ += count * kOutputDimensions;
previous_layer_trainer_->backpropagate(th, gradients_.data(), offset, count);
}
void reduce_thread_state()
{
for (IndexType i = 1; i < thread_states_.size(); ++i)
{
thread_states_[0] += thread_states_[i];
}
}
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate)
{
previous_layer_trainer_->step_end(thread_pool, learning_rate);
}
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
batch_size_(0),
previous_layer_trainer_(Trainer<PreviousLayer>::create(
&target_layer->previous_layer_, ft)),
target_layer_(target_layer) {
reset_stats();
}
void reset_stats() {
for(auto& state : thread_states_)
state.reset();
}
// Check if there are any problems with learning
void check_health() {
reduce_thread_state();
auto& main_thread_state = thread_states_[0];
const auto largest_min_activation = *std::max_element(
std::begin(main_thread_state.min_activations_), std::end(main_thread_state.min_activations_));
const auto smallest_max_activation = *std::min_element(
std::begin(main_thread_state.max_activations_), std::end(main_thread_state.max_activations_));
auto out = sync_region_cout.new_region();
out << "INFO (check_health):"
<< " layer " << LayerType::kLayerIndex
<< " - " << LayerType::get_name()
<< std::endl;
out << " - largest min activation = " << largest_min_activation
<< " , smallest max activation = " << smallest_max_activation
<< std::endl;
out << " - clipped " << static_cast<double>(main_thread_state.num_clipped_) / main_thread_state.num_total_ * 100.0 << "% of outputs"
<< std::endl;
out.unlock();
reset_stats();
}
// number of input/output dimensions
static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
// LearnFloatType constant
static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
// number of samples in mini-batch
IndexType batch_size_;
IndexType num_total_;
const LearnFloatType* input_;
// Trainer of the previous layer
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
// layer to learn
LayerType* const target_layer_;
// Forward propagation buffer
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
// buffer for back propagation
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
struct alignas(kCacheLineSize) ThreadState
{
// Health check statistics
LearnFloatType min_activations_[kOutputDimensions];
LearnFloatType max_activations_[kOutputDimensions];
IndexType num_clipped_;
IndexType num_total_;
ThreadState() { reset(); }
ThreadState& operator+=(const ThreadState& other)
{
for (IndexType i = 0; i < kOutputDimensions; ++i)
{
min_activations_[i] = std::min(min_activations_[i], other.min_activations_[i]);
}
for (IndexType i = 0; i < kOutputDimensions; ++i)
{
max_activations_[i] = std::max(max_activations_[i], other.max_activations_[i]);
}
num_clipped_ += other.num_clipped_;
num_total_ += other.num_total_;
return *this;
}
void reset()
{
std::fill(std::begin(min_activations_), std::end(min_activations_), std::numeric_limits<float>::max());
std::fill(std::begin(max_activations_), std::end(max_activations_), std::numeric_limits<float>::lowest());
num_clipped_ = 0;
num_total_ = 0;
}
};
std::vector<ThreadState, CacheLineAlignedAllocator<ThreadState>> thread_states_;
};
} // namespace Eval::NNUE
#endif
File diff suppressed because it is too large Load Diff
+336 -210
View File
@@ -1,251 +1,377 @@
// Specialization of NNUE evaluation function learning class template for InputSlice
#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
#define _NNUE_TRAINER_INPUT_SLICE_H_
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include "../../learn/learn.h"
#include "../layers/input_slice.h"
#include "trainer.h"
namespace Eval {
#include "extra/stockfish_blas.h"
namespace NNUE {
#include "learn/learn.h"
// Learning: Input layer
class SharedInputTrainer {
public:
// factory function
static std::shared_ptr<SharedInputTrainer> Create(
FeatureTransformer* feature_transformer) {
static std::shared_ptr<SharedInputTrainer> instance;
if (!instance) {
instance.reset(new SharedInputTrainer(feature_transformer));
}
++instance->num_referrers_;
return instance;
}
#include "nnue/layers/input_slice.h"
// Set options such as hyperparameters
void SendMessage(Message* message) {
if (num_calls_ == 0) {
current_operation_ = Operation::kSendMessage;
feature_transformer_trainer_->SendMessage(message);
}
assert(current_operation_ == Operation::kSendMessage);
if (++num_calls_ == num_referrers_) {
num_calls_ = 0;
current_operation_ = Operation::kNone;
}
}
#include "thread.h"
// Initialize the parameters with random numbers
template <typename RNG>
void Initialize(RNG& rng) {
if (num_calls_ == 0) {
current_operation_ = Operation::kInitialize;
feature_transformer_trainer_->Initialize(rng);
}
assert(current_operation_ == Operation::kInitialize);
if (++num_calls_ == num_referrers_) {
num_calls_ = 0;
current_operation_ = Operation::kNone;
}
}
// Specialization of NNUE evaluation function learning class template for InputSlice
namespace Eval::NNUE {
// forward propagation
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
if (gradients_.size() < kInputDimensions * batch.size()) {
gradients_.resize(kInputDimensions * batch.size());
}
batch_size_ = static_cast<IndexType>(batch.size());
if (num_calls_ == 0) {
current_operation_ = Operation::kPropagate;
output_ = feature_transformer_trainer_->Propagate(batch);
}
assert(current_operation_ == Operation::kPropagate);
if (++num_calls_ == num_referrers_) {
num_calls_ = 0;
current_operation_ = Operation::kNone;
}
return output_;
}
// Learning: Input layer
// This is tricky. It exists because when there's more than one trainer
// on top of a single feature transformer we want to only call propagate/backpropagate
// on the feature transformer once. This is straightforward in the old
// multithreading case, because propagate/backpropagate is called just once from the
// main thread. But with the current implementation of coarser multithreading
// we end up calling each method from each thread. Therefore we have to keep
// the num_calls and current_operation per thread basis, each thread must work
// on its designated batch slice, and the only synchronization points are
// step_start and step_end - for which we use state of the first thread.
// Each thread requires their own bookkeeping because it's possible that
// one thread is still in propagate of some batch slice while the other thread
// is doing backpropagate of some other slice. We also ensure the thread state
// isn't suspectible to false sharing by using a full cache line for the state.
class SharedInputTrainer {
public:
// factory function
static std::shared_ptr<SharedInputTrainer> create(
FeatureTransformer* ft) {
// backpropagation
void Backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
if (num_referrers_ == 1) {
feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
return;
}
if (num_calls_ == 0) {
current_operation_ = Operation::kBackPropagate;
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kInputDimensions * b;
for (IndexType i = 0; i < kInputDimensions; ++i) {
gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
static std::shared_ptr<SharedInputTrainer> instance;
if (!instance) {
instance.reset(new SharedInputTrainer(ft));
}
++instance->num_referrers_;
return instance;
}
}
}
assert(current_operation_ == Operation::kBackPropagate);
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kInputDimensions * b;
for (IndexType i = 0; i < kInputDimensions; ++i) {
gradients_[batch_offset + i] += gradients[batch_offset + i];
}
}
if (++num_calls_ == num_referrers_) {
feature_transformer_trainer_->Backpropagate(
gradients_.data(), learning_rate);
num_calls_ = 0;
current_operation_ = Operation::kNone;
}
}
private:
// constructor
SharedInputTrainer(FeatureTransformer* feature_transformer) :
batch_size_(0),
num_referrers_(0),
num_calls_(0),
current_operation_(Operation::kNone),
feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
feature_transformer)),
output_(nullptr) {
}
// Set options such as hyperparameters
void send_message(Message* message) {
auto& thread_state = thread_states_[0];
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
FeatureTransformer::kOutputDimensions;
if (thread_state.num_calls == 0) {
thread_state.current_operation = Operation::kSendMessage;
feature_transformer_trainer_->send_message(message);
}
// type of processing
enum class Operation {
kNone,
kSendMessage,
kInitialize,
kPropagate,
kBackPropagate,
};
assert(thread_state.current_operation == Operation::kSendMessage);
// number of samples in mini-batch
IndexType batch_size_;
if (++thread_state.num_calls == num_referrers_) {
thread_state.num_calls = 0;
thread_state.current_operation = Operation::kNone;
}
}
// number of layers sharing this layer as input
std::uint32_t num_referrers_;
// Initialize the parameters with random numbers
template <typename RNG>
void initialize(RNG& rng) {
auto& thread_state = thread_states_[0];
// Number of times the current process has been called
std::uint32_t num_calls_;
if (thread_state.num_calls == 0) {
thread_state.current_operation = Operation::kInitialize;
feature_transformer_trainer_->initialize(rng);
}
// current processing type
Operation current_operation_;
assert(thread_state.current_operation == Operation::kInitialize);
// Trainer of input feature converter
const std::shared_ptr<Trainer<FeatureTransformer>>
feature_transformer_trainer_;
if (++thread_state.num_calls == num_referrers_) {
thread_state.num_calls = 0;
thread_state.current_operation = Operation::kNone;
}
}
// pointer to output shared for forward propagation
const LearnFloatType* output_;
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
{
const auto size = batch_end - batch_begin;
// buffer for back propagation
std::vector<LearnFloatType> gradients_;
};
if ((long)gradients_.size() < (long)kInputDimensions * size) {
gradients_.resize(kInputDimensions * size);
}
// Learning: Input layer
template <IndexType OutputDimensions, IndexType Offset>
class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
private:
// Type of layer to learn
using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
if (thread_states_.size() < thread_pool.size())
{
thread_states_.resize(thread_pool.size());
}
public:
// factory function
static std::shared_ptr<Trainer> Create(
LayerType* /*target_layer*/, FeatureTransformer* feature_transformer) {
return std::shared_ptr<Trainer>(new Trainer(feature_transformer));
}
batch_size_ = size;
// Set options such as hyperparameters
void SendMessage(Message* message) {
shared_input_trainer_->SendMessage(message);
}
auto& thread_state = thread_states_[0];
// Initialize the parameters with random numbers
template <typename RNG>
void Initialize(RNG& rng) {
shared_input_trainer_->Initialize(rng);
}
if (thread_state.num_calls == 0) {
thread_state.current_operation = Operation::kStepStart;
output_ = feature_transformer_trainer_->step_start(thread_pool, batch_begin, batch_end);
}
assert(thread_state.current_operation == Operation::kStepStart);
if (++thread_state.num_calls == num_referrers_) {
thread_state.num_calls = 0;
thread_state.current_operation = Operation::kNone;
}
return output_;
}
// forward propagation
void propagate(Thread& th, uint64_t offset, uint64_t count) {
const auto thread_id = th.thread_idx();
auto& thread_state = thread_states_[thread_id];
if (thread_state.num_calls == 0) {
thread_state.current_operation = Operation::kPropagate;
feature_transformer_trainer_->propagate(th, offset, count);
}
assert(thread_state.current_operation == Operation::kPropagate);
if (++thread_state.num_calls == num_referrers_) {
thread_state.num_calls = 0;
thread_state.current_operation = Operation::kNone;
}
}
// backpropagation
void backpropagate(Thread& th,
const LearnFloatType* gradients,
uint64_t offset,
uint64_t count) {
const auto thread_id = th.thread_idx();
auto& thread_state = thread_states_[thread_id];
if (num_referrers_ == 1) {
feature_transformer_trainer_->backpropagate(th, gradients, offset, count);
return;
}
if (thread_state.num_calls == 0) {
thread_state.current_operation = Operation::kBackPropagate;
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kInputDimensions * b;
for (IndexType i = 0; i < kInputDimensions; ++i) {
gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
}
}
}
assert(thread_state.current_operation == Operation::kBackPropagate);
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType batch_offset = kInputDimensions * b;
for (IndexType i = 0; i < kInputDimensions; ++i) {
gradients_[batch_offset + i] += gradients[batch_offset + i];
}
}
if (++thread_state.num_calls == num_referrers_) {
feature_transformer_trainer_->backpropagate(
th, gradients_.data(), offset, count);
thread_state.num_calls = 0;
thread_state.current_operation = Operation::kNone;
}
}
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate) {
auto& thread_state = thread_states_[0];
if (thread_state.num_calls == 0) {
thread_state.current_operation = Operation::kStepEnd;
feature_transformer_trainer_->step_end(thread_pool, learning_rate);
}
assert(thread_state.current_operation == Operation::kStepEnd);
if (++thread_state.num_calls == num_referrers_) {
thread_state.num_calls = 0;
thread_state.current_operation = Operation::kNone;
}
}
private:
// constructor
SharedInputTrainer(FeatureTransformer* ft) :
batch_size_(0),
num_referrers_(0),
thread_states_(1),
feature_transformer_trainer_(Trainer<FeatureTransformer>::create(
ft)),
output_(nullptr) {
}
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
FeatureTransformer::kOutputDimensions;
// type of processing
enum class Operation {
kNone,
kSendMessage,
kInitialize,
kStepStart,
kPropagate,
kBackPropagate,
kStepEnd,
};
// number of samples in mini-batch
IndexType batch_size_;
// number of layers sharing this layer as input
std::uint32_t num_referrers_;
struct alignas(kCacheLineSize) ThreadState
{
std::uint32_t num_calls{0};
// current processing type
Operation current_operation = Operation::kNone;
};
// Number of times the current process has been called
std::vector<ThreadState, CacheLineAlignedAllocator<ThreadState>> thread_states_;
// Trainer of input feature converter
const std::shared_ptr<Trainer<FeatureTransformer>>
feature_transformer_trainer_;
// pointer to output shared for forward propagation
const LearnFloatType* output_;
// buffer for back propagation
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
};
// Learning: Input layer
template <IndexType OutputDimensions, IndexType Offset>
class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
private:
// Type of layer to learn
using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
public:
// factory function
static std::shared_ptr<Trainer> create(
LayerType* /*target_layer*/, FeatureTransformer* ft) {
return std::shared_ptr<Trainer>(new Trainer(ft));
}
// Set options such as hyperparameters
void send_message(Message* message) {
shared_input_trainer_->send_message(message);
}
// Initialize the parameters with random numbers
template <typename RNG>
void initialize(RNG& rng) {
shared_input_trainer_->initialize(rng);
}
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
{
const auto size = batch_end - batch_begin;
if ((long)output_.size() < (long)kOutputDimensions * size) {
output_.resize(kOutputDimensions * size);
gradients_.resize(kInputDimensions * size);
}
batch_size_ = size;
input_ = shared_input_trainer_->step_start(thread_pool, batch_begin, batch_end);
return output_.data();
}
// forward propagation
void propagate(Thread& th, uint64_t offset, uint64_t count) {
shared_input_trainer_->propagate(th, offset, count);
for (IndexType b = offset; b < offset + count; ++b) {
const IndexType input_offset = kInputDimensions * b;
const IndexType output_offset = kOutputDimensions * b;
// forward propagation
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kInputDimensions * batch.size());
}
batch_size_ = static_cast<IndexType>(batch.size());
const auto input = shared_input_trainer_->Propagate(batch);
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_offset = kInputDimensions * b;
const IndexType output_offset = kOutputDimensions * b;
#if defined(USE_BLAS)
cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
&output_[output_offset], 1);
cblas_scopy(
kOutputDimensions, &input_[input_offset + Offset], 1,
&output_[output_offset], 1
);
#else
for (IndexType i = 0; i < kOutputDimensions; ++i) {
output_[output_offset + i] = input[input_offset + Offset + i];
}
#endif
}
return output_.data();
}
// backpropagation
void Backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_offset = kInputDimensions * b;
const IndexType output_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kInputDimensions; ++i) {
if (i < Offset || i >= Offset + kOutputDimensions) {
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
} else {
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
Blas::scopy(
kOutputDimensions, &input_[input_offset + Offset], 1,
&output_[output_offset], 1
);
#endif
}
}
}
}
shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
}
private:
// constructor
Trainer(FeatureTransformer* feature_transformer):
batch_size_(0),
shared_input_trainer_(SharedInputTrainer::Create(feature_transformer)) {
}
// backpropagation
void backpropagate(Thread& th,
const LearnFloatType* gradients,
uint64_t offset,
uint64_t count) {
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
FeatureTransformer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = OutputDimensions;
static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
for (IndexType b = offset; b < offset + count; ++b)
{
const IndexType input_offset = kInputDimensions * b;
const IndexType output_offset = kOutputDimensions * b;
// number of samples in mini-batch
IndexType batch_size_;
IndexType i = 0;
for (; i < Offset; ++i) {
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
}
// Trainer of shared input layer
const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
for (; i < Offset + kOutputDimensions; ++i) {
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
}
// Forward propagation buffer
std::vector<LearnFloatType> output_;
for (; i < kInputDimensions; ++i)
{
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
}
}
// buffer for back propagation
std::vector<LearnFloatType> gradients_;
};
shared_input_trainer_->backpropagate(th, gradients_.data(), offset, count);
}
} // namespace NNUE
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate) {
shared_input_trainer_->step_end(thread_pool, learning_rate);
}
} // namespace Eval
private:
// constructor
Trainer(FeatureTransformer* ft) :
batch_size_(0),
shared_input_trainer_(SharedInputTrainer::create(ft)) {
}
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
// number of input/output dimensions
static constexpr IndexType kInputDimensions =
FeatureTransformer::kOutputDimensions;
static constexpr IndexType kOutputDimensions = OutputDimensions;
static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
// number of samples in mini-batch
IndexType batch_size_;
const LearnFloatType* input_;
// Trainer of shared input layer
const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
// Forward propagation buffer
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
// buffer for back propagation
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
};
} // namespace Eval::NNUE
#endif
+171 -160
View File
@@ -1,190 +1,201 @@
// Specialization of NNUE evaluation function learning class template for Sum
#ifndef _NNUE_TRAINER_SUM_H_
#ifndef _NNUE_TRAINER_SUM_H_
#define _NNUE_TRAINER_SUM_H_
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
#include "../../learn/learn.h"
#include "../layers/sum.h"
#include "trainer.h"
namespace Eval {
#include "extra/stockfish_blas.h"
namespace NNUE {
#include "learn/learn.h"
// Learning: A layer that sums the outputs of multiple layers
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
Trainer<Layers::Sum<RemainingPreviousLayers...>> {
private:
// Type of layer to learn
using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
#include "nnue/layers/sum.h"
public:
// factory function
static std::shared_ptr<Trainer> Create(
LayerType* target_layer, FeatureTransformer* feature_transformer) {
return std::shared_ptr<Trainer>(
new Trainer(target_layer, feature_transformer));
}
#include "thread.h"
// Set options such as hyperparameters
void SendMessage(Message* message) {
// The results of other member functions do not depend on the processing order, so
// Tail is processed first for the purpose of simplifying the implementation, but
// SendMessage processes Head first to make it easier to understand subscript correspondence
previous_layer_trainer_->SendMessage(message);
Tail::SendMessage(message);
}
// Specialization of NNUE evaluation function learning class template for Sum
namespace Eval::NNUE {
// Initialize the parameters with random numbers
template <typename RNG>
void Initialize(RNG& rng) {
Tail::Initialize(rng);
previous_layer_trainer_->Initialize(rng);
}
// Learning: A layer that sums the outputs of multiple layers
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
Trainer<Layers::Sum<RemainingPreviousLayers...>> {
private:
// Type of layer to learn
using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
public:
// factory function
static std::shared_ptr<Trainer> create(
LayerType* target_layer, FeatureTransformer* ft) {
return std::shared_ptr<Trainer>(
new Trainer(target_layer, ft));
}
// Set options such as hyperparameters
void send_message(Message* message) {
// The results of other member functions do not depend on the processing order, so
// Tail is processed first for the purpose of simplifying the implementation, but
// SendMessage processes Head first to make it easier to understand subscript correspondence
previous_layer_trainer_->send_message(message);
Tail::send_message(message);
}
// Initialize the parameters with random numbers
template <typename RNG>
void initialize(RNG& rng) {
Tail::initialize(rng);
previous_layer_trainer_->initialize(rng);
}
// forward propagation
/*const*/ LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
batch_size_ = static_cast<IndexType>(batch.size());
auto output = Tail::propagate(thread_pool, batch);
const auto head_output = previous_layer_trainer_->propagate(thread_pool, batch);
// forward propagation
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
batch_size_ = static_cast<IndexType>(batch.size());
auto output = Tail::Propagate(batch);
const auto head_output = previous_layer_trainer_->Propagate(batch);
#if defined(USE_BLAS)
cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
head_output, 1, output, 1);
cblas_saxpy(
kOutputDimensions * batch_size_, 1.0,
head_output, 1, output, 1
);
#else
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
output[batch_offset + i] += head_output[batch_offset + i];
}
}
Blas::saxpy(
thread_pool,
kOutputDimensions * batch_size_, 1.0,
head_output, 1, output, 1
);
#endif
return output;
}
return output;
}
// backpropagation
void Backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
Tail::Backpropagate(gradients, learning_rate);
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
}
// backpropagation
void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) {
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer):
Tail(target_layer, feature_transformer),
batch_size_(0),
previous_layer_trainer_(Trainer<FirstPreviousLayer>::Create(
&target_layer->previous_layer_, feature_transformer)),
target_layer_(target_layer) {
}
Tail::backpropagate(thread_pool, gradients, learning_rate);
previous_layer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
}
// number of input/output dimensions
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* ft):
Tail(target_layer, ft),
batch_size_(0),
previous_layer_trainer_(Trainer<FirstPreviousLayer>::create(
&target_layer->previous_layer_, ft)),
target_layer_(target_layer) {
}
// make subclass friend
template <typename SumLayer>
friend class Trainer;
// number of input/output dimensions
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
// number of samples in mini-batch
IndexType batch_size_;
// make subclass friend
template <typename SumLayer>
friend class Trainer;
// Trainer of the previous layer
const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
// number of samples in mini-batch
IndexType batch_size_;
// layer to learn
LayerType* const target_layer_;
};
// Trainer of the previous layer
const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
// layer to learn
LayerType* const target_layer_;
};
// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
template <typename PreviousLayer>
class Trainer<Layers::Sum<PreviousLayer>> {
private:
// Type of layer to learn
using LayerType = Layers::Sum<PreviousLayer>;
// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
template <typename PreviousLayer>
class Trainer<Layers::Sum<PreviousLayer>> {
private:
// Type of layer to learn
using LayerType = Layers::Sum<PreviousLayer>;
public:
// factory function
static std::shared_ptr<Trainer> Create(
LayerType* target_layer, FeatureTransformer* feature_transformer) {
return std::shared_ptr<Trainer>(
new Trainer(target_layer, feature_transformer));
}
public:
// factory function
static std::shared_ptr<Trainer> create(
LayerType* target_layer, FeatureTransformer* ft) {
// Set options such as hyperparameters
void SendMessage(Message* message) {
previous_layer_trainer_->SendMessage(message);
}
return std::shared_ptr<Trainer>(
new Trainer(target_layer, ft));
}
// Initialize the parameters with random numbers
template <typename RNG>
void Initialize(RNG& rng) {
previous_layer_trainer_->Initialize(rng);
}
// Set options such as hyperparameters
void send_message(Message* message) {
previous_layer_trainer_->send_message(message);
}
// Initialize the parameters with random numbers
template <typename RNG>
void initialize(RNG& rng) {
previous_layer_trainer_->initialize(rng);
}
// forward propagation
/*const*/ LearnFloatType* propagate(const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size());
}
batch_size_ = static_cast<IndexType>(batch.size());
const auto output = previous_layer_trainer_->propagate(batch);
// forward propagation
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size());
}
batch_size_ = static_cast<IndexType>(batch.size());
const auto output = previous_layer_trainer_->Propagate(batch);
#if defined(USE_BLAS)
cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
#else
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
output_[batch_offset + i] = output[batch_offset + i];
}
}
#endif
return output_.data();
}
// backpropagation
void Backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
}
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
batch_size_(0),
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
&target_layer->previous_layer_, feature_transformer)),
target_layer_(target_layer) {
}
// number of input/output dimensions
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
// make subclass friend
template <typename SumLayer>
friend class Trainer;
// number of samples in mini-batch
IndexType batch_size_;
// Trainer of the previous layer
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
// layer to learn
LayerType* const target_layer_;
// Forward propagation buffer
std::vector<LearnFloatType> output_;
};
} // namespace NNUE
} // namespace Eval
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b;
for (IndexType i = 0; i < kOutputDimensions; ++i) {
output_[batch_offset + i] = output[batch_offset + i];
}
}
#endif
return output_.data();
}
// backpropagation
void backpropagate(const LearnFloatType* gradients,
LearnFloatType learning_rate) {
previous_layer_trainer_->backpropagate(gradients, learning_rate);
}
private:
// constructor
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
batch_size_(0),
previous_layer_trainer_(Trainer<PreviousLayer>::create(
&target_layer->previous_layer_, ft)),
target_layer_(target_layer) {
}
// number of input/output dimensions
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
// make subclass friend
template <typename SumLayer>
friend class Trainer;
// number of samples in mini-batch
IndexType batch_size_;
// Trainer of the previous layer
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
// layer to learn
LayerType* const target_layer_;
// Forward propagation buffer
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
};
} // namespace Eval::NNUE
#endif
+26 -18
View File
@@ -30,29 +30,29 @@ namespace {
#define S(mg, eg) make_score(mg, eg)
// Pawn penalties
constexpr Score Backward = S( 8, 27);
constexpr Score Doubled = S(11, 55);
constexpr Score Isolated = S( 5, 17);
constexpr Score WeakLever = S( 2, 54);
constexpr Score WeakUnopposed = S(15, 25);
constexpr Score Backward = S( 8, 25);
constexpr Score Doubled = S(10, 55);
constexpr Score Isolated = S( 3, 15);
constexpr Score WeakLever = S( 3, 55);
constexpr Score WeakUnopposed = S(13, 25);
// Bonus for blocked pawns at 5th or 6th rank
constexpr Score BlockedPawn[2] = { S(-13, -4), S(-4, 3) };
constexpr Score BlockedPawn[2] = { S(-13, -4), S(-5, 2) };
constexpr Score BlockedStorm[RANK_NB] = {
S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)
};
// Connected pawn bonus
constexpr int Connected[RANK_NB] = { 0, 7, 8, 11, 24, 45, 85 };
constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 24, 48, 86 };
// Strength of pawn shelter for our king by [distance from edge][rank].
// RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
{ V( -6), V( 81), V( 93), V( 58), V( 39), V( 18), V( 25) },
{ V(-43), V( 61), V( 35), V(-49), V(-29), V(-11), V( -63) },
{ V(-10), V( 75), V( 23), V( -2), V( 32), V( 3), V( -45) },
{ V(-39), V(-13), V(-29), V(-52), V(-48), V(-67), V(-166) }
{ V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V( 28) },
{ V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) },
{ V(-11), V( 77), V( 22), V( -6), V( 31), V( 8), V( -45) },
{ V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) }
};
// Danger of enemy pawns moving toward our king by [distance from edge][rank].
@@ -60,12 +60,17 @@ namespace {
// is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
// on edge, likely blocked by our king.
constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
{ V( 85), V(-289), V(-166), V(97), V(50), V( 45), V( 50) },
{ V( 46), V( -25), V( 122), V(45), V(37), V(-10), V( 20) },
{ V( -6), V( 51), V( 168), V(34), V(-2), V(-22), V(-14) },
{ V(-15), V( -11), V( 101), V( 4), V(11), V(-15), V(-29) }
{ V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) },
{ V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) },
{ V( -8), V( 51), V( 167), V( 35), V( -4), V(-16), V(-12) },
{ V(-17), V( -13), V( 100), V( 4), V( 9), V(-16), V(-31) }
};
// KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
// for king when the king is on a semi-open or open file.
constexpr Score KingOnFile[2][2] = {{ S(-19,12), S(-6, 7) },
{ S( 0, 2), S( 6,-5) }};
#undef S
#undef V
@@ -147,7 +152,7 @@ namespace {
if (support | phalanx)
{
int v = Connected[r] * (2 + bool(phalanx) - bool(opposed))
+ 21 * popcount(support);
+ 22 * popcount(support);
score += make_score(v, v * (r - 2) / 4);
}
@@ -171,8 +176,8 @@ namespace {
score -= Doubled * doubled
+ WeakLever * more_than_one(lever);
if (blocked && r > RANK_4)
score += BlockedPawn[r-4];
if (blocked && r >= RANK_5)
score += BlockedPawn[r - RANK_5];
}
return score;
@@ -237,6 +242,9 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const {
bonus -= make_score(UnblockedStorm[d][theirRank], 0);
}
// King On File
bonus -= KingOnFile[pos.is_on_semiopen_file(Us, ksq)][pos.is_on_semiopen_file(Them, ksq)];
return bonus;
}
+33 -12
View File
@@ -23,6 +23,8 @@
#include <iomanip>
#include <sstream>
#include "nnue/evaluate_nnue.h"
#include "bitboard.h"
#include "misc.h"
#include "movegen.h"
@@ -32,6 +34,9 @@
#include "uci.h"
#include "syzygy/tbprobe.h"
#include "learn/packed_sfen.h"
#include "learn/sfen_packer.h"
using std::string;
namespace Zobrist {
@@ -77,6 +82,8 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
&& !pos.can_castle(ANY_CASTLING))
{
StateInfo st;
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
Position p;
p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread());
Tablebases::ProbeState s1, s2;
@@ -704,7 +711,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
// Used by NNUE
st->accumulator.computed_accumulation = false;
st->accumulator.computed_score = false;
auto& dp = st->dirtyPiece;
dp.dirty_num = 1;
@@ -755,7 +761,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
else
st->nonPawnMaterial[them] -= PieceValue[MG][captured];
if (Eval::useNNUE)
if (Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
{
dp.dirty_num = 2; // 1 piece moved, 1 piece captured
dp.piece[1] = captured;
@@ -799,7 +805,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
// Move the piece. The tricky Chess960 castling is handled earlier
if (type_of(m) != CASTLING)
{
if (Eval::useNNUE)
if (Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
{
dp.piece[0] = pc;
dp.from[0] = from;
@@ -830,7 +836,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
remove_piece(to);
put_piece(promotion, to);
if (Eval::useNNUE)
if (Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
{
// Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
dp.to[0] = SQ_NONE;
@@ -968,7 +974,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
if (Do && Eval::useNNUE)
if (Do && Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
{
auto& dp = st->dirtyPiece;
dp.piece[0] = make_piece(us, KING);
@@ -997,17 +1003,16 @@ void Position::do_null_move(StateInfo& newSt) {
assert(!checkers());
assert(&newSt != st);
if (Eval::useNNUE)
{
std::memcpy(&newSt, st, sizeof(StateInfo));
st->accumulator.computed_score = false;
}
else
std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
newSt.previous = st;
st = &newSt;
// Used by NNUE
st->accumulator.computed_accumulation = false;
auto& dp = st->dirtyPiece;
dp.dirty_num = 0;
if (st->epSquare != SQ_NONE)
{
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
@@ -1317,6 +1322,8 @@ bool Position::pos_is_ok() const {
assert(0 && "pos_is_ok: Bitboards");
StateInfo si = *st;
ASSERT_ALIGNED(&si, Eval::NNUE::kCacheLineSize);
set_state(&si);
if (std::memcmp(&si, st, sizeof(StateInfo)))
assert(0 && "pos_is_ok: State");
@@ -1346,3 +1353,17 @@ bool Position::pos_is_ok() const {
return true;
}
// Add a function that directly unpacks for speed. It's pretty tough.
// Write it by combining packer::unpack() and Position::set().
// If there is a problem with the passed phase and there is an error, non-zero is returned.
int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th)
{
return Learner::set_from_packed_sfen(*this, sfen, si, th);
}
// Get the packed sfen. Returns to the buffer specified in the argument.
void Position::sfen_pack(Learner::PackedSfen& sfen)
{
sfen = Learner::sfen_pack(*this);
}
+9 -7
View File
@@ -30,6 +30,9 @@
#include "nnue/nnue_accumulator.h"
#include "learn/packed_sfen.h"
#include "learn/sfen_packer.h"
/// StateInfo struct stores information needed to restore a Position object to
/// its previous state when we retract a move. Whenever a move is made on the
@@ -75,9 +78,6 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
/// traversing the search tree.
class Thread;
// packed sfen
struct PackedSfen { uint8_t data[32]; };
class Position {
public:
static void init();
@@ -175,25 +175,27 @@ public:
// Used by NNUE
StateInfo* state() const;
#if defined(EVAL_LEARN)
// --sfenization helper
friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
// Get the packed sfen. Returns to the buffer specified in the argument.
// Do not include gamePly in pack.
void sfen_pack(PackedSfen& sfen);
void sfen_pack(Learner::PackedSfen& sfen);
// It is slow to go through sfen, so I made a function to set packed sfen directly.
// Equivalent to pos.set(sfen_unpack(data),si,th);.
// If there is a problem with the passed phase and there is an error, non-zero is returned.
// PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
int set_from_packed_sfen(const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
void clear() { std::memset(this, 0, sizeof(Position)); }
// Give the board, hand piece, and turn, and return the sfen.
//static std::string sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly);
// Returns the position of the ball on the c side.
Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; }
#endif // EVAL_LEARN
private:
// Initialization helpers (used while setting up a position)
+198 -230
View File
File diff suppressed because it is too large Load Diff
+11 -6
View File
@@ -24,6 +24,7 @@
#include "misc.h"
#include "movepick.h"
#include "types.h"
#include "uci.h"
class Position;
@@ -32,6 +33,7 @@ namespace Search {
/// Threshold used for countermoves based pruning
constexpr int CounterMovePruneThreshold = 0;
extern bool prune_at_shallow_depth;
/// Stack struct keeps track of the information we need to remember from nodes
/// shallower and deeper in the tree during the search. Each search thread has
@@ -48,6 +50,8 @@ struct Stack {
int statScore;
int moveCount;
bool inCheck;
bool ttPv;
bool ttHit;
};
@@ -69,7 +73,6 @@ struct RootMove {
Value previousScore = -VALUE_INFINITE;
int selDepth = 0;
int tbRank = 0;
int bestMoveCount = 0;
Value tbScore;
std::vector<Move> pv;
};
@@ -86,9 +89,7 @@ struct LimitsType {
time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
movestogo = depth = mate = perft = infinite = 0;
nodes = 0;
#if defined (EVAL_LEARN)
silent = false;
#endif
}
bool use_time_management() const {
@@ -99,11 +100,9 @@ struct LimitsType {
TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
int movestogo, depth, mate, perft, infinite;
int64_t nodes;
#if defined (EVAL_LEARN)
// Silent mode that does not output to the screen (for continuous self-play in process)
// Do not output PV at this time.
bool silent;
#endif
};
extern LimitsType Limits;
@@ -111,6 +110,12 @@ extern LimitsType Limits;
void init();
void clear();
} // namespace Search
// A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
using ValueAndPV = std::pair<Value, std::vector<Move>>;
ValueAndPV qsearch(Position& pos);
ValueAndPV search(Position& pos, int depth_, size_t multiPV = 1, uint64_t nodesLimit = 0);
}
#endif // #ifndef SEARCH_H_INCLUDED
+11 -9
View File
@@ -28,12 +28,12 @@
#include <type_traits>
#include <mutex>
#include "../bitboard.h"
#include "../movegen.h"
#include "../position.h"
#include "../search.h"
#include "../types.h"
#include "../uci.h"
#include "bitboard.h"
#include "movegen.h"
#include "position.h"
#include "search.h"
#include "types.h"
#include "uci.h"
#include "tbprobe.h"
@@ -52,7 +52,7 @@
using namespace Tablebases;
int Tablebases::MaxCardinality;
int Tablebases::MaxCardinality = 0;
namespace {
@@ -223,7 +223,9 @@ public:
*mapping = statbuf.st_size;
*baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
#if defined(MADV_RANDOM)
madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
#endif
::close(fd);
if (*baseAddress == MAP_FAILED)
@@ -758,7 +760,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
if (entry->hasPawns) {
idx = LeadPawnIdx[leadPawnsCnt][squares[0]];
std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
for (int i = 1; i < leadPawnsCnt; ++i)
idx += Binomial[i][MapPawns[squares[i]]];
@@ -859,7 +861,7 @@ encode_remaining:
while (d->groupLen[++next])
{
std::sort(groupSq, groupSq + d->groupLen[next]);
std::stable_sort(groupSq, groupSq + d->groupLen[next]);
uint64_t n = 0;
// Map down a square if "comes later" than a square in the previous
+1 -1
View File
@@ -21,7 +21,7 @@
#include <ostream>
#include "../search.h"
#include "search.h"
namespace Tablebases {
+68 -25
View File
@@ -35,6 +35,7 @@ ThreadPool Threads; // Global object
Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) {
wait_for_search_finished();
wait_for_worker_finished();
}
@@ -51,17 +52,6 @@ Thread::~Thread() {
}
/// Thread::bestMoveCount(Move move) return best move counter for the given root move
int Thread::best_move_count(Move move) const {
auto rm = std::find(rootMoves.begin() + pvIdx,
rootMoves.begin() + pvLast, move);
return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0;
}
/// Thread::clear() reset histories, usually before a new game
void Thread::clear() {
@@ -91,6 +81,14 @@ void Thread::start_searching() {
cv.notify_one(); // Wake up the thread in idle_loop()
}
void Thread::execute_with_worker(std::function<void(Thread&)> t)
{
std::lock_guard<std::mutex> lk(mutex);
worker = std::move(t);
searching = true;
cv.notify_one(); // Wake up the thread in idle_loop()
}
/// Thread::wait_for_search_finished() blocks on the condition variable
/// until the thread has finished searching.
@@ -102,6 +100,12 @@ void Thread::wait_for_search_finished() {
}
void Thread::wait_for_worker_finished() {
std::unique_lock<std::mutex> lk(mutex);
cv.wait(lk, [&]{ return !searching; });
}
/// Thread::idle_loop() is where the thread is parked, blocked on the
/// condition variable, when it has no work to do.
@@ -119,15 +123,25 @@ void Thread::idle_loop() {
{
std::unique_lock<std::mutex> lk(mutex);
searching = false;
worker = nullptr;
cv.notify_one(); // Wake up anyone waiting for search finished
cv.wait(lk, [&]{ return searching; });
if (exit)
return;
auto wrk = std::move(worker);
lk.unlock();
search();
if (wrk)
{
wrk(*this);
}
else
{
search();
}
}
}
@@ -172,6 +186,13 @@ void ThreadPool::clear() {
main()->previousTimeReduction = 1.0;
}
void ThreadPool::execute_with_workers(const std::function<void(Thread&)>& worker)
{
for(Thread* th : *this)
{
th->execute_with_worker(worker);
}
}
/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and
/// returns immediately. Main thread will wake up other threads and start the search.
@@ -192,9 +213,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
rootMoves.emplace_back(m);
if (!rootMoves.empty())
Tablebases::rank_root_moves(pos, rootMoves);
// After ownership transfer 'states' becomes empty, so if we stop the search
// and call 'go' again without setting a new position states.get() == NULL.
assert(states.get() || setupStates.get());
@@ -214,6 +232,24 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
th->rootMoves = rootMoves;
th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
th->rootState = setupStates->back();
// This is also set by rank_root_moves but we need to set it
// also when there is no legal moves.
th->rootInTB = false;
th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
th->Cardinality = int(Options["SyzygyProbeLimit"]);
// Tables with fewer pieces than SyzygyProbeLimit are searched with
// ProbeDepth == DEPTH_ZERO
if (th->Cardinality > Tablebases::MaxCardinality)
{
th->Cardinality = Tablebases::MaxCardinality;
th->ProbeDepth = 0;
}
if (!rootMoves.empty())
Tablebases::rank_root_moves(pos, rootMoves);
}
main()->start_searching();
@@ -235,16 +271,16 @@ Thread* ThreadPool::get_best_thread() const {
votes[th->rootMoves[0].pv[0]] +=
(th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);
if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
{
// Make sure we pick the shortest mate / TB conversion or stave off mate the longest
if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
bestThread = th;
}
else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
|| ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
&& votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
bestThread = th;
if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
{
// Make sure we pick the shortest mate / TB conversion or stave off mate the longest
if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
bestThread = th;
}
else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
|| ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
&& votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
bestThread = th;
}
return bestThread;
@@ -269,3 +305,10 @@ void ThreadPool::wait_for_search_finished() const {
if (th != front())
th->wait_for_search_finished();
}
void ThreadPool::wait_for_workers_finished() const {
for (Thread* th : *this)
th->wait_for_worker_finished();
}
+81 -1
View File
@@ -24,6 +24,7 @@
#include <mutex>
#include <thread>
#include <vector>
#include <functional>
#include "material.h"
#include "movepick.h"
@@ -38,23 +39,41 @@
/// pointer to an entry its life time is unlimited and we don't have
/// to care about someone changing the entry under our feet.
namespace Detail {
template <typename T>
struct TypeIdentity {
using Type = T;
};
}
class Thread {
std::mutex mutex;
std::condition_variable cv;
size_t idx;
bool exit = false, searching = true; // Set before starting std::thread
std::function<void(Thread&)> worker;
NativeThread stdThread;
public:
explicit Thread(size_t);
virtual ~Thread();
virtual void search();
// The function object to be executed is taken by value to remove
// the need for separate lvalue and rvalue overloads.
// The worker thread needs to have ownership of the task
// to be executed because otherwise there's no way to manage its lifetime.
virtual void execute_with_worker(std::function<void(Thread&)> t);
void clear();
void idle_loop();
void start_searching();
void wait_for_search_finished();
int best_move_count(Move move) const;
void wait_for_worker_finished();
size_t thread_idx() const { return idx; }
Pawns::Table pawnsTable;
Material::Table materialTable;
@@ -74,6 +93,11 @@ public:
CapturePieceToHistory captureHistory;
ContinuationHistory continuationHistory[2][2];
Score contempt;
int failedHighCnt;
bool rootInTB;
int Cardinality;
bool UseRule50;
Depth ProbeDepth;
};
@@ -101,6 +125,61 @@ struct MainThread : public Thread {
struct ThreadPool : public std::vector<Thread*> {
// Each thread gets its own copy of the `worker` function object.
// This means that each worker thread will have exclusive access
// to the state of the `worker` function object.
void execute_with_workers(const std::function<void(Thread&)>& worker);
template <typename IndexT, typename FuncT>
void for_each_index_with_workers(
IndexT begin,
typename Detail::TypeIdentity<IndexT>::Type end,
FuncT func)
{
// This value must outlive the function call.
// It's fairly safe if we make it static
// because for_each_index_with_workers
// is not reentrant nor thread safe.
static std::atomic<IndexT> i_atomic;
i_atomic.store(begin);
execute_with_workers(
[end, func](Thread& th) mutable {
for(;;) {
const auto i = i_atomic.fetch_add(1);
if (i >= end)
break;
func(th, i);
}
});
}
template <typename IndexT, typename FuncT>
void for_each_index_chunk_with_workers(
IndexT begin,
typename Detail::TypeIdentity<IndexT>::Type end,
FuncT func)
{
// This value must outlive the function call.
// It's fairly safe if we make it static
// because for_each_index_with_workers
// is not reentrant nor thread safe.
const IndexT size = end - begin;
const IndexT chunk_size = (size + this->size()) / this->size();
execute_with_workers(
[chunk_size, end, func](Thread& th) mutable {
const IndexT thread_id = th.thread_idx();
const IndexT offset = chunk_size * thread_id;
if (offset >= end)
return;
const IndexT count = offset + chunk_size > end ? end - offset : chunk_size;
func(th, offset, count);
});
}
void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false);
void clear();
void set(size_t);
@@ -111,6 +190,7 @@ struct ThreadPool : public std::vector<Thread*> {
Thread* get_best_thread() const;
void start_searching();
void wait_for_search_finished() const;
void wait_for_workers_finished() const;
std::atomic_bool stop, increaseDepth;

Some files were not shown because too many files have changed in this diff Show More