mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 16:47:37 +00:00
Merge branch 'master' into stockfish-nnue-2020-08-30-macos
This commit is contained in:
+101
-67
@@ -28,43 +28,49 @@ else
|
||||
EXE = stockfish
|
||||
endif
|
||||
|
||||
### Installation dir definitions
|
||||
PREFIX = /usr/local
|
||||
BINDIR = $(PREFIX)/bin
|
||||
|
||||
### Built-in benchmark for pgo-builds
|
||||
PGOBENCH = ./$(EXE) bench
|
||||
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 100000
|
||||
|
||||
### Source and object files
|
||||
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
|
||||
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
|
||||
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
|
||||
nnue/evaluate_nnue.cpp \
|
||||
nnue/evaluate_nnue_learner.cpp \
|
||||
nnue/features/half_kp.cpp \
|
||||
nnue/features/half_relative_kp.cpp \
|
||||
nnue/features/k.cpp \
|
||||
nnue/features/p.cpp \
|
||||
nnue/features/castling_right.cpp \
|
||||
nnue/features/enpassant.cpp \
|
||||
nnue/nnue_test_command.cpp \
|
||||
extra/sfen_packer.cpp \
|
||||
learn/gensfen2019.cpp \
|
||||
learn/learner.cpp \
|
||||
learn/learning_tools.cpp \
|
||||
learn/multi_think.cpp
|
||||
|
||||
OBJS = $(notdir $(SRCS:.cpp=.o))
|
||||
|
||||
VPATH = syzygy:nnue:nnue/features:eval:extra:learn
|
||||
|
||||
### Establish the operating system name
|
||||
KERNEL = $(shell uname -s)
|
||||
ifeq ($(KERNEL),Linux)
|
||||
OS = $(shell uname -o)
|
||||
endif
|
||||
|
||||
### Installation dir definitions
|
||||
PREFIX = /usr/local
|
||||
BINDIR = $(PREFIX)/bin
|
||||
|
||||
### Built-in benchmark for pgo-builds
|
||||
PGO_TRAINING_DATA_FILE = pgo_training_data.bin
|
||||
PGOBENCH = ./$(EXE) bench
|
||||
PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 sfen_format bin output_file_name $(PGO_TRAINING_DATA_FILE)
|
||||
|
||||
### Source and object files
|
||||
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
|
||||
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
|
||||
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
|
||||
extra/stockfish_blas.cpp \
|
||||
nnue/evaluate_nnue.cpp \
|
||||
nnue/evaluate_nnue_learner.cpp \
|
||||
nnue/features/half_kp.cpp \
|
||||
nnue/features/half_ka.cpp \
|
||||
nnue/features/half_relative_kp.cpp \
|
||||
nnue/features/half_relative_ka.cpp \
|
||||
nnue/features/k.cpp \
|
||||
nnue/features/p.cpp \
|
||||
nnue/features/a.cpp \
|
||||
nnue/features/castling_right.cpp \
|
||||
nnue/features/enpassant.cpp \
|
||||
nnue/nnue_test_command.cpp \
|
||||
learn/sfen_packer.cpp \
|
||||
learn/learn.cpp \
|
||||
learn/gensfen.cpp \
|
||||
learn/opening_book.cpp \
|
||||
learn/convert.cpp \
|
||||
learn/transform.cpp
|
||||
|
||||
OBJS = $(notdir $(SRCS:.cpp=.o))
|
||||
|
||||
VPATH = syzygy:nnue:nnue/features:eval:extra:learn
|
||||
|
||||
### ==========================================================================
|
||||
### Section 2. High-level Configuration
|
||||
### ==========================================================================
|
||||
@@ -99,17 +105,23 @@ endif
|
||||
|
||||
### 2.1. General and architecture defaults
|
||||
|
||||
ifeq ($(ARCH),)
|
||||
ARCH = x86-64-modern
|
||||
help_skip_sanity = yes
|
||||
endif
|
||||
# explicitly check for the list of supported architectures (as listed with make help),
|
||||
# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
|
||||
ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
|
||||
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
|
||||
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
|
||||
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
|
||||
ifeq ($(ARCH), $(filter $(ARCH), \
|
||||
x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
|
||||
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
|
||||
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
|
||||
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
|
||||
SUPPORTED_ARCH=true
|
||||
else
|
||||
SUPPORTED_ARCH=false
|
||||
endif
|
||||
|
||||
blas = no
|
||||
optimize = yes
|
||||
debug = no
|
||||
sanitize = no
|
||||
@@ -127,7 +139,6 @@ avx512 = no
|
||||
vnni256 = no
|
||||
vnni512 = no
|
||||
neon = no
|
||||
ARCH = x86-64-modern
|
||||
STRIP = strip
|
||||
|
||||
### 2.2 Architecture specific
|
||||
@@ -306,9 +317,9 @@ endif
|
||||
### ==========================================================================
|
||||
|
||||
### 3.1 Selecting compiler (default = gcc)
|
||||
CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(LEARNCXXFLAGS)
|
||||
DEPENDFLAGS += -std=c++17
|
||||
LDFLAGS += $(EXTRALDFLAGS) $(LEARNLDFLAGS)
|
||||
CXXFLAGS += -g -Wall -Wcast-qual -fno-exceptions -std=c++17 -fopenmp -I. $(EXTRACXXFLAGS)
|
||||
LDFLAGS += -fopenmp $(EXTRALDFLAGS)
|
||||
DEPENDFLAGS += -std=c++17 -I.
|
||||
|
||||
ifeq ($(COMP),)
|
||||
COMP=gcc
|
||||
@@ -391,19 +402,6 @@ ifeq ($(COMP),clang)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(comp),icc)
|
||||
profile_make = icc-profile-make
|
||||
profile_use = icc-profile-use
|
||||
else
|
||||
ifeq ($(comp),clang)
|
||||
profile_make = clang-profile-make
|
||||
profile_use = clang-profile-use
|
||||
else
|
||||
profile_make = gcc-profile-make
|
||||
profile_use = gcc-profile-use
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KERNEL),Darwin)
|
||||
CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
|
||||
LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
|
||||
@@ -415,20 +413,30 @@ endif
|
||||
# Currently we don't know how to make PGO builds with the NDK yet.
|
||||
ifeq ($(COMP),ndk)
|
||||
CXXFLAGS += -stdlib=libc++ -fPIE
|
||||
comp=clang
|
||||
ifeq ($(arch),armv7)
|
||||
comp=armv7a-linux-androideabi16-clang
|
||||
CXX=armv7a-linux-androideabi16-clang++
|
||||
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
|
||||
STRIP=arm-linux-androideabi-strip
|
||||
endif
|
||||
ifeq ($(arch),armv8)
|
||||
comp=aarch64-linux-android21-clang
|
||||
CXX=aarch64-linux-android21-clang++
|
||||
STRIP=aarch64-linux-android-strip
|
||||
endif
|
||||
LDFLAGS += -static-libstdc++ -pie -lm -latomic
|
||||
endif
|
||||
|
||||
ifeq ($(comp),icc)
|
||||
profile_make = icc-profile-make
|
||||
profile_use = icc-profile-use
|
||||
else ifeq ($(comp),clang)
|
||||
profile_make = clang-profile-make
|
||||
profile_use = clang-profile-use
|
||||
else
|
||||
profile_make = gcc-profile-make
|
||||
profile_use = gcc-profile-use
|
||||
endif
|
||||
|
||||
### Travis CI script uses COMPILER to overwrite CXX
|
||||
ifdef COMPILER
|
||||
COMPCXX=$(COMPILER)
|
||||
@@ -463,14 +471,33 @@ ifneq ($(comp),mingw)
|
||||
endif
|
||||
endif
|
||||
|
||||
### 3.2.1 Debugging
|
||||
### 3.2.1. BLAS libraries
|
||||
ifeq ($(blas), yes)
|
||||
LDFLAGS += -lopenblas
|
||||
|
||||
ifeq ($(KERNEL),Linux)
|
||||
LDFLAGS +=
|
||||
else
|
||||
CXXFLAGS += -I/mingw64/include/OpenBLAS
|
||||
|
||||
ifeq ($(debug),yes)
|
||||
LDFLAGS += -Wl,-static
|
||||
else
|
||||
LDFLAGS += -Wl,-s -static
|
||||
endif
|
||||
endif
|
||||
|
||||
CXXFLAGS += -DUSE_BLAS
|
||||
endif
|
||||
|
||||
### 3.2.2 Debugging
|
||||
ifeq ($(debug),no)
|
||||
CXXFLAGS += -DNDEBUG
|
||||
else
|
||||
CXXFLAGS += -g
|
||||
endif
|
||||
|
||||
### 3.2.2 Debugging with undefined behavior sanitizers
|
||||
### 3.2.3 Debugging with undefined behavior sanitizers
|
||||
ifneq ($(sanitize),no)
|
||||
CXXFLAGS += -g3 -fsanitize=$(sanitize)
|
||||
LDFLAGS += -fsanitize=$(sanitize)
|
||||
@@ -600,11 +627,13 @@ endif
|
||||
### needs access to the optimization flags.
|
||||
ifeq ($(optimize),yes)
|
||||
ifeq ($(debug), no)
|
||||
ifeq ($(COMP),ndk)
|
||||
CXXFLAGS += -flto=thin
|
||||
LDFLAGS += $(CXXFLAGS)
|
||||
else ifeq ($(comp),clang)
|
||||
ifeq ($(comp),clang)
|
||||
CXXFLAGS += -flto=thin
|
||||
ifneq ($(findstring MINGW,$(KERNEL)),)
|
||||
CXXFLAGS += -fuse-ld=lld
|
||||
else ifneq ($(findstring MSYS,$(KERNEL)),)
|
||||
CXXFLAGS += -fuse-ld=lld
|
||||
endif
|
||||
LDFLAGS += $(CXXFLAGS)
|
||||
|
||||
# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
|
||||
@@ -628,10 +657,12 @@ ifeq ($(debug), no)
|
||||
# So, only enable it for a cross from Linux by default.
|
||||
else ifeq ($(comp),mingw)
|
||||
ifeq ($(KERNEL),Linux)
|
||||
ifneq ($(arch),i386)
|
||||
CXXFLAGS += -flto
|
||||
LDFLAGS += $(CXXFLAGS) -flto=jobserver
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -707,11 +738,12 @@ help:
|
||||
@echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
|
||||
@echo ""
|
||||
@echo "-------------------------------"
|
||||
ifeq ($(SUPPORTED_ARCH), true)
|
||||
ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true)
|
||||
@echo "The selected architecture $(ARCH) will enable the following configuration: "
|
||||
@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
|
||||
else
|
||||
@echo "Specify a supported architecture with the ARCH option for more details"
|
||||
@echo ""
|
||||
endif
|
||||
|
||||
|
||||
@@ -719,7 +751,7 @@ endif
|
||||
config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
|
||||
clang-profile-use clang-profile-make
|
||||
|
||||
build: config-sanity
|
||||
build: net config-sanity
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
|
||||
|
||||
profile-build: net config-sanity objclean profileclean
|
||||
@@ -729,6 +761,7 @@ profile-build: net config-sanity objclean profileclean
|
||||
@echo ""
|
||||
@echo "Step 2/4. Running benchmark for pgo-build ..."
|
||||
$(PGOBENCH) > /dev/null
|
||||
$(PGOGENSFEN) > /dev/null
|
||||
@echo ""
|
||||
@echo "Step 3/4. Building optimized executable ..."
|
||||
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
|
||||
@@ -745,12 +778,13 @@ install:
|
||||
-cp $(EXE) $(BINDIR)
|
||||
-strip $(BINDIR)/$(EXE)
|
||||
|
||||
#clean all
|
||||
# clean all
|
||||
clean: objclean profileclean
|
||||
@rm -f .depend *~ core
|
||||
|
||||
# evaluation network (nnue)
|
||||
net:
|
||||
$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
|
||||
$(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
|
||||
@echo "Default net: $(nnuenet)"
|
||||
$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
|
||||
$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
|
||||
@@ -772,7 +806,6 @@ net:
|
||||
echo "shasum / sha256sum not found, skipping net validation"; \
|
||||
fi
|
||||
|
||||
|
||||
# clean binaries and objects
|
||||
objclean:
|
||||
@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o
|
||||
@@ -782,6 +815,7 @@ profileclean:
|
||||
@rm -rf profdir
|
||||
@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda
|
||||
@rm -f stockfish.profdata *.profraw
|
||||
@rm -f $(PGO_TRAINING_DATA_FILE)
|
||||
|
||||
default:
|
||||
help
|
||||
@@ -792,7 +826,7 @@ default:
|
||||
|
||||
all: $(EXE) .depend
|
||||
|
||||
config-sanity:
|
||||
config-sanity: net
|
||||
@echo ""
|
||||
@echo "Config:"
|
||||
@echo "debug: '$(debug)'"
|
||||
@@ -913,6 +947,6 @@ profile-learn: config-sanity objclean profileclean
|
||||
rm generated_kifu.bin
|
||||
|
||||
.depend:
|
||||
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
|
||||
-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@
|
||||
|
||||
-include .depend
|
||||
|
||||
@@ -164,5 +164,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
|
||||
++posCounter;
|
||||
}
|
||||
|
||||
list.emplace_back("setoption name Use NNUE value true");
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
#ifndef _EVALUATE_COMMON_H_
|
||||
#define _EVALUATE_COMMON_H_
|
||||
|
||||
// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
|
||||
|
||||
#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
|
||||
#include <functional>
|
||||
|
||||
// KK file name
|
||||
#define KK_BIN "KK_synthesized.bin"
|
||||
|
||||
// KKP file name
|
||||
#define KKP_BIN "KKP_synthesized.bin"
|
||||
|
||||
// KPP file name
|
||||
#define KPP_BIN "KPP_synthesized.bin"
|
||||
|
||||
namespace Eval
|
||||
{
|
||||
|
||||
#if defined(USE_EVAL_HASH)
|
||||
// prefetch function
|
||||
void prefetch_evalhash(const Key key);
|
||||
#endif
|
||||
|
||||
// An operator that applies the function f to each parameter of the evaluation function.
|
||||
// Used for parameter analysis etc.
|
||||
// type indicates the survey target.
|
||||
// type = -1 :KK,KKP,KPP all
|
||||
// type = 0: KK only
|
||||
// type = 1: KKP only
|
||||
// type = 2: KPP only
|
||||
void foreach_eval_param(std::function<void(int32_t, int32_t)>f, int type = -1);
|
||||
|
||||
// --------------------------
|
||||
// for learning
|
||||
// --------------------------
|
||||
|
||||
#if defined(EVAL_LEARN)
|
||||
// Initialize the gradient array during learning
|
||||
// Pass the learning rate as an argument. If 0.0, the default value is used.
|
||||
// The epoch of update_weights() gradually changes from eta to eta2 until eta_epoch.
|
||||
// After eta2_epoch, gradually change from eta2 to eta3.
|
||||
void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3);
|
||||
|
||||
// Add the gradient difference value to the gradient array for all features that appear in the current phase.
|
||||
// freeze[0]: Flag that kk does not learn
|
||||
// freeze[1]: Flag that kkp does not learn
|
||||
// freeze[2]: Flag that kpp does not learn
|
||||
// freeze[3]: Flag that kppp does not learn
|
||||
void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4>& freeze);
|
||||
|
||||
// Do SGD or AdaGrad or something based on the current gradient.
|
||||
// epoch: Generation counter (starting from 0)
|
||||
// freeze[0]: Flag that kk does not learn
|
||||
// freeze[1]: Flag that kkp does not learn
|
||||
// freeze[2]: Flag that kpp does not learn
|
||||
// freeze[3]: Flag that kppp does not learn
|
||||
void update_weights(uint64_t epoch, const std::array<bool, 4>& freeze);
|
||||
|
||||
// Save the evaluation function parameters to a file.
|
||||
// You can specify the extension added to the end of the file.
|
||||
void save_eval(std::string suffix);
|
||||
|
||||
// Get the current eta.
|
||||
double get_eta();
|
||||
|
||||
// --learning related commands
|
||||
|
||||
// A function that normalizes KK. Note that it is not completely equivalent to the original evaluation function.
|
||||
// By making the values of kkp and kpp as close to zero as possible, the value of the feature factor (which is zero) that did not appear during learning
|
||||
// The idea of ensuring it is valid.
|
||||
void regularize_kk();
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN)
|
||||
|
||||
#endif // _EVALUATE_KPPT_COMMON_H_
|
||||
+93
-99
@@ -20,61 +20,25 @@
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring> // For std::memset
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <streambuf>
|
||||
#include <vector>
|
||||
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
|
||||
#include "bitboard.h"
|
||||
#include "evaluate.h"
|
||||
#include "material.h"
|
||||
#include "misc.h"
|
||||
#include "pawns.h"
|
||||
#include "thread.h"
|
||||
#include "uci.h"
|
||||
#include "incbin/incbin.h"
|
||||
|
||||
#ifdef EVAL_LEARN
|
||||
namespace Learner
|
||||
{
|
||||
extern bool use_raw_nnue_eval;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Eval {
|
||||
|
||||
bool useNNUE;
|
||||
std::string eval_file_loaded="None";
|
||||
|
||||
void init_NNUE() {
|
||||
|
||||
useNNUE = Options["Use NNUE"];
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
if (useNNUE && eval_file_loaded != eval_file)
|
||||
if (Eval::NNUE::load_eval_file(eval_file))
|
||||
eval_file_loaded = eval_file;
|
||||
}
|
||||
|
||||
void verify_NNUE() {
|
||||
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
if (useNNUE && eval_file_loaded != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl;
|
||||
sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl;
|
||||
sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl;
|
||||
sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl;
|
||||
sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl;
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled." << sync_endl;
|
||||
}
|
||||
}
|
||||
using namespace std;
|
||||
|
||||
namespace Trace {
|
||||
|
||||
@@ -120,11 +84,11 @@ using namespace Trace;
|
||||
namespace {
|
||||
|
||||
// Threshold for lazy and space evaluation
|
||||
constexpr Value LazyThreshold1 = Value(1400);
|
||||
constexpr Value LazyThreshold2 = Value(1300);
|
||||
constexpr Value SpaceThreshold = Value(12222);
|
||||
constexpr Value NNUEThreshold1 = Value(550);
|
||||
constexpr Value NNUEThreshold2 = Value(150);
|
||||
constexpr Value LazyThreshold1 = Value(1565);
|
||||
constexpr Value LazyThreshold2 = Value(1102);
|
||||
constexpr Value SpaceThreshold = Value(11551);
|
||||
constexpr Value NNUEThreshold1 = Value(682);
|
||||
constexpr Value NNUEThreshold2 = Value(176);
|
||||
|
||||
// KingAttackWeights[PieceType] contains king attack weights by piece type
|
||||
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
|
||||
@@ -132,7 +96,7 @@ namespace {
|
||||
// SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type,
|
||||
// higher if multiple safe checks are possible for that piece type.
|
||||
constexpr int SafeCheck[][2] = {
|
||||
{}, {}, {792, 1283}, {645, 967}, {1084, 1897}, {772, 1119}
|
||||
{}, {}, {803, 1292}, {639, 974}, {1087, 1878}, {759, 1132}
|
||||
};
|
||||
|
||||
#define S(mg, eg) make_score(mg, eg)
|
||||
@@ -140,19 +104,25 @@ namespace {
|
||||
// MobilityBonus[PieceType-2][attacked] contains bonuses for middle and end game,
|
||||
// indexed by piece type and number of attacked squares in the mobility area.
|
||||
constexpr Score MobilityBonus[][32] = {
|
||||
{ S(-62,-81), S(-53,-56), S(-12,-31), S( -4,-16), S( 3, 5), S( 13, 11), // Knight
|
||||
S( 22, 17), S( 28, 20), S( 33, 25) },
|
||||
{ S(-48,-59), S(-20,-23), S( 16, -3), S( 26, 13), S( 38, 24), S( 51, 42), // Bishop
|
||||
S( 55, 54), S( 63, 57), S( 63, 65), S( 68, 73), S( 81, 78), S( 81, 86),
|
||||
S( 91, 88), S( 98, 97) },
|
||||
{ S(-60,-78), S(-20,-17), S( 2, 23), S( 3, 39), S( 3, 70), S( 11, 99), // Rook
|
||||
S( 22,103), S( 31,121), S( 40,134), S( 40,139), S( 41,158), S( 48,164),
|
||||
S( 57,168), S( 57,169), S( 62,172) },
|
||||
{ S(-30,-48), S(-12,-30), S( -8, -7), S( -9, 19), S( 20, 40), S( 23, 55), // Queen
|
||||
S( 23, 59), S( 35, 75), S( 38, 78), S( 53, 96), S( 64, 96), S( 65,100),
|
||||
S( 65,121), S( 66,127), S( 67,131), S( 67,133), S( 72,136), S( 72,141),
|
||||
S( 77,147), S( 79,150), S( 93,151), S(108,168), S(108,168), S(108,171),
|
||||
S(110,182), S(114,182), S(114,192), S(116,219) }
|
||||
{ S(-62,-79), S(-53,-57), S(-12,-31), S( -3,-17), S( 3, 7), S( 12, 13), // Knight
|
||||
S( 21, 16), S( 28, 21), S( 37, 26) },
|
||||
{ S(-47,-59), S(-20,-25), S( 14, -8), S( 29, 12), S( 39, 21), S( 53, 40), // Bishop
|
||||
S( 53, 56), S( 60, 58), S( 62, 65), S( 69, 72), S( 78, 78), S( 83, 87),
|
||||
S( 91, 88), S( 96, 98) },
|
||||
{ S(-60,-82), S(-24,-15), S( 0, 17) ,S( 3, 43), S( 4, 72), S( 14,100), // Rook
|
||||
S( 20,102), S( 30,122), S( 41,133), S(41 ,139), S( 41,153), S( 45,160),
|
||||
S( 57,165), S( 58,170), S( 67,175) },
|
||||
{ S(-29,-49), S(-16,-29), S( -8, -8), S( -8, 17), S( 18, 39), S( 25, 54), // Queen
|
||||
S( 23, 59), S( 37, 73), S( 41, 76), S( 54, 95), S( 65, 95) ,S( 68,101),
|
||||
S( 69,124), S( 70,128), S( 70,132), S( 70,133) ,S( 71,136), S( 72,140),
|
||||
S( 74,147), S( 76,149), S( 90,153), S(104,169), S(105,171), S(106,171),
|
||||
S(112,178), S(114,185), S(114,187), S(119,221) }
|
||||
};
|
||||
|
||||
// BishopPawns[distance from edge] contains a file-dependent penalty for pawns on
|
||||
// squares of the same color as our bishop.
|
||||
constexpr Score BishopPawns[int(FILE_NB) / 2] = {
|
||||
S(3, 8), S(3, 9), S(1, 8), S(3, 7)
|
||||
};
|
||||
|
||||
// KingProtector[knight/bishop] contains penalty for each distance unit to own king
|
||||
@@ -160,32 +130,31 @@ namespace {
|
||||
|
||||
// Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
|
||||
// pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
|
||||
constexpr Score Outpost[] = { S(56, 36), S(30, 23) };
|
||||
constexpr Score Outpost[] = { S(56, 34), S(31, 23) };
|
||||
|
||||
// PassedRank[Rank] contains a bonus according to the rank of a passed pawn
|
||||
constexpr Score PassedRank[RANK_NB] = {
|
||||
S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
|
||||
S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260)
|
||||
};
|
||||
|
||||
// RookOnFile[semiopen/open] contains bonuses for each rook when there is
|
||||
// no (friendly) pawn on the rook file.
|
||||
constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) };
|
||||
constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) };
|
||||
|
||||
// ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
|
||||
// which piece type attacks which one. Attacks on lesser pieces which are
|
||||
// pawn-defended are not considered.
|
||||
constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
|
||||
S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161)
|
||||
S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
|
||||
};
|
||||
|
||||
constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
|
||||
S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41)
|
||||
S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
|
||||
};
|
||||
|
||||
// Assorted bonuses and penalties
|
||||
constexpr Score BadOutpost = S( -7, 36);
|
||||
constexpr Score BishopOnKingRing = S( 24, 0);
|
||||
constexpr Score BishopPawns = S( 3, 7);
|
||||
constexpr Score BishopXRayPawns = S( 4, 5);
|
||||
constexpr Score CorneredBishop = S( 50, 50);
|
||||
constexpr Score FlankAttacks = S( 8, 0);
|
||||
@@ -198,7 +167,6 @@ namespace {
|
||||
constexpr Score ReachableOutpost = S( 31, 22);
|
||||
constexpr Score RestrictedPiece = S( 7, 7);
|
||||
constexpr Score RookOnKingRing = S( 16, 0);
|
||||
constexpr Score RookOnQueenFile = S( 6, 11);
|
||||
constexpr Score SliderOnQueen = S( 60, 18);
|
||||
constexpr Score ThreatByKing = S( 24, 89);
|
||||
constexpr Score ThreatByPawnPush = S( 48, 39);
|
||||
@@ -387,7 +355,7 @@ namespace {
|
||||
// when the bishop is outside the pawn chain.
|
||||
Bitboard blocked = pos.pieces(Us, PAWN) & shift<Down>(pos.pieces());
|
||||
|
||||
score -= BishopPawns * pos.pawns_on_same_color_squares(Us, s)
|
||||
score -= BishopPawns[edge_distance(file_of(s))] * pos.pawns_on_same_color_squares(Us, s)
|
||||
* (!(attackedBy[Us][PAWN] & s) + popcount(blocked & CenterFiles));
|
||||
|
||||
// Penalty for all enemy pawns x-rayed
|
||||
@@ -414,10 +382,6 @@ namespace {
|
||||
|
||||
if (Pt == ROOK)
|
||||
{
|
||||
// Bonus for rook on the same file as a queen
|
||||
if (file_bb(s) & pos.pieces(QUEEN))
|
||||
score += RookOnQueenFile;
|
||||
|
||||
// Bonus for rook on an open or semi-open file
|
||||
if (pos.is_on_semiopen_file(Us, s))
|
||||
score += RookOnFile[pos.is_on_semiopen_file(Them, s)];
|
||||
@@ -515,18 +479,18 @@ namespace {
|
||||
int kingFlankAttack = popcount(b1) + popcount(b2);
|
||||
int kingFlankDefense = popcount(b3);
|
||||
|
||||
kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them]
|
||||
+ 185 * popcount(kingRing[Us] & weak)
|
||||
+ 148 * popcount(unsafeChecks)
|
||||
+ 98 * popcount(pos.blockers_for_king(Us))
|
||||
+ 69 * kingAttacksCount[Them]
|
||||
+ 3 * kingFlankAttack * kingFlankAttack / 8
|
||||
+ mg_value(mobility[Them] - mobility[Us])
|
||||
- 873 * !pos.count<QUEEN>(Them)
|
||||
- 100 * bool(attackedBy[Us][KNIGHT] & attackedBy[Us][KING])
|
||||
- 6 * mg_value(score) / 8
|
||||
- 4 * kingFlankDefense
|
||||
+ 37;
|
||||
kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them] // (~10 Elo)
|
||||
+ 185 * popcount(kingRing[Us] & weak) // (~15 Elo)
|
||||
+ 148 * popcount(unsafeChecks) // (~4 Elo)
|
||||
+ 98 * popcount(pos.blockers_for_king(Us)) // (~2 Elo)
|
||||
+ 69 * kingAttacksCount[Them] // (~0.5 Elo)
|
||||
+ 3 * kingFlankAttack * kingFlankAttack / 8 // (~0.5 Elo)
|
||||
+ mg_value(mobility[Them] - mobility[Us]) // (~0.5 Elo)
|
||||
- 873 * !pos.count<QUEEN>(Them) // (~24 Elo)
|
||||
- 100 * bool(attackedBy[Us][KNIGHT] & attackedBy[Us][KING]) // (~5 Elo)
|
||||
- 6 * mg_value(score) / 8 // (~8 Elo)
|
||||
- 4 * kingFlankDefense // (~5 Elo)
|
||||
+ 37; // (~0.5 Elo)
|
||||
|
||||
// Transform the kingDanger units into a Score, and subtract it from the evaluation
|
||||
if (kingDanger > 100)
|
||||
@@ -843,7 +807,9 @@ namespace {
|
||||
sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
|
||||
: pos.count<BISHOP>(WHITE) + pos.count<KNIGHT>(WHITE));
|
||||
else
|
||||
sf = std::min(sf, 36 + 7 * pos.count<PAWN>(strongSide));
|
||||
sf = std::min(sf, 36 + 7 * pos.count<PAWN>(strongSide)) - 4 * !pawnsOnBothFlanks;
|
||||
|
||||
sf -= 4 * !pawnsOnBothFlanks;
|
||||
}
|
||||
|
||||
// Interpolate between the middlegame and (scaled by 'sf') endgame score
|
||||
@@ -947,19 +913,47 @@ make_v:
|
||||
/// evaluation of the position from the point of view of the side to move.
|
||||
|
||||
Value Eval::evaluate(const Position& pos) {
|
||||
#ifdef EVAL_LEARN
|
||||
if (Learner::use_raw_nnue_eval) {
|
||||
return NNUE::evaluate(pos);
|
||||
|
||||
Value v;
|
||||
|
||||
if (NNUE::useNNUE == NNUE::UseNNUEMode::Pure) {
|
||||
v = NNUE::evaluate(pos);
|
||||
|
||||
// Guarantee evaluation does not hit the tablebase range
|
||||
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
|
||||
|
||||
return v;
|
||||
}
|
||||
#endif
|
||||
else if (NNUE::useNNUE == NNUE::UseNNUEMode::False)
|
||||
v = Evaluation<NO_TRACE>(pos).value();
|
||||
else
|
||||
{
|
||||
// Scale and shift NNUE for compatibility with search and classical evaluation
|
||||
auto adjusted_NNUE = [&](){
|
||||
int mat = pos.non_pawn_material() + PawnValueMg * pos.count<PAWN>();
|
||||
return NNUE::evaluate(pos) * (720 + mat / 32) / 1024 + Tempo;
|
||||
};
|
||||
|
||||
bool classical = !Eval::useNNUE
|
||||
|| abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
|
||||
Value v = classical ? Evaluation<NO_TRACE>(pos).value()
|
||||
: NNUE::evaluate(pos) * 5 / 4 + Tempo;
|
||||
// If there is PSQ imbalance use classical eval, with small probability if it is small
|
||||
Value psq = Value(abs(eg_value(pos.psq_score())));
|
||||
int r50 = 16 + pos.rule50_count();
|
||||
bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
|
||||
bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
|
||||
|
||||
if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
|
||||
v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
|
||||
bool strongClassical = pos.non_pawn_material() < 2 * RookValueMg && pos.count<PAWN>() < 2;
|
||||
|
||||
v = classical || strongClassical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
|
||||
|
||||
// If the classical eval is small and imbalance large, use NNUE nevertheless.
|
||||
// For the case of opposite colored bishops, switch to NNUE eval with
|
||||
// small probability if the classical eval is less than the threshold.
|
||||
if ( largePsq && !strongClassical
|
||||
&& ( abs(v) * 16 < NNUEThreshold2 * r50
|
||||
|| ( pos.opposite_bishops()
|
||||
&& abs(v) * 16 < (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50
|
||||
&& !(pos.this_thread()->nodes & 0xB))))
|
||||
v = adjusted_NNUE();
|
||||
}
|
||||
|
||||
// Damp down the evaluation linearly when shuffling
|
||||
v = v * (100 - pos.rule50_count()) / 100;
|
||||
@@ -1015,7 +1009,7 @@ std::string Eval::trace(const Position& pos) {
|
||||
|
||||
ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
|
||||
|
||||
if (Eval::useNNUE)
|
||||
if (NNUE::useNNUE != NNUE::UseNNUEMode::False)
|
||||
{
|
||||
v = NNUE::evaluate(pos);
|
||||
v = pos.side_to_move() == WHITE ? v : -v;
|
||||
|
||||
+4
-14
@@ -26,23 +26,13 @@
|
||||
class Position;
|
||||
|
||||
namespace Eval {
|
||||
|
||||
std::string trace(const Position& pos);
|
||||
Value evaluate(const Position& pos);
|
||||
|
||||
extern bool useNNUE;
|
||||
extern std::string eval_file_loaded;
|
||||
void init_NNUE();
|
||||
void verify_NNUE();
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
Value evaluate(const Position& pos);
|
||||
Value compute_eval(const Position& pos);
|
||||
void update_eval(const Position& pos);
|
||||
bool load_eval_file(const std::string& evalFile);
|
||||
|
||||
} // namespace NNUE
|
||||
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
|
||||
// for the build process (profile-build and fishtest) to work. Do not change the
|
||||
// name of the macro, as it is used in the Makefile.
|
||||
#define EvalFileDefaultName "nn-c3ca321c51c9.nnue"
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,429 +0,0 @@
|
||||
#if defined (EVAL_LEARN)
|
||||
|
||||
#include "../misc.h"
|
||||
#include "../position.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <cstring> // std::memset()
|
||||
|
||||
using namespace std;
|
||||
|
||||
// -----------------------------------
|
||||
// stage compression/decompression
|
||||
// -----------------------------------
|
||||
|
||||
// Class that handles bitstream
|
||||
// useful when doing aspect encoding
|
||||
struct BitStream
|
||||
{
|
||||
// Set the memory to store the data in advance.
|
||||
// Assume that memory is cleared to 0.
|
||||
void set_data(uint8_t* data_) { data = data_; reset(); }
|
||||
|
||||
// Get the pointer passed in set_data().
|
||||
uint8_t* get_data() const { return data; }
|
||||
|
||||
// Get the cursor.
|
||||
int get_cursor() const { return bit_cursor; }
|
||||
|
||||
// reset the cursor
|
||||
void reset() { bit_cursor = 0; }
|
||||
|
||||
// Write 1bit to the stream.
|
||||
// If b is non-zero, write out 1. If 0, write 0.
|
||||
void write_one_bit(int b)
|
||||
{
|
||||
if (b)
|
||||
data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
|
||||
|
||||
++bit_cursor;
|
||||
}
|
||||
|
||||
// Get 1 bit from the stream.
|
||||
int read_one_bit()
|
||||
{
|
||||
int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
|
||||
++bit_cursor;
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
// write n bits of data
|
||||
// Data shall be written out from the lower order of d.
|
||||
void write_n_bit(int d, int n)
|
||||
{
|
||||
for (int i = 0; i <n; ++i)
|
||||
write_one_bit(d & (1 << i));
|
||||
}
|
||||
|
||||
// read n bits of data
|
||||
// Reverse conversion of write_n_bit().
|
||||
int read_n_bit(int n)
|
||||
{
|
||||
int result = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
result |= read_one_bit() ? (1 << i) : 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
// Next bit position to read/write.
|
||||
int bit_cursor;
|
||||
|
||||
// data entity
|
||||
uint8_t* data;
|
||||
};
|
||||
|
||||
|
||||
// Huffman coding
|
||||
// * is simplified from mini encoding to make conversion easier.
|
||||
//
|
||||
// 1 box on the board (other than NO_PIECE) = 2 to 6 bits (+ 1-bit flag + 1-bit forward and backward)
|
||||
// 1 piece of hand piece = 1-5bit (+ 1-bit flag + 1bit ahead and behind)
|
||||
//
|
||||
// empty xxxxx0 + 0 (none)
|
||||
// step xxxx01 + 2 xxxx0 + 2
|
||||
// incense xx0011 + 2 xx001 + 2
|
||||
// Katsura xx1011 + 2 xx101 + 2
|
||||
// silver xx0111 + 2 xx011 + 2
|
||||
// Gold x01111 + 1 x0111 + 1 // Gold is valid and has no flags.
|
||||
// corner 011111 + 2 01111 + 2
|
||||
// Fly 111111 + 2 11111 + 2
|
||||
//
|
||||
// Assuming all pieces are on the board,
|
||||
// Sky 81-40 pieces = 41 boxes = 41bit
|
||||
// Walk 4bit*18 pieces = 72bit
|
||||
// Incense 6bit*4 pieces = 24bit
|
||||
// Katsura 6bit*4 pieces = 24bit
|
||||
// Silver 6bit*4 pieces = 24bit
|
||||
// Gold 6bit* 4 pieces = 24bit
|
||||
// corner 8bit* 2 pieces = 16bit
|
||||
// Fly 8bit* 2 pieces = 16bit
|
||||
// -------
|
||||
// 241bit + 1bit (turn) + 7bit × 2 (King's position after) = 256bit
|
||||
//
|
||||
// When the piece on the board moves to the hand piece, the piece on the board becomes empty, so the box on the board can be expressed with 1 bit,
|
||||
// Since the hand piece can be expressed by 1 bit less than the piece on the board, the total number of bits does not change in the end.
|
||||
// Therefore, in this expression, any aspect can be expressed by this bit number.
|
||||
// It is a hand piece and no flag is required, but if you include this, the bit number of the piece on the board will be -1
|
||||
// Since the total number of bits can be fixed, we will include this as well.
|
||||
|
||||
// Huffman Encoding
|
||||
//
|
||||
// Empty xxxxxxx0
|
||||
// Pawn xxxxx001 + 1 bit (Side to move)
|
||||
// Knight xxxxx011 + 1 bit (Side to move)
|
||||
// Bishop xxxxx101 + 1 bit (Side to move)
|
||||
// Rook xxxxx111 + 1 bit (Side to move)
|
||||
|
||||
struct HuffmanedPiece
|
||||
{
|
||||
int code; // how it will be coded
|
||||
int bits; // How many bits do you have
|
||||
};
|
||||
|
||||
HuffmanedPiece huffman_table[] =
|
||||
{
|
||||
{0b0000,1}, // NO_PIECE
|
||||
{0b0001,4}, // PAWN
|
||||
{0b0011,4}, // KNIGHT
|
||||
{0b0101,4}, // BISHOP
|
||||
{0b0111,4}, // ROOK
|
||||
{0b1001,4}, // QUEEN
|
||||
};
|
||||
|
||||
// Class for compressing/decompressing sfen
|
||||
// sfen can be packed to 256bit (32bytes) by Huffman coding.
|
||||
// This is proven by mini. The above is Huffman coding.
|
||||
//
|
||||
// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
|
||||
// Side to move (White = 0, Black = 1) (1bit)
|
||||
// White King Position (6 bits)
|
||||
// Black King Position (6 bits)
|
||||
// Huffman Encoding of the board
|
||||
// Castling availability (1 bit x 4)
|
||||
// En passant square (1 or 1 + 6 bits)
|
||||
// Rule 50 (6 bits)
|
||||
// Game play (8 bits)
|
||||
//
|
||||
// TODO(someone): Rename SFEN to FEN.
|
||||
//
|
||||
struct SfenPacker
|
||||
{
|
||||
// Pack sfen and store in data[32].
|
||||
void pack(const Position& pos)
|
||||
{
|
||||
// cout << pos;
|
||||
|
||||
memset(data, 0, 32 /* 256bit */);
|
||||
stream.set_data(data);
|
||||
|
||||
// turn
|
||||
// Side to move.
|
||||
stream.write_one_bit((int)(pos.side_to_move()));
|
||||
|
||||
// 7-bit positions for leading and trailing balls
|
||||
// White king and black king, 6 bits for each.
|
||||
for(auto c: Colors)
|
||||
stream.write_n_bit(pos.king_square(c), 6);
|
||||
|
||||
// Write the pieces on the board other than the kings.
|
||||
for (Rank r = RANK_8; r >= RANK_1; --r)
|
||||
{
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
{
|
||||
Piece pc = pos.piece_on(make_square(f, r));
|
||||
if (type_of(pc) == KING)
|
||||
continue;
|
||||
write_board_piece_to_stream(pc);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(someone): Support chess960.
|
||||
stream.write_one_bit(pos.can_castle(WHITE_OO));
|
||||
stream.write_one_bit(pos.can_castle(WHITE_OOO));
|
||||
stream.write_one_bit(pos.can_castle(BLACK_OO));
|
||||
stream.write_one_bit(pos.can_castle(BLACK_OOO));
|
||||
|
||||
if (pos.ep_square() == SQ_NONE) {
|
||||
stream.write_one_bit(0);
|
||||
}
|
||||
else {
|
||||
stream.write_one_bit(1);
|
||||
stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
|
||||
}
|
||||
|
||||
stream.write_n_bit(pos.state()->rule50, 6);
|
||||
|
||||
stream.write_n_bit(1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2, 8);
|
||||
|
||||
assert(stream.get_cursor() <= 256);
|
||||
}
|
||||
|
||||
// sfen packed by pack() (256bit = 32bytes)
|
||||
// Or sfen to decode with unpack()
|
||||
uint8_t *data; // uint8_t[32];
|
||||
|
||||
//private:
|
||||
// Position::set_from_packed_sfen(uint8_t data[32]) I want to use these functions, so the line is bad, but I want to keep it public.
|
||||
|
||||
BitStream stream;
|
||||
|
||||
// Output the board pieces to stream.
|
||||
void write_board_piece_to_stream(Piece pc)
|
||||
{
|
||||
// piece type
|
||||
PieceType pr = type_of(pc);
|
||||
auto c = huffman_table[pr];
|
||||
stream.write_n_bit(c.code, c.bits);
|
||||
|
||||
if (pc == NO_PIECE)
|
||||
return;
|
||||
|
||||
// first and second flag
|
||||
stream.write_one_bit(color_of(pc));
|
||||
}
|
||||
|
||||
// Read one board piece from stream
|
||||
Piece read_board_piece_from_stream()
|
||||
{
|
||||
PieceType pr = NO_PIECE_TYPE;
|
||||
int code = 0, bits = 0;
|
||||
while (true)
|
||||
{
|
||||
code |= stream.read_one_bit() << bits;
|
||||
++bits;
|
||||
|
||||
assert(bits <= 6);
|
||||
|
||||
for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
|
||||
if (huffman_table[pr].code == code
|
||||
&& huffman_table[pr].bits == bits)
|
||||
goto Found;
|
||||
}
|
||||
Found:;
|
||||
if (pr == NO_PIECE_TYPE)
|
||||
return NO_PIECE;
|
||||
|
||||
// first and second flag
|
||||
Color c = (Color)stream.read_one_bit();
|
||||
|
||||
return make_piece(c, pr);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// -----------------------------------
|
||||
// Add to Position class
|
||||
// -----------------------------------
|
||||
|
||||
// Add a function that directly unpacks for speed. It's pretty tough.
|
||||
// Write it by combining packer::unpack() and Position::set().
|
||||
// If there is a problem with the passed phase and there is an error, non-zero is returned.
|
||||
int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thread* th, bool mirror)
|
||||
{
|
||||
SfenPacker packer;
|
||||
auto& stream = packer.stream;
|
||||
stream.set_data((uint8_t*)&sfen);
|
||||
|
||||
std::memset(this, 0, sizeof(Position));
|
||||
std::memset(si, 0, sizeof(StateInfo));
|
||||
std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
|
||||
st = si;
|
||||
|
||||
// Active color
|
||||
sideToMove = (Color)stream.read_one_bit();
|
||||
|
||||
pieceList[W_KING][0] = SQUARE_NB;
|
||||
pieceList[B_KING][0] = SQUARE_NB;
|
||||
|
||||
// First the position of the ball
|
||||
if (mirror)
|
||||
{
|
||||
for (auto c : Colors)
|
||||
board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto c : Colors)
|
||||
board[stream.read_n_bit(6)] = make_piece(c, KING);
|
||||
}
|
||||
|
||||
// Piece placement
|
||||
for (Rank r = RANK_8; r >= RANK_1; --r)
|
||||
{
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
{
|
||||
auto sq = make_square(f, r);
|
||||
if (mirror) {
|
||||
sq = flip_file(sq);
|
||||
}
|
||||
|
||||
// it seems there are already balls
|
||||
Piece pc;
|
||||
if (type_of(board[sq]) != KING)
|
||||
{
|
||||
assert(board[sq] == NO_PIECE);
|
||||
pc = packer.read_board_piece_from_stream();
|
||||
}
|
||||
else
|
||||
{
|
||||
pc = board[sq];
|
||||
board[sq] = NO_PIECE; // put_piece() will catch ASSERT unless you remove it all.
|
||||
}
|
||||
|
||||
// There may be no pieces, so skip in that case.
|
||||
if (pc == NO_PIECE)
|
||||
continue;
|
||||
|
||||
put_piece(Piece(pc), sq);
|
||||
|
||||
//cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl;
|
||||
|
||||
if (stream.get_cursor()> 256)
|
||||
return 1;
|
||||
//assert(stream.get_cursor() <= 256);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// Castling availability.
|
||||
// TODO(someone): Support chess960.
|
||||
st->castlingRights = 0;
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(WHITE, SQ_H1); piece_on(rsq) != W_ROOK; --rsq) {}
|
||||
set_castling_right(WHITE, rsq);
|
||||
}
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(WHITE, SQ_A1); piece_on(rsq) != W_ROOK; ++rsq) {}
|
||||
set_castling_right(WHITE, rsq);
|
||||
}
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(BLACK, SQ_H1); piece_on(rsq) != B_ROOK; --rsq) {}
|
||||
set_castling_right(BLACK, rsq);
|
||||
}
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(BLACK, SQ_A1); piece_on(rsq) != B_ROOK; ++rsq) {}
|
||||
set_castling_right(BLACK, rsq);
|
||||
}
|
||||
|
||||
// En passant square. Ignore if no pawn capture is possible
|
||||
if (stream.read_one_bit()) {
|
||||
Square ep_square = static_cast<Square>(stream.read_n_bit(6));
|
||||
if (mirror) {
|
||||
ep_square = flip_file(ep_square);
|
||||
}
|
||||
st->epSquare = ep_square;
|
||||
|
||||
if (!(attackers_to(st->epSquare) & pieces(sideToMove, PAWN))
|
||||
|| !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))))
|
||||
st->epSquare = SQ_NONE;
|
||||
}
|
||||
else {
|
||||
st->epSquare = SQ_NONE;
|
||||
}
|
||||
|
||||
// Halfmove clock
|
||||
st->rule50 = static_cast<Square>(stream.read_n_bit(6));
|
||||
|
||||
// Fullmove number
|
||||
gamePly = static_cast<Square>(stream.read_n_bit(8));
|
||||
// Convert from fullmove starting from 1 to gamePly starting from 0,
|
||||
// handle also common incorrect FEN with fullmove = 0.
|
||||
gamePly = std::max(2 * (gamePly - 1), 0) + (sideToMove == BLACK);
|
||||
|
||||
assert(stream.get_cursor() <= 256);
|
||||
|
||||
chess960 = false;
|
||||
thisThread = th;
|
||||
set_state(st);
|
||||
|
||||
//std::cout << *this << std::endl;
|
||||
|
||||
assert(pos_is_ok());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Give the board, hand piece, and turn, and return the sfen.
|
||||
//std::string Position::sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly_)
|
||||
//{
|
||||
// // Copy it to an internal structure and call sfen() if the conversion process depends only on it
|
||||
// // Maybe it will be converted normally...
|
||||
// Position pos;
|
||||
//
|
||||
// memcpy(pos.board, board, sizeof(Piece) * 81);
|
||||
// memcpy(pos.hand, hands, sizeof(Hand) * 2);
|
||||
// pos.sideToMove = turn;
|
||||
// pos.gamePly = gamePly_;
|
||||
//
|
||||
// return pos.sfen();
|
||||
//
|
||||
// // Implementation of ↑ is beautiful, but slow.
|
||||
// // This is a bottleneck when learning a large amount of game records, so write a function to unpack directly.
|
||||
//}
|
||||
|
||||
// Get the packed sfen. Returns to the buffer specified in the argument.
|
||||
void Position::sfen_pack(PackedSfen& sfen)
|
||||
{
|
||||
SfenPacker sp;
|
||||
sp.data = (uint8_t*)&sfen;
|
||||
sp.pack(*this);
|
||||
}
|
||||
|
||||
//// Unpack the packed sfen. Returns an sfen string.
|
||||
//std::string Position::sfen_unpack(const PackedSfen& sfen)
|
||||
//{
|
||||
// SfenPacker sp;
|
||||
// sp.data = (uint8_t*)&sfen;
|
||||
// return sp.unpack();
|
||||
//}
|
||||
|
||||
|
||||
#endif // USE_SFEN_PACKER
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,140 @@
|
||||
#ifndef _STOCKFISH_BLAS_H_
|
||||
#define _STOCKFISH_BLAS_H_
|
||||
|
||||
struct ThreadPool;
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
#define SF_BLAS_RESTRICT __restrict
|
||||
#elif defined (__INTEL_COMPILER)
|
||||
#define SF_BLAS_RESTRICT restrict
|
||||
#elif defined (__clang__)
|
||||
#define SF_BLAS_RESTRICT __restrict__
|
||||
#elif defined (__GNUC__)
|
||||
#define SF_BLAS_RESTRICT __restrict__
|
||||
#endif
|
||||
|
||||
namespace Blas {
|
||||
|
||||
enum struct MatrixLayout {
|
||||
RowMajor = 101,
|
||||
ColMajor = 102
|
||||
};
|
||||
|
||||
enum struct MatrixTranspose {
|
||||
NoTrans = 111,
|
||||
Trans = 112
|
||||
};
|
||||
|
||||
void scopy(
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void scopy(
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void scopy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void scopy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void sscal(
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X
|
||||
);
|
||||
|
||||
void sscal(
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X, const int incX
|
||||
);
|
||||
|
||||
void sscal(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X
|
||||
);
|
||||
|
||||
void sscal(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
float * SF_BLAS_RESTRICT X, const int incX
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X,
|
||||
float * SF_BLAS_RESTRICT Y
|
||||
);
|
||||
|
||||
void saxpy(
|
||||
ThreadPool& thread_pool,
|
||||
const int N,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT X, const int incX,
|
||||
float * SF_BLAS_RESTRICT Y, const int incY
|
||||
);
|
||||
|
||||
void sgemm(
|
||||
ThreadPool& thread_pool,
|
||||
MatrixLayout layout, MatrixTranspose TransA, MatrixTranspose TransB,
|
||||
const int M, const int N, const int K,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT A, const int lda,
|
||||
const float * SF_BLAS_RESTRICT B, const int ldb,
|
||||
const float beta,
|
||||
float * SF_BLAS_RESTRICT C, const int ldc
|
||||
);
|
||||
|
||||
void sgemm(
|
||||
MatrixLayout layout, MatrixTranspose TransA, MatrixTranspose TransB,
|
||||
const int M, const int N, const int K,
|
||||
const float alpha,
|
||||
const float * SF_BLAS_RESTRICT A, const int lda,
|
||||
const float * SF_BLAS_RESTRICT B, const int ldb,
|
||||
const float beta,
|
||||
float * SF_BLAS_RESTRICT C, const int ldc
|
||||
);
|
||||
|
||||
void test(
|
||||
ThreadPool& thread_pool
|
||||
);
|
||||
|
||||
void bench(
|
||||
ThreadPool& thread_pool
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,26 @@
|
||||
The file "incbin.h" is free and unencumbered software released into
|
||||
the public domain by Dale Weiler, see:
|
||||
<https://github.com/graphitemaster/incbin>
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
||||
Executable
+368
@@ -0,0 +1,368 @@
|
||||
/**
|
||||
* @file incbin.h
|
||||
* @author Dale Weiler
|
||||
* @brief Utility for including binary files
|
||||
*
|
||||
* Facilities for including binary files into the current translation unit and
|
||||
* making use from them externally in other translation units.
|
||||
*/
|
||||
#ifndef INCBIN_HDR
|
||||
#define INCBIN_HDR
|
||||
#include <limits.h>
|
||||
#if defined(__AVX512BW__) || \
|
||||
defined(__AVX512CD__) || \
|
||||
defined(__AVX512DQ__) || \
|
||||
defined(__AVX512ER__) || \
|
||||
defined(__AVX512PF__) || \
|
||||
defined(__AVX512VL__) || \
|
||||
defined(__AVX512F__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 6
|
||||
#elif defined(__AVX__) || \
|
||||
defined(__AVX2__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 5
|
||||
#elif defined(__SSE__) || \
|
||||
defined(__SSE2__) || \
|
||||
defined(__SSE3__) || \
|
||||
defined(__SSSE3__) || \
|
||||
defined(__SSE4_1__) || \
|
||||
defined(__SSE4_2__) || \
|
||||
defined(__neon__)
|
||||
# define INCBIN_ALIGNMENT_INDEX 4
|
||||
#elif ULONG_MAX != 0xffffffffu
|
||||
# define INCBIN_ALIGNMENT_INDEX 3
|
||||
# else
|
||||
# define INCBIN_ALIGNMENT_INDEX 2
|
||||
#endif
|
||||
|
||||
/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
|
||||
#define INCBIN_ALIGN_SHIFT_0 1
|
||||
#define INCBIN_ALIGN_SHIFT_1 2
|
||||
#define INCBIN_ALIGN_SHIFT_2 4
|
||||
#define INCBIN_ALIGN_SHIFT_3 8
|
||||
#define INCBIN_ALIGN_SHIFT_4 16
|
||||
#define INCBIN_ALIGN_SHIFT_5 32
|
||||
#define INCBIN_ALIGN_SHIFT_6 64
|
||||
|
||||
/* Actual alignment value */
|
||||
#define INCBIN_ALIGNMENT \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
|
||||
INCBIN_ALIGNMENT_INDEX)
|
||||
|
||||
/* Stringize */
|
||||
#define INCBIN_STR(X) \
|
||||
#X
|
||||
#define INCBIN_STRINGIZE(X) \
|
||||
INCBIN_STR(X)
|
||||
/* Concatenate */
|
||||
#define INCBIN_CAT(X, Y) \
|
||||
X ## Y
|
||||
#define INCBIN_CONCATENATE(X, Y) \
|
||||
INCBIN_CAT(X, Y)
|
||||
/* Deferred macro expansion */
|
||||
#define INCBIN_EVAL(X) \
|
||||
X
|
||||
#define INCBIN_INVOKE(N, ...) \
|
||||
INCBIN_EVAL(N(__VA_ARGS__))
|
||||
|
||||
/* Green Hills uses a different directive for including binary data */
|
||||
#if defined(__ghs__)
|
||||
# if (__ghs_asm == 2)
|
||||
# define INCBIN_MACRO ".file"
|
||||
/* Or consider the ".myrawdata" entry in the ld file */
|
||||
# else
|
||||
# define INCBIN_MACRO "\tINCBIN"
|
||||
# endif
|
||||
#else
|
||||
# define INCBIN_MACRO ".incbin"
|
||||
#endif
|
||||
|
||||
#ifndef _MSC_VER
|
||||
# define INCBIN_ALIGN \
|
||||
__attribute__((aligned(INCBIN_ALIGNMENT)))
|
||||
#else
|
||||
# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || /* GNU C and RealView */ \
|
||||
defined(__arm) || /* Diab */ \
|
||||
defined(_ARM) /* ImageCraft */
|
||||
# define INCBIN_ARM
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Utilize .balign where supported */
|
||||
# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".balign 1\n"
|
||||
#elif defined(INCBIN_ARM)
|
||||
/*
|
||||
* On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
|
||||
* the shift count. This is the value passed to `.align'
|
||||
*/
|
||||
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".align 0\n"
|
||||
#else
|
||||
/* We assume other inline assembler's treat `.align' as `.balign' */
|
||||
# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
|
||||
# define INCBIN_ALIGN_BYTE ".align 1\n"
|
||||
#endif
|
||||
|
||||
/* INCBIN_CONST is used by incbin.c generated files */
|
||||
#if defined(__cplusplus)
|
||||
# define INCBIN_EXTERNAL extern "C"
|
||||
# define INCBIN_CONST extern const
|
||||
#else
|
||||
# define INCBIN_EXTERNAL extern
|
||||
# define INCBIN_CONST const
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Optionally override the linker section into which data is emitted.
|
||||
*
|
||||
* @warning If you use this facility, you'll have to deal with platform-specific linker output
|
||||
* section naming on your own
|
||||
*
|
||||
* Overriding the default linker output section, e.g for esp8266/Arduino:
|
||||
* @code
|
||||
* #define INCBIN_OUTPUT_SECTION ".irom.text"
|
||||
* #include "incbin.h"
|
||||
* INCBIN(Foo, "foo.txt");
|
||||
* // Data is emitted into program memory that never gets copied to RAM
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_OUTPUT_SECTION)
|
||||
# if defined(__APPLE__)
|
||||
# define INCBIN_OUTPUT_SECTION ".const_data"
|
||||
# else
|
||||
# define INCBIN_OUTPUT_SECTION ".rodata"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
/* The directives are different for Apple branded compilers */
|
||||
# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n"
|
||||
# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
||||
# define INCBIN_INT ".long "
|
||||
# define INCBIN_MANGLE "_"
|
||||
# define INCBIN_BYTE ".byte "
|
||||
# define INCBIN_TYPE(...)
|
||||
#else
|
||||
# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n"
|
||||
# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
|
||||
# if defined(__ghs__)
|
||||
# define INCBIN_INT ".word "
|
||||
# else
|
||||
# define INCBIN_INT ".int "
|
||||
# endif
|
||||
# if defined(__USER_LABEL_PREFIX__)
|
||||
# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
|
||||
# else
|
||||
# define INCBIN_MANGLE ""
|
||||
# endif
|
||||
# if defined(INCBIN_ARM)
|
||||
/* On arm assemblers, `@' is used as a line comment token */
|
||||
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
|
||||
# elif defined(__MINGW32__) || defined(__MINGW64__)
|
||||
/* Mingw doesn't support this directive either */
|
||||
# define INCBIN_TYPE(NAME)
|
||||
# else
|
||||
/* It's safe to use `@' on other architectures */
|
||||
# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
|
||||
# endif
|
||||
# define INCBIN_BYTE ".byte "
|
||||
#endif
|
||||
|
||||
/* List of style types used for symbol names */
|
||||
#define INCBIN_STYLE_CAMEL 0
|
||||
#define INCBIN_STYLE_SNAKE 1
|
||||
|
||||
/**
|
||||
* @brief Specify the prefix to use for symbol names.
|
||||
*
|
||||
* By default this is `g', producing symbols of the form:
|
||||
* @code
|
||||
* #include "incbin.h"
|
||||
* INCBIN(Foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char gFooData[];
|
||||
* // const unsigned char *const gFooEnd;
|
||||
* // const unsigned int gFooSize;
|
||||
* @endcode
|
||||
*
|
||||
* If however you specify a prefix before including: e.g:
|
||||
* @code
|
||||
* #define INCBIN_PREFIX incbin
|
||||
* #include "incbin.h"
|
||||
* INCBIN(Foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols instead:
|
||||
* // const unsigned char incbinFooData[];
|
||||
* // const unsigned char *const incbinFooEnd;
|
||||
* // const unsigned int incbinFooSize;
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_PREFIX)
|
||||
# define INCBIN_PREFIX g
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Specify the style used for symbol names.
|
||||
*
|
||||
* Possible options are
|
||||
* - INCBIN_STYLE_CAMEL "CamelCase"
|
||||
* - INCBIN_STYLE_SNAKE "snake_case"
|
||||
*
|
||||
* Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
|
||||
* @code
|
||||
* #include "incbin.h"
|
||||
* INCBIN(Foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>FooData[];
|
||||
* // const unsigned char *const <prefix>FooEnd;
|
||||
* // const unsigned int <prefix>FooSize;
|
||||
* @endcode
|
||||
*
|
||||
* If however you specify a style before including: e.g:
|
||||
* @code
|
||||
* #define INCBIN_STYLE INCBIN_STYLE_SNAKE
|
||||
* #include "incbin.h"
|
||||
* INCBIN(foo, "foo.txt");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>foo_data[];
|
||||
* // const unsigned char *const <prefix>foo_end;
|
||||
* // const unsigned int <prefix>foo_size;
|
||||
* @endcode
|
||||
*/
|
||||
#if !defined(INCBIN_STYLE)
|
||||
# define INCBIN_STYLE INCBIN_STYLE_CAMEL
|
||||
#endif
|
||||
|
||||
/* Style lookup tables */
|
||||
#define INCBIN_STYLE_0_DATA Data
|
||||
#define INCBIN_STYLE_0_END End
|
||||
#define INCBIN_STYLE_0_SIZE Size
|
||||
#define INCBIN_STYLE_1_DATA _data
|
||||
#define INCBIN_STYLE_1_END _end
|
||||
#define INCBIN_STYLE_1_SIZE _size
|
||||
|
||||
/* Style lookup: returning identifier */
|
||||
#define INCBIN_STYLE_IDENT(TYPE) \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_STYLE_, \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_EVAL(INCBIN_STYLE), \
|
||||
INCBIN_CONCATENATE(_, TYPE)))
|
||||
|
||||
/* Style lookup: returning string literal */
|
||||
#define INCBIN_STYLE_STRING(TYPE) \
|
||||
INCBIN_STRINGIZE( \
|
||||
INCBIN_STYLE_IDENT(TYPE)) \
|
||||
|
||||
/* Generate the global labels by indirectly invoking the macro with our style
|
||||
* type and concatenating the name against them. */
|
||||
#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_GLOBAL, \
|
||||
INCBIN_CONCATENATE( \
|
||||
NAME, \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_STYLE_IDENT, \
|
||||
TYPE))) \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_TYPE, \
|
||||
INCBIN_CONCATENATE( \
|
||||
NAME, \
|
||||
INCBIN_INVOKE( \
|
||||
INCBIN_STYLE_IDENT, \
|
||||
TYPE)))
|
||||
|
||||
/**
|
||||
* @brief Externally reference binary data included in another translation unit.
|
||||
*
|
||||
* Produces three external symbols that reference the binary data included in
|
||||
* another translation unit.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param NAME The name given for the binary data
|
||||
*
|
||||
* @code
|
||||
* INCBIN_EXTERN(Foo);
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // extern const unsigned char <prefix>FooData[];
|
||||
* // extern const unsigned char *const <prefix>FooEnd;
|
||||
* // extern const unsigned int <prefix>FooSize;
|
||||
* @endcode
|
||||
*/
|
||||
#define INCBIN_EXTERN(NAME) \
|
||||
INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(DATA))[]; \
|
||||
INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(END)); \
|
||||
INCBIN_EXTERNAL const unsigned int \
|
||||
INCBIN_CONCATENATE( \
|
||||
INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
|
||||
INCBIN_STYLE_IDENT(SIZE))
|
||||
|
||||
/**
|
||||
* @brief Include a binary file into the current translation unit.
|
||||
*
|
||||
* Includes a binary file into the current translation unit, producing three symbols
|
||||
* for objects that encode the data and size respectively.
|
||||
*
|
||||
* The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
|
||||
* "Data", as well as "End" and "Size" after. An example is provided below.
|
||||
*
|
||||
* @param NAME The name to associate with this binary data (as an identifier.)
|
||||
* @param FILENAME The file to include (as a string literal.)
|
||||
*
|
||||
* @code
|
||||
* INCBIN(Icon, "icon.png");
|
||||
*
|
||||
* // Now you have the following symbols:
|
||||
* // const unsigned char <prefix>IconData[];
|
||||
* // const unsigned char *const <prefix>IconEnd;
|
||||
* // const unsigned int <prefix>IconSize;
|
||||
* @endcode
|
||||
*
|
||||
* @warning This must be used in global scope
|
||||
* @warning The identifiers may be different if INCBIN_STYLE is not default
|
||||
*
|
||||
* To externally reference the data included by this in another translation unit
|
||||
* please @see INCBIN_EXTERN.
|
||||
*/
|
||||
#ifdef _MSC_VER
|
||||
#define INCBIN(NAME, FILENAME) \
|
||||
INCBIN_EXTERN(NAME)
|
||||
#else
|
||||
#define INCBIN(NAME, FILENAME) \
|
||||
__asm__(INCBIN_SECTION \
|
||||
INCBIN_GLOBAL_LABELS(NAME, DATA) \
|
||||
INCBIN_ALIGN_HOST \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
|
||||
INCBIN_MACRO " \"" FILENAME "\"\n" \
|
||||
INCBIN_GLOBAL_LABELS(NAME, END) \
|
||||
INCBIN_ALIGN_BYTE \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
|
||||
INCBIN_BYTE "1\n" \
|
||||
INCBIN_GLOBAL_LABELS(NAME, SIZE) \
|
||||
INCBIN_ALIGN_HOST \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
|
||||
INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
|
||||
INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
|
||||
INCBIN_ALIGN_HOST \
|
||||
".text\n" \
|
||||
); \
|
||||
INCBIN_EXTERN(NAME)
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -0,0 +1,667 @@
|
||||
#ifndef LEARNER_AUTOGRAD_H
|
||||
#define LEARNER_AUTOGRAD_H
|
||||
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <type_traits>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <optional>
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
||||
namespace Learner
|
||||
{
|
||||
template <typename T>
|
||||
struct ValueWithGrad
|
||||
{
|
||||
T value;
|
||||
T grad;
|
||||
|
||||
ValueWithGrad& operator+=(const ValueWithGrad<T>& rhs)
|
||||
{
|
||||
value += rhs.value;
|
||||
grad += rhs.grad;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ValueWithGrad& operator-=(const ValueWithGrad<T>& rhs)
|
||||
{
|
||||
value -= rhs.value;
|
||||
grad -= rhs.grad;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ValueWithGrad& operator*=(T rhs)
|
||||
{
|
||||
value *= rhs;
|
||||
grad *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ValueWithGrad& operator/=(T rhs)
|
||||
{
|
||||
value /= rhs;
|
||||
grad /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
[[nodiscard]] ValueWithGrad abs() const
|
||||
{
|
||||
return { std::abs(value), std::abs(grad) };
|
||||
}
|
||||
|
||||
[[nodiscard]] ValueWithGrad clamp_grad(T max) const
|
||||
{
|
||||
return { value, std::clamp(grad, -max, max) };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace Learner::Autograd::UnivariateStatic
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct Identity
|
||||
{
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using Id = typename Identity<T>::type;
|
||||
|
||||
template <typename T>
|
||||
using StoreValueOrRef = std::conditional_t<
|
||||
std::is_rvalue_reference_v<T>,
|
||||
std::remove_reference_t<T>,
|
||||
const std::remove_reference_t<T>&
|
||||
>;
|
||||
|
||||
namespace Detail
|
||||
{
|
||||
using CallIdType = std::uint32_t;
|
||||
|
||||
struct CallId
|
||||
{
|
||||
CallIdType call_id{};
|
||||
|
||||
constexpr CallId() :
|
||||
call_id(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr CallId(CallIdType id) :
|
||||
call_id(id)
|
||||
{
|
||||
}
|
||||
|
||||
[[nodiscard]] bool operator==(CallId rhs) const noexcept
|
||||
{
|
||||
return call_id == rhs.call_id;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool operator!=(CallId rhs) const noexcept
|
||||
{
|
||||
return call_id != rhs.call_id;
|
||||
}
|
||||
};
|
||||
|
||||
[[nodiscard]] inline CallId next_call_id()
|
||||
{
|
||||
static thread_local CallIdType s_call_id = 0;
|
||||
return CallId{ s_call_id++ };
|
||||
}
|
||||
|
||||
template <typename T, typename Tuple>
|
||||
struct TupleContains;
|
||||
|
||||
template <typename T, typename... Us>
|
||||
struct TupleContains<T, std::tuple<Us...>> : std::disjunction<std::is_same<T, Us>...> {};
|
||||
|
||||
template <typename T, typename Tuple>
|
||||
constexpr bool TupleContainsV = TupleContains<T, Tuple>::value;
|
||||
|
||||
template <typename... Ts>
|
||||
constexpr bool AreAllConstantV = (std::remove_reference_t<Ts>::is_constant && ...);
|
||||
}
|
||||
|
||||
template <typename T, typename ChildT>
|
||||
struct Evaluable
|
||||
{
|
||||
constexpr Evaluable() = default;
|
||||
|
||||
// We append a unique call id so that we can invalidate the cache when
|
||||
// the next computation starts. A single evaluation should see
|
||||
// the same call_id at every node.
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] auto eval(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
const auto call_id = Detail::next_call_id();
|
||||
const auto new_args = std::tuple_cat(args, std::tuple(call_id));
|
||||
return ValueWithGrad<T>{ value(new_args), grad(new_args) };
|
||||
}
|
||||
|
||||
template <typename... ArgsTs,
|
||||
typename SFINAE = std::enable_if_t<Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
|
||||
[[nodiscard]] auto value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
const ChildT* this_ = static_cast<const ChildT*>(this);
|
||||
|
||||
const auto call_id = std::get<Detail::CallId>(args);
|
||||
if (!value_cache.has_value() || value_cache_call_id != call_id)
|
||||
{
|
||||
value_cache_call_id = call_id;
|
||||
value_cache = this_->calculate_value(args);
|
||||
}
|
||||
|
||||
return *value_cache;
|
||||
}
|
||||
|
||||
template <typename... ArgsTs,
|
||||
typename SFINAE = std::enable_if_t<!Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
|
||||
[[nodiscard]] auto value(const std::tuple<ArgsTs...>& args, ...) const
|
||||
{
|
||||
const auto call_id = Detail::next_call_id();
|
||||
const auto new_args = std::tuple_cat(args, std::tuple(call_id));
|
||||
return value(new_args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs,
|
||||
typename SFINAE = std::enable_if_t<Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
|
||||
[[nodiscard]] auto grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
if constexpr (ChildT::is_constant)
|
||||
{
|
||||
return T(0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
const ChildT* this_ = static_cast<const ChildT*>(this);
|
||||
|
||||
const auto call_id = std::get<Detail::CallId>(args);
|
||||
if (!grad_cache.has_value() || grad_cache_call_id != call_id)
|
||||
{
|
||||
grad_cache_call_id = call_id;
|
||||
grad_cache = this_->calculate_grad(args);
|
||||
}
|
||||
|
||||
return *grad_cache;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... ArgsTs,
|
||||
typename SFINAE = std::enable_if_t<!Detail::TupleContainsV<Detail::CallId, std::tuple<ArgsTs...>>>>
|
||||
[[nodiscard]] auto grad(const std::tuple<ArgsTs...>& args, ...) const
|
||||
{
|
||||
const auto call_id = Detail::next_call_id();
|
||||
const auto new_args = std::tuple_cat(args, std::tuple(call_id));
|
||||
return grad(new_args);
|
||||
}
|
||||
|
||||
private:
|
||||
mutable std::optional<T> value_cache;
|
||||
mutable std::optional<T> grad_cache;
|
||||
mutable Detail::CallId value_cache_call_id{};
|
||||
mutable Detail::CallId grad_cache_call_id{};
|
||||
};
|
||||
|
||||
template <typename T, int I>
|
||||
struct VariableParameter : Evaluable<T, VariableParameter<T, I>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = false;
|
||||
|
||||
constexpr VariableParameter()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return std::get<I>(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
|
||||
{
|
||||
return T(1.0);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int I>
|
||||
struct ConstantParameter : Evaluable<T, ConstantParameter<T, I>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = true;
|
||||
|
||||
constexpr ConstantParameter()
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return std::get<I>(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
|
||||
{
|
||||
return T(0.0);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct Constant : Evaluable<T, Constant<T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = true;
|
||||
|
||||
constexpr Constant(T x) :
|
||||
m_x(std::move(x))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>&) const
|
||||
{
|
||||
return m_x;
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
|
||||
{
|
||||
return T(0.0);
|
||||
}
|
||||
|
||||
private:
|
||||
T m_x;
|
||||
};
|
||||
|
||||
// The "constant" may change between executions, but is assumed to be
|
||||
// constant during a single evaluation.
|
||||
template <typename T>
|
||||
struct ConstantRef : Evaluable<T, ConstantRef<T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = true;
|
||||
|
||||
constexpr ConstantRef(const T& x) :
|
||||
m_x(x)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>&) const
|
||||
{
|
||||
return m_x;
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>&) const
|
||||
{
|
||||
return T(0.0);
|
||||
}
|
||||
|
||||
private:
|
||||
const T& m_x;
|
||||
};
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
struct Sum : Evaluable<T, Sum<LhsT, RhsT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
|
||||
|
||||
constexpr Sum(LhsT&& lhs, RhsT&& rhs) :
|
||||
m_lhs(std::forward<LhsT>(lhs)),
|
||||
m_rhs(std::forward<RhsT>(rhs))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.value(args) + m_rhs.value(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.grad(args) + m_rhs.grad(args);
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<LhsT> m_lhs;
|
||||
StoreValueOrRef<RhsT> m_rhs;
|
||||
};
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator+(LhsT&& lhs, RhsT&& rhs)
|
||||
{
|
||||
return Sum<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator+(LhsT&& lhs, Id<T> rhs)
|
||||
{
|
||||
return Sum<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
|
||||
}
|
||||
|
||||
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator+(Id<T> lhs, RhsT&& rhs)
|
||||
{
|
||||
return Sum<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
struct Difference : Evaluable<T, Difference<LhsT, RhsT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
|
||||
|
||||
constexpr Difference(LhsT&& lhs, RhsT&& rhs) :
|
||||
m_lhs(std::forward<LhsT>(lhs)),
|
||||
m_rhs(std::forward<RhsT>(rhs))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.value(args) - m_rhs.value(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.grad(args) - m_rhs.grad(args);
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<LhsT> m_lhs;
|
||||
StoreValueOrRef<RhsT> m_rhs;
|
||||
};
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator-(LhsT&& lhs, RhsT&& rhs)
|
||||
{
|
||||
return Difference<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator-(LhsT&& lhs, Id<T> rhs)
|
||||
{
|
||||
return Difference<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
|
||||
}
|
||||
|
||||
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator-(Id<T> lhs, RhsT&& rhs)
|
||||
{
|
||||
return Difference<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
struct Product : Evaluable<T, Product<LhsT, RhsT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
|
||||
|
||||
constexpr Product(LhsT&& lhs, RhsT&& rhs) :
|
||||
m_lhs(std::forward<LhsT>(lhs)),
|
||||
m_rhs(std::forward<RhsT>(rhs))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.value(args) * m_rhs.value(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.grad(args) * m_rhs.value(args) + m_lhs.value(args) * m_rhs.grad(args);
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<LhsT> m_lhs;
|
||||
StoreValueOrRef<RhsT> m_rhs;
|
||||
};
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator*(LhsT&& lhs, RhsT&& rhs)
|
||||
{
|
||||
return Product<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator*(LhsT&& lhs, Id<T> rhs)
|
||||
{
|
||||
return Product<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
|
||||
}
|
||||
|
||||
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator*(Id<T> lhs, RhsT&& rhs)
|
||||
{
|
||||
return Product<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
struct Quotient : Evaluable<T, Quotient<LhsT, RhsT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<LhsT, RhsT>;
|
||||
|
||||
constexpr Quotient(LhsT&& lhs, RhsT&& rhs) :
|
||||
m_lhs(std::forward<LhsT>(lhs)),
|
||||
m_rhs(std::forward<RhsT>(rhs))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_lhs.value(args) / m_rhs.value(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
auto g = m_rhs.value(args);
|
||||
return (m_lhs.grad(args) * g - m_lhs.value(args) * m_rhs.grad(args)) / (g * g);
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<LhsT> m_lhs;
|
||||
StoreValueOrRef<RhsT> m_rhs;
|
||||
};
|
||||
|
||||
template <typename LhsT, typename RhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator/(LhsT&& lhs, RhsT&& rhs)
|
||||
{
|
||||
return Quotient<LhsT&&, RhsT&&>(std::forward<LhsT>(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename LhsT, typename T = typename std::remove_reference_t<LhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator/(LhsT&& lhs, Id<T> rhs)
|
||||
{
|
||||
return Quotient<LhsT&&, Constant<T>&&>(std::forward<LhsT>(lhs), Constant(rhs));
|
||||
}
|
||||
|
||||
template <typename RhsT, typename T = typename std::remove_reference_t<RhsT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator/(Id<T> lhs, RhsT&& rhs)
|
||||
{
|
||||
return Quotient<Constant<T>&&, RhsT&&>(Constant(lhs), std::forward<RhsT>(rhs));
|
||||
}
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
struct Negation : Evaluable<T, Negation<ArgT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
|
||||
|
||||
constexpr explicit Negation(ArgT&& x) :
|
||||
m_x(std::forward<ArgT>(x))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return -m_x.value(args);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return -m_x.grad(args);
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<ArgT> m_x;
|
||||
};
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
[[nodiscard]] constexpr auto operator-(ArgT&& x)
|
||||
{
|
||||
return Negation<ArgT&&>(std::forward<ArgT>(x));
|
||||
}
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
struct Sigmoid : Evaluable<T, Sigmoid<ArgT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
|
||||
|
||||
constexpr explicit Sigmoid(ArgT&& x) :
|
||||
m_x(std::forward<ArgT>(x))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return value_(m_x.value(args));
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_x.grad(args) * grad_(m_x.value(args));
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<ArgT> m_x;
|
||||
|
||||
[[nodiscard]] T value_(T x) const
|
||||
{
|
||||
return 1.0 / (1.0 + std::exp(-x));
|
||||
}
|
||||
|
||||
[[nodiscard]] T grad_(T x) const
|
||||
{
|
||||
return value_(x) * (1.0 - value_(x));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
[[nodiscard]] constexpr auto sigmoid(ArgT&& x)
|
||||
{
|
||||
return Sigmoid<ArgT&&>(std::forward<ArgT>(x));
|
||||
}
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
struct Pow : Evaluable<T, Pow<ArgT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
|
||||
|
||||
constexpr explicit Pow(ArgT&& x, Id<T> exponent) :
|
||||
m_x(std::forward<ArgT>(x)),
|
||||
m_exponent(std::move(exponent))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return std::pow(m_x.value(args), m_exponent);
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_exponent * std::pow(m_x.value(args), m_exponent - T(1.0)) * m_x.grad(args);
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<ArgT> m_x;
|
||||
T m_exponent;
|
||||
};
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
[[nodiscard]] constexpr auto pow(ArgT&& x, Id<T> exp)
|
||||
{
|
||||
return Pow<ArgT&&>(std::forward<ArgT>(x), std::move(exp));
|
||||
}
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
struct Log : Evaluable<T, Log<ArgT, T>>
|
||||
{
|
||||
using ValueType = T;
|
||||
|
||||
static constexpr bool is_constant = Detail::AreAllConstantV<ArgT>;
|
||||
|
||||
constexpr explicit Log(ArgT&& x) :
|
||||
m_x(std::forward<ArgT>(x))
|
||||
{
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_value(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return value_(m_x.value(args));
|
||||
}
|
||||
|
||||
template <typename... ArgsTs>
|
||||
[[nodiscard]] T calculate_grad(const std::tuple<ArgsTs...>& args) const
|
||||
{
|
||||
return m_x.grad(args) * grad_(m_x.value(args));
|
||||
}
|
||||
|
||||
private:
|
||||
StoreValueOrRef<ArgT> m_x;
|
||||
|
||||
T value_(T x) const
|
||||
{
|
||||
return std::log(x);
|
||||
}
|
||||
|
||||
T grad_(T x) const
|
||||
{
|
||||
return 1.0 / x;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ArgT, typename T = typename std::remove_reference_t<ArgT>::ValueType>
|
||||
[[nodiscard]] constexpr auto log(ArgT&& x)
|
||||
{
|
||||
return Log<ArgT&&>(std::forward<ArgT>(x));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,815 @@
|
||||
#include "convert.h"
|
||||
|
||||
#include "uci.h"
|
||||
#include "misc.h"
|
||||
#include "thread.h"
|
||||
#include "position.h"
|
||||
#include "tt.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
|
||||
#include "syzygy/tbprobe.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <unordered_set>
|
||||
#include <iomanip>
|
||||
#include <list>
|
||||
#include <cmath> // std::exp(),std::pow(),std::log()
|
||||
#include <cstring> // memcpy()
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <chrono>
|
||||
#include <random>
|
||||
#include <regex>
|
||||
#include <filesystem>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Learner
|
||||
{
|
||||
bool fen_is_ok(Position& pos, std::string input_fen) {
|
||||
std::string pos_fen = pos.fen();
|
||||
std::istringstream ss_input(input_fen);
|
||||
std::istringstream ss_pos(pos_fen);
|
||||
|
||||
// example : "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3 w - h6 0 24"
|
||||
// --> "2r4r/4kpp1/nb1np3/p2p3p/B2P1BP1/PP6/4NPKP/2R1R3"
|
||||
std::string str_input, str_pos;
|
||||
ss_input >> str_input;
|
||||
ss_pos >> str_pos;
|
||||
|
||||
// Only compare "Piece placement field" between input_fen and pos.fen().
|
||||
return str_input == str_pos;
|
||||
}
|
||||
|
||||
void convert_bin(
|
||||
const vector<string>& filenames,
|
||||
const string& output_file_name,
|
||||
const int ply_minimum,
|
||||
const int ply_maximum,
|
||||
const int interpolate_eval,
|
||||
const int src_score_min_value,
|
||||
const int src_score_max_value,
|
||||
const int dest_score_min_value,
|
||||
const int dest_score_max_value,
|
||||
const bool check_invalid_fen,
|
||||
const bool check_illegal_move)
|
||||
{
|
||||
std::cout << "check_invalid_fen=" << check_invalid_fen << std::endl;
|
||||
std::cout << "check_illegal_move=" << check_illegal_move << std::endl;
|
||||
|
||||
std::fstream fs;
|
||||
uint64_t data_size = 0;
|
||||
uint64_t filtered_size = 0;
|
||||
uint64_t filtered_size_fen = 0;
|
||||
uint64_t filtered_size_move = 0;
|
||||
uint64_t filtered_size_ply = 0;
|
||||
auto th = Threads.main();
|
||||
auto& tpos = th->rootPos;
|
||||
// convert plain rag to packed sfenvalue for Yaneura king
|
||||
fs.open(output_file_name, ios::app | ios::binary);
|
||||
StateListPtr states;
|
||||
for (auto filename : filenames) {
|
||||
std::cout << "convert " << filename << " ... ";
|
||||
std::string line;
|
||||
ifstream ifs;
|
||||
ifs.open(filename);
|
||||
PackedSfenValue p;
|
||||
data_size = 0;
|
||||
filtered_size = 0;
|
||||
filtered_size_fen = 0;
|
||||
filtered_size_move = 0;
|
||||
filtered_size_ply = 0;
|
||||
p.gamePly = 1; // Not included in apery format. Should be initialized
|
||||
bool ignore_flag_fen = false;
|
||||
bool ignore_flag_move = false;
|
||||
bool ignore_flag_ply = false;
|
||||
while (std::getline(ifs, line)) {
|
||||
std::stringstream ss(line);
|
||||
std::string token;
|
||||
std::string value;
|
||||
ss >> token;
|
||||
if (token == "fen") {
|
||||
states = StateListPtr(new std::deque<StateInfo>(1)); // Drop old and create a new one
|
||||
std::string input_fen = line.substr(4);
|
||||
tpos.set(input_fen, false, &states->back(), Threads.main());
|
||||
if (check_invalid_fen && !fen_is_ok(tpos, input_fen)) {
|
||||
ignore_flag_fen = true;
|
||||
filtered_size_fen++;
|
||||
}
|
||||
else {
|
||||
tpos.sfen_pack(p.sfen);
|
||||
}
|
||||
}
|
||||
else if (token == "move") {
|
||||
ss >> value;
|
||||
Move move = UCI::to_move(tpos, value);
|
||||
if (check_illegal_move && move == MOVE_NONE) {
|
||||
ignore_flag_move = true;
|
||||
filtered_size_move++;
|
||||
}
|
||||
else {
|
||||
p.move = move;
|
||||
}
|
||||
}
|
||||
else if (token == "score") {
|
||||
double score;
|
||||
ss >> score;
|
||||
// Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
|
||||
// Normalize to [0.0, 1.0].
|
||||
score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value);
|
||||
// Scale to [dest_score_min_value, dest_score_max_value].
|
||||
score = score * (dest_score_max_value - dest_score_min_value) + dest_score_min_value;
|
||||
p.score = Math::clamp((int32_t)std::round(score), -(int32_t)VALUE_MATE, (int32_t)VALUE_MATE);
|
||||
}
|
||||
else if (token == "ply") {
|
||||
int temp;
|
||||
ss >> temp;
|
||||
if (temp < ply_minimum || temp > ply_maximum) {
|
||||
ignore_flag_ply = true;
|
||||
filtered_size_ply++;
|
||||
}
|
||||
p.gamePly = uint16_t(temp); // No cast here?
|
||||
if (interpolate_eval != 0) {
|
||||
p.score = min(3000, interpolate_eval * temp);
|
||||
}
|
||||
}
|
||||
else if (token == "result") {
|
||||
int temp;
|
||||
ss >> temp;
|
||||
p.game_result = int8_t(temp); // Do you need a cast here?
|
||||
if (interpolate_eval) {
|
||||
p.score = p.score * p.game_result;
|
||||
}
|
||||
}
|
||||
else if (token == "e") {
|
||||
if (!(ignore_flag_fen || ignore_flag_move || ignore_flag_ply)) {
|
||||
fs.write((char*)&p, sizeof(PackedSfenValue));
|
||||
data_size += 1;
|
||||
// debug
|
||||
// std::cout<<tpos<<std::endl;
|
||||
// std::cout<<p.score<<","<<int(p.gamePly)<<","<<int(p.game_result)<<std::endl;
|
||||
}
|
||||
else {
|
||||
filtered_size++;
|
||||
}
|
||||
ignore_flag_fen = false;
|
||||
ignore_flag_move = false;
|
||||
ignore_flag_ply = false;
|
||||
}
|
||||
}
|
||||
std::cout << "done " << data_size << " parsed " << filtered_size << " is filtered"
|
||||
<< " (invalid fen:" << filtered_size_fen << ", illegal move:" << filtered_size_move << ", invalid ply:" << filtered_size_ply << ")" << std::endl;
|
||||
ifs.close();
|
||||
}
|
||||
std::cout << "all done" << std::endl;
|
||||
fs.close();
|
||||
}
|
||||
|
||||
static inline void ltrim(std::string& s) {
|
||||
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) {
|
||||
return !std::isspace(ch);
|
||||
}));
|
||||
}
|
||||
|
||||
static inline void rtrim(std::string& s) {
|
||||
s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) {
|
||||
return !std::isspace(ch);
|
||||
}).base(), s.end());
|
||||
}
|
||||
|
||||
static inline void trim(std::string& s) {
|
||||
ltrim(s);
|
||||
rtrim(s);
|
||||
}
|
||||
|
||||
int parse_game_result_from_pgn_extract(std::string result) {
|
||||
// White Win
|
||||
if (result == "\"1-0\"") {
|
||||
return 1;
|
||||
}
|
||||
// Black Win
|
||||
else if (result == "\"0-1\"") {
|
||||
return -1;
|
||||
}
|
||||
// Draw
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// 0.25 --> 0.25 * PawnValueEg
|
||||
// #-4 --> -mate_in(4)
|
||||
// #3 --> mate_in(3)
|
||||
// -M4 --> -mate_in(4)
|
||||
// +M3 --> mate_in(3)
|
||||
Value parse_score_from_pgn_extract(std::string eval, bool& success) {
|
||||
success = true;
|
||||
|
||||
if (eval.substr(0, 1) == "#") {
|
||||
if (eval.substr(1, 1) == "-") {
|
||||
return -mate_in(stoi(eval.substr(2, eval.length() - 2)));
|
||||
}
|
||||
else {
|
||||
return mate_in(stoi(eval.substr(1, eval.length() - 1)));
|
||||
}
|
||||
}
|
||||
else if (eval.substr(0, 2) == "-M") {
|
||||
//std::cout << "eval=" << eval << std::endl;
|
||||
return -mate_in(stoi(eval.substr(2, eval.length() - 2)));
|
||||
}
|
||||
else if (eval.substr(0, 2) == "+M") {
|
||||
//std::cout << "eval=" << eval << std::endl;
|
||||
return mate_in(stoi(eval.substr(2, eval.length() - 2)));
|
||||
}
|
||||
else {
|
||||
char* endptr;
|
||||
double value = strtod(eval.c_str(), &endptr);
|
||||
|
||||
if (*endptr != '\0') {
|
||||
success = false;
|
||||
return VALUE_ZERO;
|
||||
}
|
||||
else {
|
||||
return Value(value * static_cast<double>(PawnValueEg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for Debug
|
||||
//#define DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT
|
||||
|
||||
bool is_like_fen(std::string fen) {
|
||||
int count_space = std::count(fen.cbegin(), fen.cend(), ' ');
|
||||
int count_slash = std::count(fen.cbegin(), fen.cend(), '/');
|
||||
|
||||
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
|
||||
//std::cout << "count_space=" << count_space << std::endl;
|
||||
//std::cout << "count_slash=" << count_slash << std::endl;
|
||||
#endif
|
||||
|
||||
return count_space == 5 && count_slash == 7;
|
||||
}
|
||||
|
||||
void convert_bin_from_pgn_extract(
|
||||
const vector<string>& filenames,
|
||||
const string& output_file_name,
|
||||
const bool pgn_eval_side_to_move,
|
||||
const bool convert_no_eval_fens_as_score_zero)
|
||||
{
|
||||
std::cout << "pgn_eval_side_to_move=" << pgn_eval_side_to_move << std::endl;
|
||||
std::cout << "convert_no_eval_fens_as_score_zero=" << convert_no_eval_fens_as_score_zero << std::endl;
|
||||
|
||||
auto th = Threads.main();
|
||||
auto& pos = th->rootPos;
|
||||
|
||||
std::fstream ofs;
|
||||
ofs.open(output_file_name, ios::out | ios::binary);
|
||||
|
||||
int game_count = 0;
|
||||
int fen_count = 0;
|
||||
|
||||
for (auto filename : filenames) {
|
||||
std::cout << now_string() << " convert " << filename << std::endl;
|
||||
ifstream ifs;
|
||||
ifs.open(filename);
|
||||
|
||||
int game_result = 0;
|
||||
|
||||
std::string line;
|
||||
while (std::getline(ifs, line)) {
|
||||
|
||||
if (line.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
else if (line.substr(0, 1) == "[") {
|
||||
std::regex pattern_result(R"(\[Result (.+?)\])");
|
||||
std::smatch match;
|
||||
|
||||
// example: [Result "1-0"]
|
||||
if (std::regex_search(line, match, pattern_result)) {
|
||||
game_result = parse_game_result_from_pgn_extract(match.str(1));
|
||||
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
|
||||
std::cout << "game_result=" << game_result << std::endl;
|
||||
#endif
|
||||
game_count++;
|
||||
if (game_count % 10000 == 0) {
|
||||
std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
else {
|
||||
int gamePly = 1;
|
||||
auto itr = line.cbegin();
|
||||
|
||||
while (true) {
|
||||
gamePly++;
|
||||
|
||||
PackedSfenValue psv;
|
||||
memset((char*)&psv, 0, sizeof(PackedSfenValue));
|
||||
|
||||
// fen
|
||||
{
|
||||
bool fen_found = false;
|
||||
|
||||
while (!fen_found) {
|
||||
std::regex pattern_bracket(R"(\{(.+?)\})");
|
||||
std::smatch match;
|
||||
if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
|
||||
break;
|
||||
}
|
||||
|
||||
itr += match.position(0) + match.length(0) - 1;
|
||||
std::string str_fen = match.str(1);
|
||||
trim(str_fen);
|
||||
|
||||
if (is_like_fen(str_fen)) {
|
||||
fen_found = true;
|
||||
|
||||
StateInfo si;
|
||||
pos.set(str_fen, false, &si, th);
|
||||
pos.sfen_pack(psv.sfen);
|
||||
}
|
||||
|
||||
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
|
||||
std::cout << "str_fen=" << str_fen << std::endl;
|
||||
std::cout << "fen_found=" << fen_found << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!fen_found) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// move
|
||||
{
|
||||
std::regex pattern_move(R"(\}(.+?)\{)");
|
||||
std::smatch match;
|
||||
if (!std::regex_search(itr, line.cend(), match, pattern_move)) {
|
||||
break;
|
||||
}
|
||||
|
||||
itr += match.position(0) + match.length(0) - 1;
|
||||
std::string str_move = match.str(1);
|
||||
trim(str_move);
|
||||
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
|
||||
std::cout << "str_move=" << str_move << std::endl;
|
||||
#endif
|
||||
psv.move = UCI::to_move(pos, str_move);
|
||||
}
|
||||
|
||||
// eval
|
||||
bool eval_found = false;
|
||||
{
|
||||
std::regex pattern_bracket(R"(\{(.+?)\})");
|
||||
std::smatch match;
|
||||
if (!std::regex_search(itr, line.cend(), match, pattern_bracket)) {
|
||||
break;
|
||||
}
|
||||
|
||||
std::string str_eval_clk = match.str(1);
|
||||
trim(str_eval_clk);
|
||||
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
|
||||
std::cout << "str_eval_clk=" << str_eval_clk << std::endl;
|
||||
#endif
|
||||
|
||||
// example: { [%eval 0.25] [%clk 0:10:00] }
|
||||
// example: { [%eval #-4] [%clk 0:10:00] }
|
||||
// example: { [%eval #3] [%clk 0:10:00] }
|
||||
// example: { +0.71/22 1.2s }
|
||||
// example: { -M4/7 0.003s }
|
||||
// example: { M3/245 0.017s }
|
||||
// example: { +M1/245 0.010s, White mates }
|
||||
// example: { 0.60 }
|
||||
// example: { book }
|
||||
// example: { rnbqkb1r/pp3ppp/2p1pn2/3p4/2PP4/2N2N2/PP2PPPP/R1BQKB1R w KQkq - 0 5 }
|
||||
|
||||
// Considering the absence of eval
|
||||
if (!is_like_fen(str_eval_clk)) {
|
||||
itr += match.position(0) + match.length(0) - 1;
|
||||
|
||||
if (str_eval_clk != "book") {
|
||||
std::regex pattern_eval1(R"(\[\%eval (.+?)\])");
|
||||
std::regex pattern_eval2(R"((.+?)\/)");
|
||||
|
||||
std::string str_eval;
|
||||
if (std::regex_search(str_eval_clk, match, pattern_eval1) ||
|
||||
std::regex_search(str_eval_clk, match, pattern_eval2)) {
|
||||
str_eval = match.str(1);
|
||||
trim(str_eval);
|
||||
}
|
||||
else {
|
||||
str_eval = str_eval_clk;
|
||||
}
|
||||
|
||||
bool success = false;
|
||||
Value value = parse_score_from_pgn_extract(str_eval, success);
|
||||
if (success) {
|
||||
eval_found = true;
|
||||
psv.score = Math::clamp(value, -VALUE_MATE, VALUE_MATE);
|
||||
}
|
||||
|
||||
#if defined(DEBUG_CONVERT_BIN_FROM_PGN_EXTRACT)
|
||||
std::cout << "str_eval=" << str_eval << std::endl;
|
||||
std::cout << "success=" << success << ", psv.score=" << psv.score << std::endl;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write
|
||||
if (eval_found || convert_no_eval_fens_as_score_zero) {
|
||||
if (!eval_found && convert_no_eval_fens_as_score_zero) {
|
||||
psv.score = 0;
|
||||
}
|
||||
|
||||
psv.gamePly = gamePly;
|
||||
psv.game_result = game_result;
|
||||
|
||||
if (pos.side_to_move() == BLACK) {
|
||||
if (!pgn_eval_side_to_move) {
|
||||
psv.score *= -1;
|
||||
}
|
||||
psv.game_result *= -1;
|
||||
}
|
||||
|
||||
ofs.write((char*)&psv, sizeof(PackedSfenValue));
|
||||
|
||||
fen_count++;
|
||||
}
|
||||
}
|
||||
|
||||
game_result = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << now_string() << " game_count=" << game_count << ", fen_count=" << fen_count << std::endl;
|
||||
std::cout << now_string() << " all done" << std::endl;
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
void convert_plain(
|
||||
const vector<string>& filenames,
|
||||
const string& output_file_name)
|
||||
{
|
||||
Position tpos;
|
||||
std::ofstream ofs;
|
||||
ofs.open(output_file_name, ios::app);
|
||||
auto th = Threads.main();
|
||||
for (auto filename : filenames) {
|
||||
std::cout << "convert " << filename << " ... ";
|
||||
|
||||
// Just convert packedsfenvalue to text
|
||||
std::fstream fs;
|
||||
fs.open(filename, ios::in | ios::binary);
|
||||
PackedSfenValue p;
|
||||
while (true)
|
||||
{
|
||||
if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
|
||||
StateInfo si;
|
||||
tpos.set_from_packed_sfen(p.sfen, &si, th);
|
||||
|
||||
// write as plain text
|
||||
ofs << "fen " << tpos.fen() << std::endl;
|
||||
ofs << "move " << UCI::move(Move(p.move), false) << std::endl;
|
||||
ofs << "score " << p.score << std::endl;
|
||||
ofs << "ply " << int(p.gamePly) << std::endl;
|
||||
ofs << "result " << int(p.game_result) << std::endl;
|
||||
ofs << "e" << std::endl;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fs.close();
|
||||
std::cout << "done" << std::endl;
|
||||
}
|
||||
ofs.close();
|
||||
std::cout << "all done" << std::endl;
|
||||
}
|
||||
|
||||
static inline const std::string plain_extension = ".plain";
|
||||
static inline const std::string bin_extension = ".bin";
|
||||
static inline const std::string binpack_extension = ".binpack";
|
||||
|
||||
static bool file_exists(const std::string& name)
|
||||
{
|
||||
std::ifstream f(name);
|
||||
return f.good();
|
||||
}
|
||||
|
||||
static bool ends_with(const std::string& lhs, const std::string& end)
|
||||
{
|
||||
if (end.size() > lhs.size()) return false;
|
||||
|
||||
return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
|
||||
}
|
||||
|
||||
static bool is_convert_of_type(
|
||||
const std::string& input_path,
|
||||
const std::string& output_path,
|
||||
const std::string& expected_input_extension,
|
||||
const std::string& expected_output_extension)
|
||||
{
|
||||
return ends_with(input_path, expected_input_extension)
|
||||
&& ends_with(output_path, expected_output_extension);
|
||||
}
|
||||
|
||||
using ConvertFunctionType = void(std::string inputPath, std::string outputPath, std::ios_base::openmode om, bool validate);
|
||||
|
||||
static ConvertFunctionType* get_convert_function(const std::string& input_path, const std::string& output_path)
|
||||
{
|
||||
if (is_convert_of_type(input_path, output_path, plain_extension, bin_extension))
|
||||
return binpack::convertPlainToBin;
|
||||
if (is_convert_of_type(input_path, output_path, plain_extension, binpack_extension))
|
||||
return binpack::convertPlainToBinpack;
|
||||
|
||||
if (is_convert_of_type(input_path, output_path, bin_extension, plain_extension))
|
||||
return binpack::convertBinToPlain;
|
||||
if (is_convert_of_type(input_path, output_path, bin_extension, binpack_extension))
|
||||
return binpack::convertBinToBinpack;
|
||||
|
||||
if (is_convert_of_type(input_path, output_path, binpack_extension, plain_extension))
|
||||
return binpack::convertBinpackToPlain;
|
||||
if (is_convert_of_type(input_path, output_path, binpack_extension, bin_extension))
|
||||
return binpack::convertBinpackToBin;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void convert(const std::string& input_path, const std::string& output_path, std::ios_base::openmode om, bool validate)
|
||||
{
|
||||
if(!file_exists(input_path))
|
||||
{
|
||||
std::cerr << "Input file does not exist.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
auto func = get_convert_function(input_path, output_path);
|
||||
if (func != nullptr)
|
||||
{
|
||||
func(input_path, output_path, om, validate);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "Conversion between files of these types is not supported.\n";
|
||||
}
|
||||
}
|
||||
|
||||
static void convert(const std::vector<std::string>& args)
|
||||
{
|
||||
if (args.size() < 2 || args.size() > 4)
|
||||
{
|
||||
std::cerr << "Invalid arguments.\n";
|
||||
std::cerr << "Usage: convert from_path to_path [append] [validate]\n";
|
||||
return;
|
||||
}
|
||||
|
||||
const bool append = std::find(args.begin() + 2, args.end(), "append") != args.end();
|
||||
const bool validate = std::find(args.begin() + 2, args.end(), "validate") != args.end();
|
||||
|
||||
const std::ios_base::openmode openmode =
|
||||
append
|
||||
? std::ios_base::app
|
||||
: std::ios_base::trunc;
|
||||
|
||||
convert(args[0], args[1], openmode, validate);
|
||||
}
|
||||
|
||||
void convert(istringstream& is)
|
||||
{
|
||||
std::vector<std::string> args;
|
||||
|
||||
while (true)
|
||||
{
|
||||
std::string token = "";
|
||||
is >> token;
|
||||
if (token == "")
|
||||
break;
|
||||
|
||||
args.push_back(token);
|
||||
}
|
||||
|
||||
convert(args);
|
||||
}
|
||||
|
||||
static void append_files_from_dir(
|
||||
std::vector<std::string>& filenames,
|
||||
const std::string& base_dir,
|
||||
const std::string& target_dir)
|
||||
{
|
||||
string kif_base_dir = Path::combine(base_dir, target_dir);
|
||||
|
||||
namespace sys = std::filesystem;
|
||||
sys::path p(kif_base_dir); // Origin of enumeration
|
||||
std::for_each(sys::directory_iterator(p), sys::directory_iterator(),
|
||||
[&](const sys::path& path) {
|
||||
if (sys::is_regular_file(path))
|
||||
filenames.push_back(Path::combine(target_dir, path.filename().generic_string()));
|
||||
});
|
||||
}
|
||||
|
||||
static void rebase_files(
|
||||
std::vector<std::string>& filenames,
|
||||
const std::string& base_dir)
|
||||
{
|
||||
for (auto& file : filenames)
|
||||
{
|
||||
file = Path::combine(base_dir, file);
|
||||
}
|
||||
}
|
||||
|
||||
void convert_bin_from_pgn_extract(std::istringstream& is)
|
||||
{
|
||||
std::vector<std::string> filenames;
|
||||
|
||||
string base_dir;
|
||||
string target_dir;
|
||||
|
||||
bool pgn_eval_side_to_move = false;
|
||||
bool convert_no_eval_fens_as_score_zero = false;
|
||||
|
||||
string output_file_name = "shuffled_sfen.bin";
|
||||
|
||||
while (true)
|
||||
{
|
||||
string option;
|
||||
is >> option;
|
||||
|
||||
if (option == "")
|
||||
break;
|
||||
|
||||
if (option == "targetdir") is >> target_dir;
|
||||
else if (option == "targetfile")
|
||||
{
|
||||
std::string filename;
|
||||
is >> filename;
|
||||
filenames.push_back(filename);
|
||||
}
|
||||
|
||||
else if (option == "basedir") is >> base_dir;
|
||||
|
||||
else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move;
|
||||
else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero;
|
||||
else if (option == "output_file_name") is >> output_file_name;
|
||||
else
|
||||
{
|
||||
cout << "Unknown option: " << option << ". Ignoring.\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (!target_dir.empty())
|
||||
{
|
||||
append_files_from_dir(filenames, base_dir, target_dir);
|
||||
}
|
||||
rebase_files(filenames, base_dir);
|
||||
|
||||
Eval::NNUE::init();
|
||||
|
||||
cout << "convert_bin_from_pgn-extract.." << endl;
|
||||
convert_bin_from_pgn_extract(
|
||||
filenames,
|
||||
output_file_name,
|
||||
pgn_eval_side_to_move,
|
||||
convert_no_eval_fens_as_score_zero);
|
||||
}
|
||||
|
||||
void convert_bin(std::istringstream& is)
|
||||
{
|
||||
std::vector<std::string> filenames;
|
||||
|
||||
string base_dir;
|
||||
string target_dir;
|
||||
|
||||
int ply_minimum = 0;
|
||||
int ply_maximum = 114514;
|
||||
bool interpolate_eval = 0;
|
||||
bool check_invalid_fen = false;
|
||||
bool check_illegal_move = false;
|
||||
|
||||
bool pgn_eval_side_to_move = false;
|
||||
bool convert_no_eval_fens_as_score_zero = false;
|
||||
|
||||
double src_score_min_value = 0.0;
|
||||
double src_score_max_value = 1.0;
|
||||
double dest_score_min_value = 0.0;
|
||||
double dest_score_max_value = 1.0;
|
||||
|
||||
string output_file_name = "shuffled_sfen.bin";
|
||||
|
||||
while (true)
|
||||
{
|
||||
string option;
|
||||
is >> option;
|
||||
|
||||
if (option == "")
|
||||
break;
|
||||
|
||||
if (option == "targetdir") is >> target_dir;
|
||||
else if (option == "targetfile")
|
||||
{
|
||||
std::string filename;
|
||||
is >> filename;
|
||||
filenames.push_back(filename);
|
||||
}
|
||||
|
||||
else if (option == "basedir") is >> base_dir;
|
||||
|
||||
else if (option == "ply_minimum") is >> ply_minimum;
|
||||
else if (option == "ply_maximum") is >> ply_maximum;
|
||||
else if (option == "interpolate_eval") is >> interpolate_eval;
|
||||
else if (option == "check_invalid_fen") is >> check_invalid_fen;
|
||||
else if (option == "check_illegal_move") is >> check_illegal_move;
|
||||
else if (option == "pgn_eval_side_to_move") is >> pgn_eval_side_to_move;
|
||||
else if (option == "convert_no_eval_fens_as_score_zero") is >> convert_no_eval_fens_as_score_zero;
|
||||
else if (option == "src_score_min_value") is >> src_score_min_value;
|
||||
else if (option == "src_score_max_value") is >> src_score_max_value;
|
||||
else if (option == "dest_score_min_value") is >> dest_score_min_value;
|
||||
else if (option == "dest_score_max_value") is >> dest_score_max_value;
|
||||
else if (option == "output_file_name") is >> output_file_name;
|
||||
else
|
||||
{
|
||||
cout << "Unknown option: " << option << ". Ignoring.\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (!target_dir.empty())
|
||||
{
|
||||
append_files_from_dir(filenames, base_dir, target_dir);
|
||||
}
|
||||
rebase_files(filenames, base_dir);
|
||||
|
||||
Eval::NNUE::init();
|
||||
|
||||
cout << "convert_bin.." << endl;
|
||||
convert_bin(
|
||||
filenames,
|
||||
output_file_name,
|
||||
ply_minimum,
|
||||
ply_maximum,
|
||||
interpolate_eval,
|
||||
src_score_min_value,
|
||||
src_score_max_value,
|
||||
dest_score_min_value,
|
||||
dest_score_max_value,
|
||||
check_invalid_fen,
|
||||
check_illegal_move
|
||||
);
|
||||
}
|
||||
|
||||
void convert_plain(std::istringstream& is)
|
||||
{
|
||||
std::vector<std::string> filenames;
|
||||
|
||||
string base_dir;
|
||||
string target_dir;
|
||||
|
||||
string output_file_name = "shuffled_sfen.bin";
|
||||
|
||||
while (true)
|
||||
{
|
||||
string option;
|
||||
is >> option;
|
||||
|
||||
if (option == "")
|
||||
break;
|
||||
|
||||
if (option == "targetdir") is >> target_dir;
|
||||
else if (option == "targetfile")
|
||||
{
|
||||
std::string filename;
|
||||
is >> filename;
|
||||
filenames.push_back(filename);
|
||||
}
|
||||
|
||||
else if (option == "basedir") is >> base_dir;
|
||||
|
||||
else if (option == "output_file_name") is >> output_file_name;
|
||||
else
|
||||
{
|
||||
cout << "Unknown option: " << option << ". Ignoring.\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (!target_dir.empty())
|
||||
{
|
||||
append_files_from_dir(filenames, base_dir, target_dir);
|
||||
}
|
||||
rebase_files(filenames, base_dir);
|
||||
|
||||
Eval::NNUE::init();
|
||||
|
||||
cout << "convert_plain.." << endl;
|
||||
convert_plain(filenames, output_file_name);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
#ifndef _CONVERT_H_
|
||||
#define _CONVERT_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
namespace Learner {
|
||||
void convert(std::istringstream& is);
|
||||
|
||||
void convert_bin_from_pgn_extract(std::istringstream& is);
|
||||
|
||||
void convert_bin(std::istringstream& is);
|
||||
|
||||
void convert_plain(std::istringstream& is);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,962 @@
|
||||
#include "gensfen.h"
|
||||
|
||||
#include "sfen_writer.h"
|
||||
#include "packed_sfen.h"
|
||||
#include "opening_book.h"
|
||||
|
||||
#include "misc.h"
|
||||
#include "position.h"
|
||||
#include "thread.h"
|
||||
#include "tt.h"
|
||||
#include "uci.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
#include "nnue/evaluate_nnue_learner.h"
|
||||
|
||||
#include "syzygy/tbprobe.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <limits>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <random>
|
||||
#include <shared_mutex>
|
||||
#include <sstream>
|
||||
#include <unordered_set>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Learner
|
||||
{
|
||||
// Class to generate sfen with multiple threads
|
||||
struct Gensfen
|
||||
{
|
||||
struct Params
|
||||
{
|
||||
// Min and max depths for search during gensfen
|
||||
int search_depth_min = 3;
|
||||
int search_depth_max = -1;
|
||||
|
||||
// Number of the nodes to be searched.
|
||||
// 0 represents no limits.
|
||||
uint64_t nodes = 0;
|
||||
|
||||
// Upper limit of evaluation value of generated situation
|
||||
int eval_limit = 3000;
|
||||
|
||||
// minimum ply with random move
|
||||
// maximum ply with random move
|
||||
// Number of random moves in one station
|
||||
int random_move_minply = 1;
|
||||
int random_move_maxply = 24;
|
||||
int random_move_count = 5;
|
||||
|
||||
// Move kings with a probability of 1/N when randomly moving like Apery software.
|
||||
// When you move the king again, there is a 1/N chance that it will randomly moved
|
||||
// once in the opponent's turn.
|
||||
// Apery has N=2. Specifying 0 here disables this function.
|
||||
int random_move_like_apery = 0;
|
||||
|
||||
// For when using multi pv instead of random move.
|
||||
// random_multi_pv is the number of candidates for MultiPV.
|
||||
// When adopting the move of the candidate move, the difference
|
||||
// between the evaluation value of the move of the 1st place
|
||||
// and the evaluation value of the move of the Nth place is.
|
||||
// Must be in the range random_multi_pv_diff.
|
||||
// random_multi_pv_depth is the search depth for MultiPV.
|
||||
int random_multi_pv = 0;
|
||||
int random_multi_pv_diff = 32000;
|
||||
int random_multi_pv_depth = -1;
|
||||
|
||||
// The minimum and maximum ply (number of steps from
|
||||
// the initial phase) of the sfens to write out.
|
||||
int write_minply = 16;
|
||||
int write_maxply = 400;
|
||||
|
||||
uint64_t save_every = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
std::string output_file_name = "generated_kifu";
|
||||
|
||||
SfenOutputType sfen_format = SfenOutputType::Binpack;
|
||||
|
||||
std::string seed;
|
||||
|
||||
bool write_out_draw_game_in_training_data_generation = true;
|
||||
bool detect_draw_by_consecutive_low_score = true;
|
||||
bool detect_draw_by_insufficient_mating_material = true;
|
||||
|
||||
bool ensure_quiet = false;
|
||||
|
||||
uint64_t num_threads;
|
||||
|
||||
std::string book;
|
||||
|
||||
void enforce_constraints()
|
||||
{
|
||||
search_depth_max = std::max(search_depth_min, search_depth_max);
|
||||
random_multi_pv_depth = std::max(search_depth_min, random_multi_pv_depth);
|
||||
|
||||
// Limit the maximum to a one-stop score. (Otherwise you might not end the loop)
|
||||
eval_limit = std::min(eval_limit, (int)mate_in(2));
|
||||
|
||||
save_every = std::max(save_every, REPORT_STATS_EVERY);
|
||||
|
||||
num_threads = Options["Threads"];
|
||||
}
|
||||
};
|
||||
|
||||
// Hash to limit the export of identical sfens
|
||||
static constexpr uint64_t GENSFEN_HASH_SIZE = 64 * 1024 * 1024;
|
||||
// It must be 2**N because it will be used as the mask to calculate hash_index.
|
||||
static_assert((GENSFEN_HASH_SIZE& (GENSFEN_HASH_SIZE - 1)) == 0);
|
||||
|
||||
static constexpr uint64_t REPORT_DOT_EVERY = 5000;
|
||||
static constexpr uint64_t REPORT_STATS_EVERY = 200000;
|
||||
static_assert(REPORT_STATS_EVERY % REPORT_DOT_EVERY == 0);
|
||||
|
||||
Gensfen(
|
||||
const Params& prm
|
||||
) :
|
||||
params(prm),
|
||||
prng(prm.seed),
|
||||
sfen_writer(prm.output_file_name, prm.num_threads, prm.save_every, prm.sfen_format)
|
||||
{
|
||||
hash.resize(GENSFEN_HASH_SIZE);
|
||||
|
||||
if (!prm.book.empty())
|
||||
{
|
||||
opening_book = open_opening_book(prm.book, prng);
|
||||
if (opening_book == nullptr)
|
||||
{
|
||||
std::cout << "WARNING: Failed to open opening book " << prm.book << ". Falling back to startpos.\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Output seed to veryfy by the user if it's not identical by chance.
|
||||
std::cout << prng << std::endl;
|
||||
}
|
||||
|
||||
void generate(uint64_t limit);
|
||||
|
||||
private:
|
||||
Params params;
|
||||
|
||||
PRNG prng;
|
||||
|
||||
std::mutex stats_mutex;
|
||||
TimePoint last_stats_report_time;
|
||||
|
||||
// sfen exporter
|
||||
SfenWriter sfen_writer;
|
||||
|
||||
SynchronizedRegionLogger::Region out;
|
||||
|
||||
vector<Key> hash; // 64MB*sizeof(HASH_KEY) = 512MB
|
||||
|
||||
std::unique_ptr<OpeningBook> opening_book;
|
||||
|
||||
static void set_gensfen_search_limits();
|
||||
|
||||
void generate_worker(
|
||||
Thread& th,
|
||||
std::atomic<uint64_t>& counter,
|
||||
uint64_t limit);
|
||||
|
||||
bool was_seen_before(const Position& pos);
|
||||
|
||||
optional<int8_t> get_current_game_result(
|
||||
Position& pos,
|
||||
const vector<int>& move_hist_scores) const;
|
||||
|
||||
vector<uint8_t> generate_random_move_flags();
|
||||
|
||||
optional<Move> choose_random_move(
|
||||
Position& pos,
|
||||
std::vector<uint8_t>& random_move_flag,
|
||||
int ply,
|
||||
int& random_move_c);
|
||||
|
||||
bool commit_psv(
|
||||
Thread& th,
|
||||
PSVector& sfens,
|
||||
int8_t lastTurnIsWin,
|
||||
std::atomic<uint64_t>& counter,
|
||||
uint64_t limit,
|
||||
Color result_color);
|
||||
|
||||
void report(uint64_t done, uint64_t new_done);
|
||||
|
||||
void maybe_report(uint64_t done);
|
||||
};
|
||||
|
||||
void Gensfen::set_gensfen_search_limits()
|
||||
{
|
||||
// About Search::Limits
|
||||
// Be careful because this member variable is global and affects other threads.
|
||||
auto& limits = Search::Limits;
|
||||
|
||||
// Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
|
||||
limits.infinite = true;
|
||||
|
||||
// Since PV is an obstacle when displayed, erase it.
|
||||
limits.silent = true;
|
||||
|
||||
// If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
|
||||
limits.nodes = 0;
|
||||
|
||||
// depth is also processed by the one passed as an argument of Learner::search().
|
||||
limits.depth = 0;
|
||||
}
|
||||
|
||||
void Gensfen::generate(uint64_t limit)
|
||||
{
|
||||
last_stats_report_time = 0;
|
||||
|
||||
set_gensfen_search_limits();
|
||||
|
||||
std::atomic<uint64_t> counter{0};
|
||||
Threads.execute_with_workers([&counter, limit, this](Thread& th) {
|
||||
generate_worker(th, counter, limit);
|
||||
});
|
||||
Threads.wait_for_workers_finished();
|
||||
|
||||
sfen_writer.flush();
|
||||
|
||||
if (limit % REPORT_STATS_EVERY != 0)
|
||||
{
|
||||
report(limit, limit % REPORT_STATS_EVERY);
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void Gensfen::generate_worker(
|
||||
Thread& th,
|
||||
std::atomic<uint64_t>& counter,
|
||||
uint64_t limit)
|
||||
{
|
||||
// For the time being, it will be treated as a draw
|
||||
// at the maximum number of steps to write.
|
||||
// Maximum StateInfo + Search PV to advance to leaf buffer
|
||||
std::vector<StateInfo, AlignedAllocator<StateInfo>> states(
|
||||
params.write_maxply + MAX_PLY /* == search_depth_min + α */);
|
||||
|
||||
StateInfo si;
|
||||
|
||||
// end flag
|
||||
bool quit = false;
|
||||
|
||||
// repeat until the specified number of times
|
||||
while (!quit)
|
||||
{
|
||||
// It is necessary to set a dependent thread for Position.
|
||||
// When parallelizing, Threads (since this is a vector<Thread*>,
|
||||
// Do the same for up to Threads[0]...Threads[thread_num-1].
|
||||
auto& pos = th.rootPos;
|
||||
if (opening_book != nullptr)
|
||||
{
|
||||
auto& fen = opening_book->next_fen();
|
||||
pos.set(fen, false, &si, &th);
|
||||
}
|
||||
else
|
||||
{
|
||||
pos.set(StartFEN, false, &si, &th);
|
||||
}
|
||||
|
||||
int resign_counter = 0;
|
||||
bool should_resign = prng.rand(10) > 1;
|
||||
// Vector for holding the sfens in the current simulated game.
|
||||
PSVector packed_sfens;
|
||||
packed_sfens.reserve(params.write_maxply + MAX_PLY);
|
||||
|
||||
// Precomputed flags. Used internally by choose_random_move.
|
||||
vector<uint8_t> random_move_flag = generate_random_move_flags();
|
||||
|
||||
// A counter that keeps track of the number of random moves
|
||||
// When random_move_minply == -1, random moves are
|
||||
// performed continuously, so use it at this time.
|
||||
// Used internally by choose_random_move.
|
||||
int actual_random_move_count = 0;
|
||||
|
||||
// Save history of move scores for adjudication
|
||||
vector<int> move_hist_scores;
|
||||
|
||||
auto flush_psv = [&](int8_t result) {
|
||||
quit = commit_psv(th, packed_sfens, result, counter, limit, pos.side_to_move());
|
||||
};
|
||||
|
||||
for (int ply = 0; ; ++ply)
|
||||
{
|
||||
// Current search depth
|
||||
const int depth = params.search_depth_min + (int)prng.rand(params.search_depth_max - params.search_depth_min + 1);
|
||||
|
||||
// Starting search calls init_for_search
|
||||
auto [search_value, search_pv] = Search::search(pos, depth, 1, params.nodes);
|
||||
|
||||
// This has to be performed after search because it needs to know
|
||||
// rootMoves which are filled in init_for_search.
|
||||
const auto result = get_current_game_result(pos, move_hist_scores);
|
||||
if (result.has_value())
|
||||
{
|
||||
flush_psv(result.value());
|
||||
break;
|
||||
}
|
||||
|
||||
// Always adjudivate by eval limit.
|
||||
// Also because of this we don't have to check for TB/MATE scores
|
||||
if (abs(search_value) >= params.eval_limit)
|
||||
{
|
||||
resign_counter++;
|
||||
if ((should_resign && resign_counter >= 4) || abs(search_value) >= VALUE_KNOWN_WIN) {
|
||||
flush_psv((search_value >= params.eval_limit) ? 1 : -1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
resign_counter = 0;
|
||||
}
|
||||
|
||||
// In case there is no PV and the game was not ended here
|
||||
// there is nothing we can do, we can't continue the game,
|
||||
// we don't know the result, so discard this game.
|
||||
if (search_pv.empty())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Save the move score for adjudication.
|
||||
move_hist_scores.push_back(search_value);
|
||||
|
||||
// Discard stuff before write_minply is reached
|
||||
// because it can harm training due to overfitting.
|
||||
// Initial positions would be too common.
|
||||
if (ply >= params.write_minply)
|
||||
{
|
||||
packed_sfens.emplace_back(PackedSfenValue());
|
||||
|
||||
auto& psv = packed_sfens.back();
|
||||
|
||||
if (params.ensure_quiet)
|
||||
{
|
||||
auto [qsearch_value, qsearch_pv] = Search::qsearch(pos);
|
||||
if (qsearch_pv.empty())
|
||||
{
|
||||
// Here we only write the position data.
|
||||
// Result is added after the whole game is done.
|
||||
pos.sfen_pack(psv.sfen);
|
||||
|
||||
// Already a quiet position
|
||||
psv.score = search_value;
|
||||
psv.move = search_pv[0];
|
||||
psv.gamePly = ply;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Navigate to a quiet
|
||||
int old_ply = ply;
|
||||
for (auto m : qsearch_pv)
|
||||
{
|
||||
pos.do_move(m, states[ply++]);
|
||||
}
|
||||
|
||||
if (was_seen_before(pos))
|
||||
{
|
||||
// Just skip the move.
|
||||
packed_sfens.pop_back();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reevaluate
|
||||
auto [quiet_search_value, quiet_search_pv] = Search::search(pos, depth, 1, params.nodes);
|
||||
if (quiet_search_pv.empty())
|
||||
{
|
||||
// Just skip the move.
|
||||
packed_sfens.pop_back();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Here we only write the position data.
|
||||
// Result is added after the whole game is done.
|
||||
pos.sfen_pack(psv.sfen);
|
||||
|
||||
psv.score = quiet_search_value;
|
||||
psv.move = quiet_search_pv[0];
|
||||
psv.gamePly = ply;
|
||||
}
|
||||
}
|
||||
|
||||
// Get back to the game
|
||||
for (auto it = qsearch_pv.rbegin(); it != qsearch_pv.rend(); ++it)
|
||||
{
|
||||
pos.undo_move(*it);
|
||||
}
|
||||
ply = old_ply;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (was_seen_before(pos))
|
||||
{
|
||||
packed_sfens.pop_back();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Here we only write the position data.
|
||||
// Result is added after the whole game is done.
|
||||
pos.sfen_pack(psv.sfen);
|
||||
|
||||
psv.score = search_value;
|
||||
psv.move = search_pv[0];
|
||||
psv.gamePly = ply;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the next move according to best search result or random move.
|
||||
auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
|
||||
const Move next_move = random_move.has_value() ? *random_move : search_pv[0];
|
||||
|
||||
// We don't have the whole game yet, but it ended,
|
||||
// so the writing process ends and the next game starts.
|
||||
// This shouldn't really happen.
|
||||
if (!is_ok(next_move))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Do move.
|
||||
pos.do_move(next_move, states[ply]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Gensfen::was_seen_before(const Position& pos)
|
||||
{
|
||||
// Look into the position hashtable to see if the same
|
||||
// position was seen before.
|
||||
// This is a good heuristic to exlude already seen
|
||||
// positions without many false positives.
|
||||
auto key = pos.key();
|
||||
auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
|
||||
auto old_key = hash[hash_index];
|
||||
if (key == old_key)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Replace with the current key.
|
||||
hash[hash_index] = key;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
optional<int8_t> Gensfen::get_current_game_result(
|
||||
Position& pos,
|
||||
const vector<int>& move_hist_scores) const
|
||||
{
|
||||
// Variables for draw adjudication.
|
||||
// Todo: Make this as an option.
|
||||
|
||||
// start the adjudication when ply reaches this value
|
||||
constexpr int adj_draw_ply = 80;
|
||||
|
||||
// 4 move scores for each side have to be checked
|
||||
constexpr int adj_draw_cnt = 8;
|
||||
|
||||
// move score in CP
|
||||
constexpr int adj_draw_score = 0;
|
||||
|
||||
// For the time being, it will be treated as a
|
||||
// draw at the maximum number of steps to write.
|
||||
const int ply = move_hist_scores.size();
|
||||
|
||||
// has it reached the max length or is a draw
|
||||
if (ply >= params.write_maxply || pos.is_draw(ply))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(pos.this_thread()->rootMoves.empty())
|
||||
{
|
||||
// If there is no legal move
|
||||
return pos.checkers()
|
||||
? -1 /* mate */
|
||||
: 0 /* stalemate */;
|
||||
}
|
||||
|
||||
// Adjudicate game to a draw if the last 4 scores of each engine is 0.
|
||||
if (params.detect_draw_by_consecutive_low_score)
|
||||
{
|
||||
if (ply >= adj_draw_ply)
|
||||
{
|
||||
int num_cons_plies_within_draw_score = 0;
|
||||
bool is_adj_draw = false;
|
||||
|
||||
for (auto it = move_hist_scores.rbegin();
|
||||
it != move_hist_scores.rend(); ++it)
|
||||
{
|
||||
if (abs(*it) <= adj_draw_score)
|
||||
{
|
||||
num_cons_plies_within_draw_score++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Draw scores must happen on consecutive plies
|
||||
break;
|
||||
}
|
||||
|
||||
if (num_cons_plies_within_draw_score >= adj_draw_cnt)
|
||||
{
|
||||
is_adj_draw = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_adj_draw)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Draw by insufficient mating material
|
||||
if (params.detect_draw_by_insufficient_mating_material)
|
||||
{
|
||||
if (pos.count<ALL_PIECES>() <= 4)
|
||||
{
|
||||
int num_pieces = pos.count<ALL_PIECES>();
|
||||
|
||||
// (1) KvK
|
||||
if (num_pieces == 2)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
// (2) KvK + 1 minor piece
|
||||
if (num_pieces == 3)
|
||||
{
|
||||
int minor_pc = pos.count<BISHOP>(WHITE) + pos.count<KNIGHT>(WHITE) +
|
||||
pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK);
|
||||
if (minor_pc == 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// (3) KBvKB, bishops of the same color
|
||||
else if (num_pieces == 4)
|
||||
{
|
||||
if (pos.count<BISHOP>(WHITE) == 1 && pos.count<BISHOP>(BLACK) == 1)
|
||||
{
|
||||
// Color of bishops is black.
|
||||
if ((pos.pieces(WHITE, BISHOP) & DarkSquares)
|
||||
&& (pos.pieces(BLACK, BISHOP) & DarkSquares))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
// Color of bishops is white.
|
||||
if ((pos.pieces(WHITE, BISHOP) & ~DarkSquares)
|
||||
&& (pos.pieces(BLACK, BISHOP) & ~DarkSquares))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nullopt;
|
||||
}
|
||||
|
||||
vector<uint8_t> Gensfen::generate_random_move_flags()
|
||||
{
|
||||
vector<uint8_t> random_move_flag;
|
||||
|
||||
// Depending on random move selection parameters setup
|
||||
// the array of flags that indicates whether a random move
|
||||
// be taken at a given ply.
|
||||
|
||||
// Make an array like a[0] = 0 ,a[1] = 1, ...
|
||||
// Fisher-Yates shuffle and take out the first N items.
|
||||
// Actually, I only want N pieces, so I only need
|
||||
// to shuffle the first N pieces with Fisher-Yates.
|
||||
|
||||
vector<int> a;
|
||||
a.reserve((size_t)params.random_move_maxply);
|
||||
|
||||
// random_move_minply ,random_move_maxply is specified by 1 origin,
|
||||
// Note that we are handling 0 origin here.
|
||||
for (int i = std::max(params.random_move_minply - 1, 0); i < params.random_move_maxply; ++i)
|
||||
{
|
||||
a.push_back(i);
|
||||
}
|
||||
|
||||
// In case of Apery random move, insert() may be called random_move_count times.
|
||||
// Reserve only the size considering it.
|
||||
random_move_flag.resize((size_t)params.random_move_maxply + params.random_move_count);
|
||||
|
||||
// A random move that exceeds the size() of a[] cannot be applied, so limit it.
|
||||
for (int i = 0; i < std::min(params.random_move_count, (int)a.size()); ++i)
|
||||
{
|
||||
swap(a[i], a[prng.rand((uint64_t)a.size() - i) + i]);
|
||||
random_move_flag[a[i]] = true;
|
||||
}
|
||||
|
||||
return random_move_flag;
|
||||
}
|
||||
|
||||
optional<Move> Gensfen::choose_random_move(
|
||||
Position& pos,
|
||||
std::vector<uint8_t>& random_move_flag,
|
||||
int ply,
|
||||
int& random_move_c)
|
||||
{
|
||||
optional<Move> random_move;
|
||||
|
||||
// Randomly choose one from legal move
|
||||
if (
|
||||
// 1. Random move of random_move_count times from random_move_minply to random_move_maxply
|
||||
(params.random_move_minply != -1 && ply < (int)random_move_flag.size() && random_move_flag[ply]) ||
|
||||
// 2. A mode to perform random move of random_move_count times after leaving the startpos
|
||||
(params.random_move_minply == -1 && random_move_c < params.random_move_count))
|
||||
{
|
||||
++random_move_c;
|
||||
|
||||
// It's not a mate, so there should be one legal move...
|
||||
if (params.random_multi_pv == 0)
|
||||
{
|
||||
// Normal random move
|
||||
MoveList<LEGAL> list(pos);
|
||||
|
||||
// I don't really know the goodness and badness of making this the Apery method.
|
||||
if (params.random_move_like_apery == 0
|
||||
|| prng.rand(params.random_move_like_apery) != 0)
|
||||
{
|
||||
// Normally one move from legal move
|
||||
random_move = list.at((size_t)prng.rand((uint64_t)list.size()));
|
||||
}
|
||||
else
|
||||
{
|
||||
// if you can move the king, move the king
|
||||
Move moves[8]; // Near 8
|
||||
Move* p = &moves[0];
|
||||
for (auto& m : list)
|
||||
{
|
||||
if (type_of(pos.moved_piece(m)) == KING)
|
||||
{
|
||||
*(p++) = m;
|
||||
}
|
||||
}
|
||||
|
||||
size_t n = p - &moves[0];
|
||||
if (n != 0)
|
||||
{
|
||||
// move to move the king
|
||||
random_move = moves[prng.rand(n)];
|
||||
|
||||
// In Apery method, at this time there is a 1/2 chance
|
||||
// that the opponent will also move randomly
|
||||
if (prng.rand(2) == 0)
|
||||
{
|
||||
// Is it a simple hack to add a "1" next to random_move_flag[ply]?
|
||||
random_move_flag.insert(random_move_flag.begin() + ply + 1, 1, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Normally one move from legal move
|
||||
random_move = list.at((size_t)prng.rand((uint64_t)list.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Search::search(pos, params.random_multi_pv_depth, params.random_multi_pv);
|
||||
|
||||
// Select one from the top N hands of root Moves
|
||||
auto& rm = pos.this_thread()->rootMoves;
|
||||
|
||||
uint64_t s = min((uint64_t)rm.size(), (uint64_t)params.random_multi_pv);
|
||||
for (uint64_t i = 1; i < s; ++i)
|
||||
{
|
||||
// The difference from the evaluation value of rm[0] must
|
||||
// be within the range of random_multi_pv_diff.
|
||||
// It can be assumed that rm[x].score is arranged in descending order.
|
||||
if (rm[0].score > rm[i].score + params.random_multi_pv_diff)
|
||||
{
|
||||
s = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
random_move = rm[prng.rand(s)].pv[0];
|
||||
}
|
||||
}
|
||||
|
||||
return random_move;
|
||||
}
|
||||
|
||||
// Write out the phases loaded in sfens to a file.
|
||||
// result: win/loss in the next phase after the final phase in sfens
|
||||
// 1 when winning. -1 when losing. Pass 0 for a draw.
|
||||
// Return value: true if the specified number of
|
||||
// sfens has already been reached and the process ends.
|
||||
bool Gensfen::commit_psv(
|
||||
Thread& th,
|
||||
PSVector& sfens,
|
||||
int8_t result,
|
||||
std::atomic<uint64_t>& counter,
|
||||
uint64_t limit,
|
||||
Color result_color)
|
||||
{
|
||||
if (!params.write_out_draw_game_in_training_data_generation && result == 0)
|
||||
{
|
||||
// We didn't write anything so why quit.
|
||||
return false;
|
||||
}
|
||||
|
||||
auto side_to_move_from_sfen = [](auto& sfen){
|
||||
return (Color)(sfen.sfen.data[0] & 1);
|
||||
};
|
||||
|
||||
// From the final stage (one step before) to the first stage, give information on the outcome of the game for each stage.
|
||||
// The phases stored in sfens are assumed to be continuous (in order).
|
||||
for (auto it = sfens.rbegin(); it != sfens.rend(); ++it)
|
||||
{
|
||||
// The side to move is packed as the lowest bit of the first byte
|
||||
const Color side_to_move = side_to_move_from_sfen(*it);
|
||||
it->game_result = side_to_move == result_color ? result : -result;
|
||||
}
|
||||
|
||||
// Write sfens in move order to make potential compression easier
|
||||
for (auto& sfen : sfens)
|
||||
{
|
||||
// Return true if there is already enough data generated.
|
||||
const auto iter = counter.fetch_add(1);
|
||||
if (iter >= limit)
|
||||
return true;
|
||||
|
||||
// because `iter` was done, now we do one more
|
||||
maybe_report(iter + 1);
|
||||
|
||||
// Write out one sfen.
|
||||
sfen_writer.write(th.thread_idx(), sfen);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Gensfen::report(uint64_t done, uint64_t new_done)
|
||||
{
|
||||
const auto now_time = now();
|
||||
const TimePoint elapsed = now_time - last_stats_report_time + 1;
|
||||
|
||||
out
|
||||
<< endl
|
||||
<< done << " sfens, "
|
||||
<< new_done * 1000 / elapsed << " sfens/second, "
|
||||
<< "at " << now_string() << sync_endl;
|
||||
|
||||
last_stats_report_time = now_time;
|
||||
|
||||
out = sync_region_cout.new_region();
|
||||
}
|
||||
|
||||
void Gensfen::maybe_report(uint64_t done)
|
||||
{
|
||||
if (done % REPORT_DOT_EVERY == 0)
|
||||
{
|
||||
std::lock_guard lock(stats_mutex);
|
||||
|
||||
if (last_stats_report_time == 0)
|
||||
{
|
||||
last_stats_report_time = now();
|
||||
out = sync_region_cout.new_region();
|
||||
}
|
||||
|
||||
if (done != 0)
|
||||
{
|
||||
out << '.';
|
||||
|
||||
if (done % REPORT_STATS_EVERY == 0)
|
||||
{
|
||||
report(done, REPORT_STATS_EVERY);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Command to generate a game record
|
||||
void gensfen(istringstream& is)
|
||||
{
|
||||
// Number of generated game records default = 8 billion phases (Ponanza specification)
|
||||
uint64_t loop_max = 8000000000UL;
|
||||
|
||||
Gensfen::Params params;
|
||||
|
||||
// Add a random number to the end of the file name.
|
||||
bool random_file_name = false;
|
||||
std::string sfen_format = "binpack";
|
||||
|
||||
string token;
|
||||
while (true)
|
||||
{
|
||||
token = "";
|
||||
is >> token;
|
||||
if (token == "")
|
||||
break;
|
||||
|
||||
if (token == "depth")
|
||||
is >> params.search_depth_min;
|
||||
else if (token == "depth2")
|
||||
is >> params.search_depth_max;
|
||||
else if (token == "nodes")
|
||||
is >> params.nodes;
|
||||
else if (token == "loop")
|
||||
is >> loop_max;
|
||||
else if (token == "output_file_name")
|
||||
is >> params.output_file_name;
|
||||
else if (token == "eval_limit")
|
||||
is >> params.eval_limit;
|
||||
else if (token == "random_move_minply")
|
||||
is >> params.random_move_minply;
|
||||
else if (token == "random_move_maxply")
|
||||
is >> params.random_move_maxply;
|
||||
else if (token == "random_move_count")
|
||||
is >> params.random_move_count;
|
||||
else if (token == "random_move_like_apery")
|
||||
is >> params.random_move_like_apery;
|
||||
else if (token == "random_multi_pv")
|
||||
is >> params.random_multi_pv;
|
||||
else if (token == "random_multi_pv_diff")
|
||||
is >> params.random_multi_pv_diff;
|
||||
else if (token == "random_multi_pv_depth")
|
||||
is >> params.random_multi_pv_depth;
|
||||
else if (token == "write_minply")
|
||||
is >> params.write_minply;
|
||||
else if (token == "write_maxply")
|
||||
is >> params.write_maxply;
|
||||
else if (token == "save_every")
|
||||
is >> params.save_every;
|
||||
else if (token == "book")
|
||||
is >> params.book;
|
||||
else if (token == "random_file_name")
|
||||
is >> random_file_name;
|
||||
// Accept also the old option name.
|
||||
else if (token == "use_draw_in_training_data_generation" || token == "write_out_draw_game_in_training_data_generation")
|
||||
is >> params.write_out_draw_game_in_training_data_generation;
|
||||
// Accept also the old option name.
|
||||
else if (token == "use_game_draw_adjudication" || token == "detect_draw_by_consecutive_low_score")
|
||||
is >> params.detect_draw_by_consecutive_low_score;
|
||||
else if (token == "detect_draw_by_insufficient_mating_material")
|
||||
is >> params.detect_draw_by_insufficient_mating_material;
|
||||
else if (token == "sfen_format")
|
||||
is >> sfen_format;
|
||||
else if (token == "seed")
|
||||
is >> params.seed;
|
||||
else if (token == "set_recommended_uci_options")
|
||||
{
|
||||
UCI::setoption("Contempt", "0");
|
||||
UCI::setoption("Skill Level", "20");
|
||||
UCI::setoption("UCI_Chess960", "false");
|
||||
UCI::setoption("UCI_AnalyseMode", "false");
|
||||
UCI::setoption("UCI_LimitStrength", "false");
|
||||
UCI::setoption("PruneAtShallowDepth", "false");
|
||||
UCI::setoption("EnableTranspositionTable", "true");
|
||||
}
|
||||
else if (token == "ensure_quiet")
|
||||
{
|
||||
params.ensure_quiet = true;
|
||||
}
|
||||
else
|
||||
cout << "ERROR: Ignoring unknown option " << token << endl;
|
||||
}
|
||||
|
||||
if (!sfen_format.empty())
|
||||
{
|
||||
if (sfen_format == "bin")
|
||||
params.sfen_format = SfenOutputType::Bin;
|
||||
else if (sfen_format == "binpack")
|
||||
params.sfen_format = SfenOutputType::Binpack;
|
||||
else
|
||||
cout << "WARNING: Unknown sfen format `" << sfen_format << "`. Using bin\n";
|
||||
}
|
||||
|
||||
if (params.ensure_quiet)
|
||||
{
|
||||
// Otherwise we can't ensure quiet positions...
|
||||
UCI::setoption("EnableTranspositionTable", "false");
|
||||
}
|
||||
|
||||
if (random_file_name)
|
||||
{
|
||||
// Give a random number to output_file_name at this point.
|
||||
// Do not use std::random_device(). Because it always the same integers on MinGW.
|
||||
PRNG r(params.seed);
|
||||
|
||||
// Just in case, reassign the random numbers.
|
||||
for (int i = 0; i < 10; ++i)
|
||||
r.rand(1);
|
||||
|
||||
auto to_hex = [](uint64_t u) {
|
||||
std::stringstream ss;
|
||||
ss << std::hex << u;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
// I don't want to wear 64bit numbers by accident, so I'next_move going to make a 64bit number 2 just in case.
|
||||
params.output_file_name += "_" + to_hex(r.rand<uint64_t>()) + to_hex(r.rand<uint64_t>());
|
||||
}
|
||||
|
||||
params.enforce_constraints();
|
||||
|
||||
std::cout << "INFO: Executing gensfen command\n";
|
||||
|
||||
std::cout << "INFO: Parameters:\n";
|
||||
std::cout
|
||||
<< " - search_depth_min = " << params.search_depth_min << endl
|
||||
<< " - search_depth_max = " << params.search_depth_max << endl
|
||||
<< " - nodes = " << params.nodes << endl
|
||||
<< " - num sfens to generate = " << loop_max << endl
|
||||
<< " - eval_limit = " << params.eval_limit << endl
|
||||
<< " - num threads (UCI) = " << params.num_threads << endl
|
||||
<< " - random_move_minply = " << params.random_move_minply << endl
|
||||
<< " - random_move_maxply = " << params.random_move_maxply << endl
|
||||
<< " - random_move_count = " << params.random_move_count << endl
|
||||
<< " - random_move_like_apery = " << params.random_move_like_apery << endl
|
||||
<< " - random_multi_pv = " << params.random_multi_pv << endl
|
||||
<< " - random_multi_pv_diff = " << params.random_multi_pv_diff << endl
|
||||
<< " - random_multi_pv_depth = " << params.random_multi_pv_depth << endl
|
||||
<< " - write_minply = " << params.write_minply << endl
|
||||
<< " - write_maxply = " << params.write_maxply << endl
|
||||
<< " - book = " << params.book << endl
|
||||
<< " - output_file_name = " << params.output_file_name << endl
|
||||
<< " - save_every = " << params.save_every << endl
|
||||
<< " - random_file_name = " << random_file_name << endl
|
||||
<< " - write_drawn_games = " << params.write_out_draw_game_in_training_data_generation << endl
|
||||
<< " - draw by low score = " << params.detect_draw_by_consecutive_low_score << endl
|
||||
<< " - draw by insuff. mat. = " << params.detect_draw_by_insufficient_mating_material << endl;
|
||||
|
||||
// Show if the training data generator uses NNUE.
|
||||
Eval::NNUE::verify_eval_file_loaded();
|
||||
|
||||
Threads.main()->ponder = false;
|
||||
|
||||
Gensfen gensfen(params);
|
||||
gensfen.generate(loop_max);
|
||||
|
||||
std::cout << "INFO: Gensfen finished." << endl;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
#ifndef _GENSFEN_H_
|
||||
#define _GENSFEN_H_
|
||||
|
||||
#include "position.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace Learner {
|
||||
|
||||
// Automatic generation of teacher position
|
||||
void gensfen(std::istringstream& is);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1 +0,0 @@
|
||||
// just a place holder
|
||||
+90
-90
@@ -7,126 +7,126 @@
|
||||
// Floating point operation by 16bit type
|
||||
// Assume that the float type code generated by the compiler is in IEEE 754 format and use it.
|
||||
|
||||
#include "../types.h"
|
||||
#include "types.h"
|
||||
|
||||
namespace HalfFloat
|
||||
{
|
||||
// IEEE 754 float 32 format is :
|
||||
// sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
|
||||
//
|
||||
// Our float16 format is :
|
||||
// sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
|
||||
union float32_converter
|
||||
{
|
||||
int32_t n;
|
||||
float f;
|
||||
};
|
||||
// IEEE 754 float 32 format is :
|
||||
// sign(1bit) + exponent(8bits) + fraction(23bits) = 32bits
|
||||
//
|
||||
// Our float16 format is :
|
||||
// sign(1bit) + exponent(5bits) + fraction(10bits) = 16bits
|
||||
union float32_converter
|
||||
{
|
||||
int32_t n;
|
||||
float f;
|
||||
};
|
||||
|
||||
|
||||
// 16-bit float
|
||||
struct float16
|
||||
{
|
||||
// --- constructors
|
||||
// 16-bit float
|
||||
struct float16
|
||||
{
|
||||
// --- constructors
|
||||
|
||||
float16() {}
|
||||
float16(int16_t n) { from_float((float)n); }
|
||||
float16(int32_t n) { from_float((float)n); }
|
||||
float16(float n) { from_float(n); }
|
||||
float16(double n) { from_float((float)n); }
|
||||
float16() {}
|
||||
float16(int16_t n) { from_float((float)n); }
|
||||
float16(int32_t n) { from_float((float)n); }
|
||||
float16(float n) { from_float(n); }
|
||||
float16(double n) { from_float((float)n); }
|
||||
|
||||
// build from a float
|
||||
void from_float(float f) { *this = to_float16(f); }
|
||||
// build from a float
|
||||
void from_float(float f) { *this = to_float16(f); }
|
||||
|
||||
// --- implicit converters
|
||||
// --- implicit converters
|
||||
|
||||
operator int32_t() const { return (int32_t)to_float(*this); }
|
||||
operator float() const { return to_float(*this); }
|
||||
operator double() const { return double(to_float(*this)); }
|
||||
operator int32_t() const { return (int32_t)to_float(*this); }
|
||||
operator float() const { return to_float(*this); }
|
||||
operator double() const { return double(to_float(*this)); }
|
||||
|
||||
// --- operators
|
||||
// --- operators
|
||||
|
||||
float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
|
||||
float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
|
||||
float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
|
||||
float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
|
||||
float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
|
||||
float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
|
||||
float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
|
||||
float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
|
||||
float16 operator - () const { return float16(-to_float(*this)); }
|
||||
bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
|
||||
bool operator != (float16 rhs) const { return !(*this == rhs); }
|
||||
float16 operator += (float16 rhs) { from_float(to_float(*this) + to_float(rhs)); return *this; }
|
||||
float16 operator -= (float16 rhs) { from_float(to_float(*this) - to_float(rhs)); return *this; }
|
||||
float16 operator *= (float16 rhs) { from_float(to_float(*this) * to_float(rhs)); return *this; }
|
||||
float16 operator /= (float16 rhs) { from_float(to_float(*this) / to_float(rhs)); return *this; }
|
||||
float16 operator + (float16 rhs) const { return float16(*this) += rhs; }
|
||||
float16 operator - (float16 rhs) const { return float16(*this) -= rhs; }
|
||||
float16 operator * (float16 rhs) const { return float16(*this) *= rhs; }
|
||||
float16 operator / (float16 rhs) const { return float16(*this) /= rhs; }
|
||||
float16 operator - () const { return float16(-to_float(*this)); }
|
||||
bool operator == (float16 rhs) const { return this->v_ == rhs.v_; }
|
||||
bool operator != (float16 rhs) const { return !(*this == rhs); }
|
||||
|
||||
static void UnitTest() { unit_test(); }
|
||||
static void UnitTest() { unit_test(); }
|
||||
|
||||
private:
|
||||
private:
|
||||
|
||||
// --- entity
|
||||
// --- entity
|
||||
|
||||
uint16_t v_;
|
||||
uint16_t v_;
|
||||
|
||||
// --- conversion between float and float16
|
||||
// --- conversion between float and float16
|
||||
|
||||
static float16 to_float16(float f)
|
||||
{
|
||||
float32_converter c;
|
||||
c.f = f;
|
||||
u32 n = c.n;
|
||||
static float16 to_float16(float f)
|
||||
{
|
||||
float32_converter c;
|
||||
c.f = f;
|
||||
u32 n = c.n;
|
||||
|
||||
// The sign bit is MSB in common.
|
||||
uint16_t sign_bit = (n >> 16) & 0x8000;
|
||||
// The sign bit is MSB in common.
|
||||
uint16_t sign_bit = (n >> 16) & 0x8000;
|
||||
|
||||
// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
|
||||
uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
|
||||
// The exponent of IEEE 754's float 32 is biased +127 , so we change this bias into +15 and limited to 5-bit.
|
||||
uint16_t exponent = (((n >> 23) - 127 + 15) & 0x1f) << 10;
|
||||
|
||||
// The fraction is limited to 10-bit.
|
||||
uint16_t fraction = (n >> (23-10)) & 0x3ff;
|
||||
// The fraction is limited to 10-bit.
|
||||
uint16_t fraction = (n >> (23-10)) & 0x3ff;
|
||||
|
||||
float16 f_;
|
||||
f_.v_ = sign_bit | exponent | fraction;
|
||||
float16 f_;
|
||||
f_.v_ = sign_bit | exponent | fraction;
|
||||
|
||||
return f_;
|
||||
}
|
||||
return f_;
|
||||
}
|
||||
|
||||
static float to_float(float16 v)
|
||||
{
|
||||
u32 sign_bit = (v.v_ & 0x8000) << 16;
|
||||
u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
|
||||
u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
|
||||
static float to_float(float16 v)
|
||||
{
|
||||
u32 sign_bit = (v.v_ & 0x8000) << 16;
|
||||
u32 exponent = ((((v.v_ >> 10) & 0x1f) - 15 + 127) & 0xff) << 23;
|
||||
u32 fraction = (v.v_ & 0x3ff) << (23 - 10);
|
||||
|
||||
float32_converter c;
|
||||
c.n = sign_bit | exponent | fraction;
|
||||
return c.f;
|
||||
}
|
||||
float32_converter c;
|
||||
c.n = sign_bit | exponent | fraction;
|
||||
return c.f;
|
||||
}
|
||||
|
||||
// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
|
||||
static void unit_test()
|
||||
{
|
||||
float16 a, b, c, d;
|
||||
a = 1;
|
||||
std::cout << (float)a << std::endl;
|
||||
b = -118.625;
|
||||
std::cout << (float)b << std::endl;
|
||||
c = 2.5;
|
||||
std::cout << (float)c << std::endl;
|
||||
d = a + c;
|
||||
std::cout << (float)d << std::endl;
|
||||
// It is not a unit test, but I confirmed that it can be calculated. I'll fix the code later (maybe).
|
||||
static void unit_test()
|
||||
{
|
||||
float16 a, b, c, d;
|
||||
a = 1;
|
||||
std::cout << (float)a << std::endl;
|
||||
b = -118.625;
|
||||
std::cout << (float)b << std::endl;
|
||||
c = 2.5;
|
||||
std::cout << (float)c << std::endl;
|
||||
d = a + c;
|
||||
std::cout << (float)d << std::endl;
|
||||
|
||||
c *= 1.5;
|
||||
std::cout << (float)c << std::endl;
|
||||
c *= 1.5;
|
||||
std::cout << (float)c << std::endl;
|
||||
|
||||
b /= 3;
|
||||
std::cout << (float)b << std::endl;
|
||||
b /= 3;
|
||||
std::cout << (float)b << std::endl;
|
||||
|
||||
float f1 = 1.5;
|
||||
a += f1;
|
||||
std::cout << (float)a << std::endl;
|
||||
float f1 = 1.5;
|
||||
a += f1;
|
||||
std::cout << (float)a << std::endl;
|
||||
|
||||
a += f1 * (float)a;
|
||||
std::cout << (float)a << std::endl;
|
||||
}
|
||||
a += f1 * (float)a;
|
||||
std::cout << (float)a << std::endl;
|
||||
}
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
+1335
File diff suppressed because it is too large
Load Diff
+100
-189
@@ -1,101 +1,6 @@
|
||||
#ifndef _LEARN_H_
|
||||
#define _LEARN_H_
|
||||
|
||||
#if defined(EVAL_LEARN)
|
||||
|
||||
#include <vector>
|
||||
|
||||
// =====================
|
||||
// Settings for learning
|
||||
// =====================
|
||||
|
||||
// If you select one of the following, the details after that will be automatically selected.
|
||||
// If you don't select any of them, you need to set the subsequent details one by one.
|
||||
|
||||
// Learning setting by elmo method. This is the default setting.
|
||||
// To make a standard squeeze diaphragm, specify "lambda 1" with the learn command.
|
||||
#define LEARN_ELMO_METHOD
|
||||
|
||||
|
||||
// ----------------------
|
||||
// update formula
|
||||
// ----------------------
|
||||
|
||||
// Ada Grad. Recommended because it is stable.
|
||||
// #define ADA_GRAD_UPDATE
|
||||
|
||||
// SGD looking only at the sign of the gradient. It requires less memory, but the accuracy is...
|
||||
// #define SGD_UPDATE
|
||||
|
||||
// ----------------------
|
||||
// Settings for learning
|
||||
// ----------------------
|
||||
|
||||
// mini-batch size.
|
||||
// Calculate the gradient by combining this number of phases.
|
||||
// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
|
||||
// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
|
||||
// I don't think you need to change this value in most cases.
|
||||
|
||||
#define LEARN_MINI_BATCH_SIZE (1000 * 1000 * 1)
|
||||
|
||||
// The number of phases to read from the file at one time. After reading this much, shuffle.
|
||||
// It is better to have a certain size, but this number x 40 bytes x 3 times as much memory is consumed. 400MB*3 is consumed in the 10M phase.
|
||||
// Must be a multiple of THREAD_BUFFER_SIZE(=10000).
|
||||
|
||||
#define LEARN_SFEN_READ_SIZE (1000 * 1000 * 10)
|
||||
|
||||
// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
|
||||
// Needless to say, the longer the saving interval, the shorter the learning time.
|
||||
// Folder name is incremented for each save like 0/, 1/, 2/...
|
||||
// By default, once every 1 billion phases.
|
||||
#define LEARN_EVAL_SAVE_INTERVAL (1000000000ULL)
|
||||
|
||||
|
||||
// ----------------------
|
||||
// Select the objective function
|
||||
// ----------------------
|
||||
|
||||
// The objective function is the sum of squares of the difference in winning percentage
|
||||
// See learner.cpp for more information.
|
||||
|
||||
//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
|
||||
|
||||
// Objective function is cross entropy
|
||||
// See learner.cpp for more information.
|
||||
// So-called ordinary "rag cloth squeezer"
|
||||
//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
|
||||
|
||||
// A version in which the objective function is cross entropy, but the win rate function is not passed
|
||||
// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
|
||||
|
||||
// elmo (WCSC27) method
|
||||
// #define LOSS_FUNCTION_IS_ELMO_METHOD
|
||||
|
||||
// ※ Other things may be added.
|
||||
|
||||
|
||||
// ----------------------
|
||||
// debug settings for learning
|
||||
// ----------------------
|
||||
|
||||
// Reduce the output of rmse during learning to 1 for this number of times.
|
||||
// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
|
||||
#define LEARN_RMSE_OUTPUT_INTERVAL 1
|
||||
|
||||
|
||||
// ----------------------
|
||||
// learning from zero vector
|
||||
// ----------------------
|
||||
|
||||
// Start learning the evaluation function parameters from the zero vector.
|
||||
// Initialize to zero, generate a game, learn from zero vector,
|
||||
// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
|
||||
// (very time consuming)
|
||||
|
||||
//#define RESET_TO_ZERO_VECTOR
|
||||
|
||||
|
||||
// ----------------------
|
||||
// Floating point for learning
|
||||
// ----------------------
|
||||
@@ -105,7 +10,7 @@
|
||||
// Even if it is a double type, there is almost no difference in the way of convergence, so fix it to float.
|
||||
|
||||
// when using float
|
||||
typedef float LearnFloatType;
|
||||
using LearnFloatType = float;
|
||||
|
||||
// when using double
|
||||
//typedef double LearnFloatType;
|
||||
@@ -114,59 +19,6 @@ typedef float LearnFloatType;
|
||||
//#include "half_float.h"
|
||||
//typedef HalfFloat::float16 LearnFloatType;
|
||||
|
||||
// ----------------------
|
||||
// save memory
|
||||
// ----------------------
|
||||
|
||||
// Use a triangular array for the Weight array (of which is KPP) to save memory.
|
||||
// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
|
||||
|
||||
#define USE_TRIANGLE_WEIGHT_ARRAY
|
||||
|
||||
// ----------------------
|
||||
// dimension down
|
||||
// ----------------------
|
||||
|
||||
// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
|
||||
// All on by default.
|
||||
|
||||
// Dimension reduction using mirror and inverse for KK. (Unclear effect)
|
||||
// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
|
||||
#define USE_KK_MIRROR_WRITE
|
||||
#define USE_KK_INVERSE_WRITE
|
||||
|
||||
// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
|
||||
// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
|
||||
#define USE_KKP_MIRROR_WRITE
|
||||
#define USE_KKP_INVERSE_WRITE
|
||||
|
||||
// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
|
||||
// KPP has no inverse. (Because there is only K on the front side)
|
||||
#define USE_KPP_MIRROR_WRITE
|
||||
|
||||
// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
|
||||
// KPPP has no inverse. (Because there is only K on the front side)
|
||||
#define USE_KPPP_MIRROR_WRITE
|
||||
|
||||
// Reduce the dimension by KPP for learning the KKPP component.
|
||||
// Learning is very slow.
|
||||
// Do not use as it is not debugged.
|
||||
//#define USE_KKPP_LOWER_DIM
|
||||
|
||||
|
||||
// ======================
|
||||
// Settings for creating teacher phases
|
||||
// ======================
|
||||
|
||||
// ----------------------
|
||||
// write out the draw
|
||||
// ----------------------
|
||||
|
||||
// When you reach a draw, write it out as a teacher position
|
||||
// It's subtle whether it's better to do this.
|
||||
// #define LEARN_GENSFEN_USE_DRAW_RESULT
|
||||
|
||||
|
||||
// ======================
|
||||
// configure
|
||||
// ======================
|
||||
@@ -175,63 +27,122 @@ typedef float LearnFloatType;
|
||||
// Learning with the method of elmo (WCSC27)
|
||||
// ----------------------
|
||||
|
||||
#if defined( LEARN_ELMO_METHOD )
|
||||
#define LOSS_FUNCTION_IS_ELMO_METHOD
|
||||
#define ADA_GRAD_UPDATE
|
||||
#endif
|
||||
|
||||
#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
|
||||
|
||||
// ----------------------
|
||||
// Definition of struct used in Learner
|
||||
// ----------------------
|
||||
#include "../position.h"
|
||||
|
||||
#include "autograd.h"
|
||||
#include "packed_sfen.h"
|
||||
|
||||
#include "position.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
namespace Learner
|
||||
{
|
||||
//Structure in which PackedSfen and evaluation value are integrated
|
||||
// If you write different contents for each option, it will be a problem when reusing the teacher game
|
||||
// For the time being, write all the following members regardless of the options.
|
||||
struct PackedSfenValue
|
||||
{
|
||||
// phase
|
||||
PackedSfen sfen;
|
||||
// ----------------------
|
||||
// Settings for learning
|
||||
// ----------------------
|
||||
|
||||
// Evaluation value returned from Learner::search()
|
||||
int16_t score;
|
||||
// mini-batch size.
|
||||
// Calculate the gradient by combining this number of phases.
|
||||
// If you make it smaller, the number of update_weights() will increase and the convergence will be faster. The gradient is incorrect.
|
||||
// If you increase it, the number of update_weights() decreases, so the convergence will be slow. The slope will come out accurately.
|
||||
// I don't think you need to change this value in most cases.
|
||||
|
||||
// PV first move
|
||||
// Used when finding the match rate with the teacher
|
||||
uint16_t move;
|
||||
constexpr std::size_t LEARN_MINI_BATCH_SIZE = 1000 * 1000 * 1;
|
||||
|
||||
// Trouble of the phase from the initial phase.
|
||||
uint16_t gamePly;
|
||||
// Saving interval of evaluation function at learning. Save each time you learn this number of phases.
|
||||
// Needless to say, the longer the saving interval, the shorter the learning time.
|
||||
// Folder name is incremented for each save like 0/, 1/, 2/...
|
||||
// By default, once every 1 billion phases.
|
||||
constexpr std::size_t LEARN_EVAL_SAVE_INTERVAL = 100'000'000ULL;
|
||||
|
||||
// 1 if the player on this side ultimately wins the game. -1 if you are losing.
|
||||
// 0 if a draw is reached.
|
||||
// The draw is in the teacher position generation command gensfen,
|
||||
// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
|
||||
int8_t game_result;
|
||||
// Reduce the output of rmse during learning to 1 for this number of times.
|
||||
// rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
|
||||
constexpr std::size_t LEARN_RMSE_OUTPUT_INTERVAL = 1;
|
||||
|
||||
// When exchanging the file that wrote the teacher aspect with other people
|
||||
//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
|
||||
uint8_t padding;
|
||||
// Learning from the generated game record
|
||||
void learn(std::istringstream& is);
|
||||
|
||||
// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
|
||||
};
|
||||
using CalcLossFunc = ValueWithGrad<double>(Value, Value, int, int);
|
||||
|
||||
// Type that returns the reading line and the evaluation value at that time
|
||||
// Used in Learner::search(), Learner::qsearch().
|
||||
typedef std::pair<Value, std::vector<Move> > ValueAndPV;
|
||||
struct Loss
|
||||
{
|
||||
double value() const
|
||||
{
|
||||
return m_loss.value;
|
||||
}
|
||||
|
||||
// So far, only Yaneura King 2018 Otafuku has this stub
|
||||
// This stub is required if EVAL_LEARN is defined.
|
||||
extern Learner::ValueAndPV search(Position& pos, int depth , size_t multiPV = 1 , uint64_t NodesLimit = 0);
|
||||
extern Learner::ValueAndPV qsearch(Position& pos);
|
||||
double grad() const
|
||||
{
|
||||
return m_loss.grad;
|
||||
}
|
||||
|
||||
double calc_grad(Value shallow, const PackedSfenValue& psv);
|
||||
uint64_t count() const
|
||||
{
|
||||
return m_count;
|
||||
}
|
||||
|
||||
Loss() = default;
|
||||
|
||||
Loss(const Loss& other) :
|
||||
m_loss(other.m_loss),
|
||||
m_count(other.m_count)
|
||||
{
|
||||
}
|
||||
|
||||
Loss& operator += (const ValueWithGrad<double>& rhs)
|
||||
{
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
m_loss += rhs.abs();
|
||||
m_count += 1;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Loss& operator += (const Loss& rhs)
|
||||
{
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
m_loss += rhs.m_loss.abs();
|
||||
m_count += rhs.m_count;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
m_loss = ValueWithGrad<double>{ 0.0, 0.0 };
|
||||
m_count = 0;
|
||||
}
|
||||
|
||||
template <typename StreamT>
|
||||
void print_with_grad(const std::string& prefix, StreamT& s) const
|
||||
{
|
||||
s << " - " << prefix << "_loss = " << m_loss.value / (double)m_count << std::endl;
|
||||
s << " - " << prefix << "_grad_norm = " << m_loss.grad / (double)m_count << std::endl;
|
||||
}
|
||||
|
||||
template <typename StreamT>
|
||||
void print_only_loss(const std::string& prefix, StreamT& s) const
|
||||
{
|
||||
s << " - " << prefix << "_loss = " << m_loss.value / (double)m_count << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
ValueWithGrad<double> m_loss{ 0.0, 0.0 };
|
||||
uint64_t m_count{0};
|
||||
std::mutex m_mutex;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // ifndef _LEARN_H_
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,25 +0,0 @@
|
||||
#include "learning_tools.h"
|
||||
|
||||
#if defined (EVAL_LEARN)
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#include "../misc.h"
|
||||
|
||||
using namespace Eval;
|
||||
|
||||
namespace EvalLearningTools
|
||||
{
|
||||
|
||||
// --- static variables
|
||||
|
||||
double Weight::eta;
|
||||
double Weight::eta1;
|
||||
double Weight::eta2;
|
||||
double Weight::eta3;
|
||||
uint64_t Weight::eta1_epoch;
|
||||
uint64_t Weight::eta2_epoch;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,200 +0,0 @@
|
||||
#ifndef __LEARN_WEIGHT_H__
|
||||
#define __LEARN_WEIGHT_H__
|
||||
|
||||
// A set of machine learning tools related to the weight array used for machine learning of evaluation functions
|
||||
|
||||
#include "learn.h"
|
||||
#if defined (EVAL_LEARN)
|
||||
#include <array>
|
||||
|
||||
#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE)
|
||||
#include "../misc.h" // PRNG , my_insertion_sort
|
||||
#endif
|
||||
|
||||
#include <cmath> // std::sqrt()
|
||||
|
||||
namespace EvalLearningTools
|
||||
{
|
||||
// -------------------------------------------------
|
||||
// Array for learning that stores gradients etc.
|
||||
// -------------------------------------------------
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma pack(push,2)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma pack(2)
|
||||
#endif
|
||||
struct Weight
|
||||
{
|
||||
// cumulative value of one mini-batch gradient
|
||||
LearnFloatType g = LearnFloatType(0);
|
||||
|
||||
// When ADA_GRAD_UPDATE. LearnFloatType == float,
|
||||
// total 4*2 + 4*2 + 1*2 = 18 bytes
|
||||
// It suffices to secure a Weight array that is 4.5 times the size of the evaluation function parameter of 1GB.
|
||||
// However, sizeof(Weight)==20 code is generated if the structure alignment is in 4-byte units, so
|
||||
// Specify pragma pack(2).
|
||||
|
||||
// For SGD_UPDATE, this structure is reduced by 10 bytes to 8 bytes.
|
||||
|
||||
// Learning rate η(eta) such as AdaGrad.
|
||||
// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
|
||||
// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
|
||||
// After eta2_epoch, gradually change from eta2 to eta3.
|
||||
static double eta;
|
||||
static double eta1;
|
||||
static double eta2;
|
||||
static double eta3;
|
||||
static uint64_t eta1_epoch;
|
||||
static uint64_t eta2_epoch;
|
||||
|
||||
// Batch initialization of eta. If 0 is passed, the default value will be set.
|
||||
static void init_eta(double eta1, double eta2, double eta3, uint64_t eta1_epoch, uint64_t eta2_epoch)
|
||||
{
|
||||
Weight::eta1 = (eta1 != 0) ? eta1 : 30.0;
|
||||
Weight::eta2 = (eta2 != 0) ? eta2 : 30.0;
|
||||
Weight::eta3 = (eta3 != 0) ? eta3 : 30.0;
|
||||
Weight::eta1_epoch = (eta1_epoch != 0) ? eta1_epoch : 0;
|
||||
Weight::eta2_epoch = (eta2_epoch != 0) ? eta2_epoch : 0;
|
||||
}
|
||||
|
||||
// Set eta according to epoch.
|
||||
static void calc_eta(uint64_t epoch)
|
||||
{
|
||||
if (Weight::eta1_epoch == 0) // Exclude eta2
|
||||
Weight::eta = Weight::eta1;
|
||||
else if (epoch < Weight::eta1_epoch)
|
||||
// apportion
|
||||
Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
|
||||
else if (Weight::eta2_epoch == 0) // Exclude eta3
|
||||
Weight::eta = Weight::eta2;
|
||||
else if (epoch < Weight::eta2_epoch)
|
||||
Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
|
||||
else
|
||||
Weight::eta = Weight::eta3;
|
||||
}
|
||||
|
||||
template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
|
||||
|
||||
#if defined (ADA_GRAD_UPDATE)
|
||||
|
||||
// Since the maximum value that can be accurately calculated with float is INT16_MAX*256-1
|
||||
// Keep the small value as a marker.
|
||||
const LearnFloatType V0_NOT_INIT = (INT16_MAX * 128);
|
||||
|
||||
// What holds v internally. The previous implementation kept a fixed decimal with only a fractional part to save memory,
|
||||
// Since it is doubtful in accuracy and the visibility is bad, it was abolished.
|
||||
LearnFloatType v0 = LearnFloatType(V0_NOT_INIT);
|
||||
|
||||
// AdaGrad g2
|
||||
LearnFloatType g2 = LearnFloatType(0);
|
||||
|
||||
// update with AdaGrad
|
||||
// When executing this function, the value of g and the member do not change
|
||||
// Guaranteed by the caller. It does not have to be an atomic operation.
|
||||
// k is a coefficient for eta. 1.0 is usually sufficient. If you want to lower eta for your turn item, set this to 1/8.0 etc.
|
||||
template <typename T>
|
||||
void updateFV(T& v,double k)
|
||||
{
|
||||
// AdaGrad update formula
|
||||
// Gradient vector is g, vector to be updated is v, η(eta) is a constant,
|
||||
// g2 = g2 + g^2
|
||||
// v = v - ηg/sqrt(g2)
|
||||
|
||||
constexpr double epsilon = 0.000001;
|
||||
|
||||
if (g == LearnFloatType(0))
|
||||
return;
|
||||
|
||||
g2 += g * g;
|
||||
|
||||
// If v0 is V0_NOT_INIT, it means that the value is not initialized with the value of KK/KKP/KPP array,
|
||||
// In this case, read the value of v from the one passed in the argument.
|
||||
double V = (v0 == V0_NOT_INIT) ? v : v0;
|
||||
|
||||
V -= k * eta * (double)g / sqrt((double)g2 + epsilon);
|
||||
|
||||
// Limit the value of V to be within the range of types.
|
||||
// By the way, windows.h defines the min and max macros, so to avoid it,
|
||||
// Here, it is enclosed in parentheses so that it is not treated as a function-like macro.
|
||||
V = (std::min)((double)(std::numeric_limits<T>::max)() , V);
|
||||
V = (std::max)((double)(std::numeric_limits<T>::min)() , V);
|
||||
|
||||
v0 = (LearnFloatType)V;
|
||||
v = (T)round(V);
|
||||
|
||||
// Clear g because one update of mini-batch for this element is over
|
||||
// g[i] = 0;
|
||||
// → There is a problem of dimension reduction, so this will be done by the caller.
|
||||
}
|
||||
|
||||
#elif defined(SGD_UPDATE)
|
||||
|
||||
// See only the sign of the gradient Update with SGD
|
||||
// When executing this function, the value of g and the member do not change
|
||||
// Guaranteed by the caller. It does not have to be an atomic operation.
|
||||
template <typename T>
|
||||
void updateFV(T & v , double k)
|
||||
{
|
||||
if (g == 0)
|
||||
return;
|
||||
|
||||
// See only the sign of g and update.
|
||||
// If g <0, add v a little.
|
||||
// If g> 0, subtract v slightly.
|
||||
|
||||
// Since we only add integers, no decimal part is required.
|
||||
|
||||
// It's a good idea to move around 0-5.
|
||||
// It is better to have a Gaussian distribution, so generate a 5-bit random number (each bit has a 1/2 probability of 1),
|
||||
// Pop_count() it. At this time, it has a binomial distribution.
|
||||
//int16_t diff = (int16_t)POPCNT32((u32)prng.rand(31));
|
||||
// → If I do this with 80 threads, this AsyncPRNG::rand() locks, so I slowed down. This implementation is not good.
|
||||
int16_t diff = 1;
|
||||
|
||||
double V = v;
|
||||
if (g > 0.0)
|
||||
V-= diff;
|
||||
else
|
||||
V+= diff;
|
||||
|
||||
V = (std::min)((double)(std::numeric_limits<T>::max)(), V);
|
||||
V = (std::max)((double)(std::numeric_limits<T>::min)(), V);
|
||||
|
||||
v = (T)V;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// grad setting
|
||||
template <typename T> void set_grad(const T& g_) { g = g_; }
|
||||
|
||||
// Add grad
|
||||
template <typename T> void add_grad(const T& g_) { g += g_; }
|
||||
|
||||
LearnFloatType get_grad() const { return g; }
|
||||
};
|
||||
#if defined(_MSC_VER)
|
||||
#pragma pack(pop)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma pack(0)
|
||||
#endif
|
||||
|
||||
// Turned weight array
|
||||
// In order to be able to handle it transparently, let's have the same member as Weight.
|
||||
struct Weight2
|
||||
{
|
||||
Weight w[2];
|
||||
|
||||
//Evaluate your turn, eta 1/8.
|
||||
template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
|
||||
|
||||
template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
|
||||
template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
|
||||
|
||||
std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
|
||||
};
|
||||
}
|
||||
|
||||
#endif // defined (EVAL_LEARN)
|
||||
#endif
|
||||
@@ -1,123 +0,0 @@
|
||||
#include "../types.h"
|
||||
|
||||
#if defined(EVAL_LEARN)
|
||||
|
||||
#include "multi_think.h"
|
||||
#include "../tt.h"
|
||||
#include "../uci.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
void MultiThink::go_think()
|
||||
{
|
||||
// Keep a copy to restore the Options settings later.
|
||||
auto oldOptions = Options;
|
||||
|
||||
// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
|
||||
// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
|
||||
Options["BookOnTheFly"] = std::string("false");
|
||||
|
||||
// Read evaluation function, etc.
|
||||
// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
|
||||
// Skip memory corruption check.
|
||||
Eval::init_NNUE();
|
||||
|
||||
// Call the derived class's init().
|
||||
init();
|
||||
|
||||
// The loop upper limit is set with set_loop_max().
|
||||
loop_count = 0;
|
||||
done_count = 0;
|
||||
|
||||
// Create threads as many as Options["Threads"] and start thinking.
|
||||
std::vector<std::thread> threads;
|
||||
auto thread_num = (size_t)Options["Threads"];
|
||||
|
||||
// Secure end flag of worker thread
|
||||
thread_finished.resize(thread_num);
|
||||
|
||||
// start worker thread
|
||||
for (size_t i = 0; i < thread_num; ++i)
|
||||
{
|
||||
thread_finished[i] = 0;
|
||||
threads.push_back(std::thread([i, this]
|
||||
{
|
||||
// exhaust all processor threads.
|
||||
WinProcGroup::bindThisThread(i);
|
||||
|
||||
// execute the overridden process
|
||||
this->thread_worker(i);
|
||||
|
||||
// Set the end flag because the thread has ended
|
||||
this->thread_finished[i] = 1;
|
||||
}));
|
||||
}
|
||||
|
||||
// wait for all threads to finish
|
||||
// for (auto& th :threads)
|
||||
// th.join();
|
||||
// If you write like, the thread will rush here while it is still working,
|
||||
// During that time, callback_func() cannot be called and you cannot save.
|
||||
// Therefore, you need to check the end flag yourself.
|
||||
|
||||
// function to determine if all threads have finished
|
||||
auto threads_done = [&]()
|
||||
{
|
||||
// returns false if no one is finished
|
||||
for (auto& f : thread_finished)
|
||||
if (!f)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Call back if the callback function is set.
|
||||
auto do_a_callback = [&]()
|
||||
{
|
||||
if (callback_func)
|
||||
callback_func();
|
||||
};
|
||||
|
||||
|
||||
for (uint64_t i = 0 ; ; )
|
||||
{
|
||||
// If all threads have finished, exit the loop.
|
||||
if (threads_done())
|
||||
break;
|
||||
|
||||
sleep(1000);
|
||||
|
||||
// callback_func() is called every callback_seconds.
|
||||
if (++i == callback_seconds)
|
||||
{
|
||||
do_a_callback();
|
||||
// Since I am returning from ↑, I reset the counter, so
|
||||
// no matter how long it takes to save() etc. in do_a_callback()
|
||||
// The next call will take a certain amount of time.
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Last save.
|
||||
std::cout << std::endl << "finalize..";
|
||||
|
||||
// do_a_callback();
|
||||
// → It should be saved by the caller, so I feel that it is not necessary here.
|
||||
|
||||
// It is possible that the exit code of the thread is running but the exit code of the thread is running, so
|
||||
// We need to wait for the end with join().
|
||||
for (auto& th : threads)
|
||||
th.join();
|
||||
|
||||
// The file writing thread etc. are still running only when all threads are finished
|
||||
// Since the work itself may not have completed, output only that all threads have finished.
|
||||
std::cout << "all threads are joined." << std::endl;
|
||||
|
||||
// Restored because Options were rewritten.
|
||||
// Restore the handler because the handler will not start unless you assign a value.
|
||||
for (auto& s : oldOptions)
|
||||
Options[s.first] = std::string(s.second);
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif // defined(EVAL_LEARN)
|
||||
@@ -1,152 +0,0 @@
|
||||
#ifndef _MULTI_THINK_
|
||||
#define _MULTI_THINK_
|
||||
|
||||
#if defined(EVAL_LEARN)
|
||||
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
|
||||
#include "../misc.h"
|
||||
#include "../learn/learn.h"
|
||||
#include "../thread_win32_osx.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
// Learning from a game record, when making yourself think and generating a fixed track, etc.
|
||||
// Helper class used when multiple threads want to call Search::think() individually.
|
||||
// Derive and use this class.
|
||||
struct MultiThink
|
||||
{
|
||||
MultiThink() : prng(std::chrono::system_clock::now().time_since_epoch().count())
|
||||
{
|
||||
loop_count = 0;
|
||||
}
|
||||
|
||||
// Call this function from the master thread, each thread will think,
|
||||
// Return control when the thought ending condition is satisfied.
|
||||
// Do something else.
|
||||
// ・It is safe for each thread to call Learner::search(),qsearch()
|
||||
// Separates the substitution table for each thread. (It will be restored after the end.)
|
||||
// ・Book is not thread safe when in on the fly mode, so temporarily change this mode.
|
||||
// Turn it off.
|
||||
// [Requirements]
|
||||
// 1) Override thread_worker()
|
||||
// 2) Set the loop count with set_loop_max()
|
||||
// 3) set a function to be called back periodically (if necessary)
|
||||
// callback_func and callback_interval
|
||||
void go_think();
|
||||
|
||||
// If there is something you want to initialize on the derived class side, override this,
|
||||
// Called when initialization is completed with go_think().
|
||||
// It is better to read the fixed trace at that timing.
|
||||
virtual void init() {}
|
||||
|
||||
// A thread worker that is called by creating a thread when you go_think()
|
||||
// Override and use this.
|
||||
virtual void thread_worker(size_t thread_id) = 0;
|
||||
|
||||
// Called back every callback_seconds [seconds] when go_think().
|
||||
std::function<void()> callback_func;
|
||||
uint64_t callback_seconds = 600;
|
||||
|
||||
// Set the number of times worker processes (calls Search::think()).
|
||||
void set_loop_max(uint64_t loop_max_) { loop_max = loop_max_; }
|
||||
|
||||
// Get the value set by set_loop_max().
|
||||
uint64_t get_loop_max() const { return loop_max; }
|
||||
|
||||
// [ASYNC] Take the value of the loop counter and add the loop counter after taking it out.
|
||||
// If the loop counter has reached loop_max, return UINT64_MAX.
|
||||
// If you want to generate a phase, you must call this function at the time of generating the phase,
|
||||
// Please note that the number of generated phases and the value of the counter will not match.
|
||||
uint64_t get_next_loop_count() {
|
||||
std::unique_lock<std::mutex> lk(loop_mutex);
|
||||
if (loop_count >= loop_max)
|
||||
return UINT64_MAX;
|
||||
return loop_count++;
|
||||
}
|
||||
|
||||
// [ASYNC] For returning the processed number. Each time it is called, it returns a counter that is incremented.
|
||||
uint64_t get_done_count() {
|
||||
std::unique_lock<std::mutex> lk(loop_mutex);
|
||||
return ++done_count;
|
||||
}
|
||||
|
||||
// Mutex when worker thread accesses I/O
|
||||
std::mutex io_mutex;
|
||||
|
||||
protected:
|
||||
// Random number generator body
|
||||
AsyncPRNG prng;
|
||||
|
||||
private:
|
||||
// number of times worker processes (calls Search::think())
|
||||
std::atomic<uint64_t> loop_max;
|
||||
// number of times the worker has processed (calls Search::think())
|
||||
std::atomic<uint64_t> loop_count;
|
||||
// To return the number of times it has been processed.
|
||||
std::atomic<uint64_t> done_count;
|
||||
|
||||
// Mutex when changing the variables in ↑
|
||||
std::mutex loop_mutex;
|
||||
|
||||
// Thread end flag.
|
||||
// vector<bool> may not be reflected properly when trying to rewrite from multiple threads...
|
||||
typedef uint8_t Flag;
|
||||
std::vector<Flag> thread_finished;
|
||||
|
||||
};
|
||||
|
||||
// Mechanism to process task during idle time.
|
||||
// master passes the task with push_task_async() whenever you like.
|
||||
// When slave executes on_idle() in its spare time, it retrieves one task and continues execution until there is no queue.
|
||||
// Convenient to use when you want to write MultiThink thread worker in master-slave method.
|
||||
struct TaskDispatcher
|
||||
{
|
||||
typedef std::function<void(size_t /* thread_id */)> Task;
|
||||
|
||||
// slave calls this function during idle.
|
||||
void on_idle(size_t thread_id)
|
||||
{
|
||||
Task task;
|
||||
while ((task = get_task_async()) != nullptr)
|
||||
task(thread_id);
|
||||
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
// Stack [ASYNC] task.
|
||||
void push_task_async(Task task)
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(task_mutex);
|
||||
tasks.push_back(task);
|
||||
}
|
||||
|
||||
// Allocate size array elements for task in advance.
|
||||
void task_reserve(size_t size)
|
||||
{
|
||||
tasks.reserve(size);
|
||||
}
|
||||
|
||||
protected:
|
||||
// set of tasks
|
||||
std::vector<Task> tasks;
|
||||
|
||||
// Take out one [ASYNC] task. Called from on_idle().
|
||||
Task get_task_async()
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(task_mutex);
|
||||
if (tasks.size() == 0)
|
||||
return nullptr;
|
||||
Task task = *tasks.rbegin();
|
||||
tasks.pop_back();
|
||||
return task;
|
||||
}
|
||||
|
||||
// a mutex for accessing tasks
|
||||
std::mutex task_mutex;
|
||||
};
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(YANEURAOU_2018_OTAFUKU_ENGINE)
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,43 @@
|
||||
#include "opening_book.h"
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace Learner {
|
||||
|
||||
EpdOpeningBook::EpdOpeningBook(const std::string& file, PRNG& prng) :
|
||||
OpeningBook(file)
|
||||
{
|
||||
std::ifstream in(file);
|
||||
if (!in)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
while (std::getline(in, line))
|
||||
{
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
fens.emplace_back(line);
|
||||
}
|
||||
|
||||
Algo::shuffle(fens, prng);
|
||||
}
|
||||
|
||||
static bool ends_with(const std::string& lhs, const std::string& end)
|
||||
{
|
||||
if (end.size() > lhs.size()) return false;
|
||||
|
||||
return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
|
||||
}
|
||||
|
||||
std::unique_ptr<OpeningBook> open_opening_book(const std::string& filename, PRNG& prng)
|
||||
{
|
||||
if (ends_with(filename, ".epd"))
|
||||
return std::make_unique<EpdOpeningBook>(filename, prng);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
#ifndef LEARN_OPENING_BOOK_H
|
||||
#define LEARN_OPENING_BOOK_H
|
||||
|
||||
#include "misc.h"
|
||||
#include "position.h"
|
||||
#include "thread.h"
|
||||
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
namespace Learner {
|
||||
|
||||
struct OpeningBook {
|
||||
|
||||
const std::string& next_fen()
|
||||
{
|
||||
assert(fens.size() > 0);
|
||||
|
||||
auto& fen = fens[current_index++];
|
||||
if (current_index >= fens.size())
|
||||
current_index = 0;
|
||||
|
||||
return fen;
|
||||
}
|
||||
|
||||
std::size_t size() const { return fens.size(); }
|
||||
|
||||
const std::string& get_filename() const { return filename; }
|
||||
|
||||
protected:
|
||||
OpeningBook(const std::string& file) :
|
||||
filename(file),
|
||||
current_index(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
std::string filename;
|
||||
std::vector<std::string> fens;
|
||||
std::size_t current_index;
|
||||
};
|
||||
|
||||
struct EpdOpeningBook : OpeningBook {
|
||||
|
||||
EpdOpeningBook(const std::string& file, PRNG& prng);
|
||||
};
|
||||
|
||||
std::unique_ptr<OpeningBook> open_opening_book(const std::string& filename, PRNG& prng);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,46 @@
|
||||
#ifndef _PACKED_SFEN_H_
|
||||
#define _PACKED_SFEN_H_
|
||||
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
|
||||
namespace Learner {
|
||||
|
||||
// packed sfen
|
||||
struct PackedSfen { std::uint8_t data[32]; };
|
||||
|
||||
// Structure in which PackedSfen and evaluation value are integrated
|
||||
// If you write different contents for each option, it will be a problem when reusing the teacher game
|
||||
// For the time being, write all the following members regardless of the options.
|
||||
struct PackedSfenValue
|
||||
{
|
||||
// phase
|
||||
PackedSfen sfen;
|
||||
|
||||
// Evaluation value returned from Learner::search()
|
||||
std::int16_t score;
|
||||
|
||||
// PV first move
|
||||
// Used when finding the match rate with the teacher
|
||||
std::uint16_t move;
|
||||
|
||||
// Trouble of the phase from the initial phase.
|
||||
std::uint16_t gamePly;
|
||||
|
||||
// 1 if the player on this side ultimately wins the game. -1 if you are losing.
|
||||
// 0 if a draw is reached.
|
||||
// The draw is in the teacher position generation command gensfen,
|
||||
// Only write if LEARN_GENSFEN_DRAW_RESULT is enabled.
|
||||
std::int8_t game_result;
|
||||
|
||||
// When exchanging the file that wrote the teacher aspect with other people
|
||||
//Because this structure size is not fixed, pad it so that it is 40 bytes in any environment.
|
||||
std::uint8_t padding;
|
||||
|
||||
// 32 + 2 + 2 + 2 + 1 + 1 = 40bytes
|
||||
};
|
||||
|
||||
// Phase array: PSVector stands for packed sfen vector.
|
||||
using PSVector = std::vector<PackedSfenValue>;
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,386 @@
|
||||
#include "sfen_packer.h"
|
||||
|
||||
#include "packed_sfen.h"
|
||||
|
||||
#include "misc.h"
|
||||
#include "position.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <cstring> // std::memset()
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Learner {
|
||||
|
||||
// Class that handles bitstream
|
||||
// useful when doing aspect encoding
|
||||
struct BitStream
|
||||
{
|
||||
// Set the memory to store the data in advance.
|
||||
// Assume that memory is cleared to 0.
|
||||
void set_data(std::uint8_t* data_) { data = data_; reset(); }
|
||||
|
||||
// Get the pointer passed in set_data().
|
||||
uint8_t* get_data() const { return data; }
|
||||
|
||||
// Get the cursor.
|
||||
int get_cursor() const { return bit_cursor; }
|
||||
|
||||
// reset the cursor
|
||||
void reset() { bit_cursor = 0; }
|
||||
|
||||
// Write 1bit to the stream.
|
||||
// If b is non-zero, write out 1. If 0, write 0.
|
||||
void write_one_bit(int b)
|
||||
{
|
||||
if (b)
|
||||
data[bit_cursor / 8] |= 1 << (bit_cursor & 7);
|
||||
|
||||
++bit_cursor;
|
||||
}
|
||||
|
||||
// Get 1 bit from the stream.
|
||||
int read_one_bit()
|
||||
{
|
||||
int b = (data[bit_cursor / 8] >> (bit_cursor & 7)) & 1;
|
||||
++bit_cursor;
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
// write n bits of data
|
||||
// Data shall be written out from the lower order of d.
|
||||
void write_n_bit(int d, int n)
|
||||
{
|
||||
for (int i = 0; i <n; ++i)
|
||||
write_one_bit(d & (1 << i));
|
||||
}
|
||||
|
||||
// read n bits of data
|
||||
// Reverse conversion of write_n_bit().
|
||||
int read_n_bit(int n)
|
||||
{
|
||||
int result = 0;
|
||||
for (int i = 0; i < n; ++i)
|
||||
result |= read_one_bit() ? (1 << i) : 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
// Next bit position to read/write.
|
||||
int bit_cursor;
|
||||
|
||||
// data entity
|
||||
std::uint8_t* data;
|
||||
};
|
||||
|
||||
// Class for compressing/decompressing sfen
|
||||
// sfen can be packed to 256bit (32bytes) by Huffman coding.
|
||||
// This is proven by mini. The above is Huffman coding.
|
||||
//
|
||||
// Internal format = 1-bit turn + 7-bit king position *2 + piece on board (Huffman coding) + hand piece (Huffman coding)
|
||||
// Side to move (White = 0, Black = 1) (1bit)
|
||||
// White King Position (6 bits)
|
||||
// Black King Position (6 bits)
|
||||
// Huffman Encoding of the board
|
||||
// Castling availability (1 bit x 4)
|
||||
// En passant square (1 or 1 + 6 bits)
|
||||
// Rule 50 (6 bits)
|
||||
// Game play (8 bits)
|
||||
//
|
||||
// TODO(someone): Rename SFEN to FEN.
|
||||
//
|
||||
struct SfenPacker
|
||||
{
|
||||
void pack(const Position& pos);
|
||||
|
||||
// sfen packed by pack() (256bit = 32bytes)
|
||||
// Or sfen to decode with unpack()
|
||||
uint8_t *data; // uint8_t[32];
|
||||
|
||||
BitStream stream;
|
||||
|
||||
// Output the board pieces to stream.
|
||||
void write_board_piece_to_stream(Piece pc);
|
||||
|
||||
// Read one board piece from stream
|
||||
Piece read_board_piece_from_stream();
|
||||
};
|
||||
|
||||
|
||||
// Huffman coding
|
||||
// * is simplified from mini encoding to make conversion easier.
|
||||
//
|
||||
// Huffman Encoding
|
||||
//
|
||||
// Empty xxxxxxx0
|
||||
// Pawn xxxxx001 + 1 bit (Color)
|
||||
// Knight xxxxx011 + 1 bit (Color)
|
||||
// Bishop xxxxx101 + 1 bit (Color)
|
||||
// Rook xxxxx111 + 1 bit (Color)
|
||||
// Queen xxxx1001 + 1 bit (Color)
|
||||
//
|
||||
// Worst case:
|
||||
// - 32 empty squares 32 bits
|
||||
// - 30 pieces 150 bits
|
||||
// - 2 kings 12 bits
|
||||
// - castling rights 4 bits
|
||||
// - ep square 7 bits
|
||||
// - rule50 7 bits
|
||||
// - game ply 16 bits
|
||||
// - TOTAL 228 bits < 256 bits
|
||||
|
||||
struct HuffmanedPiece
|
||||
{
|
||||
int code; // how it will be coded
|
||||
int bits; // How many bits do you have
|
||||
};
|
||||
|
||||
constexpr HuffmanedPiece huffman_table[] =
|
||||
{
|
||||
{0b0000,1}, // NO_PIECE
|
||||
{0b0001,4}, // PAWN
|
||||
{0b0011,4}, // KNIGHT
|
||||
{0b0101,4}, // BISHOP
|
||||
{0b0111,4}, // ROOK
|
||||
{0b1001,4}, // QUEEN
|
||||
};
|
||||
|
||||
// Pack sfen and store in data[32].
|
||||
void SfenPacker::pack(const Position& pos)
|
||||
{
|
||||
memset(data, 0, 32 /* 256bit */);
|
||||
stream.set_data(data);
|
||||
|
||||
// turn
|
||||
// Side to move.
|
||||
stream.write_one_bit((int)(pos.side_to_move()));
|
||||
|
||||
// 7-bit positions for leading and trailing balls
|
||||
// White king and black king, 6 bits for each.
|
||||
for(auto c: Colors)
|
||||
stream.write_n_bit(pos.king_square(c), 6);
|
||||
|
||||
// Write the pieces on the board other than the kings.
|
||||
for (Rank r = RANK_8; r >= RANK_1; --r)
|
||||
{
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
{
|
||||
Piece pc = pos.piece_on(make_square(f, r));
|
||||
if (type_of(pc) == KING)
|
||||
continue;
|
||||
write_board_piece_to_stream(pc);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(someone): Support chess960.
|
||||
stream.write_one_bit(pos.can_castle(WHITE_OO));
|
||||
stream.write_one_bit(pos.can_castle(WHITE_OOO));
|
||||
stream.write_one_bit(pos.can_castle(BLACK_OO));
|
||||
stream.write_one_bit(pos.can_castle(BLACK_OOO));
|
||||
|
||||
if (pos.ep_square() == SQ_NONE) {
|
||||
stream.write_one_bit(0);
|
||||
}
|
||||
else {
|
||||
stream.write_one_bit(1);
|
||||
stream.write_n_bit(static_cast<int>(pos.ep_square()), 6);
|
||||
}
|
||||
|
||||
stream.write_n_bit(pos.state()->rule50, 6);
|
||||
|
||||
const int fm = 1 + (pos.game_ply()-(pos.side_to_move() == BLACK)) / 2;
|
||||
stream.write_n_bit(fm, 8);
|
||||
|
||||
// Write high bits of half move. This is a fix for the
|
||||
// limited range of half move counter.
|
||||
// This is backwards compatibile.
|
||||
stream.write_n_bit(fm >> 8, 8);
|
||||
|
||||
// Write the highest bit of rule50 at the end. This is a backwards
|
||||
// compatibile fix for rule50 having only 6 bits stored.
|
||||
// This bit is just ignored by the old parsers.
|
||||
stream.write_n_bit(pos.state()->rule50 >> 6, 1);
|
||||
|
||||
assert(stream.get_cursor() <= 256);
|
||||
}
|
||||
|
||||
// Output the board pieces to stream.
|
||||
void SfenPacker::write_board_piece_to_stream(Piece pc)
|
||||
{
|
||||
// piece type
|
||||
PieceType pr = type_of(pc);
|
||||
auto c = huffman_table[pr];
|
||||
stream.write_n_bit(c.code, c.bits);
|
||||
|
||||
if (pc == NO_PIECE)
|
||||
return;
|
||||
|
||||
// first and second flag
|
||||
stream.write_one_bit(color_of(pc));
|
||||
}
|
||||
|
||||
// Read one board piece from stream
|
||||
Piece SfenPacker::read_board_piece_from_stream()
|
||||
{
|
||||
PieceType pr = NO_PIECE_TYPE;
|
||||
int code = 0, bits = 0;
|
||||
while (true)
|
||||
{
|
||||
code |= stream.read_one_bit() << bits;
|
||||
++bits;
|
||||
|
||||
assert(bits <= 6);
|
||||
|
||||
for (pr = NO_PIECE_TYPE; pr <KING; ++pr)
|
||||
if (huffman_table[pr].code == code
|
||||
&& huffman_table[pr].bits == bits)
|
||||
goto Found;
|
||||
}
|
||||
Found:;
|
||||
if (pr == NO_PIECE_TYPE)
|
||||
return NO_PIECE;
|
||||
|
||||
// first and second flag
|
||||
Color c = (Color)stream.read_one_bit();
|
||||
|
||||
return make_piece(c, pr);
|
||||
}
|
||||
|
||||
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th)
|
||||
{
|
||||
SfenPacker packer;
|
||||
auto& stream = packer.stream;
|
||||
|
||||
// TODO: separate streams for writing and reading. Here we actually have to
|
||||
// const_cast which is not safe in the long run.
|
||||
stream.set_data(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(&sfen)));
|
||||
|
||||
pos.clear();
|
||||
std::memset(si, 0, sizeof(StateInfo));
|
||||
std::fill_n(&pos.pieceList[0][0], sizeof(pos.pieceList) / sizeof(Square), SQ_NONE);
|
||||
pos.st = si;
|
||||
|
||||
// Active color
|
||||
pos.sideToMove = (Color)stream.read_one_bit();
|
||||
|
||||
pos.pieceList[W_KING][0] = SQUARE_NB;
|
||||
pos.pieceList[B_KING][0] = SQUARE_NB;
|
||||
|
||||
// First the position of the ball
|
||||
for (auto c : Colors)
|
||||
pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
|
||||
|
||||
// Piece placement
|
||||
for (Rank r = RANK_8; r >= RANK_1; --r)
|
||||
{
|
||||
for (File f = FILE_A; f <= FILE_H; ++f)
|
||||
{
|
||||
auto sq = make_square(f, r);
|
||||
|
||||
// it seems there are already balls
|
||||
Piece pc;
|
||||
if (type_of(pos.board[sq]) != KING)
|
||||
{
|
||||
assert(pos.board[sq] == NO_PIECE);
|
||||
pc = packer.read_board_piece_from_stream();
|
||||
}
|
||||
else
|
||||
{
|
||||
pc = pos.board[sq];
|
||||
// put_piece() will catch ASSERT unless you remove it all.
|
||||
pos.board[sq] = NO_PIECE;
|
||||
}
|
||||
|
||||
// There may be no pieces, so skip in that case.
|
||||
if (pc == NO_PIECE)
|
||||
continue;
|
||||
|
||||
pos.put_piece(Piece(pc), sq);
|
||||
|
||||
if (stream.get_cursor()> 256)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Castling availability.
|
||||
// TODO(someone): Support chess960.
|
||||
pos.st->castlingRights = 0;
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(WHITE, SQ_H1); pos.piece_on(rsq) != W_ROOK; --rsq) {}
|
||||
pos.set_castling_right(WHITE, rsq);
|
||||
}
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(WHITE, SQ_A1); pos.piece_on(rsq) != W_ROOK; ++rsq) {}
|
||||
pos.set_castling_right(WHITE, rsq);
|
||||
}
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(BLACK, SQ_H1); pos.piece_on(rsq) != B_ROOK; --rsq) {}
|
||||
pos.set_castling_right(BLACK, rsq);
|
||||
}
|
||||
if (stream.read_one_bit()) {
|
||||
Square rsq;
|
||||
for (rsq = relative_square(BLACK, SQ_A1); pos.piece_on(rsq) != B_ROOK; ++rsq) {}
|
||||
pos.set_castling_right(BLACK, rsq);
|
||||
}
|
||||
|
||||
// En passant square. Ignore if no pawn capture is possible
|
||||
if (stream.read_one_bit()) {
|
||||
Square ep_square = static_cast<Square>(stream.read_n_bit(6));
|
||||
pos.st->epSquare = ep_square;
|
||||
|
||||
if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
|
||||
|| !(pos.pieces(~pos.sideToMove, PAWN) & (pos.st->epSquare + pawn_push(~pos.sideToMove))))
|
||||
pos.st->epSquare = SQ_NONE;
|
||||
}
|
||||
else {
|
||||
pos.st->epSquare = SQ_NONE;
|
||||
}
|
||||
|
||||
// Halfmove clock
|
||||
pos.st->rule50 = stream.read_n_bit(6);
|
||||
|
||||
// Fullmove number
|
||||
pos.gamePly = stream.read_n_bit(8);
|
||||
|
||||
// Read the highest bit of rule50. This was added as a fix for rule50
|
||||
// counter having only 6 bits stored.
|
||||
// In older entries this will just be a zero bit.
|
||||
pos.gamePly |= stream.read_n_bit(8) << 8;
|
||||
|
||||
// Read the highest bit of rule50. This was added as a fix for rule50
|
||||
// counter having only 6 bits stored.
|
||||
// In older entries this will just be a zero bit.
|
||||
pos.st->rule50 |= stream.read_n_bit(1) << 6;
|
||||
|
||||
// Convert from fullmove starting from 1 to gamePly starting from 0,
|
||||
// handle also common incorrect FEN with fullmove = 0.
|
||||
pos.gamePly = std::max(2 * (pos.gamePly - 1), 0) + (pos.sideToMove == BLACK);
|
||||
|
||||
assert(stream.get_cursor() <= 256);
|
||||
|
||||
pos.chess960 = false;
|
||||
pos.thisThread = th;
|
||||
pos.set_state(pos.st);
|
||||
|
||||
assert(pos.pos_is_ok());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
PackedSfen sfen_pack(Position& pos)
|
||||
{
|
||||
PackedSfen sfen;
|
||||
|
||||
SfenPacker sp;
|
||||
sp.data = (uint8_t*)&sfen;
|
||||
sp.pack(pos);
|
||||
|
||||
return sfen;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
#ifndef _SFEN_PACKER_H_
|
||||
#define _SFEN_PACKER_H_
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include "learn/packed_sfen.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
class Position;
|
||||
struct StateInfo;
|
||||
class Thread;
|
||||
|
||||
namespace Learner {
|
||||
|
||||
int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th);
|
||||
PackedSfen sfen_pack(Position& pos);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,365 @@
|
||||
#include "sfen_stream.h"
|
||||
|
||||
#include "packed_sfen.h"
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <list>
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
#include <iostream>
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
|
||||
namespace Learner{
|
||||
|
||||
enum struct SfenReaderMode
|
||||
{
|
||||
Sequential,
|
||||
Cyclic
|
||||
};
|
||||
|
||||
// Sfen reader
|
||||
struct SfenReader
|
||||
{
|
||||
// Number of phases buffered by each thread 0.1M phases. 4M phase at 40HT
|
||||
static constexpr size_t DEFAULT_THREAD_BUFFER_SIZE = 10 * 1000;
|
||||
|
||||
// Buffer for reading files (If this is made larger,
|
||||
// the shuffle becomes larger and the phases may vary.
|
||||
// If it is too large, the memory consumption will increase.
|
||||
// SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
|
||||
static constexpr const size_t DEFAULT_SFEN_READ_SIZE = 1000 * 1000 * 10;
|
||||
|
||||
// Do not use std::random_device().
|
||||
// Because it always the same integers on MinGW.
|
||||
SfenReader(
|
||||
const std::vector<std::string>& filenames_,
|
||||
bool do_shuffle,
|
||||
SfenReaderMode mode_,
|
||||
int thread_num,
|
||||
const std::string& seed,
|
||||
size_t read_size = DEFAULT_SFEN_READ_SIZE,
|
||||
size_t buffer_size = DEFAULT_THREAD_BUFFER_SIZE
|
||||
) :
|
||||
filenames(filenames_.begin(), filenames_.end()),
|
||||
mode(mode_),
|
||||
sfen_read_size(read_size),
|
||||
thread_buffer_size(buffer_size),
|
||||
prng(seed)
|
||||
{
|
||||
packed_sfens.resize(thread_num);
|
||||
total_read = 0;
|
||||
end_of_files = false;
|
||||
shuffle = do_shuffle;
|
||||
stop_flag = false;
|
||||
|
||||
file_worker_thread = std::thread([&] {
|
||||
this->file_read_worker();
|
||||
});
|
||||
}
|
||||
|
||||
~SfenReader()
|
||||
{
|
||||
stop_flag = true;
|
||||
|
||||
if (file_worker_thread.joinable())
|
||||
file_worker_thread.join();
|
||||
}
|
||||
|
||||
// Load the phase for calculation such as mse.
|
||||
PSVector read_for_mse(uint64_t count)
|
||||
{
|
||||
PSVector sfen_for_mse;
|
||||
sfen_for_mse.reserve(count);
|
||||
|
||||
for (uint64_t i = 0; i < count; ++i)
|
||||
{
|
||||
PackedSfenValue ps;
|
||||
if (!read_to_thread_buffer(0, ps))
|
||||
{
|
||||
std::cout << "ERROR (sfen_reader): Reading failed." << std::endl;
|
||||
return sfen_for_mse;
|
||||
}
|
||||
|
||||
sfen_for_mse.push_back(ps);
|
||||
}
|
||||
|
||||
return sfen_for_mse;
|
||||
}
|
||||
|
||||
PSVector read_validation_set(const std::string& file_name, int eval_limit, bool use_draw_games)
|
||||
{
|
||||
PSVector sfen_for_mse;
|
||||
|
||||
auto input = open_sfen_input_file(file_name);
|
||||
|
||||
while(!input->eof())
|
||||
{
|
||||
std::optional<PackedSfenValue> p_opt = input->next();
|
||||
if (p_opt.has_value())
|
||||
{
|
||||
auto& p = *p_opt;
|
||||
|
||||
if (eval_limit < abs(p.score))
|
||||
continue;
|
||||
|
||||
if (!use_draw_games && p.game_result == 0)
|
||||
continue;
|
||||
|
||||
sfen_for_mse.push_back(p);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return sfen_for_mse;
|
||||
}
|
||||
|
||||
// [ASYNC] Thread returns one aspect. Otherwise returns false.
|
||||
bool read_to_thread_buffer(size_t thread_id, PackedSfenValue& ps)
|
||||
{
|
||||
// If there are any positions left in the thread buffer
|
||||
// then retrieve one and return it.
|
||||
auto& thread_ps = packed_sfens[thread_id];
|
||||
|
||||
// Fill the read buffer if there is no remaining buffer,
|
||||
// but if it doesn't even exist, finish.
|
||||
// If the buffer is empty, fill it.
|
||||
if ((thread_ps == nullptr || thread_ps->empty())
|
||||
&& !read_to_thread_buffer_impl(thread_id))
|
||||
return false;
|
||||
|
||||
// read_to_thread_buffer_impl() returned true,
|
||||
// Since the filling of the thread buffer with the
|
||||
// phase has been completed successfully
|
||||
// thread_ps->rbegin() is alive.
|
||||
|
||||
ps = thread_ps->back();
|
||||
thread_ps->pop_back();
|
||||
|
||||
// If you've run out of buffers, call delete yourself to free this buffer.
|
||||
if (thread_ps->empty())
|
||||
{
|
||||
thread_ps.reset();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// [ASYNC] Read some aspects into thread buffer.
|
||||
bool read_to_thread_buffer_impl(size_t thread_id)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
// If you can fill from the file buffer, that's fine.
|
||||
if (packed_sfens_pool.size() != 0)
|
||||
{
|
||||
// It seems that filling is possible, so fill and finish.
|
||||
|
||||
packed_sfens[thread_id] = std::move(packed_sfens_pool.front());
|
||||
packed_sfens_pool.pop_front();
|
||||
|
||||
total_read += thread_buffer_size;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// The file to read is already gone. No more use.
|
||||
if (end_of_files)
|
||||
return false;
|
||||
|
||||
// Waiting for file worker to fill packed_sfens_pool.
|
||||
// The mutex isn't locked, so it should fill up soon.
|
||||
// Poor man's condition variable.
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void file_read_worker()
|
||||
{
|
||||
std::string currentFilename;
|
||||
uint64_t numEntriesReadFromCurrentFile = 0;
|
||||
|
||||
auto open_next_file = [&]() {
|
||||
// no more
|
||||
for(;;)
|
||||
{
|
||||
sfen_input_stream.reset();
|
||||
|
||||
if (filenames.empty())
|
||||
return false;
|
||||
|
||||
// Get the next file name.
|
||||
currentFilename = filenames.front();
|
||||
filenames.pop_front();
|
||||
|
||||
numEntriesReadFromCurrentFile = 0;
|
||||
|
||||
sfen_input_stream = open_sfen_input_file(currentFilename);
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
if (sfen_input_stream == nullptr)
|
||||
{
|
||||
out << "INFO (sfen_reader): File does not exist: " << currentFilename << '\n';
|
||||
}
|
||||
else
|
||||
{
|
||||
out << "INFO (sfen_reader): Opened file for reading: " << currentFilename << '\n';
|
||||
|
||||
// in case the file is empty or was deleted.
|
||||
if (sfen_input_stream->eof())
|
||||
{
|
||||
out << " - File empty, nothing to read.\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (sfen_input_stream == nullptr && !open_next_file())
|
||||
{
|
||||
auto out = sync_region_cout.new_region();
|
||||
out << "INFO (sfen_reader): End of files." << std::endl;
|
||||
end_of_files = true;
|
||||
return;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Wait for the buffer to run out.
|
||||
// This size() is read only, so you don't need to lock it.
|
||||
while (!stop_flag && packed_sfens_pool.size() >= sfen_read_size / thread_buffer_size)
|
||||
sleep(100);
|
||||
|
||||
if (stop_flag)
|
||||
return;
|
||||
|
||||
PSVector sfens;
|
||||
sfens.reserve(sfen_read_size);
|
||||
|
||||
// Read from the file into the file buffer.
|
||||
while (sfens.size() < sfen_read_size)
|
||||
{
|
||||
std::optional<PackedSfenValue> p = sfen_input_stream->next();
|
||||
if (p.has_value())
|
||||
{
|
||||
sfens.push_back(*p);
|
||||
++numEntriesReadFromCurrentFile;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mode == SfenReaderMode::Cyclic
|
||||
&& numEntriesReadFromCurrentFile > 0)
|
||||
{
|
||||
// The file contained data so we add it again to the end of the queue.
|
||||
filenames.emplace_back(currentFilename);
|
||||
}
|
||||
|
||||
if(!open_next_file())
|
||||
{
|
||||
// There was no next file. Abort.
|
||||
auto out = sync_region_cout.new_region();
|
||||
out << "INFO (sfen_reader): End of files." << std::endl;
|
||||
end_of_files = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shuffle the read phase data.
|
||||
if (shuffle)
|
||||
{
|
||||
Algo::shuffle(sfens, prng);
|
||||
}
|
||||
|
||||
// Divide this by thread_buffer_size. There should be size pieces.
|
||||
// sfen_read_size shall be a multiple of thread_buffer_size.
|
||||
assert((sfen_read_size % thread_buffer_size) == 0);
|
||||
|
||||
auto size = size_t(sfen_read_size / thread_buffer_size);
|
||||
std::vector<std::unique_ptr<PSVector>> buffers;
|
||||
buffers.reserve(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
// Delete this pointer on the receiving side.
|
||||
auto buf = std::make_unique<PSVector>();
|
||||
buf->resize(thread_buffer_size);
|
||||
memcpy(
|
||||
buf->data(),
|
||||
&sfens[i * thread_buffer_size],
|
||||
sizeof(PackedSfenValue) * thread_buffer_size);
|
||||
|
||||
buffers.emplace_back(std::move(buf));
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
|
||||
// The mutex lock is required because the%
|
||||
// contents of packed_sfens_pool are changed.
|
||||
|
||||
for (auto& buf : buffers)
|
||||
packed_sfens_pool.emplace_back(std::move(buf));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
// worker thread reading file in background
|
||||
std::thread file_worker_thread;
|
||||
|
||||
// sfen files
|
||||
std::deque<std::string> filenames;
|
||||
|
||||
std::atomic<bool> stop_flag;
|
||||
|
||||
// number of phases read (file to memory buffer)
|
||||
std::atomic<uint64_t> total_read;
|
||||
|
||||
// Do not shuffle when reading the phase.
|
||||
bool shuffle;
|
||||
|
||||
SfenReaderMode mode;
|
||||
|
||||
size_t sfen_read_size;
|
||||
size_t thread_buffer_size;
|
||||
|
||||
// Random number to shuffle when reading the phase
|
||||
PRNG prng;
|
||||
|
||||
// Did you read the files and reached the end?
|
||||
std::atomic<bool> end_of_files;
|
||||
|
||||
// handle of sfen file
|
||||
std::unique_ptr<BasicSfenInputStream> sfen_input_stream;
|
||||
|
||||
// sfen for each thread
|
||||
// (When the thread is used up, the thread should call delete to release it.)
|
||||
std::vector<std::unique_ptr<PSVector>> packed_sfens;
|
||||
|
||||
// Mutex when accessing packed_sfens_pool
|
||||
std::mutex mutex;
|
||||
|
||||
// pool of sfen. The worker thread read from the file is added here.
|
||||
// Each worker thread fills its own packed_sfens[thread_id] from here.
|
||||
// * Lock and access the mutex.
|
||||
std::list<std::unique_ptr<PSVector>> packed_sfens_pool;
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,222 @@
|
||||
#ifndef _SFEN_STREAM_H_
|
||||
#define _SFEN_STREAM_H_
|
||||
|
||||
#include "packed_sfen.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include <optional>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace Learner {
|
||||
|
||||
enum struct SfenOutputType
|
||||
{
|
||||
Bin,
|
||||
Binpack
|
||||
};
|
||||
|
||||
static bool ends_with(const std::string& lhs, const std::string& end)
|
||||
{
|
||||
if (end.size() > lhs.size()) return false;
|
||||
|
||||
return std::equal(end.rbegin(), end.rend(), lhs.rbegin());
|
||||
}
|
||||
|
||||
static bool has_extension(const std::string& filename, const std::string& extension)
|
||||
{
|
||||
return ends_with(filename, "." + extension);
|
||||
}
|
||||
|
||||
static std::string filename_with_extension(const std::string& filename, const std::string& ext)
|
||||
{
|
||||
if (ends_with(filename, ext))
|
||||
{
|
||||
return filename;
|
||||
}
|
||||
else
|
||||
{
|
||||
return filename + "." + ext;
|
||||
}
|
||||
}
|
||||
|
||||
struct BasicSfenInputStream
|
||||
{
|
||||
virtual std::optional<PackedSfenValue> next() = 0;
|
||||
virtual bool eof() const = 0;
|
||||
virtual ~BasicSfenInputStream() {}
|
||||
};
|
||||
|
||||
struct BinSfenInputStream : BasicSfenInputStream
|
||||
{
|
||||
static constexpr auto openmode = std::ios::in | std::ios::binary;
|
||||
static inline const std::string extension = "bin";
|
||||
|
||||
BinSfenInputStream(std::string filename) :
|
||||
m_stream(filename, openmode),
|
||||
m_eof(!m_stream)
|
||||
{
|
||||
}
|
||||
|
||||
std::optional<PackedSfenValue> next() override
|
||||
{
|
||||
PackedSfenValue e;
|
||||
if(m_stream.read(reinterpret_cast<char*>(&e), sizeof(PackedSfenValue)))
|
||||
{
|
||||
return e;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_eof = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
bool eof() const override
|
||||
{
|
||||
return m_eof;
|
||||
}
|
||||
|
||||
~BinSfenInputStream() override {}
|
||||
|
||||
private:
|
||||
std::fstream m_stream;
|
||||
bool m_eof;
|
||||
};
|
||||
|
||||
struct BinpackSfenInputStream : BasicSfenInputStream
|
||||
{
|
||||
static constexpr auto openmode = std::ios::in | std::ios::binary;
|
||||
static inline const std::string extension = "binpack";
|
||||
|
||||
BinpackSfenInputStream(std::string filename) :
|
||||
m_stream(filename, openmode),
|
||||
m_eof(!m_stream.hasNext())
|
||||
{
|
||||
}
|
||||
|
||||
std::optional<PackedSfenValue> next() override
|
||||
{
|
||||
static_assert(sizeof(binpack::nodchip::PackedSfenValue) == sizeof(PackedSfenValue));
|
||||
|
||||
if (!m_stream.hasNext())
|
||||
{
|
||||
m_eof = true;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto training_data_entry = m_stream.next();
|
||||
auto v = binpack::trainingDataEntryToPackedSfenValue(training_data_entry);
|
||||
PackedSfenValue psv;
|
||||
// same layout, different types. One is from generic library.
|
||||
std::memcpy(&psv, &v, sizeof(PackedSfenValue));
|
||||
|
||||
return psv;
|
||||
}
|
||||
|
||||
bool eof() const override
|
||||
{
|
||||
return m_eof;
|
||||
}
|
||||
|
||||
~BinpackSfenInputStream() override {}
|
||||
|
||||
private:
|
||||
binpack::CompressedTrainingDataEntryReader m_stream;
|
||||
bool m_eof;
|
||||
};
|
||||
|
||||
struct BasicSfenOutputStream
|
||||
{
|
||||
virtual void write(const PSVector& sfens) = 0;
|
||||
virtual ~BasicSfenOutputStream() {}
|
||||
};
|
||||
|
||||
struct BinSfenOutputStream : BasicSfenOutputStream
|
||||
{
|
||||
static constexpr auto openmode = std::ios::out | std::ios::binary | std::ios::app;
|
||||
static inline const std::string extension = "bin";
|
||||
|
||||
BinSfenOutputStream(std::string filename) :
|
||||
m_stream(filename_with_extension(filename, extension), openmode)
|
||||
{
|
||||
}
|
||||
|
||||
void write(const PSVector& sfens) override
|
||||
{
|
||||
m_stream.write(reinterpret_cast<const char*>(sfens.data()), sizeof(PackedSfenValue) * sfens.size());
|
||||
}
|
||||
|
||||
~BinSfenOutputStream() override {}
|
||||
|
||||
private:
|
||||
std::fstream m_stream;
|
||||
};
|
||||
|
||||
struct BinpackSfenOutputStream : BasicSfenOutputStream
|
||||
{
|
||||
static constexpr auto openmode = std::ios::out | std::ios::binary | std::ios::app;
|
||||
static inline const std::string extension = "binpack";
|
||||
|
||||
BinpackSfenOutputStream(std::string filename) :
|
||||
m_stream(filename_with_extension(filename, extension), openmode)
|
||||
{
|
||||
}
|
||||
|
||||
void write(const PSVector& sfens) override
|
||||
{
|
||||
static_assert(sizeof(binpack::nodchip::PackedSfenValue) == sizeof(PackedSfenValue));
|
||||
|
||||
for(auto& sfen : sfens)
|
||||
{
|
||||
// The library uses a type that's different but layout-compatibile.
|
||||
binpack::nodchip::PackedSfenValue e;
|
||||
std::memcpy(&e, &sfen, sizeof(binpack::nodchip::PackedSfenValue));
|
||||
m_stream.addTrainingDataEntry(binpack::packedSfenValueToTrainingDataEntry(e));
|
||||
}
|
||||
}
|
||||
|
||||
~BinpackSfenOutputStream() override {}
|
||||
|
||||
private:
|
||||
binpack::CompressedTrainingDataEntryWriter m_stream;
|
||||
};
|
||||
|
||||
inline std::unique_ptr<BasicSfenInputStream> open_sfen_input_file(const std::string& filename)
|
||||
{
|
||||
if (has_extension(filename, BinSfenInputStream::extension))
|
||||
return std::make_unique<BinSfenInputStream>(filename);
|
||||
else if (has_extension(filename, BinpackSfenInputStream::extension))
|
||||
return std::make_unique<BinpackSfenInputStream>(filename);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline std::unique_ptr<BasicSfenOutputStream> create_new_sfen_output(const std::string& filename, SfenOutputType sfen_output_type)
|
||||
{
|
||||
switch(sfen_output_type)
|
||||
{
|
||||
case SfenOutputType::Bin:
|
||||
return std::make_unique<BinSfenOutputStream>(filename);
|
||||
case SfenOutputType::Binpack:
|
||||
return std::make_unique<BinpackSfenOutputStream>(filename);
|
||||
}
|
||||
|
||||
assert(false);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline std::unique_ptr<BasicSfenOutputStream> create_new_sfen_output(const std::string& filename)
|
||||
{
|
||||
if (has_extension(filename, BinSfenOutputStream::extension))
|
||||
return std::make_unique<BinSfenOutputStream>(filename);
|
||||
else if (has_extension(filename, BinpackSfenOutputStream::extension))
|
||||
return std::make_unique<BinpackSfenOutputStream>(filename);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,206 @@
|
||||
#include "packed_sfen.h"
|
||||
#include "sfen_stream.h"
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
#include "extra/nnue_data_binpack_format.h"
|
||||
|
||||
#include "syzygy/tbprobe.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <shared_mutex>
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Learner {
|
||||
|
||||
// Helper class for exporting Sfen
|
||||
struct SfenWriter
|
||||
{
|
||||
// Amount of sfens required to flush the buffer.
|
||||
static constexpr size_t SFEN_WRITE_SIZE = 5000;
|
||||
|
||||
// File name to write and number of threads to create
|
||||
SfenWriter(string filename_, int thread_num, uint64_t save_count, SfenOutputType sfen_output_type)
|
||||
{
|
||||
sfen_buffers_pool.reserve((size_t)thread_num * 10);
|
||||
sfen_buffers.resize(thread_num);
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
out << "INFO (sfen_writer): Creating new data file at " << filename_ << endl;
|
||||
|
||||
sfen_format = sfen_output_type;
|
||||
output_file_stream = create_new_sfen_output(filename_, sfen_format);
|
||||
filename = filename_;
|
||||
save_every = save_count;
|
||||
|
||||
finished = false;
|
||||
|
||||
file_worker_thread = std::thread([&] { this->file_write_worker(); });
|
||||
}
|
||||
|
||||
~SfenWriter()
|
||||
{
|
||||
flush();
|
||||
|
||||
finished = true;
|
||||
file_worker_thread.join();
|
||||
output_file_stream.reset();
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
{
|
||||
// All buffers should be empty since file_worker_thread
|
||||
// should have written everything before exiting.
|
||||
for (const auto& p : sfen_buffers) { assert(p == nullptr); (void)p ; }
|
||||
assert(sfen_buffers_pool.empty());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void write(size_t thread_id, const PackedSfenValue& psv)
|
||||
{
|
||||
// We have a buffer for each thread and add it there.
|
||||
// If the buffer overflows, write it to a file.
|
||||
|
||||
// This buffer is prepared for each thread.
|
||||
auto& buf = sfen_buffers[thread_id];
|
||||
|
||||
// Secure since there is no buf at the first time
|
||||
// and immediately after writing the thread buffer.
|
||||
if (!buf)
|
||||
{
|
||||
buf = std::make_unique<PSVector>();
|
||||
buf->reserve(SFEN_WRITE_SIZE);
|
||||
}
|
||||
|
||||
// Buffer is exclusive to this thread.
|
||||
// There is no need for a critical section.
|
||||
buf->push_back(psv);
|
||||
|
||||
if (buf->size() >= SFEN_WRITE_SIZE)
|
||||
{
|
||||
// If you load it in sfen_buffers_pool, the worker will do the rest.
|
||||
|
||||
// Critical section since sfen_buffers_pool is shared among threads.
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
sfen_buffers_pool.emplace_back(std::move(buf));
|
||||
}
|
||||
}
|
||||
|
||||
void flush()
|
||||
{
|
||||
for (size_t i = 0; i < sfen_buffers.size(); ++i)
|
||||
{
|
||||
flush(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Move what remains in the buffer for your thread to a buffer for writing to a file.
|
||||
void flush(size_t thread_id)
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
|
||||
auto& buf = sfen_buffers[thread_id];
|
||||
|
||||
// There is a case that buf==nullptr, so that check is necessary.
|
||||
if (buf && buf->size() != 0)
|
||||
{
|
||||
sfen_buffers_pool.emplace_back(std::move(buf));
|
||||
}
|
||||
}
|
||||
|
||||
// Dedicated thread to write to file
|
||||
void file_write_worker()
|
||||
{
|
||||
while (!finished || sfen_buffers_pool.size())
|
||||
{
|
||||
vector<std::unique_ptr<PSVector>> buffers;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
|
||||
// Atomically swap take the filled buffers and
|
||||
// create a new buffer pool for threads to fill.
|
||||
buffers = std::move(sfen_buffers_pool);
|
||||
sfen_buffers_pool = std::vector<std::unique_ptr<PSVector>>();
|
||||
}
|
||||
|
||||
if (!buffers.size())
|
||||
{
|
||||
// Poor man's condition variable.
|
||||
sleep(100);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto& buf : buffers)
|
||||
{
|
||||
output_file_stream->write(*buf);
|
||||
|
||||
sfen_write_count += buf->size();
|
||||
|
||||
// Add the processed number here, and if it exceeds save_every,
|
||||
// change the file name and reset this counter.
|
||||
sfen_write_count_current_file += buf->size();
|
||||
if (sfen_write_count_current_file >= save_every)
|
||||
{
|
||||
sfen_write_count_current_file = 0;
|
||||
|
||||
// Sequential number attached to the file
|
||||
int n = (int)(sfen_write_count / save_every);
|
||||
|
||||
// Rename the file and open it again.
|
||||
// Add ios::app in consideration of overwriting.
|
||||
// (Depending on the operation, it may not be necessary.)
|
||||
string new_filename = filename + "_" + std::to_string(n);
|
||||
output_file_stream = create_new_sfen_output(new_filename, sfen_format);
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
out << "INFO (sfen_writer): Creating new data file at " << new_filename << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
std::unique_ptr<BasicSfenOutputStream> output_file_stream;
|
||||
|
||||
// A new net is saved after every save_every sfens are processed.
|
||||
uint64_t save_every = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
// File name passed in the constructor
|
||||
std::string filename;
|
||||
|
||||
// Thread to write to the file
|
||||
std::thread file_worker_thread;
|
||||
|
||||
// Flag that all threads have finished
|
||||
atomic<bool> finished;
|
||||
|
||||
SfenOutputType sfen_format;
|
||||
|
||||
// buffer before writing to file
|
||||
// sfen_buffers is the buffer for each thread
|
||||
// sfen_buffers_pool is a buffer for writing.
|
||||
// After loading the phase in the former buffer by SFEN_WRITE_SIZE,
|
||||
// transfer it to the latter.
|
||||
std::vector<std::unique_ptr<PSVector>> sfen_buffers;
|
||||
std::vector<std::unique_ptr<PSVector>> sfen_buffers_pool;
|
||||
|
||||
// Mutex required to access sfen_buffers_pool
|
||||
std::mutex mutex;
|
||||
|
||||
// Number of sfens written in total, and the
|
||||
// number of sfens written in the current file.
|
||||
uint64_t sfen_write_count = 0;
|
||||
uint64_t sfen_write_count_current_file = 0;
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,242 @@
|
||||
#include "transform.h"
|
||||
|
||||
#include "sfen_stream.h"
|
||||
#include "packed_sfen.h"
|
||||
|
||||
#include "thread.h"
|
||||
#include "position.h"
|
||||
#include "evaluate.h"
|
||||
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
|
||||
namespace Learner
|
||||
{
|
||||
using CommandFunc = void(*)(std::istringstream&);
|
||||
|
||||
enum struct NudgedStaticMode
|
||||
{
|
||||
Absolute,
|
||||
Relative,
|
||||
Interpolate
|
||||
};
|
||||
|
||||
struct NudgedStaticParams
|
||||
{
|
||||
std::string input_filename = "in.binpack";
|
||||
std::string output_filename = "out.binpack";
|
||||
NudgedStaticMode mode = NudgedStaticMode::Absolute;
|
||||
int absolute_nudge = 5;
|
||||
float relative_nudge = 0.1;
|
||||
float interpolate_nudge = 0.1;
|
||||
|
||||
void enforce_constraints()
|
||||
{
|
||||
relative_nudge = std::max(relative_nudge, 0.0f);
|
||||
absolute_nudge = std::max(absolute_nudge, 0);
|
||||
}
|
||||
};
|
||||
|
||||
[[nodiscard]] std::int16_t nudge(NudgedStaticParams& params, std::int16_t static_eval_i16, std::int16_t deep_eval_i16)
|
||||
{
|
||||
auto saturate_i32_to_i16 = [](int v) {
|
||||
return static_cast<std::int16_t>(
|
||||
std::clamp(
|
||||
v,
|
||||
(int)std::numeric_limits<std::int16_t>::min(),
|
||||
(int)std::numeric_limits<std::int16_t>::max()
|
||||
)
|
||||
);
|
||||
};
|
||||
|
||||
auto saturate_f32_to_i16 = [saturate_i32_to_i16](float v) {
|
||||
return saturate_i32_to_i16((int)v);
|
||||
};
|
||||
|
||||
int static_eval = static_eval_i16;
|
||||
int deep_eval = deep_eval_i16;
|
||||
|
||||
switch(params.mode)
|
||||
{
|
||||
case NudgedStaticMode::Absolute:
|
||||
return saturate_i32_to_i16(
|
||||
static_eval + std::clamp(
|
||||
deep_eval - static_eval,
|
||||
-params.absolute_nudge,
|
||||
params.absolute_nudge
|
||||
)
|
||||
);
|
||||
|
||||
case NudgedStaticMode::Relative:
|
||||
return saturate_f32_to_i16(
|
||||
(float)static_eval * std::clamp(
|
||||
(float)deep_eval / (float)static_eval,
|
||||
(1.0f - params.relative_nudge),
|
||||
(1.0f + params.relative_nudge)
|
||||
)
|
||||
);
|
||||
|
||||
case NudgedStaticMode::Interpolate:
|
||||
return saturate_f32_to_i16(
|
||||
(float)static_eval * (1.0f - params.interpolate_nudge)
|
||||
+ (float)deep_eval * params.interpolate_nudge
|
||||
);
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void do_nudged_static(NudgedStaticParams& params)
|
||||
{
|
||||
Thread* th = Threads.main();
|
||||
Position& pos = th->rootPos;
|
||||
StateInfo si;
|
||||
|
||||
auto in = Learner::open_sfen_input_file(params.input_filename);
|
||||
auto out = Learner::create_new_sfen_output(params.output_filename);
|
||||
|
||||
if (in == nullptr)
|
||||
{
|
||||
std::cerr << "Invalid input file type.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if (out == nullptr)
|
||||
{
|
||||
std::cerr << "Invalid output file type.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
PSVector buffer;
|
||||
uint64_t batch_size = 1'000'000;
|
||||
|
||||
buffer.reserve(batch_size);
|
||||
|
||||
uint64_t num_processed = 0;
|
||||
for (;;)
|
||||
{
|
||||
auto v = in->next();
|
||||
if (!v.has_value())
|
||||
break;
|
||||
|
||||
auto& ps = v.value();
|
||||
|
||||
pos.set_from_packed_sfen(ps.sfen, &si, th);
|
||||
auto static_eval = Eval::evaluate(pos);
|
||||
auto deep_eval = ps.score;
|
||||
ps.score = nudge(params, static_eval, deep_eval);
|
||||
|
||||
buffer.emplace_back(ps);
|
||||
if (buffer.size() >= batch_size)
|
||||
{
|
||||
num_processed += buffer.size();
|
||||
|
||||
out->write(buffer);
|
||||
buffer.clear();
|
||||
|
||||
std::cout << "Processed " << num_processed << " positions.\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (!buffer.empty())
|
||||
{
|
||||
num_processed += buffer.size();
|
||||
|
||||
out->write(buffer);
|
||||
buffer.clear();
|
||||
|
||||
std::cout << "Processed " << num_processed << " positions.\n";
|
||||
}
|
||||
|
||||
std::cout << "Finished.\n";
|
||||
}
|
||||
|
||||
void nudged_static(std::istringstream& is)
|
||||
{
|
||||
NudgedStaticParams params{};
|
||||
|
||||
while(true)
|
||||
{
|
||||
std::string token;
|
||||
is >> token;
|
||||
|
||||
if (token == "")
|
||||
break;
|
||||
|
||||
if (token == "absolute")
|
||||
{
|
||||
params.mode = NudgedStaticMode::Absolute;
|
||||
is >> params.absolute_nudge;
|
||||
}
|
||||
else if (token == "relative")
|
||||
{
|
||||
params.mode = NudgedStaticMode::Relative;
|
||||
is >> params.relative_nudge;
|
||||
}
|
||||
else if (token == "interpolate")
|
||||
{
|
||||
params.mode = NudgedStaticMode::Interpolate;
|
||||
is >> params.interpolate_nudge;
|
||||
}
|
||||
else if (token == "input_file")
|
||||
is >> params.input_filename;
|
||||
else if (token == "output_file")
|
||||
is >> params.output_filename;
|
||||
}
|
||||
|
||||
std::cout << "Performing transform nudged_static with parameters:\n";
|
||||
std::cout << "input_file : " << params.input_filename << '\n';
|
||||
std::cout << "output_file : " << params.output_filename << '\n';
|
||||
std::cout << "\n";
|
||||
if (params.mode == NudgedStaticMode::Absolute)
|
||||
{
|
||||
std::cout << "mode : absolute\n";
|
||||
std::cout << "absolute_nudge : " << params.absolute_nudge << '\n';
|
||||
}
|
||||
else if (params.mode == NudgedStaticMode::Relative)
|
||||
{
|
||||
std::cout << "mode : relative\n";
|
||||
std::cout << "relative_nudge : " << params.relative_nudge << '\n';
|
||||
}
|
||||
else if (params.mode == NudgedStaticMode::Interpolate)
|
||||
{
|
||||
std::cout << "mode : interpolate\n";
|
||||
std::cout << "interpolate_nudge : " << params.interpolate_nudge << '\n';
|
||||
}
|
||||
std::cout << '\n';
|
||||
|
||||
params.enforce_constraints();
|
||||
do_nudged_static(params);
|
||||
}
|
||||
|
||||
void transform(std::istringstream& is)
|
||||
{
|
||||
const std::map<std::string, CommandFunc> subcommands = {
|
||||
{ "nudged_static", &nudged_static }
|
||||
};
|
||||
|
||||
Eval::NNUE::init();
|
||||
|
||||
std::string subcommand;
|
||||
is >> subcommand;
|
||||
|
||||
auto func = subcommands.find(subcommand);
|
||||
if (func == subcommands.end())
|
||||
{
|
||||
std::cout << "Invalid subcommand " << subcommand << ". Exiting...\n";
|
||||
return;
|
||||
}
|
||||
|
||||
func->second(is);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
#ifndef _TRANSFORM_H_
|
||||
#define _TRANSFORM_H_
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace Learner {
|
||||
|
||||
void transform(std::istringstream& is);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
+4
-1
@@ -18,6 +18,8 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
|
||||
#include "bitboard.h"
|
||||
#include "endgame.h"
|
||||
#include "position.h"
|
||||
@@ -35,6 +37,7 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
std::cout << engine_info() << std::endl;
|
||||
|
||||
CommandLine::init(argc, argv);
|
||||
UCI::init(Options);
|
||||
Tune::init();
|
||||
PSQT::init();
|
||||
@@ -44,7 +47,7 @@ int main(int argc, char* argv[]) {
|
||||
Endgames::init();
|
||||
Threads.set(size_t(Options["Threads"]));
|
||||
Search::clear(); // After threads are up
|
||||
Eval::init_NNUE();
|
||||
Eval::NNUE::init();
|
||||
|
||||
UCI::loop(argc, argv);
|
||||
|
||||
|
||||
+98
-114
@@ -61,6 +61,8 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
|
||||
using namespace std;
|
||||
|
||||
SynchronizedRegionLogger sync_region_cout(std::cout);
|
||||
|
||||
namespace {
|
||||
|
||||
/// Version number. If Version is left empty, then compile date in the format
|
||||
@@ -132,6 +134,7 @@ public:
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
/// engine_info() returns the full name of the current Stockfish version. This
|
||||
/// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
|
||||
/// the program was compiled) or "Stockfish <Version>", depending on whether
|
||||
@@ -356,27 +359,11 @@ void std_aligned_free(void* ptr) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
/// The returned pointer is the aligned one, while the mem argument is the one that needs
|
||||
/// to be passed to free. With c++17 some of this functionality could be simplified.
|
||||
/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#if defined(_WIN32)
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
|
||||
if (posix_memalign(&mem, alignment, size))
|
||||
mem = nullptr;
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, allocSize, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#elif defined(_WIN64)
|
||||
|
||||
static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
|
||||
static void* aligned_large_pages_alloc_win(size_t allocSize) {
|
||||
|
||||
HANDLE hProcessToken { };
|
||||
LUID luid { };
|
||||
@@ -421,23 +408,10 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
|
||||
return mem;
|
||||
}
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
|
||||
static bool firstCall = true;
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
// Try to allocate large pages
|
||||
mem = aligned_ttmem_alloc_large_pages(allocSize);
|
||||
|
||||
// Suppress info strings on the first call. The first call occurs before 'uci'
|
||||
// is received and in that case this output confuses some GUIs.
|
||||
if (!firstCall)
|
||||
{
|
||||
if (mem)
|
||||
sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
|
||||
else
|
||||
sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
|
||||
}
|
||||
firstCall = false;
|
||||
void* mem = aligned_large_pages_alloc_win(allocSize);
|
||||
|
||||
// Fall back to regular, page aligned, allocation if necessary
|
||||
if (!mem)
|
||||
@@ -448,23 +422,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
|
||||
#else
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
constexpr size_t alignment = 64; // assumed cache line size
|
||||
size_t size = allocSize + alignment - 1; // allocate some extra space
|
||||
mem = malloc(size);
|
||||
void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
|
||||
return ret;
|
||||
#if defined(__linux__)
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
|
||||
#else
|
||||
constexpr size_t alignment = 4096; // assumed small page size
|
||||
#endif
|
||||
|
||||
// round up to multiples of alignment
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
||||
void *mem = std_aligned_alloc(alignment, size);
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/// aligned_ttmem_free() will free the previously allocated ttmem
|
||||
/// aligned_large_pages_free() will free the previously allocated ttmem
|
||||
|
||||
#if defined(_WIN64)
|
||||
#if defined(_WIN32)
|
||||
|
||||
void aligned_ttmem_free(void* mem) {
|
||||
void aligned_large_pages_free(void* mem) {
|
||||
|
||||
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
||||
{
|
||||
@@ -477,8 +459,8 @@ void aligned_ttmem_free(void* mem) {
|
||||
|
||||
#else
|
||||
|
||||
void aligned_ttmem_free(void *mem) {
|
||||
free(mem);
|
||||
void aligned_large_pages_free(void *mem) {
|
||||
std_aligned_free(mem);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -590,6 +572,62 @@ void bindThisThread(size_t idx) {
|
||||
|
||||
} // namespace WinProcGroup
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#define GETCWD _getcwd
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#define GETCWD getcwd
|
||||
#endif
|
||||
|
||||
namespace CommandLine {
|
||||
|
||||
string argv0; // path+name of the executable binary, as given by argv[0]
|
||||
string binaryDirectory; // path of the executable directory
|
||||
string workingDirectory; // path of the working directory
|
||||
|
||||
void init(int argc, char* argv[]) {
|
||||
(void)argc;
|
||||
string pathSeparator;
|
||||
|
||||
// extract the path+name of the executable binary
|
||||
argv0 = argv[0];
|
||||
|
||||
#ifdef _WIN32
|
||||
pathSeparator = "\\";
|
||||
#ifdef _MSC_VER
|
||||
// Under windows argv[0] may not have the extension. Also _get_pgmptr() had
|
||||
// issues in some windows 10 versions, so check returned values carefully.
|
||||
char* pgmptr = nullptr;
|
||||
if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
|
||||
argv0 = pgmptr;
|
||||
#endif
|
||||
#else
|
||||
pathSeparator = "/";
|
||||
#endif
|
||||
|
||||
// extract the working directory
|
||||
workingDirectory = "";
|
||||
char buff[40000];
|
||||
char* cwd = GETCWD(buff, 40000);
|
||||
if (cwd)
|
||||
workingDirectory = cwd;
|
||||
|
||||
// extract the binary directory path from argv0
|
||||
binaryDirectory = argv0;
|
||||
size_t pos = binaryDirectory.find_last_of("\\/");
|
||||
if (pos == std::string::npos)
|
||||
binaryDirectory = "." + pathSeparator;
|
||||
else
|
||||
binaryDirectory.resize(pos + 1);
|
||||
|
||||
// pattern replacement: "./" at the start of path is replaced by the working directory
|
||||
if (binaryDirectory.find("." + pathSeparator) == 0)
|
||||
binaryDirectory.replace(0, 1, workingDirectory);
|
||||
}
|
||||
|
||||
|
||||
} // namespace CommandLine
|
||||
// Returns a string that represents the current time. (Used when learning evaluation functions)
|
||||
std::string now_string()
|
||||
{
|
||||
@@ -627,18 +665,27 @@ void* aligned_malloc(size_t size, size_t align)
|
||||
return p;
|
||||
}
|
||||
|
||||
std::uint64_t get_file_size(std::fstream& fs)
|
||||
{
|
||||
auto pos = fs.tellg();
|
||||
|
||||
fs.seekg(0, fstream::end);
|
||||
const uint64_t eofPos = (uint64_t)fs.tellg();
|
||||
fs.clear(); // Otherwise, the next seek may fail.
|
||||
fs.seekg(0, fstream::beg);
|
||||
const uint64_t begPos = (uint64_t)fs.tellg();
|
||||
fs.seekg(pos);
|
||||
|
||||
return eofPos - begPos;
|
||||
}
|
||||
|
||||
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
|
||||
{
|
||||
fstream fs(filename, ios::in | ios::binary);
|
||||
if (fs.fail())
|
||||
return 1;
|
||||
|
||||
fs.seekg(0, fstream::end);
|
||||
uint64_t eofPos = (uint64_t)fs.tellg();
|
||||
fs.clear(); // Otherwise the next seek may fail.
|
||||
fs.seekg(0, fstream::beg);
|
||||
uint64_t begPos = (uint64_t)fs.tellg();
|
||||
uint64_t file_size = eofPos - begPos;
|
||||
const uint64_t file_size = get_file_size(fs);
|
||||
//std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
|
||||
|
||||
// I know the file size, so call callback_func to get a buffer for this,
|
||||
@@ -687,66 +734,3 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
|
||||
fs.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ----------------------------
|
||||
// mkdir wrapper
|
||||
// ----------------------------
|
||||
|
||||
// Specify relative to the current folder. Returns 0 on success, non-zero on failure.
|
||||
// Create a folder. Japanese is not used.
|
||||
// In case of gcc under msys2 environment, folder creation fails with _wmkdir(). Cause unknown.
|
||||
// Use _mkdir() because there is no help for it.
|
||||
|
||||
#if defined(_WIN32)
|
||||
// for Windows
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <codecvt> // I need this because I want wstring to mkdir
|
||||
#include <locale> // This is required for wstring_convert.
|
||||
|
||||
namespace Dependency {
|
||||
int mkdir(std::string dir_name)
|
||||
{
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
|
||||
return _wmkdir(cv.from_bytes(dir_name).c_str());
|
||||
// ::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(__GNUC__)
|
||||
|
||||
#include <direct.h>
|
||||
namespace Dependency {
|
||||
int mkdir(std::string dir_name)
|
||||
{
|
||||
return _mkdir(dir_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#elif defined(__linux__)
|
||||
|
||||
// In the linux environment, this symbol _LINUX is defined in the makefile.
|
||||
|
||||
// mkdir implementation for Linux.
|
||||
#include "sys/stat.h"
|
||||
|
||||
namespace Dependency {
|
||||
int mkdir(std::string dir_name)
|
||||
{
|
||||
return ::mkdir(dir_name.c_str(), 0777);
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
// In order to judge whether it is a Linux environment, we have to divide the makefile..
|
||||
// The function to dig a folder on linux is good for the time being... Only used to save the evaluation function file...
|
||||
|
||||
namespace Dependency {
|
||||
int mkdir(std::string dir_name)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
+418
-17
@@ -19,6 +19,7 @@
|
||||
#ifndef MISC_H_INCLUDED
|
||||
#define MISC_H_INCLUDED
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
@@ -27,6 +28,12 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
#include <cctype>
|
||||
#include <sstream>
|
||||
#include <deque>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
const std::string engine_info(bool to_uci = false);
|
||||
@@ -35,8 +42,8 @@ void prefetch(void* addr);
|
||||
void start_logger(const std::string& fname);
|
||||
void* std_aligned_alloc(size_t alignment, size_t size);
|
||||
void std_aligned_free(void* ptr);
|
||||
void* aligned_ttmem_alloc(size_t size, void*& mem);
|
||||
void aligned_ttmem_free(void* mem); // nop if mem == nullptr
|
||||
void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
|
||||
void aligned_large_pages_free(void* mem); // nop if mem == nullptr
|
||||
|
||||
void dbg_hit_on(bool b);
|
||||
void dbg_hit_on(bool c, bool b);
|
||||
@@ -44,9 +51,7 @@ void dbg_mean_of(int v);
|
||||
void dbg_print();
|
||||
|
||||
typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
|
||||
|
||||
static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
|
||||
|
||||
inline TimePoint now() {
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>
|
||||
(std::chrono::steady_clock::now().time_since_epoch()).count();
|
||||
@@ -67,6 +72,232 @@ std::ostream& operator<<(std::ostream&, SyncCout);
|
||||
#define sync_cout std::cout << IO_LOCK
|
||||
#define sync_endl std::endl << IO_UNLOCK
|
||||
|
||||
// `ptr` must point to an array of size at least
|
||||
// `sizeof(T) * N + alignment` bytes, where `N` is the
|
||||
// number of elements in the array.
|
||||
template <uintptr_t Alignment, typename T>
|
||||
T* align_ptr_up(T* ptr)
|
||||
{
|
||||
static_assert(alignof(T) < Alignment);
|
||||
|
||||
const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
|
||||
return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
|
||||
}
|
||||
|
||||
// This logger allows printing many parts in a region atomically
|
||||
// but doesn't block the threads trying to append to other regions.
|
||||
// Instead if some region tries to pring while other region holds
|
||||
// the lock the messages are queued to be printed as soon as the
|
||||
// current region releases the lock.
|
||||
struct SynchronizedRegionLogger
|
||||
{
|
||||
using RegionId = std::uint64_t;
|
||||
|
||||
struct Region
|
||||
{
|
||||
friend struct SynchronizedRegionLogger;
|
||||
|
||||
Region() :
|
||||
logger(nullptr), region_id(0), is_held(false)
|
||||
{
|
||||
}
|
||||
|
||||
Region(const Region&) = delete;
|
||||
Region& operator=(const Region&) = delete;
|
||||
|
||||
Region(Region&& other) :
|
||||
logger(other.logger), region_id(other.region_id), is_held(other.is_held)
|
||||
{
|
||||
other.logger = nullptr;
|
||||
other.is_held = false;
|
||||
}
|
||||
|
||||
Region& operator=(Region&& other) {
|
||||
if (is_held && logger != nullptr)
|
||||
{
|
||||
logger->release_region(region_id);
|
||||
}
|
||||
|
||||
logger = other.logger;
|
||||
region_id = other.region_id;
|
||||
is_held = other.is_held;
|
||||
|
||||
other.is_held = false;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~Region() { unlock(); }
|
||||
|
||||
void unlock() {
|
||||
if (is_held) {
|
||||
is_held = false;
|
||||
|
||||
if (logger != nullptr)
|
||||
logger->release_region(region_id);
|
||||
}
|
||||
}
|
||||
|
||||
Region& operator << (std::ostream&(*pManip)(std::ostream&)) {
|
||||
if (logger != nullptr)
|
||||
logger->write(region_id, pManip);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Region& operator << (const T& value) {
|
||||
if (logger != nullptr)
|
||||
logger->write(region_id, value);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
SynchronizedRegionLogger* logger;
|
||||
RegionId region_id;
|
||||
bool is_held;
|
||||
|
||||
Region(SynchronizedRegionLogger& log, RegionId id) :
|
||||
logger(&log), region_id(id), is_held(true)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
struct RegionBookkeeping
|
||||
{
|
||||
RegionBookkeeping(RegionId rid) : id(rid), is_held(true) {}
|
||||
|
||||
std::vector<std::string> pending_parts;
|
||||
RegionId id;
|
||||
bool is_held;
|
||||
};
|
||||
|
||||
RegionId init_next_region()
|
||||
{
|
||||
static RegionId next_id = 0;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
const auto id = next_id++;
|
||||
regions.emplace_back(id);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
void write(RegionId id, std::ostream&(*pManip)(std::ostream&)) {
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (regions.empty())
|
||||
return;
|
||||
|
||||
if (id == regions.front().id) {
|
||||
// We can just directly print to the output because
|
||||
// we are at the front of the region queue.
|
||||
out << *pManip;
|
||||
} else {
|
||||
// We have to schedule the print until previous regions are
|
||||
// processed
|
||||
auto* region = find_region_nolock(id);
|
||||
if (region == nullptr)
|
||||
return;
|
||||
|
||||
std::stringstream ss;
|
||||
ss << *pManip;
|
||||
region->pending_parts.emplace_back(std::move(ss).str());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void write(RegionId id, const T& value) {
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (regions.empty())
|
||||
return;
|
||||
|
||||
if (id == regions.front().id) {
|
||||
// We can just directly print to the output because
|
||||
// we are at the front of the region queue.
|
||||
out << value;
|
||||
} else {
|
||||
// We have to schedule the print until previous regions are
|
||||
// processed
|
||||
auto* region = find_region_nolock(id);
|
||||
if (region == nullptr)
|
||||
return;
|
||||
|
||||
std::stringstream ss;
|
||||
ss << value;
|
||||
region->pending_parts.emplace_back(std::move(ss).str());
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& out;
|
||||
|
||||
std::deque<RegionBookkeeping> regions;
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
RegionBookkeeping* find_region_nolock(RegionId id) {
|
||||
// Linear search because the amount of concurrent regions should be small.
|
||||
auto it = std::find_if(
|
||||
regions.begin(),
|
||||
regions.end(),
|
||||
[id](const RegionBookkeeping& r) { return r.id == id; });
|
||||
|
||||
if (it == regions.end())
|
||||
return nullptr;
|
||||
else
|
||||
return &*it;
|
||||
}
|
||||
|
||||
void release_region(RegionId id) {
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto* region = find_region_nolock(id);
|
||||
if (region == nullptr)
|
||||
return;
|
||||
|
||||
region->is_held = false;
|
||||
|
||||
process_backlog_nolock();
|
||||
}
|
||||
|
||||
void process_backlog_nolock()
|
||||
{
|
||||
while(!regions.empty()) {
|
||||
auto& region = regions.front();
|
||||
|
||||
for(auto& part : region.pending_parts) {
|
||||
out << part;
|
||||
}
|
||||
|
||||
// If the region is still held then we don't
|
||||
// want to start printing stuff from the next region.
|
||||
if (region.is_held)
|
||||
break;
|
||||
|
||||
regions.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
SynchronizedRegionLogger(std::ostream& s) :
|
||||
out(s)
|
||||
{
|
||||
}
|
||||
|
||||
[[nodiscard]] Region new_region() {
|
||||
const auto id = init_next_region();
|
||||
return Region(*this, id);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
extern SynchronizedRegionLogger sync_region_cout;
|
||||
|
||||
|
||||
/// xorshift64star Pseudo-Random Number Generator
|
||||
/// This class is based on original code written and dedicated
|
||||
@@ -83,6 +314,19 @@ std::ostream& operator<<(std::ostream&, SyncCout);
|
||||
/// For further analysis see
|
||||
/// <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>
|
||||
|
||||
static uint64_t string_hash(const std::string& str)
|
||||
{
|
||||
uint64_t h = 525201411107845655ull;
|
||||
|
||||
for (auto c : str) {
|
||||
h ^= static_cast<uint64_t>(c);
|
||||
h *= 0x5bd1e9955bd1e995ull;
|
||||
h ^= h >> 47;
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
class PRNG {
|
||||
|
||||
uint64_t s;
|
||||
@@ -94,7 +338,9 @@ class PRNG {
|
||||
}
|
||||
|
||||
public:
|
||||
PRNG() { set_seed_from_time(); }
|
||||
PRNG(uint64_t seed) : s(seed) { assert(seed); }
|
||||
PRNG(const std::string& seed) { set_seed(seed); }
|
||||
|
||||
template<typename T> T rand() { return T(rand64()); }
|
||||
|
||||
@@ -107,6 +353,40 @@ public:
|
||||
|
||||
// Return the random seed used internally.
|
||||
uint64_t get_seed() const { return s; }
|
||||
|
||||
void set_seed(uint64_t seed) { s = seed; }
|
||||
|
||||
uint64_t next_random_seed()
|
||||
{
|
||||
uint64_t seed = 0;
|
||||
for(int i = 0; i < 64; ++i)
|
||||
{
|
||||
const auto off = rand64() % 64;
|
||||
seed |= (rand64() & (uint64_t(1) << off)) >> off;
|
||||
seed <<= 1;
|
||||
}
|
||||
return seed;
|
||||
}
|
||||
|
||||
void set_seed_from_time()
|
||||
{
|
||||
set_seed(std::chrono::system_clock::now().time_since_epoch().count());
|
||||
}
|
||||
|
||||
void set_seed(const std::string& str)
|
||||
{
|
||||
if (str.empty())
|
||||
{
|
||||
set_seed_from_time();
|
||||
}
|
||||
else if (std::all_of(str.begin(), str.end(), [](char c) { return std::isdigit(c);} )) {
|
||||
set_seed(std::stoull(str));
|
||||
}
|
||||
else
|
||||
{
|
||||
set_seed(string_hash(str));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Display a random seed. (For debugging)
|
||||
@@ -130,6 +410,74 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// This bitset can be accessed concurrently, provided
|
||||
// the concurrent accesses are performed on distinct
|
||||
// instances of underlying type. That means the cuncurrent
|
||||
// accesses need to be spaced by at least
|
||||
// bits_per_bucket bits.
|
||||
// But at least best_concurrent_access_stride bits
|
||||
// is recommended to prevent false sharing.
|
||||
template <uint64_t N>
|
||||
struct LargeBitset
|
||||
{
|
||||
private:
|
||||
constexpr static uint64_t cache_line_size = 64;
|
||||
|
||||
public:
|
||||
using UnderlyingType = uint64_t;
|
||||
|
||||
constexpr static uint64_t num_bits = N;
|
||||
constexpr static uint64_t bits_per_bucket = 8 * sizeof(uint64_t);
|
||||
constexpr static uint64_t num_buckets = (num_bits + bits_per_bucket - 1) / bits_per_bucket;
|
||||
constexpr static uint64_t best_concurrent_access_stride = 8 * cache_line_size;
|
||||
|
||||
LargeBitset()
|
||||
{
|
||||
std::fill(std::begin(bits), std::end(bits), 0);
|
||||
}
|
||||
|
||||
void set(uint64_t idx)
|
||||
{
|
||||
const uint64_t bucket = idx / bits_per_bucket;
|
||||
const uint64_t bit = uint64_t(1) << (idx % bits_per_bucket);
|
||||
bits[bucket] |= bit;
|
||||
}
|
||||
|
||||
bool test(uint64_t idx) const
|
||||
{
|
||||
const uint64_t bucket = idx / bits_per_bucket;
|
||||
const uint64_t bit = uint64_t(1) << (idx % bits_per_bucket);
|
||||
return bits[bucket] & bit;
|
||||
}
|
||||
|
||||
uint64_t count() const
|
||||
{
|
||||
uint64_t c = 0;
|
||||
uint64_t i = 0;
|
||||
|
||||
for (; i < num_buckets - 3; i += 4)
|
||||
{
|
||||
uint64_t c0 = popcount(bits[i+0]);
|
||||
uint64_t c1 = popcount(bits[i+1]);
|
||||
uint64_t c2 = popcount(bits[i+2]);
|
||||
uint64_t c3 = popcount(bits[i+3]);
|
||||
c0 += c1;
|
||||
c2 += c3;
|
||||
c += c0 + c2;
|
||||
}
|
||||
|
||||
for (; i < num_buckets; ++i)
|
||||
{
|
||||
c += popcount(bits[i]);
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
private:
|
||||
alignas(cache_line_size) UnderlyingType bits[num_buckets];
|
||||
};
|
||||
|
||||
/// Under Windows it is not possible for a process to run on more than one
|
||||
/// logical processor group. This usually means to be limited to use max 64
|
||||
/// cores. To overcome this, some special platform specific API should be
|
||||
@@ -155,6 +503,7 @@ std::string now_string();
|
||||
// Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
|
||||
// Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
|
||||
|
||||
std::uint64_t get_file_size(std::fstream& fs);
|
||||
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
|
||||
int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
|
||||
|
||||
@@ -165,7 +514,9 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
|
||||
// async version of PRNG
|
||||
struct AsyncPRNG
|
||||
{
|
||||
AsyncPRNG() : prng() { }
|
||||
AsyncPRNG(uint64_t seed) : prng(seed) { assert(seed); }
|
||||
AsyncPRNG(const std::string& seed) : prng(seed) { }
|
||||
// [ASYNC] Extract one random number.
|
||||
template<typename T> T rand() {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
@@ -199,20 +550,51 @@ inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng)
|
||||
|
||||
// Mathematical function used for progress calculation and learning
|
||||
namespace Math {
|
||||
// Sigmoid function
|
||||
// = 1.0 / (1.0 + std::exp(-x))
|
||||
double sigmoid(double x);
|
||||
inline double sigmoid(double x)
|
||||
{
|
||||
return 1.0 / (1.0 + std::exp(-x));
|
||||
}
|
||||
|
||||
// Differentiation of sigmoid function
|
||||
// = sigmoid(x) * (1.0-sigmoid(x))
|
||||
double dsigmoid(double x);
|
||||
inline double dsigmoid(double x)
|
||||
{
|
||||
// Sigmoid function
|
||||
// f(x) = 1/(1+exp(-x))
|
||||
// the first derivative is
|
||||
// f'(x) = df/dx = f(x)・{ 1-f(x)}
|
||||
// becomes
|
||||
|
||||
return sigmoid(x) * (1.0 - sigmoid(x));
|
||||
}
|
||||
|
||||
// Clip v so that it fits between [lo,hi].
|
||||
// * In Stockfish, this function is written in bitboard.h.
|
||||
template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
|
||||
return v < lo ? lo : v > hi ? hi : v;
|
||||
}
|
||||
}
|
||||
|
||||
namespace Algo {
|
||||
// Fisher-Yates
|
||||
template <typename Rng, typename T>
|
||||
void shuffle(std::vector<T>& buf, Rng&& prng)
|
||||
{
|
||||
const auto size = buf.size();
|
||||
for (uint64_t i = 0; i < size; ++i)
|
||||
std::swap(buf[i], buf[prng.rand(size - i) + i]);
|
||||
}
|
||||
|
||||
// split the string
|
||||
inline std::vector<std::string> split(const std::string& input, char delimiter) {
|
||||
std::istringstream stream(input);
|
||||
std::string field;
|
||||
std::vector<std::string> fields;
|
||||
|
||||
while (std::getline(stream, field, delimiter)) {
|
||||
fields.push_back(field);
|
||||
}
|
||||
|
||||
return fields;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------
|
||||
@@ -225,7 +607,7 @@ struct Path
|
||||
{
|
||||
// Combine the path name and file name and return it.
|
||||
// If the folder name is not an empty string, append it if there is no'/' or'\\' at the end.
|
||||
static std::string Combine(const std::string& folder, const std::string& filename)
|
||||
static std::string combine(const std::string& folder, const std::string& filename)
|
||||
{
|
||||
if (folder.length() >= 1 && *folder.rbegin() != '/' && *folder.rbegin() != '\\')
|
||||
return folder + "/" + filename;
|
||||
@@ -234,7 +616,7 @@ struct Path
|
||||
}
|
||||
|
||||
// Get the file name part (excluding the folder name) from the full path expression.
|
||||
static std::string GetFileName(const std::string& path)
|
||||
static std::string get_file_name(const std::string& path)
|
||||
{
|
||||
// I don't know which "\" or "/" is used.
|
||||
auto path_index1 = path.find_last_of("\\") + 1;
|
||||
@@ -259,7 +641,24 @@ public:
|
||||
template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
|
||||
|
||||
T* allocate(std::size_t n) { return (T*)std_aligned_alloc(alignof(T), n * sizeof(T)); }
|
||||
void deallocate(T* p, std::size_t n) { std_aligned_free(p); }
|
||||
void deallocate(T* p, std::size_t ) { std_aligned_free(p); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CacheLineAlignedAllocator {
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
constexpr static uint64_t cache_line_size = 64;
|
||||
|
||||
CacheLineAlignedAllocator() {}
|
||||
CacheLineAlignedAllocator(const CacheLineAlignedAllocator&) {}
|
||||
CacheLineAlignedAllocator(CacheLineAlignedAllocator&&) {}
|
||||
|
||||
template <typename U> CacheLineAlignedAllocator(const CacheLineAlignedAllocator<U>&) {}
|
||||
|
||||
T* allocate(std::size_t n) { return (T*)std_aligned_alloc(cache_line_size, n * sizeof(T)); }
|
||||
void deallocate(T* p, std::size_t) { std_aligned_free(p); }
|
||||
};
|
||||
|
||||
// --------------------
|
||||
@@ -273,11 +672,13 @@ namespace Dependency
|
||||
// So when calling getline() on fstream,
|
||||
// just write getline() instead of std::getline() and use this function.
|
||||
extern bool getline(std::ifstream& fs, std::string& s);
|
||||
}
|
||||
|
||||
// Create a folder.
|
||||
// Specify relative to the current folder. Japanese is not used for dir_name.
|
||||
// Returns 0 on success, non-zero on failure.
|
||||
extern int mkdir(std::string dir_name);
|
||||
namespace CommandLine {
|
||||
void init(int argc, char* argv[]);
|
||||
|
||||
extern std::string binaryDirectory; // path of the executable directory
|
||||
extern std::string workingDirectory; // path of the working directory
|
||||
}
|
||||
|
||||
#endif // #ifndef MISC_H_INCLUDED
|
||||
|
||||
+3
-2
@@ -73,8 +73,9 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist
|
||||
assert(d <= 0);
|
||||
|
||||
stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
|
||||
!(ttm && (depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare)
|
||||
&& pos.pseudo_legal(ttm));
|
||||
!( ttm
|
||||
&& (pos.checkers() || depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare)
|
||||
&& pos.pseudo_legal(ttm));
|
||||
}
|
||||
|
||||
/// MovePicker constructor for ProbCut: we generate captures with SEE greater
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_HALFKA_256X2_32_32_H_INCLUDED
|
||||
#define NNUE_HALFKA_256X2_32_32_H_INCLUDED
|
||||
|
||||
#include "nnue/features/feature_set.h"
|
||||
#include "nnue/features/half_ka.h"
|
||||
|
||||
#include "nnue/layers/input_slice.h"
|
||||
#include "nnue/layers/affine_transform.h"
|
||||
#include "nnue/layers/clipped_relu.h"
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKA<Features::Side::kFriend>>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_HALFA_256X2_32_32_H_INCLUDED
|
||||
@@ -1,42 +1,57 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef HALFKP_CR_EP_256X2_32_32_H
|
||||
#define HALFKP_CR_EP_256X2_32_32_H
|
||||
#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
|
||||
#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
#include "../features/castling_right.h"
|
||||
#include "../features/enpassant.h"
|
||||
#include "nnue/features/feature_set.h"
|
||||
#include "nnue/features/half_kp.h"
|
||||
#include "nnue/features/castling_right.h"
|
||||
#include "nnue/features/enpassant.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
#include "nnue/layers/input_slice.h"
|
||||
#include "nnue/layers/affine_transform.h"
|
||||
#include "nnue/layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
|
||||
Features::EnPassant>;
|
||||
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
|
||||
Features::EnPassant>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // HALFKP_CR_EP_256X2_32_32_H
|
||||
#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
|
||||
#define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
|
||||
|
||||
#include "nnue/features/feature_set.h"
|
||||
#include "nnue/features/half_kp.h"
|
||||
#include "nnue/features/castling_right.h"
|
||||
|
||||
#include "nnue/layers/input_slice.h"
|
||||
#include "nnue/layers/affine_transform.h"
|
||||
#include "nnue/layers/clipped_relu.h"
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
@@ -21,33 +21,33 @@
|
||||
#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
|
||||
#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
#include "nnue/features/feature_set.h"
|
||||
#include "nnue/features/half_kp.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
#include "nnue/layers/input_slice.h"
|
||||
#include "nnue/layers/affine_transform.h"
|
||||
#include "nnue/layers/clipped_relu.h"
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
namespace Layers {
|
||||
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
// Define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
|
||||
@@ -3,37 +3,33 @@
|
||||
#ifndef HALFKP_384X2_32_32_H
|
||||
#define HALFKP_384X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/half_kp.h"
|
||||
#include "nnue/features/feature_set.h"
|
||||
#include "nnue/features/half_kp.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
#include "nnue/layers/input_slice.h"
|
||||
#include "nnue/layers/affine_transform.h"
|
||||
#include "nnue/layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
namespace Eval::NNUE {
|
||||
|
||||
namespace NNUE {
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<
|
||||
Features::HalfKP<Features::Side::kFriend>>;
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 384;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 384;
|
||||
namespace Layers {
|
||||
|
||||
namespace Layers {
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
} // namespace Layers
|
||||
|
||||
} // namespace Layers
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
} // namespace Eval::NNUE
|
||||
#endif // HALFKP_384X2_32_32_H
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef K_P_CR_EP_256X2_32_32_H
|
||||
#define K_P_CR_EP_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/k.h"
|
||||
#include "../features/p.h"
|
||||
#include "../features/castling_right.h"
|
||||
#include "../features/enpassant.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<Features::K, Features::P,
|
||||
Features::CastlingRight, Features::EnPassant>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // K_P_CR_EP_256X2_32_32_H
|
||||
@@ -1,41 +0,0 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
|
||||
#ifndef K_P_CR_256X2_32_32_H
|
||||
#define K_P_CR_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/k.h"
|
||||
#include "../features/p.h"
|
||||
#include "../features/castling_right.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<Features::K, Features::P,
|
||||
Features::CastlingRight>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // K_P_CR_256X2_32_32_H
|
||||
@@ -1,38 +0,0 @@
|
||||
// Definition of input features and network structure used in NNUE evaluation function
|
||||
#ifndef K_P_256X2_32_32_H
|
||||
#define K_P_256X2_32_32_H
|
||||
|
||||
#include "../features/feature_set.h"
|
||||
#include "../features/k.h"
|
||||
#include "../features/p.h"
|
||||
|
||||
#include "../layers/input_slice.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// Input features used in evaluation function
|
||||
using RawFeatures = Features::FeatureSet<Features::K, Features::P>;
|
||||
|
||||
// Number of input feature dimensions after conversion
|
||||
constexpr IndexType kTransformedFeatureDimensions = 256;
|
||||
|
||||
namespace Layers {
|
||||
|
||||
// define network structure
|
||||
using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
|
||||
using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
|
||||
using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
|
||||
using OutputLayer = AffineTransform<HiddenLayer2, 1>;
|
||||
|
||||
} // namespace Layers
|
||||
|
||||
using Network = Layers::OutputLayer;
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
#endif // K_P_256X2_32_32_H
|
||||
+210
-75
@@ -18,20 +18,29 @@
|
||||
|
||||
// Code for calculating NNUE evaluation function
|
||||
|
||||
#include <fstream>
|
||||
#include "evaluate_nnue.h"
|
||||
|
||||
#include "position.h"
|
||||
#include "misc.h"
|
||||
#include "uci.h"
|
||||
#include "types.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
|
||||
#include "../evaluate.h"
|
||||
#include "../position.h"
|
||||
#include "../misc.h"
|
||||
#include "../uci.h"
|
||||
#include "../types.h"
|
||||
|
||||
#include "evaluate_nnue.h"
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
|
||||
const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
|
||||
// convention: W - us, B - them
|
||||
// viewed from other side, W and B are reversed
|
||||
{ PS_NONE, PS_NONE },
|
||||
@@ -53,7 +62,7 @@ namespace Eval::NNUE {
|
||||
};
|
||||
|
||||
// Input feature converter
|
||||
AlignedPtr<FeatureTransformer> feature_transformer;
|
||||
LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
AlignedPtr<Network> network;
|
||||
@@ -65,50 +74,77 @@ namespace Eval::NNUE {
|
||||
std::string savedfileName = "nn.bin";
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string GetArchitectureString() {
|
||||
return "Features=" + FeatureTransformer::GetStructureString() +
|
||||
",Network=" + Network::GetStructureString();
|
||||
std::string get_architecture_string() {
|
||||
return "Features=" + FeatureTransformer::get_structure_string() +
|
||||
",Network=" + Network::get_structure_string();
|
||||
}
|
||||
|
||||
std::string get_layers_info() {
|
||||
return
|
||||
FeatureTransformer::get_layers_info()
|
||||
+ '\n' + Network::get_layers_info();
|
||||
}
|
||||
|
||||
UseNNUEMode useNNUE;
|
||||
std::string eval_file_loaded = "None";
|
||||
|
||||
namespace Detail {
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
template <typename T>
|
||||
void Initialize(AlignedPtr<T>& pointer) {
|
||||
void initialize(AlignedPtr<T>& pointer) {
|
||||
|
||||
pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void initialize(LargePagePtr<T>& pointer) {
|
||||
|
||||
static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
|
||||
pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
|
||||
std::memset(pointer.get(), 0, sizeof(T));
|
||||
}
|
||||
|
||||
// Read evaluation function parameters
|
||||
template <typename T>
|
||||
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
|
||||
bool ReadParameters(std::istream& stream, T& reference) {
|
||||
|
||||
std::uint32_t header;
|
||||
header = read_little_endian<std::uint32_t>(stream);
|
||||
if (!stream || header != T::GetHashValue()) return false;
|
||||
return pointer->ReadParameters(stream);
|
||||
return reference.ReadParameters(stream);
|
||||
}
|
||||
|
||||
// write evaluation function parameters
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
|
||||
constexpr std::uint32_t header = T::GetHashValue();
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
|
||||
|
||||
return pointer->WriteParameters(stream);
|
||||
}
|
||||
} // namespace Detail
|
||||
|
||||
// Initialize the evaluation function parameters
|
||||
void Initialize() {
|
||||
void initialize() {
|
||||
|
||||
Detail::Initialize(feature_transformer);
|
||||
Detail::Initialize(network);
|
||||
Detail::initialize(feature_transformer);
|
||||
Detail::initialize(network);
|
||||
}
|
||||
|
||||
// Read network header
|
||||
bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
bool read_header(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
|
||||
{
|
||||
std::uint32_t version, size;
|
||||
|
||||
@@ -122,13 +158,17 @@ namespace Eval::NNUE {
|
||||
}
|
||||
|
||||
// write the header
|
||||
bool WriteHeader(std::ostream& stream,
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture) {
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
|
||||
stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
|
||||
|
||||
const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
|
||||
stream.write(architecture.data(), size);
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
@@ -137,81 +177,176 @@ namespace Eval::NNUE {
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
if (!ReadHeader(stream, &hash_value, &architecture)) return false;
|
||||
if (!read_header(stream, &hash_value, &architecture)) return false;
|
||||
if (hash_value != kHashValue) return false;
|
||||
if (!Detail::ReadParameters(stream, feature_transformer)) return false;
|
||||
if (!Detail::ReadParameters(stream, network)) return false;
|
||||
if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
|
||||
if (!Detail::ReadParameters(stream, *network)) return false;
|
||||
return stream && stream.peek() == std::ios::traits_type::eof();
|
||||
}
|
||||
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream) {
|
||||
if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
|
||||
if (!Detail::WriteParameters(stream, feature_transformer)) return false;
|
||||
if (!Detail::WriteParameters(stream, network)) return false;
|
||||
|
||||
if (!write_header(stream, kHashValue, get_architecture_string()))
|
||||
return false;
|
||||
|
||||
if (!Detail::WriteParameters(stream, feature_transformer))
|
||||
return false;
|
||||
|
||||
if (!Detail::WriteParameters(stream, network))
|
||||
return false;
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
static void UpdateAccumulatorIfPossible(const Position& pos) {
|
||||
|
||||
feature_transformer->UpdateAccumulatorIfPossible(pos);
|
||||
}
|
||||
|
||||
// Calculate the evaluation value
|
||||
static Value ComputeScore(const Position& pos, bool refresh) {
|
||||
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
if (!refresh && accumulator.computed_score) {
|
||||
return accumulator.score;
|
||||
}
|
||||
|
||||
alignas(kCacheLineSize) TransformedFeatureType
|
||||
transformed_features[FeatureTransformer::kBufferSize];
|
||||
feature_transformer->Transform(pos, transformed_features, refresh);
|
||||
alignas(kCacheLineSize) char buffer[Network::kBufferSize];
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
|
||||
auto score = static_cast<Value>(output[0] / FV_SCALE);
|
||||
|
||||
accumulator.score = score;
|
||||
accumulator.computed_score = true;
|
||||
return accumulator.score;
|
||||
}
|
||||
|
||||
// Load the evaluation function file
|
||||
bool load_eval_file(const std::string& evalFile) {
|
||||
|
||||
Initialize();
|
||||
|
||||
if (Options["SkipLoadingEval"])
|
||||
{
|
||||
std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
fileName = evalFile;
|
||||
|
||||
std::ifstream stream(evalFile, std::ios::binary);
|
||||
|
||||
const bool result = ReadParameters(stream);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Evaluation function. Perform differential calculation.
|
||||
Value evaluate(const Position& pos) {
|
||||
return ComputeScore(pos, false);
|
||||
|
||||
// We manually align the arrays on the stack because with gcc < 9.3
|
||||
// overaligning stack variables with alignas() doesn't work correctly.
|
||||
|
||||
constexpr uint64_t alignment = kCacheLineSize;
|
||||
|
||||
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
|
||||
TransformedFeatureType transformed_features_unaligned[
|
||||
FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
|
||||
char buffer_unaligned[Network::kBufferSize + alignment];
|
||||
|
||||
auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
|
||||
auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
|
||||
#else
|
||||
alignas(alignment)
|
||||
TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
|
||||
alignas(alignment) char buffer[Network::kBufferSize];
|
||||
#endif
|
||||
|
||||
ASSERT_ALIGNED(transformed_features, alignment);
|
||||
ASSERT_ALIGNED(buffer, alignment);
|
||||
|
||||
feature_transformer->Transform(pos, transformed_features);
|
||||
const auto output = network->Propagate(transformed_features, buffer);
|
||||
|
||||
return static_cast<Value>(output[0] / FV_SCALE);
|
||||
}
|
||||
|
||||
// Evaluation function. Perform full calculation.
|
||||
Value compute_eval(const Position& pos) {
|
||||
return ComputeScore(pos, true);
|
||||
// Load eval, from a file stream or a memory stream
|
||||
bool load_eval(std::string name, std::istream& stream) {
|
||||
|
||||
initialize();
|
||||
fileName = name;
|
||||
return ReadParameters(stream);
|
||||
}
|
||||
|
||||
static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
|
||||
{
|
||||
if (mode == "false")
|
||||
return UseNNUEMode::False;
|
||||
else if (mode == "true")
|
||||
return UseNNUEMode::True;
|
||||
else if (mode == "pure")
|
||||
return UseNNUEMode::Pure;
|
||||
|
||||
return UseNNUEMode::False;
|
||||
}
|
||||
|
||||
void init() {
|
||||
|
||||
useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
|
||||
|
||||
if (Options["SkipLoadingEval"] || useNNUE == UseNNUEMode::False)
|
||||
{
|
||||
eval_file_loaded.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
void update_eval(const Position& pos) {
|
||||
UpdateAccumulatorIfPossible(pos);
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
|
||||
#if defined(DEFAULT_NNUE_DIRECTORY)
|
||||
#define stringify2(x) #x
|
||||
#define stringify(x) stringify2(x)
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
|
||||
#else
|
||||
std::vector<std::string> dirs = { "" , CommandLine::binaryDirectory };
|
||||
#endif
|
||||
|
||||
for (std::string directory : dirs)
|
||||
{
|
||||
if (eval_file_loaded != eval_file)
|
||||
{
|
||||
std::ifstream stream(directory + eval_file, std::ios::binary);
|
||||
if (load_eval(eval_file, stream))
|
||||
{
|
||||
sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded = eval_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
|
||||
eval_file_loaded.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef stringify2
|
||||
#undef stringify
|
||||
}
|
||||
|
||||
/// NNUE::verify() verifies that the last net used was loaded successfully
|
||||
void verify_eval_file_loaded() {
|
||||
|
||||
std::string eval_file = std::string(Options["EvalFile"]);
|
||||
|
||||
if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + std::string(defaults["EvalFile"]);
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg4 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
|
||||
/// In training we override eval file so this is useful.
|
||||
void verify_any_net_loaded() {
|
||||
|
||||
if (!Options["SkipLoadingEval"] && useNNUE != UseNNUEMode::False && eval_file_loaded.empty())
|
||||
{
|
||||
UCI::OptionsMap defaults;
|
||||
UCI::init(defaults);
|
||||
|
||||
std::string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
|
||||
std::string msg2 = "The option is set to true, but the network file was not loaded successfully.";
|
||||
std::string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
|
||||
std::string msg5 = "The engine will be terminated now.";
|
||||
|
||||
sync_cout << "info string ERROR: " << msg1 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg2 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg3 << sync_endl;
|
||||
sync_cout << "info string ERROR: " << msg5 << sync_endl;
|
||||
|
||||
std::exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (useNNUE != UseNNUEMode::False)
|
||||
sync_cout << "info string NNUE evaluation using " << eval_file_loaded << " enabled" << sync_endl;
|
||||
else
|
||||
sync_cout << "info string classical evaluation enabled" << sync_endl;
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
@@ -23,10 +23,19 @@
|
||||
|
||||
#include "nnue_feature_transformer.h"
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
enum struct UseNNUEMode
|
||||
{
|
||||
False,
|
||||
True,
|
||||
Pure
|
||||
};
|
||||
|
||||
// Hash value of evaluation function structure
|
||||
constexpr std::uint32_t kHashValue =
|
||||
FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
|
||||
@@ -40,11 +49,22 @@ namespace Eval::NNUE {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct LargePageDeleter {
|
||||
void operator()(T* ptr) const {
|
||||
ptr->~T();
|
||||
aligned_large_pages_free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
|
||||
|
||||
template <typename T>
|
||||
using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
|
||||
|
||||
// Input feature converter
|
||||
extern AlignedPtr<FeatureTransformer> feature_transformer;
|
||||
extern LargePagePtr<FeatureTransformer> feature_transformer;
|
||||
|
||||
// Evaluation function
|
||||
extern AlignedPtr<Network> network;
|
||||
@@ -55,16 +75,22 @@ namespace Eval::NNUE {
|
||||
// Saved evaluation function file name
|
||||
extern std::string savedfileName;
|
||||
|
||||
extern UseNNUEMode useNNUE;
|
||||
|
||||
extern std::string eval_file_loaded;
|
||||
|
||||
// Get a string that represents the structure of the evaluation function
|
||||
std::string GetArchitectureString();
|
||||
std::string get_architecture_string();
|
||||
|
||||
std::string get_layers_info();
|
||||
|
||||
// read the header
|
||||
bool ReadHeader(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
bool read_header(std::istream& stream,
|
||||
std::uint32_t* hash_value, std::string* architecture);
|
||||
|
||||
// write the header
|
||||
bool WriteHeader(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
bool write_header(std::ostream& stream,
|
||||
std::uint32_t hash_value, const std::string& architecture);
|
||||
|
||||
// read evaluation function parameters
|
||||
bool ReadParameters(std::istream& stream);
|
||||
@@ -72,6 +98,13 @@ namespace Eval::NNUE {
|
||||
// write evaluation function parameters
|
||||
bool WriteParameters(std::ostream& stream);
|
||||
|
||||
Value evaluate(const Position& pos);
|
||||
bool load_eval(std::string name, std::istream& stream);
|
||||
void init();
|
||||
|
||||
void verify_eval_file_loaded();
|
||||
void verify_any_net_loaded();
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
|
||||
|
||||
+300
-189
@@ -1,231 +1,342 @@
|
||||
// Code for learning NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include <random>
|
||||
#include <random>
|
||||
#include <fstream>
|
||||
|
||||
#include "../learn/learn.h"
|
||||
#include "../learn/learning_tools.h"
|
||||
|
||||
#include "../position.h"
|
||||
#include "../uci.h"
|
||||
#include "../misc.h"
|
||||
#include "../thread_win32_osx.h"
|
||||
|
||||
#include "../eval/evaluate_common.h"
|
||||
#include <filesystem>
|
||||
|
||||
#include "evaluate_nnue.h"
|
||||
#include "evaluate_nnue_learner.h"
|
||||
#include "trainer/features/factorizer_feature_set.h"
|
||||
#include "trainer/features/factorizer_half_kp.h"
|
||||
|
||||
#include "trainer/features/all_factorizers.h"
|
||||
|
||||
#include "trainer/trainer_feature_transformer.h"
|
||||
#include "trainer/trainer_input_slice.h"
|
||||
#include "trainer/trainer_affine_transform.h"
|
||||
#include "trainer/trainer_clipped_relu.h"
|
||||
#include "trainer/trainer_sum.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "position.h"
|
||||
#include "uci.h"
|
||||
#include "misc.h"
|
||||
#include "thread_win32_osx.h"
|
||||
#include "thread.h"
|
||||
|
||||
namespace NNUE {
|
||||
// Code for learning NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
namespace {
|
||||
namespace {
|
||||
|
||||
// learning data
|
||||
std::vector<Example> examples;
|
||||
// learning data
|
||||
std::vector<Example> examples;
|
||||
|
||||
// Mutex for exclusive control of examples
|
||||
std::mutex examples_mutex;
|
||||
// Mutex for exclusive control of examples
|
||||
std::mutex examples_mutex;
|
||||
|
||||
// number of samples in mini-batch
|
||||
uint64_t batch_size;
|
||||
// number of samples in mini-batch
|
||||
uint64_t batch_size;
|
||||
|
||||
// random number generator
|
||||
std::mt19937 rng;
|
||||
// random number generator
|
||||
std::mt19937 rng;
|
||||
|
||||
// learner
|
||||
std::shared_ptr<Trainer<Network>> trainer;
|
||||
// learner
|
||||
std::shared_ptr<Trainer<Network>> trainer;
|
||||
|
||||
// Learning rate scale
|
||||
double global_learning_rate_scale;
|
||||
// Tell the learner options such as hyperparameters
|
||||
void send_messages(std::vector<Message> messages) {
|
||||
for (auto& message : messages) {
|
||||
trainer->send_message(&message);
|
||||
assert(message.num_receivers > 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the learning rate scale
|
||||
double GetGlobalLearningRateScale() {
|
||||
return global_learning_rate_scale;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Tell the learner options such as hyperparameters
|
||||
void SendMessages(std::vector<Message> messages) {
|
||||
for (auto& message : messages) {
|
||||
trainer->SendMessage(&message);
|
||||
assert(message.num_receivers > 0);
|
||||
}
|
||||
}
|
||||
// Initialize learning
|
||||
void initialize_training(
|
||||
const std::string& seed,
|
||||
SynchronizedRegionLogger::Region& out) {
|
||||
|
||||
} // namespace
|
||||
#if defined (OPENBLAS_VERSION)
|
||||
openblas_set_num_threads(1);
|
||||
#elif defined (INTEL_MKL_VERSION)
|
||||
mkl_set_num_threads(1);
|
||||
#endif
|
||||
|
||||
// Initialize learning
|
||||
void InitializeTraining(double eta1, uint64_t eta1_epoch,
|
||||
double eta2, uint64_t eta2_epoch, double eta3) {
|
||||
std::cout << "Initializing NN training for "
|
||||
<< GetArchitectureString() << std::endl;
|
||||
out << "INFO (initialize_training): Initializing NN training for "
|
||||
<< get_architecture_string() << std::endl;
|
||||
|
||||
assert(feature_transformer);
|
||||
assert(network);
|
||||
trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
|
||||
out << std::endl;
|
||||
|
||||
if (Options["SkipLoadingEval"]) {
|
||||
trainer->Initialize(rng);
|
||||
}
|
||||
out << "Layers:\n"
|
||||
<< get_layers_info() << std::endl;
|
||||
|
||||
global_learning_rate_scale = 1.0;
|
||||
EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
|
||||
}
|
||||
out << std::endl;
|
||||
|
||||
// set the number of samples in the mini-batch
|
||||
void SetBatchSize(uint64_t size) {
|
||||
assert(size > 0);
|
||||
batch_size = size;
|
||||
}
|
||||
out << "Factorizers:\n"
|
||||
<< Features::Factorizer<RawFeatures>::get_factorizers_string() << std::endl;
|
||||
|
||||
// set the learning rate scale
|
||||
void SetGlobalLearningRateScale(double scale) {
|
||||
global_learning_rate_scale = scale;
|
||||
}
|
||||
out << std::endl;
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SetOptions(const std::string& options) {
|
||||
std::vector<Message> messages;
|
||||
for (const auto& option : Split(options, ',')) {
|
||||
const auto fields = Split(option, '=');
|
||||
assert(fields.size() == 1 || fields.size() == 2);
|
||||
if (fields.size() == 1) {
|
||||
messages.emplace_back(fields[0]);
|
||||
} else {
|
||||
messages.emplace_back(fields[0], fields[1]);
|
||||
}
|
||||
}
|
||||
SendMessages(std::move(messages));
|
||||
}
|
||||
assert(feature_transformer);
|
||||
assert(network);
|
||||
|
||||
// Reread the evaluation function parameters for learning from the file
|
||||
void RestoreParameters(const std::string& dir_name) {
|
||||
const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
|
||||
std::ifstream stream(file_name, std::ios::binary);
|
||||
bool result = ReadParameters(stream);
|
||||
assert(result);
|
||||
trainer = Trainer<Network>::create(network.get(), feature_transformer.get());
|
||||
rng.seed(PRNG(seed).rand<uint64_t>());
|
||||
|
||||
SendMessages({{"reset"}});
|
||||
}
|
||||
|
||||
// Add 1 sample of learning data
|
||||
void AddExample(Position& pos, Color rootColor,
|
||||
const Learner::PackedSfenValue& psv, double weight) {
|
||||
Example example;
|
||||
if (rootColor == pos.side_to_move()) {
|
||||
example.sign = 1;
|
||||
} else {
|
||||
example.sign = -1;
|
||||
}
|
||||
example.psv = psv;
|
||||
example.weight = weight;
|
||||
|
||||
Features::IndexList active_indices[2];
|
||||
for (const auto trigger : kRefreshTriggers) {
|
||||
RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
|
||||
}
|
||||
if (pos.side_to_move() != WHITE) {
|
||||
active_indices[0].swap(active_indices[1]);
|
||||
}
|
||||
for (const auto color : Colors) {
|
||||
std::vector<TrainingFeature> training_features;
|
||||
for (const auto base_index : active_indices[color]) {
|
||||
static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
|
||||
(1 << TrainingFeature::kIndexBits), "");
|
||||
Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
|
||||
base_index, &training_features);
|
||||
}
|
||||
std::sort(training_features.begin(), training_features.end());
|
||||
|
||||
auto& unique_features = example.training_features[color];
|
||||
for (const auto& feature : training_features) {
|
||||
if (!unique_features.empty() &&
|
||||
feature.GetIndex() == unique_features.back().GetIndex()) {
|
||||
unique_features.back() += feature;
|
||||
} else {
|
||||
unique_features.push_back(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(examples_mutex);
|
||||
examples.push_back(std::move(example));
|
||||
}
|
||||
|
||||
// update the evaluation function parameters
|
||||
void UpdateParameters(uint64_t epoch) {
|
||||
assert(batch_size > 0);
|
||||
|
||||
EvalLearningTools::Weight::calc_eta(epoch);
|
||||
const auto learning_rate = static_cast<LearnFloatType>(
|
||||
get_eta() / batch_size);
|
||||
|
||||
std::lock_guard<std::mutex> lock(examples_mutex);
|
||||
std::shuffle(examples.begin(), examples.end(), rng);
|
||||
while (examples.size() >= batch_size) {
|
||||
std::vector<Example> batch(examples.end() - batch_size, examples.end());
|
||||
examples.resize(examples.size() - batch_size);
|
||||
|
||||
const auto network_output = trainer->Propagate(batch);
|
||||
|
||||
std::vector<LearnFloatType> gradients(batch.size());
|
||||
for (std::size_t b = 0; b < batch.size(); ++b) {
|
||||
const auto shallow = static_cast<Value>(Round<std::int32_t>(
|
||||
batch[b].sign * network_output[b] * kPonanzaConstant));
|
||||
const auto& psv = batch[b].psv;
|
||||
const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
|
||||
gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
|
||||
if (Options["SkipLoadingEval"]) {
|
||||
out << "INFO (initialize_training): Performing random net initialization.\n";
|
||||
trainer->initialize(rng);
|
||||
}
|
||||
}
|
||||
|
||||
trainer->Backpropagate(gradients.data(), learning_rate);
|
||||
}
|
||||
SendMessages({{"quantize_parameters"}});
|
||||
}
|
||||
// set the number of samples in the mini-batch
|
||||
void set_batch_size(uint64_t size) {
|
||||
assert(size > 0);
|
||||
batch_size = size;
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
SendMessages({{"check_health"}});
|
||||
}
|
||||
// Set options such as hyperparameters
|
||||
void set_options(const std::string& options) {
|
||||
std::vector<Message> messages;
|
||||
for (const auto& option : Algo::split(options, ',')) {
|
||||
const auto fields = Algo::split(option, '=');
|
||||
assert(fields.size() == 1 || fields.size() == 2);
|
||||
|
||||
} // namespace NNUE
|
||||
if (fields.size() == 1) {
|
||||
messages.emplace_back(fields[0]);
|
||||
} else {
|
||||
messages.emplace_back(fields[0], fields[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// save merit function parameters to a file
|
||||
void save_eval(std::string dir_name) {
|
||||
auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
|
||||
std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
|
||||
send_messages(std::move(messages));
|
||||
}
|
||||
|
||||
// mkdir() will fail if this folder already exists, but
|
||||
// Apart from that. If not, I just want you to make it.
|
||||
// Also, assume that the folders up to EvalSaveDir have been dug.
|
||||
Dependency::mkdir(eval_dir);
|
||||
// Reread the evaluation function parameters for learning from the file
|
||||
void restore_parameters(const std::string& dir_name) {
|
||||
const std::string file_name = Path::combine(dir_name, NNUE::savedfileName);
|
||||
std::ifstream stream(file_name, std::ios::binary);
|
||||
#ifndef NDEBUG
|
||||
bool result =
|
||||
#endif
|
||||
ReadParameters(stream);
|
||||
#ifndef NDEBUG
|
||||
assert(result);
|
||||
#endif
|
||||
|
||||
if (Options["SkipLoadingEval"] && NNUE::trainer) {
|
||||
NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
|
||||
}
|
||||
send_messages({{"reset"}});
|
||||
}
|
||||
|
||||
const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
|
||||
std::ofstream stream(file_name, std::ios::binary);
|
||||
const bool result = NNUE::WriteParameters(stream);
|
||||
assert(result);
|
||||
void finalize_net() {
|
||||
send_messages({{"clear_unobserved_feature_weights"}});
|
||||
}
|
||||
|
||||
std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
|
||||
}
|
||||
// Add 1 sample of learning data
|
||||
void add_example(
|
||||
Position& pos,
|
||||
Color rootColor,
|
||||
Value discrete_nn_eval,
|
||||
const Learner::PackedSfenValue& psv,
|
||||
double weight) {
|
||||
|
||||
// get the current eta
|
||||
double get_eta() {
|
||||
return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
|
||||
}
|
||||
Example example;
|
||||
if (rootColor == pos.side_to_move()) {
|
||||
example.sign = 1;
|
||||
} else {
|
||||
example.sign = -1;
|
||||
}
|
||||
|
||||
} // namespace Eval
|
||||
example.discrete_nn_eval = discrete_nn_eval;
|
||||
example.psv = psv;
|
||||
example.weight = weight;
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
Features::IndexList active_indices[2];
|
||||
for (const auto trigger : kRefreshTriggers) {
|
||||
RawFeatures::append_active_indices(pos, trigger, active_indices);
|
||||
}
|
||||
|
||||
if (pos.side_to_move() != WHITE) {
|
||||
active_indices[0].swap(active_indices[1]);
|
||||
}
|
||||
|
||||
static thread_local std::vector<TrainingFeature> s_training_features;
|
||||
auto& training_features = s_training_features;
|
||||
|
||||
for (const auto color : Colors) {
|
||||
training_features.clear();
|
||||
|
||||
for (const auto base_index : active_indices[color]) {
|
||||
static_assert(Features::Factorizer<RawFeatures>::get_dimensions() <
|
||||
(1 << TrainingFeature::kIndexBits), "");
|
||||
Features::Factorizer<RawFeatures>::append_training_features(
|
||||
base_index, &training_features);
|
||||
}
|
||||
|
||||
std::sort(training_features.begin(), training_features.end());
|
||||
|
||||
auto& unique_features = example.training_features[color];
|
||||
unique_features.reserve(training_features.size());
|
||||
for (const auto& feature : training_features) {
|
||||
if (!unique_features.empty() &&
|
||||
feature.get_index() == unique_features.back().get_index()) {
|
||||
|
||||
unique_features.back() += feature;
|
||||
} else {
|
||||
unique_features.push_back(feature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(examples_mutex);
|
||||
examples.push_back(std::move(example));
|
||||
}
|
||||
|
||||
// update the evaluation function parameters
|
||||
Learner::Loss update_parameters(
|
||||
ThreadPool& thread_pool,
|
||||
uint64_t epoch,
|
||||
bool verbose,
|
||||
double learning_rate,
|
||||
double max_grad,
|
||||
Learner::CalcLossFunc calc_loss)
|
||||
{
|
||||
using namespace Learner::Autograd::UnivariateStatic;
|
||||
|
||||
assert(batch_size > 0);
|
||||
|
||||
learning_rate /= batch_size;
|
||||
|
||||
std::lock_guard<std::mutex> lock(examples_mutex);
|
||||
|
||||
double abs_eval_diff_sum = 0.0;
|
||||
double abs_discrete_eval_sum = 0.0;
|
||||
double gradient_norm = 0.0;
|
||||
|
||||
bool collect_stats = verbose;
|
||||
|
||||
Learner::Loss loss_sum{};
|
||||
|
||||
std::vector<double> abs_eval_diff_sum_local(thread_pool.size(), 0.0);
|
||||
std::vector<double> abs_discrete_eval_sum_local(thread_pool.size(), 0.0);
|
||||
std::vector<double> gradient_norm_local(thread_pool.size(), 0.0);
|
||||
std::vector<Learner::Loss> loss_sum_local(thread_pool.size());
|
||||
|
||||
auto prev_batch_begin = examples.end();
|
||||
while ((long)(prev_batch_begin - examples.begin()) >= (long)batch_size) {
|
||||
auto batch_begin = prev_batch_begin - batch_size;
|
||||
auto batch_end = prev_batch_begin;
|
||||
auto size = batch_end - batch_begin;
|
||||
const auto network_output = trainer->step_start(thread_pool, batch_begin, batch_end);
|
||||
std::vector<LearnFloatType> gradients(size);
|
||||
|
||||
thread_pool.for_each_index_chunk_with_workers(
|
||||
std::size_t(0), size,
|
||||
[&](Thread& th, std::size_t offset, std::size_t count) {
|
||||
const auto thread_id = th.thread_idx();
|
||||
|
||||
trainer->propagate(th, offset, count);
|
||||
|
||||
for (std::size_t b = offset; b < offset + count; ++b) {
|
||||
const auto& e = *(batch_begin + b);
|
||||
const auto shallow = static_cast<Value>(round<std::int32_t>(
|
||||
e.sign * network_output[b] * kPonanzaConstant));
|
||||
const auto discrete = e.sign * e.discrete_nn_eval;
|
||||
const auto& psv = e.psv;
|
||||
auto loss = calc_loss(shallow, (Value)psv.score, psv.game_result, psv.gamePly);
|
||||
loss.grad = std::clamp(
|
||||
loss.grad * e.sign * kPonanzaConstant * e.weight, -max_grad, max_grad);
|
||||
gradients[b] = static_cast<LearnFloatType>(loss.grad);
|
||||
loss_sum_local[thread_id] += loss;
|
||||
|
||||
// The discrete eval will only be valid before first backpropagation,
|
||||
// that is only for the first batch.
|
||||
// Similarily we want only gradients from one batch.
|
||||
if (collect_stats)
|
||||
{
|
||||
abs_eval_diff_sum_local[thread_id] += std::abs(discrete - shallow);
|
||||
abs_discrete_eval_sum_local[thread_id] += std::abs(discrete);
|
||||
gradient_norm_local[thread_id] += std::abs(loss.grad);
|
||||
}
|
||||
}
|
||||
|
||||
trainer->backpropagate(th, gradients.data(), offset, count);
|
||||
}
|
||||
);
|
||||
|
||||
// We can asyncronously erase the examples that we used in the previous
|
||||
// step. This can be done safely because we're no longer using these
|
||||
// examples and erase won't invalidate iterators.
|
||||
examples.erase(prev_batch_begin, examples.end());
|
||||
prev_batch_begin = batch_begin;
|
||||
|
||||
thread_pool.wait_for_workers_finished();
|
||||
|
||||
trainer->step_end(thread_pool, learning_rate);
|
||||
|
||||
collect_stats = false;
|
||||
}
|
||||
examples.erase(prev_batch_begin, examples.end());
|
||||
|
||||
if (verbose)
|
||||
{
|
||||
abs_eval_diff_sum = std::accumulate(abs_eval_diff_sum_local.begin(), abs_eval_diff_sum_local.end(), 0.0);
|
||||
abs_discrete_eval_sum = std::accumulate(abs_discrete_eval_sum_local.begin(), abs_discrete_eval_sum_local.end(), 0.0);
|
||||
gradient_norm = std::accumulate(gradient_norm_local.begin(), gradient_norm_local.end(), 0.0);
|
||||
|
||||
const double avg_abs_eval_diff = abs_eval_diff_sum / batch_size;
|
||||
const double avg_abs_discrete_eval = abs_discrete_eval_sum / batch_size;
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
|
||||
out << "INFO (update_parameters):"
|
||||
<< " epoch = " << epoch
|
||||
<< " , avg_abs(trainer_eval-nnue_eval) = " << avg_abs_eval_diff
|
||||
<< " , avg_abs(nnue_eval) = " << avg_abs_discrete_eval
|
||||
<< " , avg_relative_error = " << avg_abs_eval_diff / avg_abs_discrete_eval
|
||||
<< " , batch_size = " << batch_size
|
||||
<< " , grad_norm = " << gradient_norm
|
||||
<< std::endl;
|
||||
} else {
|
||||
// Display some progress but don't synchronize as
|
||||
// we can't really decide when to release the output lock here
|
||||
std::cout << '.';
|
||||
}
|
||||
|
||||
send_messages({{"quantize_parameters"}});
|
||||
|
||||
for(auto& loss : loss_sum_local)
|
||||
{
|
||||
loss_sum += loss;
|
||||
}
|
||||
|
||||
return loss_sum;
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void check_health() {
|
||||
send_messages({{"check_health"}});
|
||||
}
|
||||
|
||||
// save merit function parameters to a file
|
||||
void save_eval(std::string dir_name) {
|
||||
auto eval_dir = Path::combine(Options["EvalSaveDir"], dir_name);
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
|
||||
out << "INFO (save_eval): Saving current evaluation file in " << eval_dir << std::endl;
|
||||
|
||||
// mkdir() will fail if this folder already exists, but
|
||||
// Apart from that. If not, I just want you to make it.
|
||||
// Also, assume that the folders up to EvalSaveDir have been dug.
|
||||
std::filesystem::create_directories(eval_dir);
|
||||
|
||||
const std::string file_name = Path::combine(eval_dir, NNUE::savedfileName);
|
||||
std::ofstream stream(file_name, std::ios::binary);
|
||||
#ifndef NDEBUG
|
||||
bool result =
|
||||
#endif
|
||||
WriteParameters(stream);
|
||||
#ifndef NDEBUG
|
||||
assert(result);
|
||||
#endif
|
||||
out << "INFO (save_eval): Finished saving evaluation file in " << eval_dir << std::endl;
|
||||
}
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
@@ -1,46 +1,52 @@
|
||||
// Interface used for learning NNUE evaluation function
|
||||
|
||||
#ifndef _EVALUATE_NNUE_LEARNER_H_
|
||||
#ifndef _EVALUATE_NNUE_LEARNER_H_
|
||||
#define _EVALUATE_NNUE_LEARNER_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
#include "learn/learn.h"
|
||||
|
||||
#include "../learn/learn.h"
|
||||
#include "misc.h"
|
||||
|
||||
namespace Eval {
|
||||
struct ThreadPool;
|
||||
|
||||
namespace NNUE {
|
||||
// Interface used for learning NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Initialize learning
|
||||
void InitializeTraining(double eta1, uint64_t eta1_epoch,
|
||||
double eta2, uint64_t eta2_epoch, double eta3);
|
||||
// Initialize learning
|
||||
void initialize_training(
|
||||
const std::string& seed,
|
||||
SynchronizedRegionLogger::Region& out);
|
||||
|
||||
// set the number of samples in the mini-batch
|
||||
void SetBatchSize(uint64_t size);
|
||||
// set the number of samples in the mini-batch
|
||||
void set_batch_size(uint64_t size);
|
||||
|
||||
// set the learning rate scale
|
||||
void SetGlobalLearningRateScale(double scale);
|
||||
// Set options such as hyperparameters
|
||||
void set_options(const std::string& options);
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SetOptions(const std::string& options);
|
||||
// Reread the evaluation function parameters for learning from the file
|
||||
void restore_parameters(const std::string& dir_name);
|
||||
|
||||
// Reread the evaluation function parameters for learning from the file
|
||||
void RestoreParameters(const std::string& dir_name);
|
||||
// Add 1 sample of learning data
|
||||
void add_example(
|
||||
Position& pos,
|
||||
Color rootColor,
|
||||
Value discrete_nn_eval,
|
||||
const Learner::PackedSfenValue& psv,
|
||||
double weight);
|
||||
|
||||
// Add 1 sample of learning data
|
||||
void AddExample(Position& pos, Color rootColor,
|
||||
const Learner::PackedSfenValue& psv, double weight);
|
||||
// update the evaluation function parameters
|
||||
Learner::Loss update_parameters(
|
||||
ThreadPool& thread_pool,
|
||||
uint64_t epoch,
|
||||
bool verbose,
|
||||
double learning_rate,
|
||||
double max_grad,
|
||||
Learner::CalcLossFunc calc_loss);
|
||||
|
||||
// update the evaluation function parameters
|
||||
void UpdateParameters(uint64_t epoch);
|
||||
// Check if there are any problems with learning
|
||||
void check_health();
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth();
|
||||
void finalize_net();
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
void save_eval(std::string suffix);
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
#include "a.h"
|
||||
#include "index_list.h"
|
||||
|
||||
// Definition of input feature A of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// Important note for "halfka": this arch was designed with "flip" in mind
|
||||
// although it still is untested which approach is better.
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
// Find the index of the feature quantity from the king position and PieceSquare
|
||||
inline IndexType A::make_index(
|
||||
Color perspective, Square s, Piece pc) {
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void A::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
Bitboard bb = pos.pieces();
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s)));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void A::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc));
|
||||
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
@@ -0,0 +1,54 @@
|
||||
#ifndef _NNUE_FEATURES_A_H_
|
||||
#define _NNUE_FEATURES_A_H_
|
||||
|
||||
#include "features_common.h"
|
||||
|
||||
#include "evaluate.h"
|
||||
|
||||
// Definition of input feature A of NNUE evaluation function
|
||||
// A is a union of P features and K features, so technically the
|
||||
// same effect can be achieved by including both P and K features
|
||||
// but it would result in slower index appending because
|
||||
// P would conditionally exclude K features and vice versa,
|
||||
// where A doesn't have any conditionals.
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Feature P: PieceSquare of pieces other than balls
|
||||
class A {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "A";
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x7A4C414Cu;
|
||||
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = PS_END2;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 32;
|
||||
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
private:
|
||||
// Index of a feature for a given piece on some square
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc);
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef _NNUE_FEATURES_UNION_P_K_H_
|
||||
@@ -1,73 +1,65 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "castling_right.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
//Definition of input feature quantity CastlingRight of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace NNUE {
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void CastlingRight::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void CastlingRight::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
|
||||
int castling_rights = pos.state()->castlingRights;
|
||||
int relative_castling_rights;
|
||||
if (perspective == WHITE) {
|
||||
relative_castling_rights = castling_rights;
|
||||
relative_castling_rights = castling_rights;
|
||||
}
|
||||
else {
|
||||
// Invert the perspective.
|
||||
relative_castling_rights = ((castling_rights & 3) << 2)
|
||||
& ((castling_rights >> 2) & 3);
|
||||
// Invert the perspective.
|
||||
relative_castling_rights = ((castling_rights & 3) << 2)
|
||||
& ((castling_rights >> 2) & 3);
|
||||
}
|
||||
|
||||
for (int i = 0; i <kDimensions; ++i) {
|
||||
if (relative_castling_rights & (i << 1)) {
|
||||
active->push_back(i);
|
||||
}
|
||||
for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
|
||||
if (relative_castling_rights & (1 << i)) {
|
||||
active->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void CastlingRight::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void CastlingRight::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* /* added */) {
|
||||
|
||||
int previous_castling_rights = pos.state()->previous->castlingRights;
|
||||
int current_castling_rights = pos.state()->castlingRights;
|
||||
int relative_previous_castling_rights;
|
||||
int relative_current_castling_rights;
|
||||
if (perspective == WHITE) {
|
||||
relative_previous_castling_rights = previous_castling_rights;
|
||||
relative_current_castling_rights = current_castling_rights;
|
||||
relative_previous_castling_rights = previous_castling_rights;
|
||||
relative_current_castling_rights = current_castling_rights;
|
||||
}
|
||||
else {
|
||||
// Invert the perspective.
|
||||
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
|
||||
& ((previous_castling_rights >> 2) & 3);
|
||||
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
|
||||
& ((current_castling_rights >> 2) & 3);
|
||||
// Invert the perspective.
|
||||
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
|
||||
& ((previous_castling_rights >> 2) & 3);
|
||||
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
|
||||
& ((current_castling_rights >> 2) & 3);
|
||||
}
|
||||
|
||||
for (int i = 0; i < kDimensions; ++i) {
|
||||
if ((relative_previous_castling_rights & (i << 1)) &&
|
||||
(relative_current_castling_rights & (i << 1)) == 0) {
|
||||
removed->push_back(i);
|
||||
}
|
||||
for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
|
||||
if ((relative_previous_castling_rights & (1 << i)) &&
|
||||
(relative_current_castling_rights & (1 << i)) == 0) {
|
||||
removed->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
@@ -1,48 +1,44 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
|
||||
#define _NNUE_FEATURES_CASTLING_RIGHT_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "evaluate.h"
|
||||
|
||||
namespace NNUE {
|
||||
//Definition of input feature quantity CastlingRight of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature K: Ball position
|
||||
class CastlingRight {
|
||||
public:
|
||||
class CastlingRight {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "CastlingRight";
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x913968AAu;
|
||||
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = 4;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 4;
|
||||
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
};
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,47 +1,49 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "enpassant.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
//Definition of input feature quantity EnPassant of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace NNUE {
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void EnPassant::append_active_indices(
|
||||
const Position& pos,
|
||||
Color /* perspective */,
|
||||
IndexList* active) {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void EnPassant::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
// do nothing if array size is small to avoid compiler warning
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
|
||||
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions)
|
||||
return;
|
||||
|
||||
auto epSquare = pos.state()->epSquare;
|
||||
if (epSquare == SQ_NONE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (perspective == BLACK) {
|
||||
epSquare = rotate180(epSquare);
|
||||
}
|
||||
if (epSquare == SQ_NONE)
|
||||
return;
|
||||
|
||||
auto file = file_of(epSquare);
|
||||
active->push_back(file);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
|
||||
void EnPassant::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
// Not implemented.
|
||||
assert(false);
|
||||
}
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void EnPassant::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color /* perspective */,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
} // namespace Features
|
||||
auto previous_epSquare = pos.state()->previous->epSquare;
|
||||
auto epSquare = pos.state()->epSquare;
|
||||
|
||||
} // namespace NNUE
|
||||
if (previous_epSquare != SQ_NONE) {
|
||||
if (epSquare != SQ_NONE && file_of(epSquare) == file_of(previous_epSquare))
|
||||
return;
|
||||
|
||||
} // namespace Eval
|
||||
auto file = file_of(previous_epSquare);
|
||||
removed->push_back(file);
|
||||
}
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
if (epSquare != SQ_NONE) {
|
||||
auto file = file_of(epSquare);
|
||||
added->push_back(file);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_ENPASSANT_H_
|
||||
#define _NNUE_FEATURES_ENPASSANT_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "evaluate.h"
|
||||
|
||||
namespace NNUE {
|
||||
//Definition of input feature quantity EnPassant of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
|
||||
// Feature K: Ball position
|
||||
class EnPassant {
|
||||
public:
|
||||
class EnPassant {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "EnPassant";
|
||||
// Hash value embedded in the evaluation function file
|
||||
@@ -26,23 +19,22 @@ namespace Eval {
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 1;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
};
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
};
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
+251
-197
@@ -26,222 +26,276 @@
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Class template that represents a list of values
|
||||
template <typename T, T... Values>
|
||||
struct CompileTimeList;
|
||||
// Class template that represents a list of values
|
||||
template <typename T, T... Values>
|
||||
struct CompileTimeList;
|
||||
|
||||
template <typename T, T First, T... Remaining>
|
||||
struct CompileTimeList<T, First, Remaining...> {
|
||||
static constexpr bool Contains(T value) {
|
||||
return value == First || CompileTimeList<T, Remaining...>::Contains(value);
|
||||
}
|
||||
static constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
kValues = {{First, Remaining...}};
|
||||
};
|
||||
|
||||
template <typename T, T First, T... Remaining>
|
||||
constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
CompileTimeList<T, First, Remaining...>::kValues;
|
||||
template <typename T>
|
||||
struct CompileTimeList<T> {
|
||||
static constexpr bool Contains(T /*value*/) {
|
||||
return false;
|
||||
}
|
||||
static constexpr std::array<T, 0> kValues = { {} };
|
||||
};
|
||||
|
||||
// Class template that adds to the beginning of the list
|
||||
template <typename T, typename ListType, T Value>
|
||||
struct AppendToList;
|
||||
template <typename T, T... Values, T AnotherValue>
|
||||
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
|
||||
using Result = CompileTimeList<T, AnotherValue, Values...>;
|
||||
};
|
||||
|
||||
// Class template for adding to a sorted, unique list
|
||||
template <typename T, typename ListType, T Value>
|
||||
struct InsertToSet;
|
||||
template <typename T, T First, T... Remaining, T AnotherValue>
|
||||
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
|
||||
using Result = std::conditional_t<
|
||||
CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
|
||||
CompileTimeList<T, First, Remaining...>,
|
||||
std::conditional_t<(AnotherValue < First),
|
||||
CompileTimeList<T, AnotherValue, First, Remaining...>,
|
||||
typename AppendToList<T, typename InsertToSet<
|
||||
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
|
||||
First>::Result>>;
|
||||
};
|
||||
template <typename T, T Value>
|
||||
struct InsertToSet<T, CompileTimeList<T>, Value> {
|
||||
using Result = CompileTimeList<T, Value>;
|
||||
};
|
||||
|
||||
// Base class of feature set
|
||||
template <typename Derived>
|
||||
class FeatureSetBase {
|
||||
|
||||
public:
|
||||
// Get a list of indices for active features
|
||||
template <typename IndexListType>
|
||||
static void AppendActiveIndices(
|
||||
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
|
||||
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
Derived::CollectActiveIndices(
|
||||
pos, trigger, perspective, &active[perspective]);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
template <typename PositionType, typename IndexListType>
|
||||
static void AppendChangedIndices(
|
||||
const PositionType& pos, TriggerEvent trigger,
|
||||
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
|
||||
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
if (dp.dirty_num == 0) return;
|
||||
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
reset[perspective] = false;
|
||||
switch (trigger) {
|
||||
case TriggerEvent::kFriendKingMoved:
|
||||
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
template <typename T, T First, T... Remaining>
|
||||
struct CompileTimeList<T, First, Remaining...> {
|
||||
static constexpr bool contains(T value) {
|
||||
return value == First || CompileTimeList<T, Remaining...>::contains(value);
|
||||
}
|
||||
if (reset[perspective]) {
|
||||
Derived::CollectActiveIndices(
|
||||
pos, trigger, perspective, &added[perspective]);
|
||||
} else {
|
||||
Derived::CollectChangedIndices(
|
||||
pos, trigger, perspective,
|
||||
&removed[perspective], &added[perspective]);
|
||||
|
||||
static constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
kValues = {{First, Remaining...}};
|
||||
};
|
||||
|
||||
template <typename T, T First, T... Remaining>
|
||||
constexpr std::array<T, sizeof...(Remaining) + 1>
|
||||
CompileTimeList<T, First, Remaining...>::kValues;
|
||||
|
||||
template <typename T>
|
||||
struct CompileTimeList<T> {
|
||||
static constexpr bool contains(T /*value*/) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
static constexpr std::array<T, 0> kValues = { {} };
|
||||
};
|
||||
|
||||
// Class template that represents the feature set
|
||||
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
|
||||
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
|
||||
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
|
||||
public FeatureSetBase<
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
|
||||
private:
|
||||
using Head = FirstFeatureType;
|
||||
using Tail = FeatureSet<RemainingFeatureTypes...>;
|
||||
// Class template that adds to the beginning of the list
|
||||
template <typename T, typename ListType, T Value>
|
||||
struct AppendToList;
|
||||
|
||||
public:
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
Head::kDimensions + Tail::kDimensions;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
|
||||
// List of timings to perform all calculations instead of difference calculation
|
||||
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
|
||||
typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
template <typename T, T... Values, T AnotherValue>
|
||||
struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
|
||||
using Result = CompileTimeList<T, AnotherValue, Values...>;
|
||||
};
|
||||
|
||||
// Get the feature quantity name
|
||||
static std::string GetName() {
|
||||
return std::string(Head::kName) + "+" + Tail::GetName();
|
||||
}
|
||||
// Class template for adding to a sorted, unique list
|
||||
template <typename T, typename ListType, T Value>
|
||||
struct InsertToSet;
|
||||
|
||||
private:
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <typename IndexListType>
|
||||
static void CollectActiveIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexListType* const active) {
|
||||
Tail::CollectActiveIndices(pos, trigger, perspective, active);
|
||||
if (Head::kRefreshTrigger == trigger) {
|
||||
const auto start = active->size();
|
||||
Head::AppendActiveIndices(pos, perspective, active);
|
||||
for (auto i = start; i < active->size(); ++i) {
|
||||
(*active)[i] += Tail::kDimensions;
|
||||
template <typename T, T First, T... Remaining, T AnotherValue>
|
||||
struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
|
||||
using Result =
|
||||
std::conditional_t<
|
||||
CompileTimeList<T, First, Remaining...>::contains(AnotherValue),
|
||||
CompileTimeList<T, First, Remaining...>,
|
||||
std::conditional_t<
|
||||
(AnotherValue < First),
|
||||
CompileTimeList<T, AnotherValue, First, Remaining...>,
|
||||
typename AppendToList<T, typename InsertToSet<
|
||||
T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
|
||||
First
|
||||
>::Result
|
||||
>
|
||||
>;
|
||||
};
|
||||
|
||||
template <typename T, T Value>
|
||||
struct InsertToSet<T, CompileTimeList<T>, Value> {
|
||||
using Result = CompileTimeList<T, Value>;
|
||||
};
|
||||
|
||||
// Base class of feature set
|
||||
template <typename Derived>
|
||||
class FeatureSetBase {
|
||||
|
||||
public:
|
||||
// Get a list of indices for active features
|
||||
template <typename IndexListType>
|
||||
static void append_active_indices(
|
||||
const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
|
||||
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
Derived::collect_active_indices(
|
||||
pos, trigger, perspective, &active[perspective]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <typename IndexListType>
|
||||
static void CollectChangedIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexListType* const removed, IndexListType* const added) {
|
||||
Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
|
||||
if (Head::kRefreshTrigger == trigger) {
|
||||
const auto start_removed = removed->size();
|
||||
const auto start_added = added->size();
|
||||
Head::AppendChangedIndices(pos, perspective, removed, added);
|
||||
for (auto i = start_removed; i < removed->size(); ++i) {
|
||||
(*removed)[i] += Tail::kDimensions;
|
||||
// Get a list of indices for recently changed features
|
||||
template <typename PositionType, typename IndexListType>
|
||||
static void append_changed_indices(
|
||||
const PositionType& pos,
|
||||
TriggerEvent trigger,
|
||||
IndexListType removed[2],
|
||||
IndexListType added[2],
|
||||
bool reset[2]) {
|
||||
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
switch (trigger) {
|
||||
case TriggerEvent::kNone:
|
||||
break;
|
||||
case TriggerEvent::kFriendKingMoved:
|
||||
if (dp.dirty_num == 0) continue;
|
||||
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
|
||||
break;
|
||||
case TriggerEvent::kEnemyKingMoved:
|
||||
if (dp.dirty_num == 0) continue;
|
||||
reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
|
||||
break;
|
||||
case TriggerEvent::kAnyKingMoved:
|
||||
if (dp.dirty_num == 0) continue;
|
||||
reset[perspective] = type_of(dp.piece[0]) == KING;
|
||||
break;
|
||||
case TriggerEvent::kAnyPieceMoved:
|
||||
reset[perspective] = true;
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
if (reset[perspective]) {
|
||||
Derived::collect_active_indices(
|
||||
pos, trigger, perspective, &added[perspective]);
|
||||
} else {
|
||||
Derived::collect_changed_indices(
|
||||
pos, trigger, perspective,
|
||||
&removed[perspective], &added[perspective]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto i = start_added; i < added->size(); ++i) {
|
||||
(*added)[i] += Tail::kDimensions;
|
||||
};
|
||||
|
||||
// Class template that represents the feature set
|
||||
// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
|
||||
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
|
||||
class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
|
||||
public FeatureSetBase<
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>
|
||||
> {
|
||||
|
||||
private:
|
||||
using Head = FirstFeatureType;
|
||||
using Tail = FeatureSet<RemainingFeatureTypes...>;
|
||||
|
||||
public:
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
|
||||
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
Head::kDimensions + Tail::kDimensions;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
|
||||
|
||||
// List of timings to perform all calculations instead of difference calculation
|
||||
using SortedTriggerSet = typename InsertToSet<TriggerEvent,
|
||||
typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
|
||||
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
|
||||
// Get the feature quantity name
|
||||
static std::string get_name() {
|
||||
return std::string(Head::kName) + "+" + Tail::get_name();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make the base class and the class template that recursively uses itself a friend
|
||||
friend class FeatureSetBase<FeatureSet>;
|
||||
template <typename... FeatureTypes>
|
||||
friend class FeatureSet;
|
||||
};
|
||||
private:
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <typename IndexListType>
|
||||
static void collect_active_indices(
|
||||
const Position& pos,
|
||||
const TriggerEvent trigger,
|
||||
const Color perspective,
|
||||
IndexListType* const active) {
|
||||
|
||||
// Class template that represents the feature set
|
||||
template <typename FeatureType>
|
||||
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
|
||||
Tail::collect_active_indices(pos, trigger, perspective, active);
|
||||
if (Head::kRefreshTrigger == trigger) {
|
||||
const auto start = active->size();
|
||||
Head::append_active_indices(pos, perspective, active);
|
||||
|
||||
public:
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions = FeatureType::kDimensions;
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
using SortedTriggerSet =
|
||||
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
for (auto i = start; i < active->size(); ++i) {
|
||||
(*active)[i] += Tail::kDimensions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the feature quantity name
|
||||
static std::string GetName() {
|
||||
return FeatureType::kName;
|
||||
}
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <typename IndexListType>
|
||||
static void collect_changed_indices(
|
||||
const Position& pos,
|
||||
const TriggerEvent trigger,
|
||||
const Color perspective,
|
||||
IndexListType* const removed,
|
||||
IndexListType* const added) {
|
||||
|
||||
private:
|
||||
// Get a list of indices for active features
|
||||
static void CollectActiveIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexList* const active) {
|
||||
if (FeatureType::kRefreshTrigger == trigger) {
|
||||
FeatureType::AppendActiveIndices(pos, perspective, active);
|
||||
}
|
||||
}
|
||||
Tail::collect_changed_indices(pos, trigger, perspective, removed, added);
|
||||
if (Head::kRefreshTrigger == trigger) {
|
||||
const auto start_removed = removed->size();
|
||||
const auto start_added = added->size();
|
||||
Head::append_changed_indices(pos, perspective, removed, added);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void CollectChangedIndices(
|
||||
const Position& pos, const TriggerEvent trigger, const Color perspective,
|
||||
IndexList* const removed, IndexList* const added) {
|
||||
for (auto i = start_removed; i < removed->size(); ++i) {
|
||||
(*removed)[i] += Tail::kDimensions;
|
||||
}
|
||||
|
||||
if (FeatureType::kRefreshTrigger == trigger) {
|
||||
FeatureType::AppendChangedIndices(pos, perspective, removed, added);
|
||||
}
|
||||
}
|
||||
for (auto i = start_added; i < added->size(); ++i) {
|
||||
(*added)[i] += Tail::kDimensions;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make the base class and the class template that recursively uses itself a friend
|
||||
friend class FeatureSetBase<FeatureSet>;
|
||||
template <typename... FeatureTypes>
|
||||
friend class FeatureSet;
|
||||
};
|
||||
// Make the base class and the class template that recursively uses itself a friend
|
||||
friend class FeatureSetBase<FeatureSet>;
|
||||
|
||||
template <typename... FeatureTypes>
|
||||
friend class FeatureSet;
|
||||
};
|
||||
|
||||
// Class template that represents the feature set
|
||||
template <typename FeatureType>
|
||||
class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
|
||||
|
||||
public:
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
|
||||
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions = FeatureType::kDimensions;
|
||||
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
using SortedTriggerSet =
|
||||
CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
|
||||
|
||||
static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
|
||||
|
||||
// Get the feature quantity name
|
||||
static std::string get_name() {
|
||||
return FeatureType::kName;
|
||||
}
|
||||
|
||||
private:
|
||||
// Get a list of indices for active features
|
||||
static void collect_active_indices(
|
||||
const Position& pos,
|
||||
const TriggerEvent trigger,
|
||||
const Color perspective,
|
||||
IndexList* const active) {
|
||||
|
||||
if (FeatureType::kRefreshTrigger == trigger) {
|
||||
FeatureType::append_active_indices(pos, perspective, active);
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void collect_changed_indices(
|
||||
const Position& pos,
|
||||
const TriggerEvent trigger,
|
||||
const Color perspective,
|
||||
IndexList* const removed,
|
||||
IndexList* const added) {
|
||||
|
||||
if (FeatureType::kRefreshTrigger == trigger) {
|
||||
FeatureType::append_changed_indices(pos, perspective, removed, added);
|
||||
}
|
||||
}
|
||||
|
||||
// Make the base class and the class template that recursively uses itself a friend
|
||||
friend class FeatureSetBase<FeatureSet>;
|
||||
|
||||
template <typename... FeatureTypes>
|
||||
friend class FeatureSet;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
|
||||
@@ -34,10 +34,10 @@ namespace Eval::NNUE::Features {
|
||||
// Trigger to perform full calculations instead of difference only
|
||||
enum class TriggerEvent {
|
||||
kNone, // Calculate the difference whenever possible
|
||||
kFriendKingMoved, // calculate all when own ball moves
|
||||
kEnemyKingMoved, // do all calculations when enemy balls move
|
||||
kAnyKingMoved, // do all calculations if either ball moves
|
||||
kAnyPieceMoved, // always do all calculations
|
||||
kFriendKingMoved, // calculate full evaluation when own king moves
|
||||
kEnemyKingMoved, // calculate full evaluation when opponent king moves
|
||||
kAnyKingMoved, // calculate full evaluation when any king moves
|
||||
kAnyPieceMoved, // always calculate full evaluation
|
||||
};
|
||||
|
||||
enum class Side {
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Definition of input features HalfKA of NNUE evaluation function
|
||||
|
||||
#include "half_ka.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// Important note for "halfka": this arch was designed with "flip" in mind
|
||||
// although it still is untested which approach is better.
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
// Find the index of the feature quantity from the king position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfKA<AssociatedKing>::make_index(
|
||||
Color perspective,
|
||||
Square s,
|
||||
Piece pc,
|
||||
Square ksq) {
|
||||
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END2 * ksq);
|
||||
}
|
||||
|
||||
// Get a list of indices for active features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKA<AssociatedKing>::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
Bitboard bb = pos.pieces();
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKA<AssociatedKing>::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
|
||||
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfKA<Side::kFriend>;
|
||||
template class HalfKA<Side::kEnemy>;
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NNUE_FEATURES_HALF_KA_H_INCLUDED
|
||||
#define NNUE_FEATURES_HALF_KA_H_INCLUDED
|
||||
|
||||
#include "features_common.h"
|
||||
|
||||
#include "evaluate.h"
|
||||
|
||||
//Definition of input features HalfKPK of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Feature HalfKPK: Combination of the position of own king
|
||||
// and the position of pieces other than kings
|
||||
template <Side AssociatedKing>
|
||||
class HalfKA {
|
||||
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
|
||||
"HalfKA(Friend)" : "HalfKA(Enemy)";
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0x5F134CB9u ^ (AssociatedKing == Side::kFriend);
|
||||
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END2);
|
||||
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions = 32;
|
||||
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
|
||||
// Get a list of indices for active features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
private:
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef NNUE_FEATURES_HALF_KA_H_INCLUDED
|
||||
@@ -1,19 +1,19 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
@@ -23,50 +23,72 @@
|
||||
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
// Find the index of the feature quantity from the king position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfKP<AssociatedKing>::MakeIndex(
|
||||
Color perspective, Square s, Piece pc, Square ksq) {
|
||||
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
|
||||
}
|
||||
|
||||
// Get a list of indices for active features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
// Find the index of the feature quantity from the king position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfKP<AssociatedKing>::make_index(
|
||||
Color perspective,
|
||||
Square s,
|
||||
Piece pc,
|
||||
Square ksq) {
|
||||
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
if (type_of(pc) == KING) continue;
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfKP<Side::kFriend>;
|
||||
// Get a list of indices for active features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
template <Side AssociatedKing>
|
||||
void HalfKP<AssociatedKing>::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
|
||||
if (type_of(pc) == KING)
|
||||
continue;
|
||||
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
|
||||
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfKP<Side::kFriend>;
|
||||
template class HalfKP<Side::kEnemy>;
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
+54
-42
@@ -1,62 +1,74 @@
|
||||
/*
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
Stockfish, a UCI chess playing engine derived from Glaurung 2.1
|
||||
Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
|
||||
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
Stockfish is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
Stockfish is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
|
||||
#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
|
||||
#define NNUE_FEATURES_HALF_KP_H_INCLUDED
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
#include "evaluate.h"
|
||||
|
||||
//Definition of input features HalfKP of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Feature HalfKP: Combination of the position of own king
|
||||
// and the position of pieces other than kings
|
||||
template <Side AssociatedKing>
|
||||
class HalfKP {
|
||||
// Feature HalfKP: Combination of the position of own king
|
||||
// and the position of pieces other than kings
|
||||
template <Side AssociatedKing>
|
||||
class HalfKP {
|
||||
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* kName = "HalfKP(Friend)";
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
|
||||
public:
|
||||
// Feature name
|
||||
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
|
||||
"HalfKP(Friend)" : "HalfKP(Enemy)";
|
||||
|
||||
// Get a list of indices for active features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
// Number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
|
||||
|
||||
private:
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
|
||||
};
|
||||
// Maximum number of simultaneously active features
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
|
||||
// Trigger for full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
|
||||
// Get a list of indices for active features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices for recently changed features
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
private:
|
||||
// Index of a feature for a given king position and another piece on some square
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
#include "half_relative_ka.h"
|
||||
#include "index_list.h"
|
||||
|
||||
//Definition of input features HalfRelativeKA of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// Important note for "halfka": this arch was designed with "flip" in mind
|
||||
// although it still is untested which approach is better.
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKA<AssociatedKing>::make_index(
|
||||
Color perspective,
|
||||
Square s,
|
||||
Piece pc,
|
||||
Square sq_k) {
|
||||
|
||||
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
|
||||
return make_index(sq_k, p);
|
||||
}
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKA<AssociatedKing>::make_index(
|
||||
Square sq_k,
|
||||
IndexType p) {
|
||||
|
||||
constexpr IndexType W = kBoardWidth;
|
||||
constexpr IndexType H = kBoardHeight;
|
||||
const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
|
||||
const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
|
||||
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
|
||||
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
|
||||
return H * W * piece_index + H * relative_file + relative_rank;
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKA<AssociatedKing>::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
Bitboard bb = pos.pieces();
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKA<AssociatedKing>::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
|
||||
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfRelativeKA<Side::kFriend>;
|
||||
template class HalfRelativeKA<Side::kEnemy>;
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
@@ -0,0 +1,68 @@
|
||||
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KA_H_
|
||||
#define _NNUE_FEATURES_HALF_RELATIVE_KA_H_
|
||||
|
||||
#include "features_common.h"
|
||||
|
||||
#include "evaluate.h"
|
||||
|
||||
// Definition of input features HalfRelativeKA of NNUE evaluation function
|
||||
// K - King
|
||||
// A - Any piece
|
||||
// KA - product of K and A
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Feature HalfRelativeKA: Relative position of each piece other than the ball based on own ball or enemy ball
|
||||
template <Side AssociatedKing>
|
||||
class HalfRelativeKA {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
|
||||
"HalfRelativeKA(Friend)" : "HalfRelativeKA(Enemy)";
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0xA123051Fu ^ (AssociatedKing == Side::kFriend);
|
||||
|
||||
static constexpr IndexType kNumPieceKinds = 6 * 2;
|
||||
|
||||
// width of the virtual board with the ball in the center
|
||||
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
|
||||
|
||||
// height of a virtual board with balls in the center
|
||||
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
|
||||
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
kNumPieceKinds * kBoardHeight * kBoardWidth;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 32;
|
||||
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
static IndexType make_index(Square s, IndexType p);
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef _NNUE_FEATURES_HALF_RELATIVE_KA_H_
|
||||
@@ -1,78 +1,91 @@
|
||||
//Definition of input features HalfRelativeKP of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "half_relative_kp.h"
|
||||
#include "half_relative_kp.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
//Definition of input features HalfRelativeKP of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace NNUE {
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
namespace Features {
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKP<AssociatedKing>::make_index(
|
||||
Color perspective,
|
||||
Square s,
|
||||
Piece pc,
|
||||
Square sq_k) {
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
|
||||
return make_index(sq_k, p);
|
||||
}
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
|
||||
Color perspective, Square s, Piece pc, Square sq_k) {
|
||||
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
|
||||
return MakeIndex(sq_k, p);
|
||||
}
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKP<AssociatedKing>::make_index(
|
||||
Square sq_k,
|
||||
IndexType p) {
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
template <Side AssociatedKing>
|
||||
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
|
||||
Square sq_k, IndexType p) {
|
||||
constexpr IndexType W = kBoardWidth;
|
||||
constexpr IndexType H = kBoardHeight;
|
||||
const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
|
||||
const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
|
||||
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
|
||||
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
|
||||
return H * W * piece_index + H * relative_file + relative_rank;
|
||||
}
|
||||
constexpr IndexType W = kBoardWidth;
|
||||
constexpr IndexType H = kBoardHeight;
|
||||
const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
|
||||
const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
|
||||
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
|
||||
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
|
||||
return H * W * piece_index + H * relative_file + relative_rank;
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
|
||||
}
|
||||
}
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKP<AssociatedKing>::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
Square ksq = orient(perspective, pos.square<KING>(perspective));
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
if (type_of(pc) == KING) continue;
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
|
||||
}
|
||||
}
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
template class HalfRelativeKP<Side::kFriend>;
|
||||
template class HalfRelativeKP<Side::kEnemy>;
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
template <Side AssociatedKing>
|
||||
void HalfRelativeKP<AssociatedKing>::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
} // namespace NNUE
|
||||
Square ksq = orient(
|
||||
perspective,
|
||||
pos.square<KING>(
|
||||
AssociatedKing == Side::kFriend ? perspective : ~perspective));
|
||||
|
||||
} // namespace Eval
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
if (type_of(pc) == KING)
|
||||
continue;
|
||||
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
|
||||
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc, ksq));
|
||||
}
|
||||
}
|
||||
|
||||
template class HalfRelativeKP<Side::kFriend>;
|
||||
template class HalfRelativeKP<Side::kEnemy>;
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
@@ -1,65 +1,66 @@
|
||||
//Definition of input features HalfRelativeKP of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
|
||||
#ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
|
||||
#define _NNUE_FEATURES_HALF_RELATIVE_KP_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "evaluate.h"
|
||||
|
||||
namespace NNUE {
|
||||
//Definition of input features HalfRelativeKP of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
|
||||
template <Side AssociatedKing>
|
||||
class HalfRelativeKP {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
|
||||
"HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
|
||||
|
||||
// Feature HalfRelativeKP: Relative position of each piece other than the ball based on own ball or enemy ball
|
||||
template <Side AssociatedKing>
|
||||
class HalfRelativeKP {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
|
||||
"HalfRelativeKP(Friend)" : "HalfRelativeKP(Enemy)";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0xF9180919u ^ (AssociatedKing == Side::kFriend);
|
||||
// Piece type excluding balls
|
||||
static constexpr IndexType kNumPieceKinds = 5 * 2;
|
||||
// width of the virtual board with the ball in the center
|
||||
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
|
||||
// height of a virtual board with balls in the center
|
||||
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
kNumPieceKinds * kBoardHeight * kBoardWidth;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue =
|
||||
0xF9180919u ^ (AssociatedKing == Side::kFriend);
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
// Piece type excluding balls
|
||||
static constexpr IndexType kNumPieceKinds = 5 * 2;
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
// width of the virtual board with the ball in the center
|
||||
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
static IndexType MakeIndex(Square s, IndexType p);
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
|
||||
};
|
||||
// height of a virtual board with balls in the center
|
||||
static constexpr IndexType kBoardHeight = RANK_NB * 2 - 1;
|
||||
|
||||
} // namespace Features
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions =
|
||||
kNumPieceKinds * kBoardHeight * kBoardWidth;
|
||||
|
||||
} // namespace NNUE
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
|
||||
} // namespace Eval
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger =
|
||||
(AssociatedKing == Side::kFriend) ?
|
||||
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
static IndexType make_index(Square s, IndexType p);
|
||||
|
||||
// Find the index of the feature quantity from the ball position and PieceSquare
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc, Square sq_k);
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
+36
-49
@@ -1,58 +1,45 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "k.h"
|
||||
#include "k.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace NNUE {
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
namespace Features {
|
||||
// Index of a feature for a given king position.
|
||||
IndexType K::make_index(Color perspective, Square s, Color king_color) {
|
||||
return IndexType(orient(perspective, s) + bool(perspective ^ king_color) * 64);
|
||||
}
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void K::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
// Index of a feature for a given king position.
|
||||
IndexType K::MakeIndex(Color perspective, Square s, Color king_color) {
|
||||
return IndexType(orient(perspective, s) + bool(perspective ^ king_color) * 64);
|
||||
}
|
||||
for (auto color : Colors) {
|
||||
active->push_back(make_index(perspective, pos.square<KING>(color), color));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void K::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
for (auto color : Colors) {
|
||||
active->push_back(MakeIndex(perspective, pos.square<KING>(color), color));
|
||||
}
|
||||
}
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void K::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void K::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
Color king_color;
|
||||
if (dp.piece[0] == Piece::W_KING) {
|
||||
king_color = WHITE;
|
||||
}
|
||||
else if (dp.piece[0] == Piece::B_KING) {
|
||||
king_color = BLACK;
|
||||
}
|
||||
else {
|
||||
return;
|
||||
}
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
if (type_of(dp.piece[0]) == KING)
|
||||
{
|
||||
removed->push_back(make_index(perspective, dp.from[0], color_of(dp.piece[0])));
|
||||
added->push_back(make_index(perspective, dp.to[0], color_of(dp.piece[0])));
|
||||
}
|
||||
}
|
||||
|
||||
removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
|
||||
added->push_back(MakeIndex(perspective, dp.to[0], king_color));
|
||||
}
|
||||
|
||||
} // namespace Features
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
+33
-36
@@ -1,52 +1,49 @@
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_K_H_
|
||||
#ifndef _NNUE_FEATURES_K_H_
|
||||
#define _NNUE_FEATURES_K_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "evaluate.h"
|
||||
|
||||
namespace NNUE {
|
||||
//Definition of input feature quantity K of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
// Feature K: Ball position
|
||||
class K {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "K";
|
||||
|
||||
// Feature K: Ball position
|
||||
class K {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "K";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = SQUARE_NB * 2;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 2;
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0xD3CEE169u;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = SQUARE_NB * 2;
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 2;
|
||||
|
||||
private:
|
||||
// Index of a feature for a given king position.
|
||||
static IndexType MakeIndex(Color perspective, Square s, Color king_color);
|
||||
};
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
} // namespace Features
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
} // namespace NNUE
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
} // namespace Eval
|
||||
private:
|
||||
// Index of a feature for a given king position.
|
||||
static IndexType make_index(Color perspective, Square s, Color king_color);
|
||||
};
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
+43
-44
@@ -1,56 +1,55 @@
|
||||
//Definition of input feature P of NNUE evaluation function
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "p.h"
|
||||
#include "p.h"
|
||||
#include "index_list.h"
|
||||
|
||||
namespace Eval {
|
||||
//Definition of input feature P of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace NNUE {
|
||||
// Orient a square according to perspective (rotate the board 180° for black)
|
||||
// this has to stay until we find a better arch that works with "flip".
|
||||
// allows us to use current master net for gensfen (primarily needed for higher quality data)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
|
||||
namespace Features {
|
||||
// Find the index of the feature quantity from the king position and PieceSquare
|
||||
inline IndexType P::make_index(
|
||||
Color perspective, Square s, Piece pc) {
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
|
||||
}
|
||||
|
||||
// Orient a square according to perspective (rotates by 180 for black)
|
||||
inline Square orient(Color perspective, Square s) {
|
||||
return Square(int(s) ^ (bool(perspective) * 63));
|
||||
}
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void P::append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active) {
|
||||
|
||||
// Find the index of the feature quantity from the king position and PieceSquare
|
||||
inline IndexType P::MakeIndex(
|
||||
Color perspective, Square s, Piece pc) {
|
||||
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
|
||||
}
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(make_index(perspective, s, pos.piece_on(s)));
|
||||
}
|
||||
}
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
void P::AppendActiveIndices(
|
||||
const Position& pos, Color perspective, IndexList* active) {
|
||||
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
|
||||
while (bb) {
|
||||
Square s = pop_lsb(&bb);
|
||||
active->push_back(MakeIndex(perspective, s, pos.piece_on(s)));
|
||||
}
|
||||
}
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void P::append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added) {
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
void P::AppendChangedIndices(
|
||||
const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added) {
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
if (type_of(pc) == KING) continue;
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(MakeIndex(perspective, dp.from[i], pc));
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(MakeIndex(perspective, dp.to[i], pc));
|
||||
}
|
||||
}
|
||||
const auto& dp = pos.state()->dirtyPiece;
|
||||
for (int i = 0; i < dp.dirty_num; ++i) {
|
||||
Piece pc = dp.piece[i];
|
||||
|
||||
} // namespace Features
|
||||
if (type_of(pc) == KING)
|
||||
continue;
|
||||
|
||||
} // namespace NNUE
|
||||
if (dp.from[i] != SQ_NONE)
|
||||
removed->push_back(make_index(perspective, dp.from[i], pc));
|
||||
|
||||
} // namespace Eval
|
||||
if (dp.to[i] != SQ_NONE)
|
||||
added->push_back(make_index(perspective, dp.to[i], pc));
|
||||
}
|
||||
}
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
+33
-36
@@ -1,52 +1,49 @@
|
||||
//Definition of input feature P of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_FEATURES_P_H_
|
||||
#ifndef _NNUE_FEATURES_P_H_
|
||||
#define _NNUE_FEATURES_P_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../evaluate.h"
|
||||
#include "features_common.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "evaluate.h"
|
||||
|
||||
namespace NNUE {
|
||||
//Definition of input feature P of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
// Feature P: PieceSquare of pieces other than balls
|
||||
class P {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "P";
|
||||
|
||||
// Feature P: PieceSquare of pieces other than balls
|
||||
class P {
|
||||
public:
|
||||
// feature quantity name
|
||||
static constexpr const char* kName = "P";
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = PS_END;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
|
||||
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void AppendActiveIndices(const Position& pos, Color perspective,
|
||||
IndexList* active);
|
||||
// number of feature dimensions
|
||||
static constexpr IndexType kDimensions = PS_END;
|
||||
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void AppendChangedIndices(const Position& pos, Color perspective,
|
||||
IndexList* removed, IndexList* added);
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
|
||||
|
||||
private:
|
||||
// Index of a feature for a given piece on some square
|
||||
static IndexType MakeIndex(Color perspective, Square s, Piece pc);
|
||||
};
|
||||
// Timing of full calculation instead of difference calculation
|
||||
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
|
||||
|
||||
} // namespace Features
|
||||
// Get a list of indices with a value of 1 among the features
|
||||
static void append_active_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* active);
|
||||
|
||||
} // namespace NNUE
|
||||
// Get a list of indices whose values have changed from the previous one in the feature quantity
|
||||
static void append_changed_indices(
|
||||
const Position& pos,
|
||||
Color perspective,
|
||||
IndexList* removed,
|
||||
IndexList* added);
|
||||
|
||||
} // namespace Eval
|
||||
private:
|
||||
// Index of a feature for a given piece on some square
|
||||
static IndexType make_index(Color perspective, Square s, Piece pc);
|
||||
};
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
+627
-108
@@ -24,6 +24,10 @@
|
||||
#include <iostream>
|
||||
#include "../nnue_common.h"
|
||||
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <cstdint>
|
||||
|
||||
namespace Eval::NNUE::Layers {
|
||||
|
||||
// Affine transformation layer
|
||||
@@ -50,6 +54,8 @@ namespace Eval::NNUE::Layers {
|
||||
static constexpr std::size_t kBufferSize =
|
||||
PreviousLayer::kBufferSize + kSelfBufferSize;
|
||||
|
||||
static constexpr int kLayerIndex = PreviousLayer::kLayerIndex + 1;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xCC03DAE4u;
|
||||
@@ -59,14 +65,27 @@ namespace Eval::NNUE::Layers {
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "AffineTransform[" +
|
||||
std::to_string(kOutputDimensions) + "<-" +
|
||||
std::to_string(kInputDimensions) + "](" +
|
||||
PreviousLayer::GetStructureString() + ")";
|
||||
static std::string get_name() {
|
||||
return "AffineTransform[" +
|
||||
std::to_string(kOutputDimensions) + "<-" +
|
||||
std::to_string(kInputDimensions) + "]";
|
||||
}
|
||||
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string get_structure_string() {
|
||||
return get_name() + "(" +
|
||||
PreviousLayer::get_structure_string() + ")";
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = PreviousLayer::get_layers_info();
|
||||
info += "\n - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
if (!previous_layer_.ReadParameters(stream)) return false;
|
||||
@@ -79,13 +98,17 @@ namespace Eval::NNUE::Layers {
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
if (!previous_layer_.WriteParameters(stream)) return false;
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kOutputDimensions * sizeof(BiasType));
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kOutputDimensions * kPaddedInputDimensions *
|
||||
sizeof(WeightType));
|
||||
return !stream.fail();
|
||||
if (!previous_layer_.WriteParameters(stream))
|
||||
return false;
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kOutputDimensions * sizeof(BiasType));
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kOutputDimensions * kPaddedInputDimensions *
|
||||
sizeof(WeightType));
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
@@ -93,113 +116,606 @@ namespace Eval::NNUE::Layers {
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
const auto input = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
|
||||
[[maybe_unused]] const __m512i kOnes512 = _mm512_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
|
||||
return _mm512_reduce_add_epi32(sum) + bias;
|
||||
};
|
||||
|
||||
// This function takes
|
||||
// sum0 = [xmm0a, xmm0b, xmm0c, xmm0d]
|
||||
// sum1 = [xmm1a, xmm1b, xmm1c, xmm1d]
|
||||
// sum2 = [xmm2a, xmm2b, xmm2c, xmm2d]
|
||||
// sum3 = [xmm3a, xmm3b, xmm3c, xmm3d]
|
||||
// and returns
|
||||
// ret = [
|
||||
// reduce_add_epi32(xmm0a), reduce_add_epi32(xmm1a), reduce_add_epi32(xmm2a), reduce_add_epi32(xmm3a),
|
||||
// reduce_add_epi32(xmm0b), reduce_add_epi32(xmm1b), reduce_add_epi32(xmm2b), reduce_add_epi32(xmm3b),
|
||||
// reduce_add_epi32(xmm0c), reduce_add_epi32(xmm1c), reduce_add_epi32(xmm2c), reduce_add_epi32(xmm3c),
|
||||
// reduce_add_epi32(xmm0d), reduce_add_epi32(xmm1d), reduce_add_epi32(xmm2d), reduce_add_epi32(xmm3d)
|
||||
// ]
|
||||
[[maybe_unused]] auto m512_hadd128x16_interleave = [](
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3) -> __m512i {
|
||||
|
||||
__m512i sum01a = _mm512_unpacklo_epi32(sum0, sum1);
|
||||
__m512i sum01b = _mm512_unpackhi_epi32(sum0, sum1);
|
||||
|
||||
__m512i sum23a = _mm512_unpacklo_epi32(sum2, sum3);
|
||||
__m512i sum23b = _mm512_unpackhi_epi32(sum2, sum3);
|
||||
|
||||
__m512i sum01 = _mm512_add_epi32(sum01a, sum01b);
|
||||
__m512i sum23 = _mm512_add_epi32(sum23a, sum23b);
|
||||
|
||||
__m512i sum0123a = _mm512_unpacklo_epi64(sum01, sum23);
|
||||
__m512i sum0123b = _mm512_unpackhi_epi64(sum01, sum23);
|
||||
|
||||
return _mm512_add_epi32(sum0123a, sum0123b);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_haddx4 = [m512_hadd128x16_interleave](
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, __m128i bias) -> __m128i {
|
||||
|
||||
__m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
|
||||
|
||||
__m256i sum256lo = _mm512_castsi512_si256(sum);
|
||||
__m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
|
||||
|
||||
sum256lo = _mm256_add_epi32(sum256lo, sum256hi);
|
||||
|
||||
__m128i sum128lo = _mm256_castsi256_si128(sum256lo);
|
||||
__m128i sum128hi = _mm256_extracti128_si256(sum256lo, 1);
|
||||
|
||||
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_haddx8 = [m512_hadd128x16_interleave](
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
|
||||
__m512i sum4, __m512i sum5, __m512i sum6, __m512i sum7, __m256i bias) -> __m256i {
|
||||
|
||||
__m512i suma = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
|
||||
__m512i sumb = m512_hadd128x16_interleave(sum4, sum5, sum6, sum7);
|
||||
|
||||
__m512i indices0 = _mm512_setr_epi64(0, 1, 8, 9, 4, 5, 12, 13);
|
||||
__m512i indices1 = _mm512_setr_epi64(2, 3, 10, 11, 6, 7, 14, 15);
|
||||
__m512i x = _mm512_add_epi32(
|
||||
_mm512_permutex2var_epi64(suma, indices0, sumb),
|
||||
_mm512_permutex2var_epi64(suma, indices1, sumb));
|
||||
|
||||
__m256i sum256lo = _mm512_castsi512_si256(x);
|
||||
__m256i sum256hi = _mm512_extracti64x4_epi64(x, 1);
|
||||
|
||||
return _mm256_add_epi32(_mm256_add_epi32(sum256lo, sum256hi), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_hadd256x8 =[m512_hadd128x16_interleave](
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3, __m256i bias) -> __m256i {
|
||||
|
||||
__m512i sum = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
|
||||
|
||||
__m512i indices = _mm512_setr_epi32(
|
||||
0, 4, 8, 12, 2, 6, 10, 14,
|
||||
1, 5, 9, 13, 3, 7, 11, 15);
|
||||
sum = _mm512_permutexvar_epi32(indices, sum);
|
||||
|
||||
__m256i sum256lo = _mm512_castsi512_si256(sum);
|
||||
__m256i sum256hi = _mm512_extracti64x4_epi64(sum, 1);
|
||||
|
||||
return _mm256_add_epi32(_mm256_hadd_epi32(sum256lo, sum256hi), bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m512_hadd256x16 = [m512_hadd128x16_interleave](
|
||||
__m512i sum0, __m512i sum1, __m512i sum2, __m512i sum3,
|
||||
__m512i sum4, __m512i sum5, __m512i sum6, __m512i sum7, __m512i bias) -> __m512i {
|
||||
|
||||
__m512i suma = m512_hadd128x16_interleave(sum0, sum1, sum2, sum3);
|
||||
__m512i sumb = m512_hadd128x16_interleave(sum4, sum5, sum6, sum7);
|
||||
|
||||
__m512i indices0 = _mm512_setr_epi64(0, 1, 8, 9, 4, 5, 12, 13);
|
||||
__m512i indices1 = _mm512_setr_epi64(2, 3, 10, 11, 6, 7, 14, 15);
|
||||
__m512i x = _mm512_add_epi32(
|
||||
_mm512_permutex2var_epi64(suma, indices0, sumb),
|
||||
_mm512_permutex2var_epi64(suma, indices1, sumb));
|
||||
|
||||
__m512i indices = _mm512_setr_epi32(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15);
|
||||
return _mm512_add_epi32(_mm512_permutexvar_epi32(indices, x), bias);
|
||||
};
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
[[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
|
||||
acc = _mm512_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
[[maybe_unused]] auto m512_dpbusd_epi32 = [=](__m512i a, __m512i b) -> __m512i {
|
||||
__m512i product0 = _mm512_maddubs_epi16(a, b);
|
||||
return _mm512_madd_epi16(product0, kOnes512);
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
#if defined (USE_AVX2)
|
||||
|
||||
[[maybe_unused]] const __m256i kOnes256 = _mm256_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
||||
return _mm_cvtsi128_si32(sum128) + bias;
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m256_haddx4 = [](__m256i sum0, __m256i sum1, __m256i sum2, __m256i sum3, __m128i bias) -> __m128i {
|
||||
sum0 = _mm256_hadd_epi32(sum0, sum1);
|
||||
sum2 = _mm256_hadd_epi32(sum2, sum3);
|
||||
|
||||
sum0 = _mm256_hadd_epi32(sum0, sum2);
|
||||
|
||||
__m128i sum128lo = _mm256_castsi256_si128(sum0);
|
||||
__m128i sum128hi = _mm256_extracti128_si256(sum0, 1);
|
||||
|
||||
return _mm_add_epi32(_mm_add_epi32(sum128lo, sum128hi), bias);
|
||||
};
|
||||
#if defined (USE_VNNI)
|
||||
[[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
|
||||
acc = _mm256_dpbusd_epi32(acc, a, b);
|
||||
#else
|
||||
[[maybe_unused]] auto m256_dpbusd_epi32 = [=](__m256i a, __m256i b) -> __m256i {
|
||||
__m256i product0 = _mm256_maddubs_epi16(a, b);
|
||||
return _mm256_madd_epi16(product0, kOnes256);
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (USE_SSSE3)
|
||||
|
||||
[[maybe_unused]] const __m128i kOnes128 = _mm_set1_epi16(1);
|
||||
|
||||
[[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
||||
return _mm_cvtsi128_si32(sum) + bias;
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m128_haddx4 = [](__m128i sum0, __m128i sum1, __m128i sum2, __m128i sum3, __m128i bias) -> __m128i {
|
||||
sum0 = _mm_hadd_epi32(sum0, sum1);
|
||||
sum2 = _mm_hadd_epi32(sum2, sum3);
|
||||
|
||||
sum0 = _mm_hadd_epi32(sum0, sum2);
|
||||
|
||||
return _mm_add_epi32(sum0, bias);
|
||||
};
|
||||
|
||||
[[maybe_unused]] auto m128_dpbusd_epi32 = [=](__m128i a, __m128i b) -> __m128i {
|
||||
__m128i product0 = _mm_maddubs_epi16(a, b);
|
||||
return _mm_madd_epi16(product0, kOnes128);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (USE_AVX512)
|
||||
|
||||
constexpr IndexType kNumChunks512 = kPaddedInputDimensions / (kSimdWidth * 2);
|
||||
constexpr IndexType kNumChunks256 = kPaddedInputDimensions / kSimdWidth;
|
||||
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
|
||||
const auto input_vector = reinterpret_cast<const __m512i*>(input);
|
||||
#if !defined(USE_VNNI)
|
||||
const __m512i kOnes = _mm512_set1_epi16(1);
|
||||
#endif
|
||||
// Since to saturate a zmm register it takes 64 bytes we
|
||||
// cannot use AVX512 for the smaller affine transforms.
|
||||
// Instead we fallback to a AVX2 implementation if the
|
||||
// kInputDimensions isn't a multiple of 64.
|
||||
// Note that this means that for example for
|
||||
// kInputDimensions of 96 we fallback to AVX2 even though
|
||||
// the first 64 elements could be processed with AVX512.
|
||||
// This is caused by mixing the __m256 and __m512 variables
|
||||
// required to better handle that case and it would
|
||||
// require handling more cases statically not to lose performance.
|
||||
// This should be revisited if such input dimensions are to be considered.
|
||||
[[maybe_unused]] const auto input_vector512 = reinterpret_cast<const __m512i*>(input);
|
||||
[[maybe_unused]] const auto input_vector256 = reinterpret_cast<const __m256i*>(input);
|
||||
|
||||
// kOutputDimensions is either 1 or a multiple of kSimdWidth
|
||||
// because then it is also an input dimension.
|
||||
if constexpr (kOutputDimensions % 16 == 0 && kNumChunks256 == 1)
|
||||
{
|
||||
for (IndexType i = 0; i < kOutputDimensions; i += 16)
|
||||
{
|
||||
const IndexType offset01a = (i + 0) * kPaddedInputDimensions;
|
||||
const IndexType offset23a = (i + 2) * kPaddedInputDimensions;
|
||||
const IndexType offset45a = (i + 4) * kPaddedInputDimensions;
|
||||
const IndexType offset67a = (i + 6) * kPaddedInputDimensions;
|
||||
const IndexType offset01b = (i + 8) * kPaddedInputDimensions;
|
||||
const IndexType offset23b = (i + 10) * kPaddedInputDimensions;
|
||||
const IndexType offset45b = (i + 12) * kPaddedInputDimensions;
|
||||
const IndexType offset67b = (i + 14) * kPaddedInputDimensions;
|
||||
|
||||
const __m512i bias = *reinterpret_cast<const __m512i*>(&biases_[i]);
|
||||
__m512i* outptr = reinterpret_cast<__m512i*>(&output[i]);
|
||||
|
||||
const auto row01a = *reinterpret_cast<const __m512i*>(&weights_[offset01a]);
|
||||
const auto row23a = *reinterpret_cast<const __m512i*>(&weights_[offset23a]);
|
||||
const auto row45a = *reinterpret_cast<const __m512i*>(&weights_[offset45a]);
|
||||
const auto row67a = *reinterpret_cast<const __m512i*>(&weights_[offset67a]);
|
||||
const auto row01b = *reinterpret_cast<const __m512i*>(&weights_[offset01b]);
|
||||
const auto row23b = *reinterpret_cast<const __m512i*>(&weights_[offset23b]);
|
||||
const auto row45b = *reinterpret_cast<const __m512i*>(&weights_[offset45b]);
|
||||
const auto row67b = *reinterpret_cast<const __m512i*>(&weights_[offset67b]);
|
||||
|
||||
const __m256i in256 = input_vector256[0];
|
||||
const __m512i in = _mm512_inserti64x4(_mm512_castsi256_si512(in256), in256, 1);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum01a = _mm512_setzero_si512();
|
||||
__m512i sum23a = _mm512_setzero_si512();
|
||||
__m512i sum45a = _mm512_setzero_si512();
|
||||
__m512i sum67a = _mm512_setzero_si512();
|
||||
__m512i sum01b = _mm512_setzero_si512();
|
||||
__m512i sum23b = _mm512_setzero_si512();
|
||||
__m512i sum45b = _mm512_setzero_si512();
|
||||
__m512i sum67b = _mm512_setzero_si512();
|
||||
|
||||
m512_add_dpbusd_epi32(sum01a, in, row01a);
|
||||
m512_add_dpbusd_epi32(sum23a, in, row23a);
|
||||
m512_add_dpbusd_epi32(sum45a, in, row45a);
|
||||
m512_add_dpbusd_epi32(sum67a, in, row67a);
|
||||
m512_add_dpbusd_epi32(sum01b, in, row01b);
|
||||
m512_add_dpbusd_epi32(sum23b, in, row23b);
|
||||
m512_add_dpbusd_epi32(sum45b, in, row45b);
|
||||
m512_add_dpbusd_epi32(sum67b, in, row67b);
|
||||
#else
|
||||
__m512i sum01a = m512_dpbusd_epi32(in, row01a);
|
||||
__m512i sum23a = m512_dpbusd_epi32(in, row23a);
|
||||
__m512i sum45a = m512_dpbusd_epi32(in, row45a);
|
||||
__m512i sum67a = m512_dpbusd_epi32(in, row67a);
|
||||
__m512i sum01b = m512_dpbusd_epi32(in, row01b);
|
||||
__m512i sum23b = m512_dpbusd_epi32(in, row23b);
|
||||
__m512i sum45b = m512_dpbusd_epi32(in, row45b);
|
||||
__m512i sum67b = m512_dpbusd_epi32(in, row67b);
|
||||
#endif
|
||||
|
||||
*outptr = m512_hadd256x16(
|
||||
sum01a, sum23a, sum45a, sum67a,
|
||||
sum01b, sum23b, sum45b, sum67b, bias);
|
||||
}
|
||||
}
|
||||
else if constexpr (kOutputDimensions % 4 == 0)
|
||||
{
|
||||
for (IndexType i = 0; i < kOutputDimensions; i += 4)
|
||||
{
|
||||
const IndexType offset0 = (i + 0) * kPaddedInputDimensions;
|
||||
const IndexType offset1 = (i + 1) * kPaddedInputDimensions;
|
||||
const IndexType offset2 = (i + 2) * kPaddedInputDimensions;
|
||||
const IndexType offset3 = (i + 3) * kPaddedInputDimensions;
|
||||
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
|
||||
{
|
||||
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m512i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m512i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m512i*>(&weights_[offset3]);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
__m512i sum1 = _mm512_setzero_si512();
|
||||
__m512i sum2 = _mm512_setzero_si512();
|
||||
__m512i sum3 = _mm512_setzero_si512();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
|
||||
__m512i sum1 = m512_dpbusd_epi32(input_vector512[0], row1[0]);
|
||||
__m512i sum2 = m512_dpbusd_epi32(input_vector512[0], row2[0]);
|
||||
__m512i sum3 = m512_dpbusd_epi32(input_vector512[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks512; ++j)
|
||||
{
|
||||
const __m512i in = input_vector512[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m512_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m512_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m512_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m512_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm512_add_epi32(sum1, m512_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm512_add_epi32(sum2, m512_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm512_add_epi32(sum3, m512_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m512_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
|
||||
__m256i sum1 = m256_dpbusd_epi32(input_vector256[0], row1[0]);
|
||||
__m256i sum2 = m256_dpbusd_epi32(input_vector256[0], row2[0]);
|
||||
__m256i sum3 = m256_dpbusd_epi32(input_vector256[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks256; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m256_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m256_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m256_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) == 0)
|
||||
{
|
||||
const auto row0 = reinterpret_cast<const __m512i*>(&weights_[0]);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m512i sum0 = _mm512_setzero_si512();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m512i sum0 = m512_dpbusd_epi32(input_vector512[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks512; ++j)
|
||||
{
|
||||
const __m512i in = input_vector512[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m512_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm512_add_epi32(sum0, m512_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m512_hadd(sum0, biases_[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector256[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks256; ++j)
|
||||
{
|
||||
const __m256i in = input_vector256[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// This case can never happen because kOutputDimensions
|
||||
// is always 1 or a multiple of kSimdWidth.
|
||||
assert(false);
|
||||
}
|
||||
|
||||
#elif defined (USE_AVX2)
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
const auto input_vector = reinterpret_cast<const __m256i*>(input);
|
||||
#if !defined(USE_VNNI)
|
||||
const __m256i kOnes = _mm256_set1_epi16(1);
|
||||
#endif
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
// kOutputDimensions is either 1 or a multiple of kSimdWidth
|
||||
// because then it is also an input dimension.
|
||||
if constexpr (kOutputDimensions % 4 == 0)
|
||||
{
|
||||
for (IndexType i = 0; i < kOutputDimensions; i += 4)
|
||||
{
|
||||
const IndexType offset0 = (i + 0) * kPaddedInputDimensions;
|
||||
const IndexType offset1 = (i + 1) * kPaddedInputDimensions;
|
||||
const IndexType offset2 = (i + 2) * kPaddedInputDimensions;
|
||||
const IndexType offset3 = (i + 3) * kPaddedInputDimensions;
|
||||
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m256i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m256i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m256i*>(&weights_[offset3]);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
__m256i sum1 = _mm256_setzero_si256();
|
||||
__m256i sum2 = _mm256_setzero_si256();
|
||||
__m256i sum3 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
__m256i sum1 = m256_dpbusd_epi32(input_vector[0], row1[0]);
|
||||
__m256i sum2 = m256_dpbusd_epi32(input_vector[0], row2[0]);
|
||||
__m256i sum3 = m256_dpbusd_epi32(input_vector[0], row3[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
m256_add_dpbusd_epi32(sum1, in, row1[j]);
|
||||
m256_add_dpbusd_epi32(sum2, in, row2[j]);
|
||||
m256_add_dpbusd_epi32(sum3, in, row3[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm256_add_epi32(sum1, m256_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm256_add_epi32(sum2, m256_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm256_add_epi32(sum3, m256_dpbusd_epi32(in, row3[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
*outptr = m256_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
}
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
__m256i sum0 = _mm256_setzero_si256();
|
||||
const IndexType kStart = 0;
|
||||
#else
|
||||
__m256i sum0 = m256_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
const IndexType kStart = 1;
|
||||
#endif
|
||||
|
||||
for (IndexType j = kStart; j < kNumChunks; ++j)
|
||||
{
|
||||
const __m256i in = input_vector[j];
|
||||
|
||||
#if defined (USE_VNNI)
|
||||
m256_add_dpbusd_epi32(sum0, in, row0[j]);
|
||||
#else
|
||||
sum0 = _mm256_add_epi32(sum0, m256_dpbusd_epi32(in, row0[j]));
|
||||
#endif
|
||||
}
|
||||
|
||||
output[0] = m256_hadd(sum0, biases_[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This case can never happen because kOutputDimensions
|
||||
// is always 1 or a multiple of kSimdWidth.
|
||||
assert(false);
|
||||
}
|
||||
|
||||
#elif defined (USE_SSSE3)
|
||||
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
#ifndef USE_SSSE3
|
||||
const __m128i kZeros = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i kOnes = _mm_set1_epi16(1);
|
||||
#endif
|
||||
|
||||
auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
const auto input_vector = reinterpret_cast<const __m128i*>(input);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
// kOutputDimensions is either 1 or a multiple of kSimdWidth
|
||||
// because then it is also an input dimension.
|
||||
if constexpr (kOutputDimensions % 4 == 0)
|
||||
{
|
||||
for (IndexType i = 0; i < kOutputDimensions; i += 4)
|
||||
{
|
||||
const IndexType offset0 = (i + 0) * kPaddedInputDimensions;
|
||||
const IndexType offset1 = (i + 1) * kPaddedInputDimensions;
|
||||
const IndexType offset2 = (i + 2) * kPaddedInputDimensions;
|
||||
const IndexType offset3 = (i + 3) * kPaddedInputDimensions;
|
||||
|
||||
const __m128i bias = *reinterpret_cast<const __m128i*>(&biases_[i]);
|
||||
__m128i* outptr = reinterpret_cast<__m128i*>(&output[i]);
|
||||
|
||||
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[offset0]);
|
||||
const auto row1 = reinterpret_cast<const __m128i*>(&weights_[offset1]);
|
||||
const auto row2 = reinterpret_cast<const __m128i*>(&weights_[offset2]);
|
||||
const auto row3 = reinterpret_cast<const __m128i*>(&weights_[offset3]);
|
||||
|
||||
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
__m128i sum1 = m128_dpbusd_epi32(input_vector[0], row1[0]);
|
||||
__m128i sum2 = m128_dpbusd_epi32(input_vector[0], row2[0]);
|
||||
__m128i sum3 = m128_dpbusd_epi32(input_vector[0], row3[0]);
|
||||
|
||||
for (int j = 1; j < (int)kNumChunks; ++j)
|
||||
{
|
||||
const __m128i in = input_vector[j];
|
||||
|
||||
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(in, row0[j]));
|
||||
sum1 = _mm_add_epi32(sum1, m128_dpbusd_epi32(in, row1[j]));
|
||||
sum2 = _mm_add_epi32(sum2, m128_dpbusd_epi32(in, row2[j]));
|
||||
sum3 = _mm_add_epi32(sum3, m128_dpbusd_epi32(in, row3[j]));
|
||||
}
|
||||
|
||||
*outptr = m128_haddx4(sum0, sum1, sum2, sum3, bias);
|
||||
}
|
||||
}
|
||||
else if constexpr (kOutputDimensions == 1)
|
||||
{
|
||||
const auto row0 = reinterpret_cast<const __m128i*>(&weights_[0]);
|
||||
|
||||
__m128i sum0 = m128_dpbusd_epi32(input_vector[0], row0[0]);
|
||||
|
||||
for (int j = 1; j < (int)kNumChunks; ++j)
|
||||
sum0 = _mm_add_epi32(sum0, m128_dpbusd_epi32(input_vector[j], row0[j]));
|
||||
|
||||
output[0] = m128_hadd(sum0, biases_[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
// This case can never happen because kOutputDimensions
|
||||
// is always 1 or a multiple of kSimdWidth.
|
||||
assert(false);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Use old implementation for the other architectures.
|
||||
|
||||
auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
|
||||
#if defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
#ifndef USE_SSSE3
|
||||
const __m128i kZeros = _mm_setzero_si128();
|
||||
#else
|
||||
const __m128i kOnes = _mm_set1_epi16(1);
|
||||
#endif
|
||||
const auto input_vector = reinterpret_cast<const __m128i*>(input);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const __m64 kZeros = _mm_setzero_si64();
|
||||
const auto input_vector = reinterpret_cast<const __m64*>(input);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
|
||||
const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType offset = i * kPaddedInputDimensions;
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
__m512i sum = _mm512_setzero_si512();
|
||||
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
#if defined(USE_VNNI)
|
||||
sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
|
||||
#else
|
||||
__m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
|
||||
product = _mm512_madd_epi16(product, kOnes);
|
||||
sum = _mm512_add_epi32(sum, product);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
|
||||
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
|
||||
// and we have to do one more 256bit chunk.
|
||||
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
|
||||
{
|
||||
const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
|
||||
const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
|
||||
#if defined(USE_VNNI)
|
||||
__m256i product256 = _mm256_dpbusd_epi32(
|
||||
_mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
|
||||
sum = _mm512_inserti32x8(sum, product256, 0);
|
||||
#else
|
||||
__m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
|
||||
sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
|
||||
#endif
|
||||
}
|
||||
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
__m256i sum = _mm256_setzero_si256();
|
||||
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
#if defined(USE_VNNI)
|
||||
sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
|
||||
#else
|
||||
__m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
|
||||
product = _mm256_madd_epi16(product, kOnes);
|
||||
sum = _mm256_add_epi32(sum, product);
|
||||
#endif
|
||||
}
|
||||
__m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
|
||||
sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
|
||||
output[i] = _mm_cvtsi128_si32(sum128) + biases_[i];
|
||||
|
||||
#elif defined(USE_SSSE3)
|
||||
__m128i sum = _mm_setzero_si128();
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (int j = 0; j < (int)kNumChunks - 1; j += 2) {
|
||||
__m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
|
||||
product0 = _mm_madd_epi16(product0, kOnes);
|
||||
sum = _mm_add_epi32(sum, product0);
|
||||
__m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1]));
|
||||
product1 = _mm_madd_epi16(product1, kOnes);
|
||||
sum = _mm_add_epi32(sum, product1);
|
||||
}
|
||||
if (kNumChunks & 0x1) {
|
||||
__m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1]));
|
||||
product = _mm_madd_epi16(product, kOnes);
|
||||
sum = _mm_add_epi32(sum, product);
|
||||
}
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
|
||||
sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
|
||||
output[i] = _mm_cvtsi128_si32(sum) + biases_[i];
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
#if defined(USE_SSE2)
|
||||
__m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
|
||||
__m128i sum_hi = kZeros;
|
||||
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
@@ -223,7 +739,7 @@ namespace Eval::NNUE::Layers {
|
||||
sum = _mm_add_epi32(sum, sum_second_32);
|
||||
output[i] = _mm_cvtsi128_si32(sum);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
#elif defined(USE_MMX)
|
||||
__m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
|
||||
__m64 sum_hi = kZeros;
|
||||
const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
|
||||
@@ -244,7 +760,7 @@ namespace Eval::NNUE::Layers {
|
||||
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
|
||||
output[i] = _mm_cvtsi64_si32(sum);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
#elif defined(USE_NEON)
|
||||
int32x4_t sum = {biases_[i]};
|
||||
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
@@ -254,18 +770,21 @@ namespace Eval::NNUE::Layers {
|
||||
}
|
||||
output[i] = sum[0] + sum[1] + sum[2] + sum[3];
|
||||
|
||||
#else
|
||||
#else
|
||||
OutputType sum = biases_[i];
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
sum += weights_[offset + j] * input[j];
|
||||
}
|
||||
output[i] = sum;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,6 +23,10 @@
|
||||
|
||||
#include "../nnue_common.h"
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Eval::NNUE::Layers {
|
||||
|
||||
// Clipped ReLU
|
||||
@@ -47,6 +51,8 @@ namespace Eval::NNUE::Layers {
|
||||
static constexpr std::size_t kBufferSize =
|
||||
PreviousLayer::kBufferSize + kSelfBufferSize;
|
||||
|
||||
static constexpr int kLayerIndex = PreviousLayer::kLayerIndex + 1;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0x538D24C7u;
|
||||
@@ -54,11 +60,24 @@ namespace Eval::NNUE::Layers {
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return "ClippedReLU[" +
|
||||
std::to_string(kOutputDimensions) + "]";
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "ClippedReLU[" +
|
||||
std::to_string(kOutputDimensions) + "](" +
|
||||
PreviousLayer::GetStructureString() + ")";
|
||||
static std::string get_structure_string() {
|
||||
return get_name() + "(" +
|
||||
PreviousLayer::get_structure_string() + ")";
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = PreviousLayer::get_layers_info();
|
||||
info += "\n - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
@@ -68,7 +87,7 @@ namespace Eval::NNUE::Layers {
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
@@ -86,12 +105,12 @@ namespace Eval::NNUE::Layers {
|
||||
const auto out = reinterpret_cast<__m256i*>(output);
|
||||
for (IndexType i = 0; i < kNumChunks; ++i) {
|
||||
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
|
||||
_mm256_loadA_si256(&in[i * 4 + 0]),
|
||||
_mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits);
|
||||
_mm256_load_si256(&in[i * 4 + 0]),
|
||||
_mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
|
||||
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
|
||||
_mm256_loadA_si256(&in[i * 4 + 2]),
|
||||
_mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits);
|
||||
_mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
|
||||
_mm256_load_si256(&in[i * 4 + 2]),
|
||||
_mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
|
||||
_mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(words0, words1), kZero), kOffsets));
|
||||
}
|
||||
constexpr IndexType kStart = kNumChunks * kSimdWidth;
|
||||
@@ -170,9 +189,9 @@ namespace Eval::NNUE::Layers {
|
||||
}
|
||||
|
||||
private:
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<ClippedReLU>;
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<ClippedReLU>;
|
||||
|
||||
PreviousLayer previous_layer_;
|
||||
};
|
||||
|
||||
|
||||
@@ -41,6 +41,8 @@ class InputSlice {
|
||||
// Size of forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize = 0;
|
||||
|
||||
static constexpr int kLayerIndex = 1;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xEC42E90Du;
|
||||
@@ -48,12 +50,24 @@ class InputSlice {
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
|
||||
std::to_string(Offset) + ":" +
|
||||
std::to_string(Offset + kOutputDimensions) + ")]";
|
||||
}
|
||||
static std::string get_name() {
|
||||
return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
|
||||
std::to_string(Offset) + ":" +
|
||||
std::to_string(Offset + kOutputDimensions) + ")]";
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string get_structure_string() {
|
||||
return get_name();
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = " - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& /*stream*/) {
|
||||
@@ -62,7 +76,7 @@ class InputSlice {
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& /*stream*/) const {
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Forward propagation
|
||||
|
||||
+160
-127
@@ -1,163 +1,196 @@
|
||||
// Definition of layer Sum of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_LAYERS_SUM_H_
|
||||
#ifndef _NNUE_LAYERS_SUM_H_
|
||||
#define _NNUE_LAYERS_SUM_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
#include "nnue/nnue_common.h"
|
||||
|
||||
#include "../nnue_common.h"
|
||||
// Definition of layer Sum of NNUE evaluation function
|
||||
namespace Eval::NNUE::Layers {
|
||||
|
||||
namespace Eval {
|
||||
// Layer that sums the output of multiple layers
|
||||
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
|
||||
class Sum : public Sum<RemainingPreviousLayers...> {
|
||||
private:
|
||||
using Head = FirstPreviousLayer;
|
||||
using Tail = Sum<RemainingPreviousLayers...>;
|
||||
|
||||
namespace NNUE {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename Head::OutputType;
|
||||
|
||||
namespace Layers {
|
||||
using OutputType = InputType;
|
||||
|
||||
// Layer that sums the output of multiple layers
|
||||
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
|
||||
class Sum : public Sum<RemainingPreviousLayers...> {
|
||||
private:
|
||||
using Head = FirstPreviousLayer;
|
||||
using Tail = Sum<RemainingPreviousLayers...>;
|
||||
static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
|
||||
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename Head::OutputType;
|
||||
using OutputType = InputType;
|
||||
static_assert(std::is_same<InputType, typename Tail::InputType>::value, "");
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = Head::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
static_assert(kInputDimensions == Tail::kInputDimensions ,"");
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
static_assert(kInputDimensions == Tail::kInputDimensions ,"");
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
|
||||
// Size of forward propagation buffer used in this layer
|
||||
static constexpr std::size_t kSelfBufferSize =
|
||||
CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xBCE400B4u;
|
||||
hash_value ^= Head::GetHashValue() >> 1;
|
||||
hash_value ^= Head::GetHashValue() << 31;
|
||||
hash_value ^= Tail::GetHashValue() >> 2;
|
||||
hash_value ^= Tail::GetHashValue() << 30;
|
||||
return hash_value;
|
||||
}
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize =
|
||||
std::max(Head::kBufferSize + kSelfBufferSize, Tail::kBufferSize);
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "Sum[" +
|
||||
std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
|
||||
}
|
||||
static constexpr int kLayerIndex = Tail::kLayerIndex + 1;
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
if (!Tail::ReadParameters(stream)) return false;
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xBCE400B4u;
|
||||
hash_value ^= Head::GetHashValue() >> 1;
|
||||
hash_value ^= Head::GetHashValue() << 31;
|
||||
hash_value ^= Tail::GetHashValue() >> 2;
|
||||
hash_value ^= Tail::GetHashValue() << 30;
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
if (!Tail::WriteParameters(stream)) return false;
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
static std::string get_name() {
|
||||
return "Sum[" +
|
||||
std::to_string(kOutputDimensions) + "]";
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
Tail::Propagate(transformed_features, buffer);
|
||||
const auto head_output = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
for (IndexType i = 0; i <kOutputDimensions; ++i) {
|
||||
output[i] += head_output[i];
|
||||
}
|
||||
return output;
|
||||
}
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string get_structure_string() {
|
||||
return get_name() + "(" + get_summands_string() + ")";
|
||||
}
|
||||
|
||||
protected:
|
||||
// A string that represents the list of layers to be summed
|
||||
static std::string GetSummandsString() {
|
||||
return Head::GetStructureString() + "," + Tail::GetSummandsString();
|
||||
}
|
||||
static std::string get_layers_info() {
|
||||
std::string info = Tail::get_layers_info();
|
||||
info += "\n - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<Sum>;
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
if (!Tail::ReadParameters(stream))
|
||||
return false;
|
||||
|
||||
// the layer immediately before this layer
|
||||
FirstPreviousLayer previous_layer_;
|
||||
};
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
|
||||
// Layer that sums the output of multiple layers (when there is one template argument)
|
||||
template <typename PreviousLayer>
|
||||
class Sum<PreviousLayer> {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
using OutputType = InputType;
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
if (!Tail::WriteParameters(stream))
|
||||
return false;
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
|
||||
// forward propagation
|
||||
const OutputType* propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xBCE400B4u;
|
||||
hash_value ^= PreviousLayer::GetHashValue() >> 1;
|
||||
hash_value ^= PreviousLayer::GetHashValue() << 31;
|
||||
return hash_value;
|
||||
}
|
||||
Tail::propagate(transformed_features, buffer);
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string GetStructureString() {
|
||||
return "Sum[" +
|
||||
std::to_string(kOutputDimensions) + "](" + GetSummandsString() + ")";
|
||||
}
|
||||
const auto head_output = previous_layer_.Propagate(
|
||||
transformed_features, buffer + kSelfBufferSize);
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
const auto output = reinterpret_cast<OutputType*>(buffer);
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
for (IndexType i = 0; i <kOutputDimensions; ++i) {
|
||||
output[i] += head_output[i];
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
return previous_layer_.Propagate(transformed_features, buffer);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
protected:
|
||||
// A string that represents the list of layers to be summed
|
||||
static std::string GetSummandsString() {
|
||||
return PreviousLayer::GetStructureString();
|
||||
}
|
||||
protected:
|
||||
// A string that represents the list of layers to be summed
|
||||
static std::string get_summands_string() {
|
||||
return Head::get_structure_string() + "," + Tail::get_summands_string();
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<Sum>;
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<Sum>;
|
||||
|
||||
// the layer immediately before this layer
|
||||
PreviousLayer previous_layer_;
|
||||
};
|
||||
// the layer immediately before this layer
|
||||
FirstPreviousLayer previous_layer_;
|
||||
};
|
||||
|
||||
} // namespace Layers
|
||||
// Layer that sums the output of multiple layers (when there is one template argument)
|
||||
template <typename PreviousLayer>
|
||||
class Sum<PreviousLayer> {
|
||||
public:
|
||||
// Input/output type
|
||||
using InputType = typename PreviousLayer::OutputType;
|
||||
|
||||
} // namespace NNUE
|
||||
using OutputType = InputType;
|
||||
|
||||
} // namespace Eval
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
PreviousLayer::kOutputDimensions;
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
static constexpr IndexType kOutputDimensions = kInputDimensions;
|
||||
|
||||
// Size of the forward propagation buffer used from the input layer to this layer
|
||||
static constexpr std::size_t kBufferSize = PreviousLayer::kBufferSize;
|
||||
|
||||
static constexpr int kLayerIndex = PreviousLayer::kLayerIndex + 1;
|
||||
|
||||
// Hash value embedded in the evaluation function file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
std::uint32_t hash_value = 0xBCE400B4u;
|
||||
hash_value ^= PreviousLayer::GetHashValue() >> 1;
|
||||
hash_value ^= PreviousLayer::GetHashValue() << 31;
|
||||
return hash_value;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return "Sum[" +
|
||||
std::to_string(kOutputDimensions) + "]";
|
||||
}
|
||||
|
||||
// A string that represents the structure from the input layer to this layer
|
||||
static std::string get_structure_string() {
|
||||
return get_name() + "(" + get_summands_string() + ")";
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = PreviousLayer::get_layers_info();
|
||||
info += '\n';
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += ": ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// read parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
return previous_layer_.ReadParameters(stream);
|
||||
}
|
||||
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
return previous_layer_.WriteParameters(stream);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const OutputType* Propagate(
|
||||
const TransformedFeatureType* transformed_features, char* buffer) const {
|
||||
|
||||
return previous_layer_.Propagate(transformed_features, buffer);
|
||||
}
|
||||
|
||||
protected:
|
||||
// A string that represents the list of layers to be summed
|
||||
static std::string get_summands_string() {
|
||||
return PreviousLayer::get_structure_string();
|
||||
}
|
||||
|
||||
// Make the learning class a friend
|
||||
friend class Trainer<Sum>;
|
||||
|
||||
// the layer immediately before this layer
|
||||
PreviousLayer previous_layer_;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Layers
|
||||
|
||||
#endif
|
||||
|
||||
@@ -27,11 +27,8 @@ namespace Eval::NNUE {
|
||||
|
||||
// Class that holds the result of affine transformation of input features
|
||||
struct alignas(kCacheLineSize) Accumulator {
|
||||
std::int16_t
|
||||
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
Value score;
|
||||
bool computed_accumulation;
|
||||
bool computed_score;
|
||||
std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
|
||||
bool computed_accumulation;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
+3
-24
@@ -21,6 +21,8 @@
|
||||
#ifndef NNUE_COMMON_H_INCLUDED
|
||||
#define NNUE_COMMON_H_INCLUDED
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
|
||||
@@ -43,29 +45,6 @@
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
|
||||
// compiled with older g++ crashes because the output memory is not aligned
|
||||
// even though alignas is specified.
|
||||
#if defined(USE_AVX2)
|
||||
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
|
||||
#define _mm256_loadA_si256 _mm256_loadu_si256
|
||||
#define _mm256_storeA_si256 _mm256_storeu_si256
|
||||
#else
|
||||
#define _mm256_loadA_si256 _mm256_load_si256
|
||||
#define _mm256_storeA_si256 _mm256_store_si256
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(USE_AVX512)
|
||||
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
|
||||
#define _mm512_loadA_si512 _mm512_loadu_si512
|
||||
#define _mm512_storeA_si512 _mm512_storeu_si512
|
||||
#else
|
||||
#define _mm512_loadA_si512 _mm512_load_si512
|
||||
#define _mm512_storeA_si512 _mm512_store_si512
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Version of the evaluation file
|
||||
@@ -113,7 +92,7 @@ namespace Eval::NNUE {
|
||||
PS_END2 = 12 * SQUARE_NB + 1
|
||||
};
|
||||
|
||||
extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
|
||||
extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
|
||||
|
||||
// Type of input feature after conversion
|
||||
using TransformedFeatureType = std::uint8_t;
|
||||
|
||||
+295
-177
@@ -25,10 +25,66 @@
|
||||
#include "nnue_architecture.h"
|
||||
#include "features/index_list.h"
|
||||
|
||||
#include <cstring> // std::memset()
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// If vector instructions are enabled, we update and refresh the
|
||||
// accumulator tile by tile such that each tile fits in the CPU's
|
||||
// vector registers.
|
||||
#define VECTOR
|
||||
|
||||
#ifdef USE_AVX512
|
||||
typedef __m512i vec_t;
|
||||
#define vec_load(a) _mm512_load_si512(a)
|
||||
#define vec_store(a,b) _mm512_store_si512(a,b)
|
||||
#define vec_add_16(a,b) _mm512_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
|
||||
#define vec_zero _mm512_setzero_si512()
|
||||
static constexpr IndexType kNumRegs = 8; // only 8 are needed
|
||||
|
||||
#elif USE_AVX2
|
||||
typedef __m256i vec_t;
|
||||
#define vec_load(a) _mm256_load_si256(a)
|
||||
#define vec_store(a,b) _mm256_store_si256(a,b)
|
||||
#define vec_add_16(a,b) _mm256_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
|
||||
#define vec_zero _mm256_setzero_si256()
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#elif USE_SSE2
|
||||
typedef __m128i vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_epi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_epi16(a,b)
|
||||
#define vec_zero _mm_setzero_si128()
|
||||
static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
|
||||
|
||||
#elif USE_MMX
|
||||
typedef __m64 vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) _mm_add_pi16(a,b)
|
||||
#define vec_sub_16(a,b) _mm_sub_pi16(a,b)
|
||||
#define vec_zero _mm_setzero_si64()
|
||||
static constexpr IndexType kNumRegs = 8;
|
||||
|
||||
#elif USE_NEON
|
||||
typedef int16x8_t vec_t;
|
||||
#define vec_load(a) (*(a))
|
||||
#define vec_store(a,b) *(a)=(b)
|
||||
#define vec_add_16(a,b) vaddq_s16(a,b)
|
||||
#define vec_sub_16(a,b) vsubq_s16(a,b)
|
||||
#define vec_zero {0}
|
||||
static constexpr IndexType kNumRegs = 16;
|
||||
|
||||
#else
|
||||
#undef VECTOR
|
||||
|
||||
#endif
|
||||
|
||||
// Input feature converter
|
||||
class FeatureTransformer {
|
||||
|
||||
@@ -36,6 +92,11 @@ namespace Eval::NNUE {
|
||||
// Number of output dimensions for one side
|
||||
static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
|
||||
|
||||
#ifdef VECTOR
|
||||
static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
|
||||
static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
|
||||
#endif
|
||||
|
||||
public:
|
||||
// Output type
|
||||
using OutputType = TransformedFeatureType;
|
||||
@@ -48,20 +109,36 @@ namespace Eval::NNUE {
|
||||
static constexpr std::size_t kBufferSize =
|
||||
kOutputDimensions * sizeof(OutputType);
|
||||
|
||||
static constexpr int kLayerIndex = 0;
|
||||
|
||||
// Hash value embedded in the evaluation file
|
||||
static constexpr std::uint32_t GetHashValue() {
|
||||
|
||||
return RawFeatures::kHashValue ^ kOutputDimensions;
|
||||
}
|
||||
|
||||
static std::string get_name() {
|
||||
return RawFeatures::get_name() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
}
|
||||
|
||||
// a string representing the structure
|
||||
static std::string GetStructureString() {
|
||||
return RawFeatures::GetName() + "[" +
|
||||
std::to_string(kInputDimensions) + "->" +
|
||||
std::to_string(kHalfDimensions) + "x2]";
|
||||
static std::string get_structure_string() {
|
||||
return get_name();
|
||||
}
|
||||
|
||||
static std::string get_layers_info() {
|
||||
std::string info = " - ";
|
||||
info += std::to_string(kLayerIndex);
|
||||
info += " - ";
|
||||
info += get_name();
|
||||
return info;
|
||||
}
|
||||
|
||||
// Read network parameters
|
||||
bool ReadParameters(std::istream& stream) {
|
||||
|
||||
for (std::size_t i = 0; i < kHalfDimensions; ++i)
|
||||
biases_[i] = read_little_endian<BiasType>(stream);
|
||||
for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
|
||||
@@ -72,34 +149,45 @@ namespace Eval::NNUE {
|
||||
// write parameters
|
||||
bool WriteParameters(std::ostream& stream) const {
|
||||
stream.write(reinterpret_cast<const char*>(biases_),
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
|
||||
stream.write(reinterpret_cast<const char*>(weights_),
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
kHalfDimensions * kInputDimensions * sizeof(WeightType));
|
||||
|
||||
return !stream.fail();
|
||||
}
|
||||
|
||||
// Proceed with the difference calculation if possible
|
||||
bool UpdateAccumulatorIfPossible(const Position& pos) const {
|
||||
bool update_accumulator_if_possible(const Position& pos) const {
|
||||
|
||||
const auto now = pos.state();
|
||||
if (now->accumulator.computed_accumulation) {
|
||||
if (now->accumulator.computed_accumulation)
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto prev = now->previous;
|
||||
if (prev && prev->accumulator.computed_accumulation) {
|
||||
UpdateAccumulator(pos);
|
||||
update_accumulator(pos);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert input features
|
||||
void Transform(const Position& pos, OutputType* output, bool refresh) const {
|
||||
if (refresh || !UpdateAccumulatorIfPossible(pos)) {
|
||||
RefreshAccumulator(pos);
|
||||
}
|
||||
void Transform(const Position& pos, OutputType* output) const {
|
||||
|
||||
if (!update_accumulator_if_possible(pos))
|
||||
refresh_accumulator(pos);
|
||||
|
||||
const auto& accumulation = pos.state()->accumulator.accumulation;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#if defined(USE_AVX512)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth * 2);
|
||||
static_assert(kHalfDimensions % (kSimdWidth * 2) == 0);
|
||||
const __m512i kControl = _mm512_setr_epi64(0, 2, 4, 6, 1, 3, 5, 7);
|
||||
const __m512i kZero = _mm512_setzero_si512();
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
constexpr int kControl = 0b11011000;
|
||||
const __m256i kZero = _mm256_setzero_si256();
|
||||
@@ -126,14 +214,39 @@ namespace Eval::NNUE {
|
||||
for (IndexType p = 0; p < 2; ++p) {
|
||||
const IndexType offset = kHalfDimensions * p;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#if defined(USE_AVX512)
|
||||
auto out = reinterpret_cast<__m512i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m512i sum0 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m512i sum1 = _mm512_load_si512(
|
||||
&reinterpret_cast<const __m512i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm512_add_epi16(sum0, reinterpret_cast<const __m512i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm512_add_epi16(sum1, reinterpret_cast<const __m512i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm512_store_si512(&out[j], _mm512_permutexvar_epi64(kControl,
|
||||
_mm512_max_epi8(_mm512_packs_epi16(sum0, sum1), kZero)));
|
||||
}
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
auto out = reinterpret_cast<__m256i*>(&output[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
__m256i sum0 = _mm256_loadA_si256(
|
||||
__m256i sum0 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m256i sum1 = _mm256_loadA_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
_mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
__m256i sum1 = _mm256_load_si256(
|
||||
&reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
|
||||
_mm256_packs_epi16(sum0, sum1), kZero), kControl));
|
||||
}
|
||||
|
||||
@@ -144,14 +257,21 @@ namespace Eval::NNUE {
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
|
||||
|
||||
_mm_store_si128(&out[j],
|
||||
|
||||
#ifdef USE_SSE41
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
_mm_max_epi8(packedbytes, kZero)
|
||||
#else
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
_mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
|
||||
#endif
|
||||
|
||||
);
|
||||
@@ -164,6 +284,13 @@ namespace Eval::NNUE {
|
||||
accumulation[perspectives[p]][0])[j * 2 + 0]);
|
||||
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][0])[j * 2 + 1]);
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 0]);
|
||||
sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
|
||||
accumulation[perspectives[p]][i])[j * 2 + 1]);
|
||||
}
|
||||
|
||||
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
|
||||
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
|
||||
}
|
||||
@@ -173,12 +300,22 @@ namespace Eval::NNUE {
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
int16x8_t sum = reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][0])[j];
|
||||
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
|
||||
accumulation[perspectives[p]][i])[j]);
|
||||
}
|
||||
|
||||
out[j] = vmax_s8(vqmovn_s16(sum), kZero);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
|
||||
for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
|
||||
sum += accumulation[static_cast<int>(perspectives[p])][i][j];
|
||||
}
|
||||
|
||||
output[offset + j] = static_cast<OutputType>(
|
||||
std::max<int>(0, std::min<int>(127, sum)));
|
||||
}
|
||||
@@ -192,108 +329,150 @@ namespace Eval::NNUE {
|
||||
|
||||
private:
|
||||
// Calculate cumulative value without using difference calculation
|
||||
void RefreshAccumulator(const Position& pos) const {
|
||||
void refresh_accumulator(const Position& pos) const {
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
#endif
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
IndexType i = 0;
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
#if defined(USE_AVX512)
|
||||
auto accumulation = reinterpret_cast<__m512i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
_mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::append_active_indices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
#elif defined(USE_AVX2)
|
||||
auto accumulation = reinterpret_cast<__m256i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
_mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto accumulation = reinterpret_cast<__m128i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto accumulation = reinterpret_cast<__m64*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; k++)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
#else
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
auto accumulation = reinterpret_cast<int16x8_t*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
accumulator.computed_accumulation = true;
|
||||
accumulator.computed_score = false;
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
accumulator.computed_accumulation = true;
|
||||
}
|
||||
|
||||
// Calculate cumulative value using difference calculation
|
||||
void UpdateAccumulator(const Position& pos) const {
|
||||
const auto prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
IndexType i = 0;
|
||||
void update_accumulator(const Position& pos) const {
|
||||
|
||||
#ifdef VECTOR
|
||||
// Gcc-10.2 unnecessarily spills AVX2 registers if this array
|
||||
// is defined in the VECTOR code below, once in each branch
|
||||
vec_t acc[kNumRegs];
|
||||
#endif
|
||||
const auto& prev_accumulator = pos.state()->previous->accumulator;
|
||||
auto& accumulator = pos.state()->accumulator;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2];
|
||||
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
bool reset[2] = { false, false };
|
||||
RawFeatures::append_changed_indices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
|
||||
#ifdef VECTOR
|
||||
for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
auto accTile = reinterpret_cast<vec_t*>(
|
||||
&accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
if (reset[perspective]) {
|
||||
if (i == 0) {
|
||||
auto biasesTile = reinterpret_cast<const vec_t*>(
|
||||
&biases_[j * kTileHeight]);
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = biasesTile[k];
|
||||
} else {
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_zero;
|
||||
}
|
||||
} else {
|
||||
auto prevAccTile = reinterpret_cast<const vec_t*>(
|
||||
&prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_load(&prevAccTile[k]);
|
||||
|
||||
// Difference calculation for the deactivated features
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_sub_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index + j * kTileHeight;
|
||||
auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
acc[k] = vec_add_16(acc[k], column[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (IndexType k = 0; k < kNumRegs; ++k)
|
||||
vec_store(&accTile[k], acc[k]);
|
||||
}
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
#else
|
||||
for (Color perspective : { WHITE, BLACK }) {
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<__m256i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<__m128i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<__m64*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
auto accumulation = reinterpret_cast<int16x8_t*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
#endif
|
||||
|
||||
if (reset[perspective]) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
if (i == 0) {
|
||||
std::memcpy(accumulator.accumulation[perspective][i], biases_,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
} else {
|
||||
std::memset(accumulator.accumulation[perspective][i], 0,
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
}
|
||||
} else {
|
||||
std::memcpy(accumulator.accumulation[perspective][i],
|
||||
prev_accumulator.accumulation[perspective][i],
|
||||
@@ -302,83 +481,22 @@ namespace Eval::NNUE {
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = vsubq_s16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
accumulator.accumulation[perspective][i][j] -=
|
||||
weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
|
||||
}
|
||||
}
|
||||
{ // Difference calculation for the activated features
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_MMX)
|
||||
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
accumulator.accumulation[perspective][i][j] +=
|
||||
weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(USE_MMX)
|
||||
_mm_empty();
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
accumulator.computed_accumulation = true;
|
||||
accumulator.computed_score = false;
|
||||
}
|
||||
|
||||
using BiasType = std::int16_t;
|
||||
|
||||
+203
-189
@@ -1,201 +1,215 @@
|
||||
// USI extended command for NNUE evaluation function
|
||||
|
||||
#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../thread.h"
|
||||
#include "../uci.h"
|
||||
#include "evaluate_nnue.h"
|
||||
#include "evaluate_nnue.h"
|
||||
#include "nnue_test_command.h"
|
||||
|
||||
#include "thread.h"
|
||||
#include "uci.h"
|
||||
|
||||
#include <set>
|
||||
#include <fstream>
|
||||
|
||||
#define ASSERT(X) { if (!(X)) { std::cout << "\nError : ASSERT(" << #X << "), " << __FILE__ << "(" << __LINE__ << "): " << __func__ << std::endl; \
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(3000)); *(int*)1 =0;} }
|
||||
|
||||
namespace Eval {
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
namespace {
|
||||
|
||||
// Testing RawFeatures mainly for difference calculation
|
||||
void TestFeatures(Position& pos) {
|
||||
const std::uint64_t num_games = 1000;
|
||||
StateInfo si;
|
||||
pos.set(StartFEN, false, &si, Threads.main());
|
||||
const int MAX_PLY = 256; // test up to 256 hands
|
||||
|
||||
StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
|
||||
int ply; // Trouble from the initial phase
|
||||
|
||||
PRNG prng(20171128);
|
||||
|
||||
std::uint64_t num_moves = 0;
|
||||
std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
|
||||
std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
|
||||
constexpr IndexType kUnknown = -1;
|
||||
std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
|
||||
auto make_index_sets = [&](const Position& pos) {
|
||||
std::vector<std::vector<std::set<IndexType>>> index_sets(
|
||||
kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
for (const auto perspective : Colors) {
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT(index_sets[i][perspective].count(index) == 0);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
index_sets[i][perspective].insert(index);
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return index_sets;
|
||||
};
|
||||
auto update_index_sets = [&](const Position& pos, auto* index_sets) {
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2];
|
||||
RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
for (const auto perspective : Colors) {
|
||||
if (reset[perspective]) {
|
||||
(*index_sets)[i][perspective].clear();
|
||||
++num_resets[i];
|
||||
} else {
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT((*index_sets)[i][perspective].count(index) == 1);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
(*index_sets)[i][perspective].erase(index);
|
||||
++num_updates.back();
|
||||
++num_updates[i];
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT((*index_sets)[i][perspective].count(index) == 0);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
(*index_sets)[i][perspective].insert(index);
|
||||
++num_updates.back();
|
||||
++num_updates[i];
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::cout << "feature set: " << RawFeatures::GetName()
|
||||
<< "[" << RawFeatures::kDimensions << "]" << std::endl;
|
||||
std::cout << "start testing with random games";
|
||||
|
||||
for (std::uint64_t i = 0; i < num_games; ++i) {
|
||||
auto index_sets = make_index_sets(pos);
|
||||
for (ply = 0; ply < MAX_PLY; ++ply) {
|
||||
MoveList<LEGAL> mg(pos); // Generate all legal hands
|
||||
|
||||
// There was no legal move == Clog
|
||||
if (mg.size() == 0)
|
||||
break;
|
||||
|
||||
// Randomly choose from the generated moves and advance the phase with the moves.
|
||||
Move m = mg.begin()[prng.rand(mg.size())];
|
||||
pos.do_move(m, state[ply]);
|
||||
|
||||
++num_moves;
|
||||
update_index_sets(pos, &index_sets);
|
||||
ASSERT(index_sets == make_index_sets(pos));
|
||||
}
|
||||
|
||||
pos.set(StartFEN, false, &si, Threads.main());
|
||||
|
||||
// Output'.' every 100 times (so you can see that it's progressing)
|
||||
if ((i % 100) == 0)
|
||||
std::cout << "." << std::flush;
|
||||
}
|
||||
std::cout << "passed." << std::endl;
|
||||
std::cout << num_games << " games, " << num_moves << " moves, "
|
||||
<< num_updates.back() << " updates, "
|
||||
<< (1.0 * num_updates.back() / num_moves)
|
||||
<< " updates per move" << std::endl;
|
||||
std::size_t num_observed_indices = 0;
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
|
||||
num_observed_indices += count;
|
||||
std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
|
||||
<< "): " << count << " features ("
|
||||
<< (100.0 * count / RawFeatures::kDimensions) << "%), "
|
||||
<< num_updates[i] << " updates ("
|
||||
<< (1.0 * num_updates[i] / num_moves) << " per move), "
|
||||
<< num_resets[i] << " resets ("
|
||||
<< (100.0 * num_resets[i] / num_moves) << "%)"
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << "observed " << num_observed_indices << " ("
|
||||
<< (100.0 * num_observed_indices / RawFeatures::kDimensions)
|
||||
<< "% of " << RawFeatures::kDimensions
|
||||
<< ") features" << std::endl;
|
||||
#define ASSERT(X) { \
|
||||
if (!(X)) { \
|
||||
std::cout \
|
||||
<< "\nError : ASSERT(" << #X << "), " \
|
||||
<< __FILE__ << "(" << __LINE__ << "): " \
|
||||
<< __func__ << std::endl; \
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(3000)); \
|
||||
*(int*)1 =0; \
|
||||
} \
|
||||
}
|
||||
|
||||
// Output a string that represents the structure of the evaluation function
|
||||
void PrintInfo(std::istream& stream) {
|
||||
std::cout << "network architecture: " << GetArchitectureString() << std::endl;
|
||||
|
||||
while (true) {
|
||||
std::string file_name;
|
||||
stream >> file_name;
|
||||
if (file_name.empty()) break;
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
const bool success = [&]() {
|
||||
std::ifstream file_stream(file_name, std::ios::binary);
|
||||
if (!file_stream) return false;
|
||||
if (!ReadHeader(file_stream, &hash_value, &architecture)) return false;
|
||||
return true;
|
||||
}();
|
||||
|
||||
std::cout << file_name << ": ";
|
||||
if (success) {
|
||||
if (hash_value == kHashValue) {
|
||||
std::cout << "matches with this binary";
|
||||
if (architecture != GetArchitectureString()) {
|
||||
std::cout << ", but architecture string differs: " << architecture;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
} else {
|
||||
std::cout << architecture << std::endl;
|
||||
}
|
||||
} else {
|
||||
std::cout << "failed to read header" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// USI extended command for NNUE evaluation function
|
||||
void TestCommand(Position& pos, std::istream& stream) {
|
||||
std::string sub_command;
|
||||
stream >> sub_command;
|
||||
namespace Eval::NNUE {
|
||||
|
||||
if (sub_command == "test_features") {
|
||||
TestFeatures(pos);
|
||||
} else if (sub_command == "info") {
|
||||
PrintInfo(stream);
|
||||
} else {
|
||||
std::cout << "usage:" << std::endl;
|
||||
std::cout << " test nnue test_features" << std::endl;
|
||||
std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
|
||||
}
|
||||
}
|
||||
namespace {
|
||||
|
||||
} // namespace NNUE
|
||||
// Testing RawFeatures mainly for difference calculation
|
||||
void test_features(Position& pos) {
|
||||
const std::uint64_t num_games = 1000;
|
||||
StateInfo si;
|
||||
pos.set(StartFEN, false, &si, Threads.main());
|
||||
const int MAX_PLY = 256; // test up to 256 hands
|
||||
|
||||
} // namespace Eval
|
||||
StateInfo state[MAX_PLY]; // StateInfo only for the maximum number of steps
|
||||
int ply; // Trouble from the initial phase
|
||||
|
||||
#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
PRNG prng(20171128);
|
||||
|
||||
std::uint64_t num_moves = 0;
|
||||
std::vector<std::uint64_t> num_updates(kRefreshTriggers.size() + 1);
|
||||
std::vector<std::uint64_t> num_resets(kRefreshTriggers.size());
|
||||
constexpr IndexType kUnknown = -1;
|
||||
std::vector<IndexType> trigger_map(RawFeatures::kDimensions, kUnknown);
|
||||
|
||||
auto make_index_sets = [&](const Position& position) {
|
||||
std::vector<std::vector<std::set<IndexType>>> index_sets(
|
||||
kRefreshTriggers.size(), std::vector<std::set<IndexType>>(2));
|
||||
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList active_indices[2];
|
||||
RawFeatures::append_active_indices(position, kRefreshTriggers[i],
|
||||
active_indices);
|
||||
|
||||
for (const auto perspective : Colors) {
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT(index_sets[i][perspective].count(index) == 0);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
index_sets[i][perspective].insert(index);
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return index_sets;
|
||||
};
|
||||
|
||||
auto update_index_sets = [&](const Position& position, auto* index_sets) {
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
Features::IndexList removed_indices[2], added_indices[2];
|
||||
bool reset[2] = { false, false };
|
||||
RawFeatures::append_changed_indices(position, kRefreshTriggers[i],
|
||||
removed_indices, added_indices, reset);
|
||||
for (const auto perspective : Colors) {
|
||||
if (reset[perspective]) {
|
||||
(*index_sets)[i][perspective].clear();
|
||||
++num_resets[i];
|
||||
} else {
|
||||
for (const auto index : removed_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT((*index_sets)[i][perspective].count(index) == 1);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
(*index_sets)[i][perspective].erase(index);
|
||||
++num_updates.back();
|
||||
++num_updates[i];
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto index : added_indices[perspective]) {
|
||||
ASSERT(index < RawFeatures::kDimensions);
|
||||
ASSERT((*index_sets)[i][perspective].count(index) == 0);
|
||||
ASSERT(trigger_map[index] == kUnknown || trigger_map[index] == i);
|
||||
(*index_sets)[i][perspective].insert(index);
|
||||
++num_updates.back();
|
||||
++num_updates[i];
|
||||
trigger_map[index] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::cout << "feature set: " << RawFeatures::get_name()
|
||||
<< "[" << RawFeatures::kDimensions << "]" << std::endl;
|
||||
std::cout << "start testing with random games";
|
||||
|
||||
for (std::uint64_t i = 0; i < num_games; ++i) {
|
||||
auto index_sets = make_index_sets(pos);
|
||||
for (ply = 0; ply < MAX_PLY; ++ply) {
|
||||
MoveList<LEGAL> mg(pos); // Generate all legal hands
|
||||
|
||||
// There was no legal move == Clog
|
||||
if (mg.size() == 0)
|
||||
break;
|
||||
|
||||
// Randomly choose from the generated moves and advance the phase with the moves.
|
||||
Move m = mg.begin()[prng.rand(mg.size())];
|
||||
pos.do_move(m, state[ply]);
|
||||
|
||||
++num_moves;
|
||||
update_index_sets(pos, &index_sets);
|
||||
ASSERT(index_sets == make_index_sets(pos));
|
||||
}
|
||||
|
||||
pos.set(StartFEN, false, &si, Threads.main());
|
||||
|
||||
// Output'.' every 100 times (so you can see that it's progressing)
|
||||
if ((i % 100) == 0)
|
||||
std::cout << "." << std::flush;
|
||||
}
|
||||
|
||||
std::cout << "passed." << std::endl;
|
||||
std::cout << num_games << " games, " << num_moves << " moves, "
|
||||
<< num_updates.back() << " updates, "
|
||||
<< (1.0 * num_updates.back() / num_moves)
|
||||
<< " updates per move" << std::endl;
|
||||
std::size_t num_observed_indices = 0;
|
||||
|
||||
for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
|
||||
const auto count = std::count(trigger_map.begin(), trigger_map.end(), i);
|
||||
num_observed_indices += count;
|
||||
std::cout << "TriggerEvent(" << static_cast<int>(kRefreshTriggers[i])
|
||||
<< "): " << count << " features ("
|
||||
<< (100.0 * count / RawFeatures::kDimensions) << "%), "
|
||||
<< num_updates[i] << " updates ("
|
||||
<< (1.0 * num_updates[i] / num_moves) << " per move), "
|
||||
<< num_resets[i] << " resets ("
|
||||
<< (100.0 * num_resets[i] / num_moves) << "%)"
|
||||
<< std::endl;
|
||||
}
|
||||
std::cout << "observed " << num_observed_indices << " ("
|
||||
<< (100.0 * num_observed_indices / RawFeatures::kDimensions)
|
||||
<< "% of " << RawFeatures::kDimensions
|
||||
<< ") features" << std::endl;
|
||||
}
|
||||
|
||||
// Output a string that represents the structure of the evaluation function
|
||||
void print_info(std::istream& stream) {
|
||||
std::cout << "network architecture: " << get_architecture_string() << std::endl;
|
||||
|
||||
while (true) {
|
||||
std::string file_name;
|
||||
stream >> file_name;
|
||||
if (file_name.empty())
|
||||
break;
|
||||
|
||||
std::uint32_t hash_value;
|
||||
std::string architecture;
|
||||
const bool success = [&]() {
|
||||
std::ifstream file_stream(file_name, std::ios::binary);
|
||||
|
||||
if (!file_stream)
|
||||
return false;
|
||||
if (!read_header(file_stream, &hash_value, &architecture))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}();
|
||||
|
||||
std::cout << file_name << ": ";
|
||||
if (success) {
|
||||
if (hash_value == kHashValue) {
|
||||
std::cout << "matches with this binary";
|
||||
if (architecture != get_architecture_string()) {
|
||||
std::cout << ", but architecture string differs: " << architecture;
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
} else {
|
||||
std::cout << architecture << std::endl;
|
||||
}
|
||||
} else {
|
||||
std::cout << "failed to read header" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// USI extended command for NNUE evaluation function
|
||||
void test_command(Position& pos, std::istream& stream) {
|
||||
std::string sub_command;
|
||||
stream >> sub_command;
|
||||
|
||||
if (sub_command == "test_features") {
|
||||
test_features(pos);
|
||||
} else if (sub_command == "info") {
|
||||
print_info(stream);
|
||||
} else {
|
||||
std::cout << "usage:" << std::endl;
|
||||
std::cout << " test nnue test_features" << std::endl;
|
||||
std::cout << " test nnue info [path/to/" << fileName << "...]" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
@@ -1,21 +1,12 @@
|
||||
// USI extended command interface for NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TEST_COMMAND_H_
|
||||
#ifndef _NNUE_TEST_COMMAND_H_
|
||||
#define _NNUE_TEST_COMMAND_H_
|
||||
|
||||
#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
// USI extended command interface for NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
namespace Eval {
|
||||
// USI extended command for NNUE evaluation function
|
||||
void test_command(Position& pos, std::istream& stream);
|
||||
|
||||
namespace NNUE {
|
||||
|
||||
// USI extended command for NNUE evaluation function
|
||||
void TestCommand(Position& pos, std::istream& stream);
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
#ifndef _NNUE_TRAINER_FEATURES_ALL_FACTORIZERS_H_
|
||||
#define _NNUE_TRAINER_FEATURES_ALL_FACTORIZERS_H_
|
||||
|
||||
#include "factorizer.h"
|
||||
#include "factorizer_feature_set.h"
|
||||
|
||||
#include "factorizer_half_kp.h"
|
||||
#include "factorizer_half_ka.h"
|
||||
|
||||
#endif
|
||||
@@ -1,110 +1,117 @@
|
||||
// NNUE evaluation function feature conversion class template
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
#include "nnue/nnue_common.h"
|
||||
|
||||
#include "../../nnue_common.h"
|
||||
#include "../trainer.h"
|
||||
#include "nnue/trainer/trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
// NNUE evaluation function feature conversion class template
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace NNUE {
|
||||
// Class template that converts input features into learning features
|
||||
// By default, the learning feature is the same as the original input feature, and specialized as necessary
|
||||
template <typename FeatureType>
|
||||
class Factorizer {
|
||||
public:
|
||||
static constexpr std::string get_name() {
|
||||
return "Factorizer<" + FeatureType::get_name() + "> -> " + std::string("No factorizer");
|
||||
}
|
||||
|
||||
namespace Features {
|
||||
static constexpr std::string get_factorizers_string() {
|
||||
return " - " + get_name();
|
||||
}
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// By default, the learning feature is the same as the original input feature, and specialized as necessary
|
||||
template <typename FeatureType>
|
||||
class Factorizer {
|
||||
public:
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return FeatureType::kDimensions;
|
||||
}
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return FeatureType::kDimensions;
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
assert(base_index <FeatureType::kDimensions);
|
||||
training_features->emplace_back(base_index);
|
||||
}
|
||||
};
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
// Learning feature information
|
||||
struct FeatureProperties {
|
||||
bool active;
|
||||
IndexType dimensions;
|
||||
};
|
||||
assert(base_index <FeatureType::kDimensions);
|
||||
training_features->emplace_back(base_index);
|
||||
}
|
||||
};
|
||||
|
||||
// Add the original input features to the learning features
|
||||
template <typename FeatureType>
|
||||
IndexType AppendBaseFeature(
|
||||
FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
assert(properties.dimensions == FeatureType::kDimensions);
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
training_features->emplace_back(base_index);
|
||||
return properties.dimensions;
|
||||
}
|
||||
// Learning feature information
|
||||
struct FeatureProperties {
|
||||
bool active;
|
||||
IndexType dimensions;
|
||||
};
|
||||
|
||||
// If the learning rate scale is not 0, inherit other types of learning features
|
||||
template <typename FeatureType>
|
||||
IndexType InheritFeaturesIfRequired(
|
||||
IndexType index_offset, FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
if (!properties.active) {
|
||||
return 0;
|
||||
}
|
||||
assert(properties.dimensions == Factorizer<FeatureType>::GetDimensions());
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
const auto start = training_features->size();
|
||||
Factorizer<FeatureType>::AppendTrainingFeatures(
|
||||
base_index, training_features);
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
|
||||
feature.ShiftIndex(index_offset);
|
||||
}
|
||||
return properties.dimensions;
|
||||
}
|
||||
// Add the original input features to the learning features
|
||||
template <typename FeatureType>
|
||||
IndexType append_base_feature(
|
||||
FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
// Return the index difference as needed, without adding learning features
|
||||
// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
|
||||
IndexType SkipFeatures(FeatureProperties properties) {
|
||||
if (!properties.active) {
|
||||
return 0;
|
||||
}
|
||||
return properties.dimensions;
|
||||
}
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
template <std::size_t N>
|
||||
constexpr IndexType GetActiveDimensions(
|
||||
const FeatureProperties (&properties)[N]) {
|
||||
static_assert(N > 0, "");
|
||||
IndexType dimensions = properties[0].dimensions;
|
||||
for (std::size_t i = 1; i < N; ++i) {
|
||||
if (properties[i].active) {
|
||||
dimensions += properties[i].dimensions;
|
||||
assert(properties.dimensions == FeatureType::kDimensions);
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
training_features->emplace_back(base_index);
|
||||
return properties.dimensions;
|
||||
}
|
||||
}
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
// get the number of elements in the array
|
||||
template <typename T, std::size_t N>
|
||||
constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {
|
||||
return N;
|
||||
}
|
||||
// If the learning rate scale is not 0, inherit other types of learning features
|
||||
template <typename FeatureType>
|
||||
IndexType inherit_features_if_required(
|
||||
IndexType index_offset, FeatureProperties properties, IndexType base_index,
|
||||
std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
} // namespace Features
|
||||
if (!properties.active) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
assert(properties.dimensions == Factorizer<FeatureType>::get_dimensions());
|
||||
assert(base_index < FeatureType::kDimensions);
|
||||
|
||||
} // namespace Eval
|
||||
const auto start = training_features->size();
|
||||
Factorizer<FeatureType>::append_training_features(
|
||||
base_index, training_features);
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.get_index() < Factorizer<FeatureType>::get_dimensions());
|
||||
feature.shift_index(index_offset);
|
||||
}
|
||||
|
||||
return properties.dimensions;
|
||||
}
|
||||
|
||||
// Return the index difference as needed, without adding learning features
|
||||
// Call instead of InheritFeaturesIfRequired() if there are no corresponding features
|
||||
IndexType skip_features(FeatureProperties properties) {
|
||||
if (!properties.active)
|
||||
return 0;
|
||||
|
||||
return properties.dimensions;
|
||||
}
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
template <std::size_t N>
|
||||
constexpr IndexType get_active_dimensions(
|
||||
const FeatureProperties (&properties)[N]) {
|
||||
|
||||
static_assert(N > 0, "");
|
||||
|
||||
IndexType dimensions = properties[0].dimensions;
|
||||
|
||||
for (std::size_t i = 1; i < N; ++i) {
|
||||
if (properties[i].active) {
|
||||
dimensions += properties[i].dimensions;
|
||||
}
|
||||
}
|
||||
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
// get the number of elements in the array
|
||||
template <typename T, std::size_t N>
|
||||
constexpr std::size_t get_array_length(const T (&/*array*/)[N]) {
|
||||
return N;
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,104 +1,121 @@
|
||||
// Specialization for feature set of feature conversion class template of NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../features/feature_set.h"
|
||||
#include "factorizer.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "nnue/features/feature_set.h"
|
||||
|
||||
namespace NNUE {
|
||||
// Specialization for feature set of feature conversion class template of NNUE evaluation function
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for FeatureSet
|
||||
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
|
||||
class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
|
||||
private:
|
||||
using Head = Factorizer<FeatureSet<FirstFeatureType>>;
|
||||
using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for FeatureSet
|
||||
template <typename FirstFeatureType, typename... RemainingFeatureTypes>
|
||||
class Factorizer<FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
|
||||
private:
|
||||
using Head = Factorizer<FeatureSet<FirstFeatureType>>;
|
||||
using Tail = Factorizer<FeatureSet<RemainingFeatureTypes...>>;
|
||||
public:
|
||||
// number of dimensions of original input features
|
||||
static constexpr IndexType kBaseDimensions =
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
|
||||
|
||||
public:
|
||||
// number of dimensions of original input features
|
||||
static constexpr IndexType kBaseDimensions =
|
||||
FeatureSet<FirstFeatureType, RemainingFeatureTypes...>::kDimensions;
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return Head::GetDimensions() + Tail::GetDimensions();
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
assert(base_index < kBaseDimensions);
|
||||
constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
|
||||
if (base_index < boundary) {
|
||||
Tail::AppendTrainingFeatures(
|
||||
base_index, training_features, base_dimensions);
|
||||
} else {
|
||||
const auto start = training_features->size();
|
||||
Head::AppendTrainingFeatures(
|
||||
base_index - boundary, training_features, base_dimensions);
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
const auto index = feature.GetIndex();
|
||||
assert(index < Head::GetDimensions() ||
|
||||
(index >= base_dimensions &&
|
||||
index < base_dimensions +
|
||||
Head::GetDimensions() - Head::kBaseDimensions));
|
||||
if (index < Head::kBaseDimensions) {
|
||||
feature.ShiftIndex(Tail::kBaseDimensions);
|
||||
} else {
|
||||
feature.ShiftIndex(Tail::GetDimensions() - Tail::kBaseDimensions);
|
||||
static constexpr std::string get_factorizers_string() {
|
||||
std::string str = " - ";
|
||||
str += Head::get_name();
|
||||
str += '\n';
|
||||
str += Tail::get_factorizers_string();
|
||||
return str;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization when FeatureSet has one template argument
|
||||
template <typename FeatureType>
|
||||
class Factorizer<FeatureSet<FeatureType>> {
|
||||
public:
|
||||
// number of dimensions of original input features
|
||||
static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return Head::get_dimensions() + Tail::get_dimensions();
|
||||
}
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return Factorizer<FeatureType>::GetDimensions();
|
||||
}
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
assert(base_index < kBaseDimensions);
|
||||
const auto start = training_features->size();
|
||||
Factorizer<FeatureType>::AppendTrainingFeatures(
|
||||
base_index, training_features);
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.GetIndex() < Factorizer<FeatureType>::GetDimensions());
|
||||
if (feature.GetIndex() >= kBaseDimensions) {
|
||||
feature.ShiftIndex(base_dimensions - kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
assert(base_index < kBaseDimensions);
|
||||
|
||||
} // namespace Features
|
||||
constexpr auto boundary = FeatureSet<RemainingFeatureTypes...>::kDimensions;
|
||||
|
||||
} // namespace NNUE
|
||||
if (base_index < boundary) {
|
||||
Tail::append_training_features(
|
||||
base_index, training_features, base_dimensions);
|
||||
}
|
||||
else {
|
||||
const auto start = training_features->size();
|
||||
|
||||
} // namespace Eval
|
||||
Head::append_training_features(
|
||||
base_index - boundary, training_features, base_dimensions);
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
const auto index = feature.get_index();
|
||||
|
||||
assert(index < Head::get_dimensions() ||
|
||||
(index >= base_dimensions &&
|
||||
index < base_dimensions +
|
||||
Head::get_dimensions() - Head::kBaseDimensions));
|
||||
|
||||
if (index < Head::kBaseDimensions) {
|
||||
feature.shift_index(Tail::kBaseDimensions);
|
||||
}
|
||||
else {
|
||||
feature.shift_index(Tail::get_dimensions() - Tail::kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization when FeatureSet has one template argument
|
||||
template <typename FeatureType>
|
||||
class Factorizer<FeatureSet<FeatureType>> {
|
||||
public:
|
||||
// number of dimensions of original input features
|
||||
static constexpr IndexType kBaseDimensions = FeatureType::kDimensions;
|
||||
|
||||
static constexpr std::string get_name() {
|
||||
return Factorizer<FeatureType>::get_name();
|
||||
}
|
||||
|
||||
static constexpr std::string get_factorizers_string() {
|
||||
return " - " + get_name();
|
||||
}
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return Factorizer<FeatureType>::get_dimensions();
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features,
|
||||
IndexType base_dimensions = kBaseDimensions) {
|
||||
|
||||
assert(base_index < kBaseDimensions);
|
||||
|
||||
const auto start = training_features->size();
|
||||
|
||||
Factorizer<FeatureType>::append_training_features(
|
||||
base_index, training_features);
|
||||
|
||||
for (auto i = start; i < training_features->size(); ++i) {
|
||||
auto& feature = (*training_features)[i];
|
||||
assert(feature.get_index() < Factorizer<FeatureType>::get_dimensions());
|
||||
if (feature.get_index() >= kBaseDimensions) {
|
||||
feature.shift_index(base_dimensions - kBaseDimensions);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KA_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KA_H_
|
||||
|
||||
#include "factorizer.h"
|
||||
|
||||
#include "nnue/features/half_ka.h"
|
||||
#include "nnue/features/a.h"
|
||||
#include "nnue/features/half_relative_ka.h"
|
||||
|
||||
// Specialization of NNUE evaluation function feature conversion class template for HalfKA
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for HalfKA
|
||||
template <Side AssociatedKing>
|
||||
class Factorizer<HalfKA<AssociatedKing>> {
|
||||
private:
|
||||
using FeatureType = HalfKA<AssociatedKing>;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
|
||||
// Type of learning feature
|
||||
enum TrainingFeatureType {
|
||||
kFeaturesHalfKA,
|
||||
kFeaturesA,
|
||||
kFeaturesHalfRelativeKA,
|
||||
kNumTrainingFeatureTypes,
|
||||
};
|
||||
|
||||
// Learning feature information
|
||||
static constexpr FeatureProperties kProperties[] = {
|
||||
// kFeaturesHalfA
|
||||
{true, FeatureType::kDimensions},
|
||||
// kFeaturesA
|
||||
{true, Factorizer<A>::get_dimensions()},
|
||||
// kFeaturesHalfRelativeKA
|
||||
{true, Factorizer<HalfRelativeKA<AssociatedKing>>::get_dimensions()},
|
||||
};
|
||||
|
||||
static_assert(get_array_length(kProperties) == kNumTrainingFeatureTypes, "");
|
||||
|
||||
public:
|
||||
static constexpr std::string get_name() {
|
||||
return std::string("Factorizer<") + FeatureType::kName + "> -> " + "A, HalfRelativeKA";
|
||||
}
|
||||
|
||||
static constexpr std::string get_factorizers_string() {
|
||||
return " - " + get_name();
|
||||
}
|
||||
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return get_active_dimensions(kProperties);
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
// kFeaturesHalfA
|
||||
IndexType index_offset = append_base_feature<FeatureType>(
|
||||
kProperties[kFeaturesHalfKA], base_index, training_features);
|
||||
|
||||
const auto sq_k = static_cast<Square>(base_index / PS_END2);
|
||||
const auto a = static_cast<IndexType>(base_index % PS_END2);
|
||||
|
||||
// kFeaturesA
|
||||
index_offset += inherit_features_if_required<A>(
|
||||
index_offset, kProperties[kFeaturesA], a, training_features);
|
||||
|
||||
// kFeaturesHalfRelativeKA
|
||||
if (a >= PS_W_PAWN) {
|
||||
index_offset += inherit_features_if_required<HalfRelativeKA<AssociatedKing>>(
|
||||
index_offset, kProperties[kFeaturesHalfRelativeKA],
|
||||
HalfRelativeKA<AssociatedKing>::make_index(sq_k, a),
|
||||
training_features);
|
||||
}
|
||||
else {
|
||||
index_offset += skip_features(kProperties[kFeaturesHalfRelativeKA]);
|
||||
}
|
||||
|
||||
assert(index_offset == get_dimensions());
|
||||
}
|
||||
};
|
||||
|
||||
template <Side AssociatedKing>
|
||||
constexpr FeatureProperties Factorizer<HalfKA<AssociatedKing>>::kProperties[];
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif // #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KA_H_
|
||||
@@ -1,103 +1,104 @@
|
||||
// Specialization of NNUE evaluation function feature conversion class template for HalfKP
|
||||
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
|
||||
#ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
|
||||
#define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
|
||||
|
||||
#if defined(EVAL_NNUE)
|
||||
|
||||
#include "../../features/half_kp.h"
|
||||
#include "../../features/p.h"
|
||||
#include "../../features/half_relative_kp.h"
|
||||
#include "factorizer.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "nnue/features/half_kp.h"
|
||||
#include "nnue/features/p.h"
|
||||
#include "nnue/features/half_relative_kp.h"
|
||||
|
||||
namespace NNUE {
|
||||
// Specialization of NNUE evaluation function feature conversion class template for HalfKP
|
||||
namespace Eval::NNUE::Features {
|
||||
|
||||
namespace Features {
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for HalfKP
|
||||
template <Side AssociatedKing>
|
||||
class Factorizer<HalfKP<AssociatedKing>> {
|
||||
private:
|
||||
using FeatureType = HalfKP<AssociatedKing>;
|
||||
|
||||
// Class template that converts input features into learning features
|
||||
// Specialization for HalfKP
|
||||
template <Side AssociatedKing>
|
||||
class Factorizer<HalfKP<AssociatedKing>> {
|
||||
private:
|
||||
using FeatureType = HalfKP<AssociatedKing>;
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
|
||||
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
|
||||
static constexpr IndexType kMaxActiveDimensions =
|
||||
FeatureType::kMaxActiveDimensions;
|
||||
// Type of learning feature
|
||||
enum TrainingFeatureType {
|
||||
kFeaturesHalfKP,
|
||||
kFeaturesHalfK,
|
||||
kFeaturesP,
|
||||
kFeaturesHalfRelativeKP,
|
||||
kNumTrainingFeatureTypes,
|
||||
};
|
||||
|
||||
// Type of learning feature
|
||||
enum TrainingFeatureType {
|
||||
kFeaturesHalfKP,
|
||||
kFeaturesHalfK,
|
||||
kFeaturesP,
|
||||
kFeaturesHalfRelativeKP,
|
||||
kNumTrainingFeatureTypes,
|
||||
};
|
||||
// Learning feature information
|
||||
static constexpr FeatureProperties kProperties[] = {
|
||||
// kFeaturesHalfKP
|
||||
{true, FeatureType::kDimensions},
|
||||
// kFeaturesHalfK
|
||||
{true, SQUARE_NB},
|
||||
// kFeaturesP
|
||||
{true, Factorizer<P>::get_dimensions()},
|
||||
// kFeaturesHalfRelativeKP
|
||||
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::get_dimensions()},
|
||||
};
|
||||
|
||||
// Learning feature information
|
||||
static constexpr FeatureProperties kProperties[] = {
|
||||
// kFeaturesHalfKP
|
||||
{true, FeatureType::kDimensions},
|
||||
// kFeaturesHalfK
|
||||
{true, SQUARE_NB},
|
||||
// kFeaturesP
|
||||
{true, Factorizer<P>::GetDimensions()},
|
||||
// kFeaturesHalfRelativeKP
|
||||
{true, Factorizer<HalfRelativeKP<AssociatedKing>>::GetDimensions()},
|
||||
};
|
||||
static_assert(GetArrayLength(kProperties) == kNumTrainingFeatureTypes, "");
|
||||
static_assert(get_array_length(kProperties) == kNumTrainingFeatureTypes, "");
|
||||
|
||||
public:
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType GetDimensions() {
|
||||
return GetActiveDimensions(kProperties);
|
||||
}
|
||||
public:
|
||||
static constexpr std::string get_name() {
|
||||
return std::string("Factorizer<") + FeatureType::kName + "> -> " + "HalfK, P, HalfRelativeKP";
|
||||
}
|
||||
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void AppendTrainingFeatures(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
// kFeaturesHalfKP
|
||||
IndexType index_offset = AppendBaseFeature<FeatureType>(
|
||||
kProperties[kFeaturesHalfKP], base_index, training_features);
|
||||
static constexpr std::string get_factorizers_string() {
|
||||
return " - " + get_name();
|
||||
}
|
||||
|
||||
const auto sq_k = static_cast<Square>(base_index / PS_END);
|
||||
const auto p = static_cast<IndexType>(base_index % PS_END);
|
||||
// kFeaturesHalfK
|
||||
{
|
||||
const auto& properties = kProperties[kFeaturesHalfK];
|
||||
if (properties.active) {
|
||||
training_features->emplace_back(index_offset + sq_k);
|
||||
index_offset += properties.dimensions;
|
||||
}
|
||||
}
|
||||
// kFeaturesP
|
||||
index_offset += InheritFeaturesIfRequired<P>(
|
||||
index_offset, kProperties[kFeaturesP], p, training_features);
|
||||
// kFeaturesHalfRelativeKP
|
||||
if (p >= PS_W_PAWN) {
|
||||
index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
|
||||
index_offset, kProperties[kFeaturesHalfRelativeKP],
|
||||
HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
|
||||
training_features);
|
||||
} else {
|
||||
index_offset += SkipFeatures(kProperties[kFeaturesHalfRelativeKP]);
|
||||
}
|
||||
// Get the dimensionality of the learning feature
|
||||
static constexpr IndexType get_dimensions() {
|
||||
return get_active_dimensions(kProperties);
|
||||
}
|
||||
|
||||
assert(index_offset == GetDimensions());
|
||||
}
|
||||
};
|
||||
// Get index of learning feature and scale of learning rate
|
||||
static void append_training_features(
|
||||
IndexType base_index, std::vector<TrainingFeature>* training_features) {
|
||||
|
||||
template <Side AssociatedKing>
|
||||
constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
|
||||
// kFeaturesHalfKP
|
||||
IndexType index_offset = append_base_feature<FeatureType>(
|
||||
kProperties[kFeaturesHalfKP], base_index, training_features);
|
||||
|
||||
} // namespace Features
|
||||
const auto sq_k = static_cast<Square>(base_index / PS_END);
|
||||
const auto p = static_cast<IndexType>(base_index % PS_END);
|
||||
|
||||
} // namespace NNUE
|
||||
// kFeaturesHalfK
|
||||
{
|
||||
const auto& properties = kProperties[kFeaturesHalfK];
|
||||
if (properties.active) {
|
||||
training_features->emplace_back(index_offset + sq_k);
|
||||
index_offset += properties.dimensions;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Eval
|
||||
// kFeaturesP
|
||||
index_offset += inherit_features_if_required<P>(
|
||||
index_offset, kProperties[kFeaturesP], p, training_features);
|
||||
// kFeaturesHalfRelativeKP
|
||||
if (p >= PS_W_PAWN) {
|
||||
index_offset += inherit_features_if_required<HalfRelativeKP<AssociatedKing>>(
|
||||
index_offset, kProperties[kFeaturesHalfRelativeKP],
|
||||
HalfRelativeKP<AssociatedKing>::make_index(sq_k, p),
|
||||
training_features);
|
||||
}
|
||||
else {
|
||||
index_offset += skip_features(kProperties[kFeaturesHalfRelativeKP]);
|
||||
}
|
||||
|
||||
#endif // defined(EVAL_NNUE)
|
||||
assert(index_offset == get_dimensions());
|
||||
}
|
||||
};
|
||||
|
||||
template <Side AssociatedKing>
|
||||
constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];
|
||||
|
||||
} // namespace Eval::NNUE::Features
|
||||
|
||||
#endif
|
||||
|
||||
+96
-99
@@ -1,125 +1,122 @@
|
||||
// Common header of class template for learning NNUE evaluation function
|
||||
|
||||
#ifndef _NNUE_TRAINER_H_
|
||||
#ifndef _NNUE_TRAINER_H_
|
||||
#define _NNUE_TRAINER_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../nnue_common.h"
|
||||
#include "../features/index_list.h"
|
||||
#include "nnue/nnue_common.h"
|
||||
#include "nnue/features/index_list.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#if defined(USE_BLAS)
|
||||
static_assert(std::is_same<LearnFloatType, float>::value, "");
|
||||
#include <cblas.h>
|
||||
#endif
|
||||
|
||||
namespace Eval {
|
||||
// Common header of class template for learning NNUE evaluation function
|
||||
namespace Eval::NNUE {
|
||||
|
||||
namespace NNUE {
|
||||
// Ponanza constant used in the relation between evaluation value and winning percentage
|
||||
constexpr double kPonanzaConstant = 600.0;
|
||||
|
||||
// Ponanza constant used in the relation between evaluation value and winning percentage
|
||||
constexpr double kPonanzaConstant = 600.0;
|
||||
// Class that represents one index of learning feature
|
||||
class TrainingFeature {
|
||||
using StorageType = std::uint32_t;
|
||||
static_assert(std::is_unsigned<StorageType>::value, "");
|
||||
|
||||
// Class that represents one index of learning feature
|
||||
class TrainingFeature {
|
||||
using StorageType = std::uint32_t;
|
||||
static_assert(std::is_unsigned<StorageType>::value, "");
|
||||
public:
|
||||
static constexpr std::uint32_t kIndexBits = 24;
|
||||
|
||||
public:
|
||||
static constexpr std::uint32_t kIndexBits = 24;
|
||||
static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
|
||||
static constexpr std::uint32_t kCountBits =
|
||||
std::numeric_limits<StorageType>::digits - kIndexBits;
|
||||
static_assert(kIndexBits < std::numeric_limits<StorageType>::digits, "");
|
||||
|
||||
explicit TrainingFeature(IndexType index) :
|
||||
index_and_count_((index << kCountBits) | 1) {
|
||||
assert(index < (1 << kIndexBits));
|
||||
}
|
||||
TrainingFeature& operator+=(const TrainingFeature& other) {
|
||||
assert(other.GetIndex() == GetIndex());
|
||||
assert(other.GetCount() + GetCount() < (1 << kCountBits));
|
||||
index_and_count_ += other.GetCount();
|
||||
return *this;
|
||||
}
|
||||
IndexType GetIndex() const {
|
||||
return static_cast<IndexType>(index_and_count_ >> kCountBits);
|
||||
}
|
||||
void ShiftIndex(IndexType offset) {
|
||||
assert(GetIndex() + offset < (1 << kIndexBits));
|
||||
index_and_count_ += offset << kCountBits;
|
||||
}
|
||||
IndexType GetCount() const {
|
||||
return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
|
||||
}
|
||||
bool operator<(const TrainingFeature& other) const {
|
||||
return index_and_count_ < other.index_and_count_;
|
||||
}
|
||||
static constexpr std::uint32_t kCountBits =
|
||||
std::numeric_limits<StorageType>::digits - kIndexBits;
|
||||
|
||||
private:
|
||||
StorageType index_and_count_;
|
||||
};
|
||||
explicit TrainingFeature(IndexType index) :
|
||||
index_and_count_((index << kCountBits) | 1) {
|
||||
|
||||
// Structure that represents one sample of training data
|
||||
struct Example {
|
||||
std::vector<TrainingFeature> training_features[2];
|
||||
Learner::PackedSfenValue psv;
|
||||
int sign;
|
||||
double weight;
|
||||
};
|
||||
assert(index < (1 << kIndexBits));
|
||||
}
|
||||
|
||||
// Message used for setting hyperparameters
|
||||
struct Message {
|
||||
Message(const std::string& name, const std::string& value = ""):
|
||||
name(name), value(value), num_peekers(0), num_receivers(0) {}
|
||||
const std::string name;
|
||||
const std::string value;
|
||||
std::uint32_t num_peekers;
|
||||
std::uint32_t num_receivers;
|
||||
};
|
||||
TrainingFeature& operator+=(const TrainingFeature& other) {
|
||||
assert(other.get_index() == get_index());
|
||||
assert(other.get_count() + get_count() < (1 << kCountBits));
|
||||
index_and_count_ += other.get_count();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// determine whether to accept the message
|
||||
bool ReceiveMessage(const std::string& name, Message* message) {
|
||||
const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
|
||||
if (message->name.substr(0, name.size() + 1) == name + "[") {
|
||||
++message->num_peekers;
|
||||
}
|
||||
if (message->name == name || message->name == name + subscript) {
|
||||
++message->num_receivers;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
IndexType get_index() const {
|
||||
return static_cast<IndexType>(index_and_count_ >> kCountBits);
|
||||
}
|
||||
|
||||
// split the string
|
||||
std::vector<std::string> Split(const std::string& input, char delimiter) {
|
||||
std::istringstream stream(input);
|
||||
std::string field;
|
||||
std::vector<std::string> fields;
|
||||
while (std::getline(stream, field, delimiter)) {
|
||||
fields.push_back(field);
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
void shift_index(IndexType offset) {
|
||||
assert(get_index() + offset < (1 << kIndexBits));
|
||||
index_and_count_ += offset << kCountBits;
|
||||
}
|
||||
|
||||
// round a floating point number to an integer
|
||||
template <typename IntType>
|
||||
IntType Round(double value) {
|
||||
return static_cast<IntType>(std::floor(value + 0.5));
|
||||
}
|
||||
IndexType get_count() const {
|
||||
return static_cast<IndexType>(index_and_count_ & ((1 << kCountBits) - 1));
|
||||
}
|
||||
|
||||
// make_shared with alignment
|
||||
template <typename T, typename... ArgumentTypes>
|
||||
std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
|
||||
const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
|
||||
T(std::forward<ArgumentTypes>(arguments)...);
|
||||
return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
|
||||
}
|
||||
bool operator<(const TrainingFeature& other) const {
|
||||
return index_and_count_ < other.index_and_count_;
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
private:
|
||||
StorageType index_and_count_;
|
||||
};
|
||||
|
||||
} // namespace Eval
|
||||
// Structure that represents one sample of training data
|
||||
struct Example {
|
||||
std::vector<TrainingFeature> training_features[2];
|
||||
Learner::PackedSfenValue psv;
|
||||
Value discrete_nn_eval;
|
||||
int sign;
|
||||
double weight;
|
||||
};
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
// Message used for setting hyperparameters
|
||||
struct Message {
|
||||
Message(const std::string& message_name, const std::string& message_value = "") :
|
||||
name(message_name), value(message_value), num_peekers(0), num_receivers(0)
|
||||
{
|
||||
}
|
||||
|
||||
const std::string name;
|
||||
const std::string value;
|
||||
std::uint32_t num_peekers;
|
||||
std::uint32_t num_receivers;
|
||||
};
|
||||
|
||||
// determine whether to accept the message
|
||||
bool receive_message(const std::string& name, Message* message) {
|
||||
const auto subscript = "[" + std::to_string(message->num_peekers) + "]";
|
||||
|
||||
if (message->name.substr(0, name.size() + 1) == name + "[") {
|
||||
++message->num_peekers;
|
||||
}
|
||||
|
||||
if (message->name == name || message->name == name + subscript) {
|
||||
++message->num_receivers;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// round a floating point number to an integer
|
||||
template <typename IntType>
|
||||
IntType round(double value) {
|
||||
return static_cast<IntType>(std::floor(value + 0.5));
|
||||
}
|
||||
|
||||
// make_shared with alignment
|
||||
template <typename T, typename... ArgumentTypes>
|
||||
std::shared_ptr<T> make_aligned_shared_ptr(ArgumentTypes&&... arguments) {
|
||||
const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
|
||||
T(std::forward<ArgumentTypes>(arguments)...);
|
||||
|
||||
return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
|
||||
}
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,301 +1,476 @@
|
||||
// Specialization of NNUE evaluation function learning class template for AffineTransform
|
||||
|
||||
#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
|
||||
#ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
|
||||
#define _NNUE_TRAINER_AFFINE_TRANSFORM_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../learn/learn.h"
|
||||
#include "../layers/affine_transform.h"
|
||||
#include "trainer.h"
|
||||
|
||||
#include "extra/stockfish_blas.h"
|
||||
|
||||
#include "learn/learn.h"
|
||||
|
||||
#include "nnue/layers/affine_transform.h"
|
||||
|
||||
#include "thread.h"
|
||||
|
||||
#include <random>
|
||||
|
||||
namespace Eval {
|
||||
// Specialization of NNUE evaluation function learning class template for AffineTransform
|
||||
namespace Eval::NNUE {
|
||||
|
||||
namespace NNUE {
|
||||
// Learning: Affine transformation layer
|
||||
template <typename PreviousLayer, IndexType OutputDimensions>
|
||||
class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
|
||||
|
||||
// Learning: Affine transformation layer
|
||||
template <typename PreviousLayer, IndexType OutputDimensions>
|
||||
class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::AffineTransform<PreviousLayer, OutputDimensions>;
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
if (ReceiveMessage("momentum", message)) {
|
||||
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
if (ReceiveMessage("learning_rate_scale", message)) {
|
||||
learning_rate_scale_ =
|
||||
static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
if (ReceiveMessage("reset", message)) {
|
||||
DequantizeParameters();
|
||||
}
|
||||
if (ReceiveMessage("quantize_parameters", message)) {
|
||||
QuantizeParameters();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
if (kIsOutputLayer) {
|
||||
// Initialize output layer with 0
|
||||
std::fill(std::begin(biases_), std::end(biases_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
std::fill(std::begin(weights_), std::end(weights_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
} else {
|
||||
// Assuming that the input distribution is unit-mean 0.5, equal variance,
|
||||
// Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
|
||||
const double kSigma = 1.0 / std::sqrt(kInputDimensions);
|
||||
auto distribution = std::normal_distribution<double>(0.0, kSigma);
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
double sum = 0.0;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const auto weight = static_cast<LearnFloatType>(distribution(rng));
|
||||
weights_[kInputDimensions * i + j] = weight;
|
||||
sum += weight;
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, ft));
|
||||
}
|
||||
biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
|
||||
}
|
||||
}
|
||||
QuantizeParameters();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
batch_input_ = previous_layer_trainer_->Propagate(batch);
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
previous_layer_trainer_->send_message(message);
|
||||
|
||||
if (receive_message("momentum", message)) {
|
||||
momentum_ = static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
|
||||
if (receive_message("learning_rate_scale", message)) {
|
||||
learning_rate_scale_ =
|
||||
static_cast<LearnFloatType>(std::stod(message->value));
|
||||
}
|
||||
|
||||
if (receive_message("reset", message)) {
|
||||
dequantize_parameters();
|
||||
}
|
||||
|
||||
if (receive_message("quantize_parameters", message)) {
|
||||
quantize_parameters();
|
||||
}
|
||||
|
||||
if (receive_message("check_health", message)) {
|
||||
check_health();
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
|
||||
if (kIsOutputLayer) {
|
||||
// Initialize output layer with 0
|
||||
std::fill(std::begin(biases_), std::end(biases_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
std::fill(std::begin(weights_), std::end(weights_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
}
|
||||
else {
|
||||
// Assuming that the input distribution is unit-mean 0.5, equal variance,
|
||||
// Initialize the output distribution so that each unit has a mean of 0.5 and the same variance as the input
|
||||
const double kSigma = 1.0 / std::sqrt(kInputDimensions);
|
||||
auto distribution = std::normal_distribution<double>(0.0, kSigma);
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
double sum = 0.0;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const auto weight = static_cast<LearnFloatType>(distribution(rng));
|
||||
weights_[kInputDimensions * i + j] = weight;
|
||||
sum += weight;
|
||||
}
|
||||
|
||||
biases_[i] = static_cast<LearnFloatType>(0.5 - 0.5 * sum);
|
||||
}
|
||||
}
|
||||
|
||||
quantize_parameters();
|
||||
}
|
||||
|
||||
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
|
||||
{
|
||||
const auto size = batch_end - batch_begin;
|
||||
|
||||
if ((long)output_.size() < (long)kOutputDimensions * size) {
|
||||
output_.resize(kOutputDimensions * size);
|
||||
gradients_.resize(kInputDimensions * size);
|
||||
}
|
||||
|
||||
if (thread_states_.size() < thread_pool.size())
|
||||
{
|
||||
thread_states_.resize(thread_pool.size());
|
||||
}
|
||||
|
||||
combined_batch_size_ = size;
|
||||
combined_batch_input_ = previous_layer_trainer_->step_start(thread_pool, batch_begin, batch_end);
|
||||
|
||||
auto& main_thread_state = thread_states_[0];
|
||||
|
||||
#if defined(USE_BLAS)
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
cblas_scopy(kOutputDimensions, biases_, 1, &output_[batch_offset], 1);
|
||||
}
|
||||
cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans,
|
||||
kOutputDimensions, batch_size_, kInputDimensions, 1.0,
|
||||
weights_, kInputDimensions,
|
||||
batch_input_, kInputDimensions,
|
||||
1.0, &output_[0], kOutputDimensions);
|
||||
#else
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_batch_offset = kInputDimensions * b;
|
||||
const IndexType output_batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
double sum = biases_[i];
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const IndexType index = kInputDimensions * i + j;
|
||||
sum += weights_[index] * batch_input_[input_batch_offset + j];
|
||||
}
|
||||
output_[output_batch_offset + i] = static_cast<LearnFloatType>(sum);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
const LearnFloatType local_learning_rate =
|
||||
learning_rate * learning_rate_scale_;
|
||||
// update
|
||||
cblas_sscal(
|
||||
kOutputDimensions, momentum_, main_thread_state.biases_diff_, 1
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
Blas::sscal(
|
||||
kOutputDimensions, momentum_, main_thread_state.biases_diff_, 1
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
for (IndexType i = 1; i < thread_states_.size(); ++i)
|
||||
thread_states_[i].reset_biases();
|
||||
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
void propagate(Thread& th, const uint64_t offset, const uint64_t count) {
|
||||
|
||||
previous_layer_trainer_->propagate(th, offset, count);
|
||||
|
||||
#if defined(USE_BLAS)
|
||||
// backpropagate
|
||||
cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans,
|
||||
kInputDimensions, batch_size_, kOutputDimensions, 1.0,
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
cblas_scopy(
|
||||
kOutputDimensions, biases_, 1, &output_[batch_offset], 1
|
||||
);
|
||||
}
|
||||
|
||||
cblas_sgemm(
|
||||
CblasColMajor, CblasTrans, CblasNoTrans,
|
||||
kOutputDimensions, count, kInputDimensions,
|
||||
1.0,
|
||||
weights_, kInputDimensions,
|
||||
gradients, kOutputDimensions,
|
||||
0.0, &gradients_[0], kInputDimensions);
|
||||
// update
|
||||
cblas_sscal(kOutputDimensions, momentum_, biases_diff_, 1);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
cblas_saxpy(kOutputDimensions, 1.0,
|
||||
&gradients[batch_offset], 1, biases_diff_, 1);
|
||||
}
|
||||
cblas_saxpy(kOutputDimensions, -local_learning_rate,
|
||||
biases_diff_, 1, biases_, 1);
|
||||
cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
|
||||
kOutputDimensions, kInputDimensions, batch_size_, 1.0,
|
||||
gradients, kOutputDimensions,
|
||||
batch_input_, kInputDimensions,
|
||||
momentum_, weights_diff_, kInputDimensions);
|
||||
cblas_saxpy(kOutputDimensions * kInputDimensions, -local_learning_rate,
|
||||
weights_diff_, 1, weights_, 1);
|
||||
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
|
||||
1.0,
|
||||
&output_[offset * kOutputDimensions], kOutputDimensions
|
||||
);
|
||||
#else
|
||||
// backpropagate
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_batch_offset = kInputDimensions * b;
|
||||
const IndexType output_batch_offset = kOutputDimensions * b;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
double sum = 0.0;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = kInputDimensions * i + j;
|
||||
sum += weights_[index] * gradients[output_batch_offset + i];
|
||||
}
|
||||
gradients_[input_batch_offset + j] = static_cast<LearnFloatType>(sum);
|
||||
}
|
||||
}
|
||||
// update
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_diff_[i] *= momentum_;
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_diff_[i] *= momentum_;
|
||||
}
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_batch_offset = kInputDimensions * b;
|
||||
const IndexType output_batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_diff_[i] += gradients[output_batch_offset + i];
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
const IndexType index = kInputDimensions * i + j;
|
||||
weights_diff_[index] += gradients[output_batch_offset + i] *
|
||||
batch_input_[input_batch_offset + j];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_[i] -= local_learning_rate * biases_diff_[i];
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_[i] -= local_learning_rate * weights_diff_[i];
|
||||
}
|
||||
#endif
|
||||
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
batch_input_(nullptr),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer),
|
||||
biases_(),
|
||||
weights_(),
|
||||
biases_diff_(),
|
||||
weights_diff_(),
|
||||
momentum_(0.0),
|
||||
learning_rate_scale_(1.0) {
|
||||
DequantizeParameters();
|
||||
}
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
Blas::scopy(
|
||||
kOutputDimensions, biases_, 1, &output_[batch_offset], 1
|
||||
);
|
||||
}
|
||||
|
||||
// Weight saturation and parameterization
|
||||
void QuantizeParameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_[i] = std::max(-kMaxWeightMagnitude,
|
||||
std::min(+kMaxWeightMagnitude, weights_[i]));
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
target_layer_->biases_[i] =
|
||||
Round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const auto offset = kInputDimensions * i;
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
target_layer_->weights_[padded_offset + j] =
|
||||
Round<typename LayerType::WeightType>(
|
||||
weights_[offset + j] * kWeightScale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read parameterized integer
|
||||
void DequantizeParameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->biases_[i] / kBiasScale);
|
||||
}
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const auto offset = kInputDimensions * i;
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
weights_[offset + j] = static_cast<LearnFloatType>(
|
||||
target_layer_->weights_[padded_offset + j] / kWeightScale);
|
||||
}
|
||||
}
|
||||
std::fill(std::begin(biases_diff_), std::end(biases_diff_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
std::fill(std::begin(weights_diff_), std::end(weights_diff_),
|
||||
static_cast<LearnFloatType>(0.0));
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// If the output dimensionality is 1, the output layer
|
||||
static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
|
||||
|
||||
// Coefficient used for parameterization
|
||||
static constexpr LearnFloatType kActivationScale =
|
||||
std::numeric_limits<std::int8_t>::max();
|
||||
static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
|
||||
(kPonanzaConstant * FV_SCALE) :
|
||||
((1 << kWeightScaleBits) * kActivationScale);
|
||||
static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
|
||||
|
||||
// Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
|
||||
static constexpr LearnFloatType kMaxWeightMagnitude =
|
||||
std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Input mini batch
|
||||
const LearnFloatType* batch_input_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// parameter
|
||||
LearnFloatType biases_[kOutputDimensions];
|
||||
LearnFloatType weights_[kOutputDimensions * kInputDimensions];
|
||||
|
||||
// Buffer used for updating parameters
|
||||
LearnFloatType biases_diff_[kOutputDimensions];
|
||||
LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
|
||||
// hyper parameter
|
||||
LearnFloatType momentum_;
|
||||
LearnFloatType learning_rate_scale_;
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
Blas::sgemm(
|
||||
Blas::MatrixLayout::ColMajor, Blas::MatrixTranspose::Trans, Blas::MatrixTranspose::NoTrans,
|
||||
kOutputDimensions, count, kInputDimensions,
|
||||
1.0,
|
||||
weights_, kInputDimensions,
|
||||
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
|
||||
1.0,
|
||||
&output_[offset * kOutputDimensions], kOutputDimensions
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void backpropagate(Thread& th,
|
||||
const LearnFloatType* gradients,
|
||||
uint64_t offset,
|
||||
uint64_t count) {
|
||||
|
||||
auto& thread_state = thread_states_[th.thread_idx()];
|
||||
const auto momentum = th.thread_idx() == 0 ? momentum_ : 0.0f;
|
||||
#if defined(USE_BLAS)
|
||||
|
||||
cblas_sgemm(
|
||||
CblasColMajor, CblasNoTrans, CblasNoTrans,
|
||||
kInputDimensions, count, kOutputDimensions,
|
||||
1.0,
|
||||
weights_, kInputDimensions,
|
||||
gradients + offset * kOutputDimensions, kOutputDimensions,
|
||||
0.0,
|
||||
&gradients_[offset * kInputDimensions], kInputDimensions
|
||||
);
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
cblas_saxpy(
|
||||
kOutputDimensions, 1.0,
|
||||
&gradients[batch_offset], 1, thread_state.biases_diff_, 1
|
||||
);
|
||||
}
|
||||
|
||||
cblas_sgemm(
|
||||
CblasRowMajor, CblasTrans, CblasNoTrans,
|
||||
kOutputDimensions, kInputDimensions, count,
|
||||
1.0,
|
||||
gradients + offset * kOutputDimensions, kOutputDimensions,
|
||||
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
|
||||
momentum,
|
||||
thread_state.weights_diff_, kInputDimensions
|
||||
);
|
||||
|
||||
#else
|
||||
|
||||
// backpropagate
|
||||
Blas::sgemm(
|
||||
Blas::MatrixLayout::ColMajor, Blas::MatrixTranspose::NoTrans, Blas::MatrixTranspose::NoTrans,
|
||||
kInputDimensions, count, kOutputDimensions,
|
||||
1.0,
|
||||
weights_, kInputDimensions,
|
||||
gradients + offset * kOutputDimensions, kOutputDimensions,
|
||||
0.0,
|
||||
&gradients_[offset * kInputDimensions], kInputDimensions
|
||||
);
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
Blas::saxpy(kOutputDimensions, 1.0,
|
||||
&gradients[batch_offset], 1, thread_state.biases_diff_, 1);
|
||||
}
|
||||
|
||||
Blas::sgemm(
|
||||
Blas::MatrixLayout::RowMajor, Blas::MatrixTranspose::Trans, Blas::MatrixTranspose::NoTrans,
|
||||
kOutputDimensions, kInputDimensions, count,
|
||||
1.0,
|
||||
gradients + offset * kOutputDimensions, kOutputDimensions,
|
||||
combined_batch_input_ + offset * kInputDimensions, kInputDimensions,
|
||||
momentum,
|
||||
thread_state.weights_diff_, kInputDimensions
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
previous_layer_trainer_->backpropagate(th, gradients_.data(), offset, count);
|
||||
}
|
||||
|
||||
void reduce_thread_state()
|
||||
{
|
||||
for (IndexType i = 1; i < thread_states_.size(); ++i)
|
||||
{
|
||||
thread_states_[0] += thread_states_[i];
|
||||
}
|
||||
}
|
||||
|
||||
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate)
|
||||
{
|
||||
const LearnFloatType local_learning_rate =
|
||||
learning_rate * learning_rate_scale_;
|
||||
|
||||
reduce_thread_state();
|
||||
|
||||
auto& main_thread_state = thread_states_[0];
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const double d = local_learning_rate * main_thread_state.biases_diff_[i];
|
||||
biases_[i] -= d;
|
||||
abs_biases_diff_sum_ += std::abs(d);
|
||||
}
|
||||
num_biases_diffs_ += kOutputDimensions;
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
const double d = local_learning_rate * main_thread_state.weights_diff_[i];
|
||||
weights_[i] -= d;
|
||||
abs_weights_diff_sum_ += std::abs(d);
|
||||
}
|
||||
num_weights_diffs_ += kOutputDimensions * kInputDimensions;
|
||||
|
||||
previous_layer_trainer_->step_end(thread_pool, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
|
||||
combined_batch_size_(0),
|
||||
combined_batch_input_(nullptr),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer),
|
||||
biases_(),
|
||||
weights_(),
|
||||
momentum_(0.2),
|
||||
learning_rate_scale_(1.0) {
|
||||
|
||||
dequantize_parameters();
|
||||
}
|
||||
|
||||
void reset_stats() {
|
||||
abs_biases_diff_sum_ = 0.0;
|
||||
abs_weights_diff_sum_ = 0.0;
|
||||
num_biases_diffs_ = 0;
|
||||
num_weights_diffs_ = 0;
|
||||
}
|
||||
|
||||
void check_health() {
|
||||
|
||||
double abs_bias_sum = 0.0;
|
||||
double abs_weight_sum = 0.0;
|
||||
|
||||
for(auto b : biases_)
|
||||
abs_bias_sum += std::abs(b);
|
||||
|
||||
for(auto w : weights_)
|
||||
abs_weight_sum += std::abs(w);
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
|
||||
out << "INFO (check_health):"
|
||||
<< " layer " << LayerType::kLayerIndex
|
||||
<< " - " << LayerType::get_name()
|
||||
<< std::endl;
|
||||
|
||||
out << " - avg_abs_bias = " << abs_bias_sum / std::size(biases_) << std::endl;
|
||||
out << " - avg_abs_bias_diff = " << abs_biases_diff_sum_ / num_biases_diffs_ << std::endl;
|
||||
out << " - avg_abs_weight = " << abs_weight_sum / std::size(weights_) << std::endl;
|
||||
out << " - avg_abs_weight_diff = " << abs_weights_diff_sum_ / num_weights_diffs_ << std::endl;
|
||||
|
||||
out.unlock();
|
||||
|
||||
reset_stats();
|
||||
}
|
||||
|
||||
// Weight saturation and parameterization
|
||||
void quantize_parameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i) {
|
||||
weights_[i] = std::max(-kMaxWeightMagnitude,
|
||||
std::min(+kMaxWeightMagnitude, weights_[i]));
|
||||
}
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
target_layer_->biases_[i] =
|
||||
round<typename LayerType::BiasType>(biases_[i] * kBiasScale);
|
||||
}
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const auto offset = kInputDimensions * i;
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
target_layer_->weights_[padded_offset + j] =
|
||||
round<typename LayerType::WeightType>(
|
||||
weights_[offset + j] * kWeightScale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// read parameterized integer
|
||||
void dequantize_parameters() {
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
biases_[i] = static_cast<LearnFloatType>(
|
||||
target_layer_->biases_[i] / kBiasScale);
|
||||
}
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const auto offset = kInputDimensions * i;
|
||||
const auto padded_offset = LayerType::kPaddedInputDimensions * i;
|
||||
for (IndexType j = 0; j < kInputDimensions; ++j) {
|
||||
weights_[offset + j] = static_cast<LearnFloatType>(
|
||||
target_layer_->weights_[padded_offset + j] / kWeightScale);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& state : thread_states_)
|
||||
{
|
||||
state.reset_weights();
|
||||
state.reset_biases();
|
||||
}
|
||||
|
||||
|
||||
reset_stats();
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = LayerType::kInputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// If the output dimensionality is 1, the output layer
|
||||
static constexpr bool kIsOutputLayer = kOutputDimensions == 1;
|
||||
|
||||
// Coefficient used for parameterization
|
||||
static constexpr LearnFloatType kActivationScale =
|
||||
std::numeric_limits<std::int8_t>::max();
|
||||
|
||||
static constexpr LearnFloatType kBiasScale = kIsOutputLayer ?
|
||||
(kPonanzaConstant * FV_SCALE) :
|
||||
((1 << kWeightScaleBits) * kActivationScale);
|
||||
|
||||
static constexpr LearnFloatType kWeightScale = kBiasScale / kActivationScale;
|
||||
|
||||
// Upper limit of absolute value of weight used to prevent overflow when parameterizing integers
|
||||
static constexpr LearnFloatType kMaxWeightMagnitude =
|
||||
std::numeric_limits<typename LayerType::WeightType>::max() / kWeightScale;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType combined_batch_size_;
|
||||
|
||||
double abs_biases_diff_sum_;
|
||||
double abs_weights_diff_sum_;
|
||||
uint64_t num_biases_diffs_;
|
||||
uint64_t num_weights_diffs_;
|
||||
|
||||
// Input mini batch
|
||||
const LearnFloatType* combined_batch_input_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// parameter
|
||||
struct alignas(kCacheLineSize) ThreadState
|
||||
{
|
||||
// Buffer used for updating parameters
|
||||
alignas(kCacheLineSize) LearnFloatType biases_diff_[kOutputDimensions];
|
||||
alignas(kCacheLineSize) LearnFloatType weights_diff_[kOutputDimensions * kInputDimensions];
|
||||
|
||||
ThreadState() { reset_weights(); reset_biases(); }
|
||||
|
||||
ThreadState& operator+=(const ThreadState& other)
|
||||
{
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i)
|
||||
{
|
||||
biases_diff_[i] += other.biases_diff_[i];
|
||||
}
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions * kInputDimensions; ++i)
|
||||
{
|
||||
weights_diff_[i] += other.weights_diff_[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void reset_weights()
|
||||
{
|
||||
std::fill(std::begin(weights_diff_), std::end(weights_diff_), 0.0f);
|
||||
}
|
||||
|
||||
void reset_biases()
|
||||
{
|
||||
std::fill(std::begin(biases_diff_), std::end(biases_diff_), 0.0f);
|
||||
}
|
||||
};
|
||||
|
||||
alignas(kCacheLineSize) LearnFloatType biases_[kOutputDimensions];
|
||||
alignas(kCacheLineSize) LearnFloatType weights_[kOutputDimensions * kInputDimensions];
|
||||
|
||||
std::vector<ThreadState, CacheLineAlignedAllocator<ThreadState>> thread_states_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
|
||||
|
||||
// hyper parameter
|
||||
LearnFloatType momentum_;
|
||||
LearnFloatType learning_rate_scale_;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,142 +1,356 @@
|
||||
// Specialization of NNUE evaluation function learning class template for ClippedReLU
|
||||
|
||||
#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
|
||||
#ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
|
||||
#define _NNUE_TRAINER_CLIPPED_RELU_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../learn/learn.h"
|
||||
#include "../layers/clipped_relu.h"
|
||||
#include "trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "learn/learn.h"
|
||||
|
||||
namespace NNUE {
|
||||
#include "nnue/layers/clipped_relu.h"
|
||||
|
||||
// Learning: Affine transformation layer
|
||||
template <typename PreviousLayer>
|
||||
class Trainer<Layers::ClippedReLU<PreviousLayer>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::ClippedReLU<PreviousLayer>;
|
||||
#include "thread.h"
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
// Specialization of NNUE evaluation function learning class template for ClippedReLU
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
if (ReceiveMessage("check_health", message)) {
|
||||
CheckHealth();
|
||||
}
|
||||
}
|
||||
// Learning: Affine transformation layer
|
||||
template <typename PreviousLayer>
|
||||
class Trainer<Layers::ClippedReLU<PreviousLayer>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::ClippedReLU<PreviousLayer>;
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
}
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
const auto input = previous_layer_trainer_->Propagate(batch);
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
output_[index] = std::max(+kZero, std::min(+kOne, input[index]));
|
||||
min_activations_[i] = std::min(min_activations_[i], output_[index]);
|
||||
max_activations_[i] = std::max(max_activations_[i], output_[index]);
|
||||
}
|
||||
}
|
||||
return output_.data();
|
||||
}
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, ft));
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
gradients_[index] = gradients[index] *
|
||||
(output_[index] > kZero) * (output_[index] < kOne);
|
||||
}
|
||||
}
|
||||
previous_layer_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
previous_layer_trainer_->send_message(message);
|
||||
if (receive_message("check_health", message)) {
|
||||
check_health();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer) {
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_),
|
||||
std::numeric_limits<LearnFloatType>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
}
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void CheckHealth() {
|
||||
const auto largest_min_activation = *std::max_element(
|
||||
std::begin(min_activations_), std::end(min_activations_));
|
||||
const auto smallest_max_activation = *std::min_element(
|
||||
std::begin(max_activations_), std::end(max_activations_));
|
||||
std::cout << "INFO: largest min activation = " << largest_min_activation
|
||||
<< ", smallest max activation = " << smallest_max_activation
|
||||
<< std::endl;
|
||||
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
|
||||
{
|
||||
const auto size = batch_end - batch_begin;
|
||||
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_),
|
||||
std::numeric_limits<LearnFloatType>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_),
|
||||
std::numeric_limits<LearnFloatType>::lowest());
|
||||
}
|
||||
if ((long)output_.size() < (long)kOutputDimensions * size) {
|
||||
output_.resize(kOutputDimensions * size);
|
||||
gradients_.resize(kInputDimensions * size);
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
if (thread_states_.size() < thread_pool.size())
|
||||
{
|
||||
thread_states_.resize(thread_pool.size());
|
||||
}
|
||||
|
||||
// LearnFloatType constant
|
||||
static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
|
||||
static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
|
||||
input_ = previous_layer_trainer_->step_start(thread_pool, batch_begin, batch_end);
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
batch_size_ = size;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
// forward propagation
|
||||
void propagate(Thread& th, const uint64_t offset, const uint64_t count) {
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
auto& thread_state = thread_states_[th.thread_idx()];
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
previous_layer_trainer_->propagate(th, offset, count);
|
||||
|
||||
// Health check statistics
|
||||
LearnFloatType min_activations_[kOutputDimensions];
|
||||
LearnFloatType max_activations_[kOutputDimensions];
|
||||
};
|
||||
#if defined (USE_SSE2)
|
||||
|
||||
} // namespace NNUE
|
||||
{
|
||||
static_assert(kOutputDimensions % 16 == 0, "This implementation assumes that it can process 16 floats at a time");
|
||||
|
||||
} // namespace Eval
|
||||
const __m128 kZero4 = _mm_set1_ps(+kZero);
|
||||
const __m128 kOne4 = _mm_set1_ps(+kOne);
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
for (IndexType b = offset; b < offset + count; ++b)
|
||||
{
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; i += 16)
|
||||
{
|
||||
__m128 out0 = _mm_loadu_ps(&input_[i + 0 + batch_offset]);
|
||||
__m128 out1 = _mm_loadu_ps(&input_[i + 4 + batch_offset]);
|
||||
__m128 out2 = _mm_loadu_ps(&input_[i + 8 + batch_offset]);
|
||||
__m128 out3 = _mm_loadu_ps(&input_[i + 12 + batch_offset]);
|
||||
|
||||
out0 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out0));
|
||||
out1 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out1));
|
||||
out2 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out2));
|
||||
out3 = _mm_max_ps(kZero4, _mm_min_ps(kOne4, out3));
|
||||
|
||||
_mm_storeu_ps(&output_[i + 0 + batch_offset], out0);
|
||||
_mm_storeu_ps(&output_[i + 4 + batch_offset], out1);
|
||||
_mm_storeu_ps(&output_[i + 8 + batch_offset], out2);
|
||||
_mm_storeu_ps(&output_[i + 12 + batch_offset], out3);
|
||||
|
||||
__m128 minact0 = _mm_loadu_ps(&thread_state.min_activations_[i + 0]);
|
||||
__m128 minact1 = _mm_loadu_ps(&thread_state.min_activations_[i + 4]);
|
||||
__m128 minact2 = _mm_loadu_ps(&thread_state.min_activations_[i + 8]);
|
||||
__m128 minact3 = _mm_loadu_ps(&thread_state.min_activations_[i + 12]);
|
||||
|
||||
__m128 maxact0 = _mm_loadu_ps(&thread_state.max_activations_[i + 0]);
|
||||
__m128 maxact1 = _mm_loadu_ps(&thread_state.max_activations_[i + 4]);
|
||||
__m128 maxact2 = _mm_loadu_ps(&thread_state.max_activations_[i + 8]);
|
||||
__m128 maxact3 = _mm_loadu_ps(&thread_state.max_activations_[i + 12]);
|
||||
|
||||
minact0 = _mm_min_ps(out0, minact0);
|
||||
minact1 = _mm_min_ps(out1, minact1);
|
||||
minact2 = _mm_min_ps(out2, minact2);
|
||||
minact3 = _mm_min_ps(out3, minact3);
|
||||
|
||||
maxact0 = _mm_max_ps(out0, maxact0);
|
||||
maxact1 = _mm_max_ps(out1, maxact1);
|
||||
maxact2 = _mm_max_ps(out2, maxact2);
|
||||
maxact3 = _mm_max_ps(out3, maxact3);
|
||||
|
||||
_mm_storeu_ps(&thread_state.min_activations_[i + 0], minact0);
|
||||
_mm_storeu_ps(&thread_state.min_activations_[i + 4], minact1);
|
||||
_mm_storeu_ps(&thread_state.min_activations_[i + 8], minact2);
|
||||
_mm_storeu_ps(&thread_state.min_activations_[i + 12], minact3);
|
||||
|
||||
_mm_storeu_ps(&thread_state.max_activations_[i + 0], maxact0);
|
||||
_mm_storeu_ps(&thread_state.max_activations_[i + 4], maxact1);
|
||||
_mm_storeu_ps(&thread_state.max_activations_[i + 8], maxact2);
|
||||
_mm_storeu_ps(&thread_state.max_activations_[i + 12], maxact3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
output_[index] = std::max(+kZero, std::min(+kOne, input_[index]));
|
||||
thread_state.min_activations_[i] = std::min(thread_state.min_activations_[i], output_[index]);
|
||||
thread_state.max_activations_[i] = std::max(thread_state.max_activations_[i], output_[index]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void backpropagate(Thread& th,
|
||||
const LearnFloatType* gradients,
|
||||
const uint64_t offset,
|
||||
const uint64_t count) {
|
||||
|
||||
auto& thread_state = thread_states_[th.thread_idx()];
|
||||
|
||||
#if defined (USE_SSE2)
|
||||
|
||||
{
|
||||
static_assert(kOutputDimensions % 16 == 0, "This implementation assumes that it can process 16 floats at a time");
|
||||
|
||||
const __m128 kZero4 = _mm_set1_ps(+kZero);
|
||||
const __m128 kOne4 = _mm_set1_ps(+kOne);
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b)
|
||||
{
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; i += 16)
|
||||
{
|
||||
__m128 out0 = _mm_loadu_ps(&output_[batch_offset + i + 0]);
|
||||
__m128 out1 = _mm_loadu_ps(&output_[batch_offset + i + 4]);
|
||||
__m128 out2 = _mm_loadu_ps(&output_[batch_offset + i + 8]);
|
||||
__m128 out3 = _mm_loadu_ps(&output_[batch_offset + i + 12]);
|
||||
|
||||
__m128 clipped0 = _mm_or_ps(_mm_cmple_ps(out0, kZero4), _mm_cmpge_ps(out0, kOne4));
|
||||
__m128 clipped1 = _mm_or_ps(_mm_cmple_ps(out1, kZero4), _mm_cmpge_ps(out1, kOne4));
|
||||
__m128 clipped2 = _mm_or_ps(_mm_cmple_ps(out2, kZero4), _mm_cmpge_ps(out2, kOne4));
|
||||
__m128 clipped3 = _mm_or_ps(_mm_cmple_ps(out3, kZero4), _mm_cmpge_ps(out3, kOne4));
|
||||
|
||||
__m128 grad0 = _mm_loadu_ps(&gradients[batch_offset + i + 0]);
|
||||
__m128 grad1 = _mm_loadu_ps(&gradients[batch_offset + i + 4]);
|
||||
__m128 grad2 = _mm_loadu_ps(&gradients[batch_offset + i + 8]);
|
||||
__m128 grad3 = _mm_loadu_ps(&gradients[batch_offset + i + 12]);
|
||||
|
||||
grad0 = _mm_andnot_ps(clipped0, grad0);
|
||||
grad1 = _mm_andnot_ps(clipped1, grad1);
|
||||
grad2 = _mm_andnot_ps(clipped2, grad2);
|
||||
grad3 = _mm_andnot_ps(clipped3, grad3);
|
||||
|
||||
_mm_storeu_ps(&gradients_[batch_offset + i + 0], grad0);
|
||||
_mm_storeu_ps(&gradients_[batch_offset + i + 4], grad1);
|
||||
_mm_storeu_ps(&gradients_[batch_offset + i + 8], grad2);
|
||||
_mm_storeu_ps(&gradients_[batch_offset + i + 12], grad3);
|
||||
|
||||
const int clipped_mask =
|
||||
(_mm_movemask_ps(clipped0) << 0)
|
||||
| (_mm_movemask_ps(clipped1) << 4)
|
||||
| (_mm_movemask_ps(clipped2) << 8)
|
||||
| (_mm_movemask_ps(clipped3) << 12);
|
||||
|
||||
thread_state.num_clipped_ += popcount(clipped_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
const IndexType index = batch_offset + i;
|
||||
const bool clipped = (output_[index] <= kZero) | (output_[index] >= kOne);
|
||||
gradients_[index] = gradients[index] * !clipped;
|
||||
thread_state.num_clipped_ += clipped;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
thread_state.num_total_ += count * kOutputDimensions;
|
||||
|
||||
previous_layer_trainer_->backpropagate(th, gradients_.data(), offset, count);
|
||||
}
|
||||
|
||||
void reduce_thread_state()
|
||||
{
|
||||
for (IndexType i = 1; i < thread_states_.size(); ++i)
|
||||
{
|
||||
thread_states_[0] += thread_states_[i];
|
||||
}
|
||||
}
|
||||
|
||||
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate)
|
||||
{
|
||||
previous_layer_trainer_->step_end(thread_pool, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer) {
|
||||
|
||||
reset_stats();
|
||||
}
|
||||
|
||||
void reset_stats() {
|
||||
for(auto& state : thread_states_)
|
||||
state.reset();
|
||||
}
|
||||
|
||||
// Check if there are any problems with learning
|
||||
void check_health() {
|
||||
|
||||
reduce_thread_state();
|
||||
|
||||
auto& main_thread_state = thread_states_[0];
|
||||
|
||||
const auto largest_min_activation = *std::max_element(
|
||||
std::begin(main_thread_state.min_activations_), std::end(main_thread_state.min_activations_));
|
||||
const auto smallest_max_activation = *std::min_element(
|
||||
std::begin(main_thread_state.max_activations_), std::end(main_thread_state.max_activations_));
|
||||
|
||||
auto out = sync_region_cout.new_region();
|
||||
|
||||
out << "INFO (check_health):"
|
||||
<< " layer " << LayerType::kLayerIndex
|
||||
<< " - " << LayerType::get_name()
|
||||
<< std::endl;
|
||||
|
||||
out << " - largest min activation = " << largest_min_activation
|
||||
<< " , smallest max activation = " << smallest_max_activation
|
||||
<< std::endl;
|
||||
|
||||
out << " - clipped " << static_cast<double>(main_thread_state.num_clipped_) / main_thread_state.num_total_ * 100.0 << "% of outputs"
|
||||
<< std::endl;
|
||||
|
||||
out.unlock();
|
||||
|
||||
reset_stats();
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions = LayerType::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// LearnFloatType constant
|
||||
static constexpr LearnFloatType kZero = static_cast<LearnFloatType>(0.0);
|
||||
static constexpr LearnFloatType kOne = static_cast<LearnFloatType>(1.0);
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
IndexType num_total_;
|
||||
|
||||
const LearnFloatType* input_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
|
||||
|
||||
struct alignas(kCacheLineSize) ThreadState
|
||||
{
|
||||
// Health check statistics
|
||||
LearnFloatType min_activations_[kOutputDimensions];
|
||||
LearnFloatType max_activations_[kOutputDimensions];
|
||||
IndexType num_clipped_;
|
||||
IndexType num_total_;
|
||||
|
||||
ThreadState() { reset(); }
|
||||
|
||||
ThreadState& operator+=(const ThreadState& other)
|
||||
{
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i)
|
||||
{
|
||||
min_activations_[i] = std::min(min_activations_[i], other.min_activations_[i]);
|
||||
}
|
||||
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i)
|
||||
{
|
||||
max_activations_[i] = std::max(max_activations_[i], other.max_activations_[i]);
|
||||
}
|
||||
|
||||
num_clipped_ += other.num_clipped_;
|
||||
num_total_ += other.num_total_;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
std::fill(std::begin(min_activations_), std::end(min_activations_), std::numeric_limits<float>::max());
|
||||
std::fill(std::begin(max_activations_), std::end(max_activations_), std::numeric_limits<float>::lowest());
|
||||
num_clipped_ = 0;
|
||||
num_total_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<ThreadState, CacheLineAlignedAllocator<ThreadState>> thread_states_;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,251 +1,377 @@
|
||||
// Specialization of NNUE evaluation function learning class template for InputSlice
|
||||
|
||||
#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
|
||||
#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
|
||||
#define _NNUE_TRAINER_INPUT_SLICE_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../learn/learn.h"
|
||||
#include "../layers/input_slice.h"
|
||||
#include "trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "extra/stockfish_blas.h"
|
||||
|
||||
namespace NNUE {
|
||||
#include "learn/learn.h"
|
||||
|
||||
// Learning: Input layer
|
||||
class SharedInputTrainer {
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<SharedInputTrainer> Create(
|
||||
FeatureTransformer* feature_transformer) {
|
||||
static std::shared_ptr<SharedInputTrainer> instance;
|
||||
if (!instance) {
|
||||
instance.reset(new SharedInputTrainer(feature_transformer));
|
||||
}
|
||||
++instance->num_referrers_;
|
||||
return instance;
|
||||
}
|
||||
#include "nnue/layers/input_slice.h"
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kSendMessage;
|
||||
feature_transformer_trainer_->SendMessage(message);
|
||||
}
|
||||
assert(current_operation_ == Operation::kSendMessage);
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
#include "thread.h"
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kInitialize;
|
||||
feature_transformer_trainer_->Initialize(rng);
|
||||
}
|
||||
assert(current_operation_ == Operation::kInitialize);
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
// Specialization of NNUE evaluation function learning class template for InputSlice
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (gradients_.size() < kInputDimensions * batch.size()) {
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kPropagate;
|
||||
output_ = feature_transformer_trainer_->Propagate(batch);
|
||||
}
|
||||
assert(current_operation_ == Operation::kPropagate);
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
return output_;
|
||||
}
|
||||
// Learning: Input layer
|
||||
// This is tricky. It exists because when there's more than one trainer
|
||||
// on top of a single feature transformer we want to only call propagate/backpropagate
|
||||
// on the feature transformer once. This is straightforward in the old
|
||||
// multithreading case, because propagate/backpropagate is called just once from the
|
||||
// main thread. But with the current implementation of coarser multithreading
|
||||
// we end up calling each method from each thread. Therefore we have to keep
|
||||
// the num_calls and current_operation per thread basis, each thread must work
|
||||
// on its designated batch slice, and the only synchronization points are
|
||||
// step_start and step_end - for which we use state of the first thread.
|
||||
// Each thread requires their own bookkeeping because it's possible that
|
||||
// one thread is still in propagate of some batch slice while the other thread
|
||||
// is doing backpropagate of some other slice. We also ensure the thread state
|
||||
// isn't suspectible to false sharing by using a full cache line for the state.
|
||||
class SharedInputTrainer {
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<SharedInputTrainer> create(
|
||||
FeatureTransformer* ft) {
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
if (num_referrers_ == 1) {
|
||||
feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
return;
|
||||
}
|
||||
if (num_calls_ == 0) {
|
||||
current_operation_ = Operation::kBackPropagate;
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
static std::shared_ptr<SharedInputTrainer> instance;
|
||||
|
||||
if (!instance) {
|
||||
instance.reset(new SharedInputTrainer(ft));
|
||||
}
|
||||
|
||||
++instance->num_referrers_;
|
||||
|
||||
return instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(current_operation_ == Operation::kBackPropagate);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] += gradients[batch_offset + i];
|
||||
}
|
||||
}
|
||||
if (++num_calls_ == num_referrers_) {
|
||||
feature_transformer_trainer_->Backpropagate(
|
||||
gradients_.data(), learning_rate);
|
||||
num_calls_ = 0;
|
||||
current_operation_ = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
SharedInputTrainer(FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
num_referrers_(0),
|
||||
num_calls_(0),
|
||||
current_operation_(Operation::kNone),
|
||||
feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
|
||||
feature_transformer)),
|
||||
output_(nullptr) {
|
||||
}
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
auto& thread_state = thread_states_[0];
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
FeatureTransformer::kOutputDimensions;
|
||||
if (thread_state.num_calls == 0) {
|
||||
thread_state.current_operation = Operation::kSendMessage;
|
||||
feature_transformer_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
// type of processing
|
||||
enum class Operation {
|
||||
kNone,
|
||||
kSendMessage,
|
||||
kInitialize,
|
||||
kPropagate,
|
||||
kBackPropagate,
|
||||
};
|
||||
assert(thread_state.current_operation == Operation::kSendMessage);
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
if (++thread_state.num_calls == num_referrers_) {
|
||||
thread_state.num_calls = 0;
|
||||
thread_state.current_operation = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// number of layers sharing this layer as input
|
||||
std::uint32_t num_referrers_;
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
auto& thread_state = thread_states_[0];
|
||||
|
||||
// Number of times the current process has been called
|
||||
std::uint32_t num_calls_;
|
||||
if (thread_state.num_calls == 0) {
|
||||
thread_state.current_operation = Operation::kInitialize;
|
||||
feature_transformer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// current processing type
|
||||
Operation current_operation_;
|
||||
assert(thread_state.current_operation == Operation::kInitialize);
|
||||
|
||||
// Trainer of input feature converter
|
||||
const std::shared_ptr<Trainer<FeatureTransformer>>
|
||||
feature_transformer_trainer_;
|
||||
if (++thread_state.num_calls == num_referrers_) {
|
||||
thread_state.num_calls = 0;
|
||||
thread_state.current_operation = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// pointer to output shared for forward propagation
|
||||
const LearnFloatType* output_;
|
||||
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
|
||||
{
|
||||
const auto size = batch_end - batch_begin;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
};
|
||||
if ((long)gradients_.size() < (long)kInputDimensions * size) {
|
||||
gradients_.resize(kInputDimensions * size);
|
||||
}
|
||||
|
||||
// Learning: Input layer
|
||||
template <IndexType OutputDimensions, IndexType Offset>
|
||||
class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
|
||||
if (thread_states_.size() < thread_pool.size())
|
||||
{
|
||||
thread_states_.resize(thread_pool.size());
|
||||
}
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* /*target_layer*/, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(new Trainer(feature_transformer));
|
||||
}
|
||||
batch_size_ = size;
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
shared_input_trainer_->SendMessage(message);
|
||||
}
|
||||
auto& thread_state = thread_states_[0];
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
shared_input_trainer_->Initialize(rng);
|
||||
}
|
||||
if (thread_state.num_calls == 0) {
|
||||
thread_state.current_operation = Operation::kStepStart;
|
||||
output_ = feature_transformer_trainer_->step_start(thread_pool, batch_begin, batch_end);
|
||||
}
|
||||
|
||||
assert(thread_state.current_operation == Operation::kStepStart);
|
||||
|
||||
if (++thread_state.num_calls == num_referrers_) {
|
||||
thread_state.num_calls = 0;
|
||||
thread_state.current_operation = Operation::kNone;
|
||||
}
|
||||
|
||||
return output_;
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
void propagate(Thread& th, uint64_t offset, uint64_t count) {
|
||||
const auto thread_id = th.thread_idx();
|
||||
|
||||
auto& thread_state = thread_states_[thread_id];
|
||||
|
||||
if (thread_state.num_calls == 0) {
|
||||
thread_state.current_operation = Operation::kPropagate;
|
||||
feature_transformer_trainer_->propagate(th, offset, count);
|
||||
}
|
||||
|
||||
assert(thread_state.current_operation == Operation::kPropagate);
|
||||
|
||||
if (++thread_state.num_calls == num_referrers_) {
|
||||
thread_state.num_calls = 0;
|
||||
thread_state.current_operation = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void backpropagate(Thread& th,
|
||||
const LearnFloatType* gradients,
|
||||
uint64_t offset,
|
||||
uint64_t count) {
|
||||
|
||||
const auto thread_id = th.thread_idx();
|
||||
|
||||
auto& thread_state = thread_states_[thread_id];
|
||||
|
||||
if (num_referrers_ == 1) {
|
||||
feature_transformer_trainer_->backpropagate(th, gradients, offset, count);
|
||||
return;
|
||||
}
|
||||
|
||||
if (thread_state.num_calls == 0) {
|
||||
thread_state.current_operation = Operation::kBackPropagate;
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(thread_state.current_operation == Operation::kBackPropagate);
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType batch_offset = kInputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
gradients_[batch_offset + i] += gradients[batch_offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
if (++thread_state.num_calls == num_referrers_) {
|
||||
feature_transformer_trainer_->backpropagate(
|
||||
th, gradients_.data(), offset, count);
|
||||
thread_state.num_calls = 0;
|
||||
thread_state.current_operation = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate) {
|
||||
auto& thread_state = thread_states_[0];
|
||||
|
||||
if (thread_state.num_calls == 0) {
|
||||
thread_state.current_operation = Operation::kStepEnd;
|
||||
feature_transformer_trainer_->step_end(thread_pool, learning_rate);
|
||||
}
|
||||
|
||||
assert(thread_state.current_operation == Operation::kStepEnd);
|
||||
|
||||
if (++thread_state.num_calls == num_referrers_) {
|
||||
thread_state.num_calls = 0;
|
||||
thread_state.current_operation = Operation::kNone;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
SharedInputTrainer(FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
num_referrers_(0),
|
||||
thread_states_(1),
|
||||
feature_transformer_trainer_(Trainer<FeatureTransformer>::create(
|
||||
ft)),
|
||||
output_(nullptr) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
FeatureTransformer::kOutputDimensions;
|
||||
|
||||
// type of processing
|
||||
enum class Operation {
|
||||
kNone,
|
||||
kSendMessage,
|
||||
kInitialize,
|
||||
kStepStart,
|
||||
kPropagate,
|
||||
kBackPropagate,
|
||||
kStepEnd,
|
||||
};
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// number of layers sharing this layer as input
|
||||
std::uint32_t num_referrers_;
|
||||
|
||||
struct alignas(kCacheLineSize) ThreadState
|
||||
{
|
||||
std::uint32_t num_calls{0};
|
||||
|
||||
// current processing type
|
||||
Operation current_operation = Operation::kNone;
|
||||
};
|
||||
|
||||
// Number of times the current process has been called
|
||||
std::vector<ThreadState, CacheLineAlignedAllocator<ThreadState>> thread_states_;
|
||||
|
||||
// Trainer of input feature converter
|
||||
const std::shared_ptr<Trainer<FeatureTransformer>>
|
||||
feature_transformer_trainer_;
|
||||
|
||||
// pointer to output shared for forward propagation
|
||||
const LearnFloatType* output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
|
||||
};
|
||||
|
||||
// Learning: Input layer
|
||||
template <IndexType OutputDimensions, IndexType Offset>
|
||||
class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* /*target_layer*/, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(new Trainer(ft));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
shared_input_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
shared_input_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
const LearnFloatType* step_start(ThreadPool& thread_pool, std::vector<Example>::const_iterator batch_begin, std::vector<Example>::const_iterator batch_end)
|
||||
{
|
||||
const auto size = batch_end - batch_begin;
|
||||
|
||||
if ((long)output_.size() < (long)kOutputDimensions * size) {
|
||||
output_.resize(kOutputDimensions * size);
|
||||
gradients_.resize(kInputDimensions * size);
|
||||
}
|
||||
|
||||
batch_size_ = size;
|
||||
|
||||
input_ = shared_input_trainer_->step_start(thread_pool, batch_begin, batch_end);
|
||||
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
void propagate(Thread& th, uint64_t offset, uint64_t count) {
|
||||
|
||||
shared_input_trainer_->propagate(th, offset, count);
|
||||
|
||||
for (IndexType b = offset; b < offset + count; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
|
||||
// forward propagation
|
||||
const LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
gradients_.resize(kInputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
const auto input = shared_input_trainer_->Propagate(batch);
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
|
||||
&output_[output_offset], 1);
|
||||
|
||||
cblas_scopy(
|
||||
kOutputDimensions, &input_[input_offset + Offset], 1,
|
||||
&output_[output_offset], 1
|
||||
);
|
||||
#else
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output_[output_offset + i] = input[input_offset + Offset + i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kInputDimensions; ++i) {
|
||||
if (i < Offset || i >= Offset + kOutputDimensions) {
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
} else {
|
||||
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
|
||||
Blas::scopy(
|
||||
kOutputDimensions, &input_[input_offset + Offset], 1,
|
||||
&output_[output_offset], 1
|
||||
);
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(FeatureTransformer* feature_transformer):
|
||||
batch_size_(0),
|
||||
shared_input_trainer_(SharedInputTrainer::Create(feature_transformer)) {
|
||||
}
|
||||
// backpropagation
|
||||
void backpropagate(Thread& th,
|
||||
const LearnFloatType* gradients,
|
||||
uint64_t offset,
|
||||
uint64_t count) {
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
FeatureTransformer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
|
||||
for (IndexType b = offset; b < offset + count; ++b)
|
||||
{
|
||||
const IndexType input_offset = kInputDimensions * b;
|
||||
const IndexType output_offset = kOutputDimensions * b;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
IndexType i = 0;
|
||||
for (; i < Offset; ++i) {
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
|
||||
// Trainer of shared input layer
|
||||
const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
|
||||
for (; i < Offset + kOutputDimensions; ++i) {
|
||||
gradients_[input_offset + i] = gradients[output_offset + i - Offset];
|
||||
}
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
for (; i < kInputDimensions; ++i)
|
||||
{
|
||||
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType> gradients_;
|
||||
};
|
||||
shared_input_trainer_->backpropagate(th, gradients_.data(), offset, count);
|
||||
}
|
||||
|
||||
} // namespace NNUE
|
||||
void step_end(ThreadPool& thread_pool, LearnFloatType learning_rate) {
|
||||
shared_input_trainer_->step_end(thread_pool, learning_rate);
|
||||
}
|
||||
|
||||
} // namespace Eval
|
||||
private:
|
||||
// constructor
|
||||
Trainer(FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
shared_input_trainer_(SharedInputTrainer::create(ft)) {
|
||||
}
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kInputDimensions =
|
||||
FeatureTransformer::kOutputDimensions;
|
||||
static constexpr IndexType kOutputDimensions = OutputDimensions;
|
||||
static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
const LearnFloatType* input_;
|
||||
|
||||
// Trainer of shared input layer
|
||||
const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
|
||||
|
||||
// buffer for back propagation
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> gradients_;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
+171
-160
@@ -1,190 +1,201 @@
|
||||
// Specialization of NNUE evaluation function learning class template for Sum
|
||||
|
||||
#ifndef _NNUE_TRAINER_SUM_H_
|
||||
#ifndef _NNUE_TRAINER_SUM_H_
|
||||
#define _NNUE_TRAINER_SUM_H_
|
||||
|
||||
#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
|
||||
#include "../../learn/learn.h"
|
||||
#include "../layers/sum.h"
|
||||
#include "trainer.h"
|
||||
|
||||
namespace Eval {
|
||||
#include "extra/stockfish_blas.h"
|
||||
|
||||
namespace NNUE {
|
||||
#include "learn/learn.h"
|
||||
|
||||
// Learning: A layer that sums the outputs of multiple layers
|
||||
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
|
||||
class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
|
||||
Trainer<Layers::Sum<RemainingPreviousLayers...>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
|
||||
using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
|
||||
#include "nnue/layers/sum.h"
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
#include "thread.h"
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
// The results of other member functions do not depend on the processing order, so
|
||||
// Tail is processed first for the purpose of simplifying the implementation, but
|
||||
// SendMessage processes Head first to make it easier to understand subscript correspondence
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
Tail::SendMessage(message);
|
||||
}
|
||||
// Specialization of NNUE evaluation function learning class template for Sum
|
||||
namespace Eval::NNUE {
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
Tail::Initialize(rng);
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
}
|
||||
// Learning: A layer that sums the outputs of multiple layers
|
||||
template <typename FirstPreviousLayer, typename... RemainingPreviousLayers>
|
||||
class Trainer<Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>> :
|
||||
Trainer<Layers::Sum<RemainingPreviousLayers...>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::Sum<FirstPreviousLayer, RemainingPreviousLayers...>;
|
||||
using Tail = Trainer<Layers::Sum<RemainingPreviousLayers...>>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, ft));
|
||||
}
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
// The results of other member functions do not depend on the processing order, so
|
||||
// Tail is processed first for the purpose of simplifying the implementation, but
|
||||
// SendMessage processes Head first to make it easier to understand subscript correspondence
|
||||
previous_layer_trainer_->send_message(message);
|
||||
Tail::send_message(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
Tail::initialize(rng);
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
auto output = Tail::propagate(thread_pool, batch);
|
||||
const auto head_output = previous_layer_trainer_->propagate(thread_pool, batch);
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
auto output = Tail::Propagate(batch);
|
||||
const auto head_output = previous_layer_trainer_->Propagate(batch);
|
||||
#if defined(USE_BLAS)
|
||||
cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
|
||||
head_output, 1, output, 1);
|
||||
|
||||
cblas_saxpy(
|
||||
kOutputDimensions * batch_size_, 1.0,
|
||||
head_output, 1, output, 1
|
||||
);
|
||||
|
||||
#else
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output[batch_offset + i] += head_output[batch_offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
Blas::saxpy(
|
||||
thread_pool,
|
||||
kOutputDimensions * batch_size_, 1.0,
|
||||
head_output, 1, output, 1
|
||||
);
|
||||
|
||||
#endif
|
||||
return output;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
Tail::Backpropagate(gradients, learning_rate);
|
||||
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
}
|
||||
// backpropagation
|
||||
void backpropagate(ThreadPool& thread_pool,
|
||||
const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer):
|
||||
Tail(target_layer, feature_transformer),
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<FirstPreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
Tail::backpropagate(thread_pool, gradients, learning_rate);
|
||||
previous_layer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft):
|
||||
Tail(target_layer, ft),
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<FirstPreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
|
||||
// make subclass friend
|
||||
template <typename SumLayer>
|
||||
friend class Trainer;
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
// make subclass friend
|
||||
template <typename SumLayer>
|
||||
friend class Trainer;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
};
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<FirstPreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
};
|
||||
|
||||
|
||||
// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
|
||||
template <typename PreviousLayer>
|
||||
class Trainer<Layers::Sum<PreviousLayer>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::Sum<PreviousLayer>;
|
||||
// Learning: Layer that takes the sum of the outputs of multiple layers (when there is one template argument)
|
||||
template <typename PreviousLayer>
|
||||
class Trainer<Layers::Sum<PreviousLayer>> {
|
||||
private:
|
||||
// Type of layer to learn
|
||||
using LayerType = Layers::Sum<PreviousLayer>;
|
||||
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> Create(
|
||||
LayerType* target_layer, FeatureTransformer* feature_transformer) {
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, feature_transformer));
|
||||
}
|
||||
public:
|
||||
// factory function
|
||||
static std::shared_ptr<Trainer> create(
|
||||
LayerType* target_layer, FeatureTransformer* ft) {
|
||||
|
||||
// Set options such as hyperparameters
|
||||
void SendMessage(Message* message) {
|
||||
previous_layer_trainer_->SendMessage(message);
|
||||
}
|
||||
return std::shared_ptr<Trainer>(
|
||||
new Trainer(target_layer, ft));
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void Initialize(RNG& rng) {
|
||||
previous_layer_trainer_->Initialize(rng);
|
||||
}
|
||||
// Set options such as hyperparameters
|
||||
void send_message(Message* message) {
|
||||
previous_layer_trainer_->send_message(message);
|
||||
}
|
||||
|
||||
// Initialize the parameters with random numbers
|
||||
template <typename RNG>
|
||||
void initialize(RNG& rng) {
|
||||
previous_layer_trainer_->initialize(rng);
|
||||
}
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
}
|
||||
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
const auto output = previous_layer_trainer_->propagate(batch);
|
||||
|
||||
// forward propagation
|
||||
/*const*/ LearnFloatType* Propagate(const std::vector<Example>& batch) {
|
||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||
output_.resize(kOutputDimensions * batch.size());
|
||||
}
|
||||
batch_size_ = static_cast<IndexType>(batch.size());
|
||||
const auto output = previous_layer_trainer_->Propagate(batch);
|
||||
#if defined(USE_BLAS)
|
||||
cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
|
||||
cblas_scopy(kOutputDimensions * batch_size_, output, 1, &output_[0], 1);
|
||||
#else
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output_[batch_offset + i] = output[batch_offset + i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void Backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
previous_layer_trainer_->Backpropagate(gradients, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* feature_transformer) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::Create(
|
||||
&target_layer->previous_layer_, feature_transformer)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// make subclass friend
|
||||
template <typename SumLayer>
|
||||
friend class Trainer;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType> output_;
|
||||
};
|
||||
|
||||
} // namespace NNUE
|
||||
|
||||
} // namespace Eval
|
||||
|
||||
#endif // defined(EVAL_LEARN) && defined(EVAL_NNUE)
|
||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||
const IndexType batch_offset = kOutputDimensions * b;
|
||||
for (IndexType i = 0; i < kOutputDimensions; ++i) {
|
||||
output_[batch_offset + i] = output[batch_offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
return output_.data();
|
||||
}
|
||||
|
||||
// backpropagation
|
||||
void backpropagate(const LearnFloatType* gradients,
|
||||
LearnFloatType learning_rate) {
|
||||
|
||||
previous_layer_trainer_->backpropagate(gradients, learning_rate);
|
||||
}
|
||||
|
||||
private:
|
||||
// constructor
|
||||
Trainer(LayerType* target_layer, FeatureTransformer* ft) :
|
||||
batch_size_(0),
|
||||
previous_layer_trainer_(Trainer<PreviousLayer>::create(
|
||||
&target_layer->previous_layer_, ft)),
|
||||
target_layer_(target_layer) {
|
||||
}
|
||||
|
||||
// number of input/output dimensions
|
||||
static constexpr IndexType kOutputDimensions = LayerType::kOutputDimensions;
|
||||
|
||||
// make subclass friend
|
||||
template <typename SumLayer>
|
||||
friend class Trainer;
|
||||
|
||||
// number of samples in mini-batch
|
||||
IndexType batch_size_;
|
||||
|
||||
// Trainer of the previous layer
|
||||
const std::shared_ptr<Trainer<PreviousLayer>> previous_layer_trainer_;
|
||||
|
||||
// layer to learn
|
||||
LayerType* const target_layer_;
|
||||
|
||||
// Forward propagation buffer
|
||||
std::vector<LearnFloatType, CacheLineAlignedAllocator<LearnFloatType>> output_;
|
||||
};
|
||||
|
||||
} // namespace Eval::NNUE
|
||||
|
||||
#endif
|
||||
|
||||
+26
-18
@@ -30,29 +30,29 @@ namespace {
|
||||
#define S(mg, eg) make_score(mg, eg)
|
||||
|
||||
// Pawn penalties
|
||||
constexpr Score Backward = S( 8, 27);
|
||||
constexpr Score Doubled = S(11, 55);
|
||||
constexpr Score Isolated = S( 5, 17);
|
||||
constexpr Score WeakLever = S( 2, 54);
|
||||
constexpr Score WeakUnopposed = S(15, 25);
|
||||
constexpr Score Backward = S( 8, 25);
|
||||
constexpr Score Doubled = S(10, 55);
|
||||
constexpr Score Isolated = S( 3, 15);
|
||||
constexpr Score WeakLever = S( 3, 55);
|
||||
constexpr Score WeakUnopposed = S(13, 25);
|
||||
|
||||
// Bonus for blocked pawns at 5th or 6th rank
|
||||
constexpr Score BlockedPawn[2] = { S(-13, -4), S(-4, 3) };
|
||||
constexpr Score BlockedPawn[2] = { S(-13, -4), S(-5, 2) };
|
||||
|
||||
constexpr Score BlockedStorm[RANK_NB] = {
|
||||
S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)
|
||||
};
|
||||
|
||||
// Connected pawn bonus
|
||||
constexpr int Connected[RANK_NB] = { 0, 7, 8, 11, 24, 45, 85 };
|
||||
constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 24, 48, 86 };
|
||||
|
||||
// Strength of pawn shelter for our king by [distance from edge][rank].
|
||||
// RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
|
||||
constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
|
||||
{ V( -6), V( 81), V( 93), V( 58), V( 39), V( 18), V( 25) },
|
||||
{ V(-43), V( 61), V( 35), V(-49), V(-29), V(-11), V( -63) },
|
||||
{ V(-10), V( 75), V( 23), V( -2), V( 32), V( 3), V( -45) },
|
||||
{ V(-39), V(-13), V(-29), V(-52), V(-48), V(-67), V(-166) }
|
||||
{ V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V( 28) },
|
||||
{ V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) },
|
||||
{ V(-11), V( 77), V( 22), V( -6), V( 31), V( 8), V( -45) },
|
||||
{ V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) }
|
||||
};
|
||||
|
||||
// Danger of enemy pawns moving toward our king by [distance from edge][rank].
|
||||
@@ -60,12 +60,17 @@ namespace {
|
||||
// is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
|
||||
// on edge, likely blocked by our king.
|
||||
constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
|
||||
{ V( 85), V(-289), V(-166), V(97), V(50), V( 45), V( 50) },
|
||||
{ V( 46), V( -25), V( 122), V(45), V(37), V(-10), V( 20) },
|
||||
{ V( -6), V( 51), V( 168), V(34), V(-2), V(-22), V(-14) },
|
||||
{ V(-15), V( -11), V( 101), V( 4), V(11), V(-15), V(-29) }
|
||||
{ V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) },
|
||||
{ V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) },
|
||||
{ V( -8), V( 51), V( 167), V( 35), V( -4), V(-16), V(-12) },
|
||||
{ V(-17), V( -13), V( 100), V( 4), V( 9), V(-16), V(-31) }
|
||||
};
|
||||
|
||||
// KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
|
||||
// for king when the king is on a semi-open or open file.
|
||||
constexpr Score KingOnFile[2][2] = {{ S(-19,12), S(-6, 7) },
|
||||
{ S( 0, 2), S( 6,-5) }};
|
||||
|
||||
#undef S
|
||||
#undef V
|
||||
|
||||
@@ -147,7 +152,7 @@ namespace {
|
||||
if (support | phalanx)
|
||||
{
|
||||
int v = Connected[r] * (2 + bool(phalanx) - bool(opposed))
|
||||
+ 21 * popcount(support);
|
||||
+ 22 * popcount(support);
|
||||
|
||||
score += make_score(v, v * (r - 2) / 4);
|
||||
}
|
||||
@@ -171,8 +176,8 @@ namespace {
|
||||
score -= Doubled * doubled
|
||||
+ WeakLever * more_than_one(lever);
|
||||
|
||||
if (blocked && r > RANK_4)
|
||||
score += BlockedPawn[r-4];
|
||||
if (blocked && r >= RANK_5)
|
||||
score += BlockedPawn[r - RANK_5];
|
||||
}
|
||||
|
||||
return score;
|
||||
@@ -237,6 +242,9 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const {
|
||||
bonus -= make_score(UnblockedStorm[d][theirRank], 0);
|
||||
}
|
||||
|
||||
// King On File
|
||||
bonus -= KingOnFile[pos.is_on_semiopen_file(Us, ksq)][pos.is_on_semiopen_file(Them, ksq)];
|
||||
|
||||
return bonus;
|
||||
}
|
||||
|
||||
|
||||
+33
-12
@@ -23,6 +23,8 @@
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "nnue/evaluate_nnue.h"
|
||||
|
||||
#include "bitboard.h"
|
||||
#include "misc.h"
|
||||
#include "movegen.h"
|
||||
@@ -32,6 +34,9 @@
|
||||
#include "uci.h"
|
||||
#include "syzygy/tbprobe.h"
|
||||
|
||||
#include "learn/packed_sfen.h"
|
||||
#include "learn/sfen_packer.h"
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace Zobrist {
|
||||
@@ -77,6 +82,8 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) {
|
||||
&& !pos.can_castle(ANY_CASTLING))
|
||||
{
|
||||
StateInfo st;
|
||||
ASSERT_ALIGNED(&st, Eval::NNUE::kCacheLineSize);
|
||||
|
||||
Position p;
|
||||
p.set(pos.fen(), pos.is_chess960(), &st, pos.this_thread());
|
||||
Tablebases::ProbeState s1, s2;
|
||||
@@ -704,7 +711,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
|
||||
|
||||
// Used by NNUE
|
||||
st->accumulator.computed_accumulation = false;
|
||||
st->accumulator.computed_score = false;
|
||||
auto& dp = st->dirtyPiece;
|
||||
dp.dirty_num = 1;
|
||||
|
||||
@@ -755,7 +761,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
|
||||
else
|
||||
st->nonPawnMaterial[them] -= PieceValue[MG][captured];
|
||||
|
||||
if (Eval::useNNUE)
|
||||
if (Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
|
||||
{
|
||||
dp.dirty_num = 2; // 1 piece moved, 1 piece captured
|
||||
dp.piece[1] = captured;
|
||||
@@ -799,7 +805,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
|
||||
// Move the piece. The tricky Chess960 castling is handled earlier
|
||||
if (type_of(m) != CASTLING)
|
||||
{
|
||||
if (Eval::useNNUE)
|
||||
if (Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
|
||||
{
|
||||
dp.piece[0] = pc;
|
||||
dp.from[0] = from;
|
||||
@@ -830,7 +836,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
|
||||
remove_piece(to);
|
||||
put_piece(promotion, to);
|
||||
|
||||
if (Eval::useNNUE)
|
||||
if (Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
|
||||
{
|
||||
// Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
|
||||
dp.to[0] = SQ_NONE;
|
||||
@@ -968,7 +974,7 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
|
||||
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
|
||||
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
|
||||
|
||||
if (Do && Eval::useNNUE)
|
||||
if (Do && Eval::NNUE::useNNUE != Eval::NNUE::UseNNUEMode::False)
|
||||
{
|
||||
auto& dp = st->dirtyPiece;
|
||||
dp.piece[0] = make_piece(us, KING);
|
||||
@@ -997,17 +1003,16 @@ void Position::do_null_move(StateInfo& newSt) {
|
||||
assert(!checkers());
|
||||
assert(&newSt != st);
|
||||
|
||||
if (Eval::useNNUE)
|
||||
{
|
||||
std::memcpy(&newSt, st, sizeof(StateInfo));
|
||||
st->accumulator.computed_score = false;
|
||||
}
|
||||
else
|
||||
std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
|
||||
std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
|
||||
|
||||
newSt.previous = st;
|
||||
st = &newSt;
|
||||
|
||||
// Used by NNUE
|
||||
st->accumulator.computed_accumulation = false;
|
||||
auto& dp = st->dirtyPiece;
|
||||
dp.dirty_num = 0;
|
||||
|
||||
if (st->epSquare != SQ_NONE)
|
||||
{
|
||||
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
|
||||
@@ -1317,6 +1322,8 @@ bool Position::pos_is_ok() const {
|
||||
assert(0 && "pos_is_ok: Bitboards");
|
||||
|
||||
StateInfo si = *st;
|
||||
ASSERT_ALIGNED(&si, Eval::NNUE::kCacheLineSize);
|
||||
|
||||
set_state(&si);
|
||||
if (std::memcmp(&si, st, sizeof(StateInfo)))
|
||||
assert(0 && "pos_is_ok: State");
|
||||
@@ -1346,3 +1353,17 @@ bool Position::pos_is_ok() const {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add a function that directly unpacks for speed. It's pretty tough.
|
||||
// Write it by combining packer::unpack() and Position::set().
|
||||
// If there is a problem with the passed phase and there is an error, non-zero is returned.
|
||||
int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th)
|
||||
{
|
||||
return Learner::set_from_packed_sfen(*this, sfen, si, th);
|
||||
}
|
||||
|
||||
// Get the packed sfen. Returns to the buffer specified in the argument.
|
||||
void Position::sfen_pack(Learner::PackedSfen& sfen)
|
||||
{
|
||||
sfen = Learner::sfen_pack(*this);
|
||||
}
|
||||
|
||||
+9
-7
@@ -30,6 +30,9 @@
|
||||
|
||||
#include "nnue/nnue_accumulator.h"
|
||||
|
||||
#include "learn/packed_sfen.h"
|
||||
#include "learn/sfen_packer.h"
|
||||
|
||||
|
||||
/// StateInfo struct stores information needed to restore a Position object to
|
||||
/// its previous state when we retract a move. Whenever a move is made on the
|
||||
@@ -75,9 +78,6 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
|
||||
/// traversing the search tree.
|
||||
class Thread;
|
||||
|
||||
// packed sfen
|
||||
struct PackedSfen { uint8_t data[32]; };
|
||||
|
||||
class Position {
|
||||
public:
|
||||
static void init();
|
||||
@@ -175,25 +175,27 @@ public:
|
||||
// Used by NNUE
|
||||
StateInfo* state() const;
|
||||
|
||||
#if defined(EVAL_LEARN)
|
||||
// --sfenization helper
|
||||
|
||||
friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
|
||||
|
||||
// Get the packed sfen. Returns to the buffer specified in the argument.
|
||||
// Do not include gamePly in pack.
|
||||
void sfen_pack(PackedSfen& sfen);
|
||||
void sfen_pack(Learner::PackedSfen& sfen);
|
||||
|
||||
// It is slow to go through sfen, so I made a function to set packed sfen directly.
|
||||
// Equivalent to pos.set(sfen_unpack(data),si,th);.
|
||||
// If there is a problem with the passed phase and there is an error, non-zero is returned.
|
||||
// PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
|
||||
int set_from_packed_sfen(const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
|
||||
int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
|
||||
|
||||
void clear() { std::memset(this, 0, sizeof(Position)); }
|
||||
|
||||
// Give the board, hand piece, and turn, and return the sfen.
|
||||
//static std::string sfen_from_rawdata(Piece board[81], Hand hands[2], Color turn, int gamePly);
|
||||
|
||||
// Returns the position of the ball on the c side.
|
||||
Square king_square(Color c) const { return pieceList[make_piece(c, KING)][0]; }
|
||||
#endif // EVAL_LEARN
|
||||
|
||||
private:
|
||||
// Initialization helpers (used while setting up a position)
|
||||
|
||||
+198
-230
File diff suppressed because it is too large
Load Diff
+11
-6
@@ -24,6 +24,7 @@
|
||||
#include "misc.h"
|
||||
#include "movepick.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
|
||||
class Position;
|
||||
|
||||
@@ -32,6 +33,7 @@ namespace Search {
|
||||
/// Threshold used for countermoves based pruning
|
||||
constexpr int CounterMovePruneThreshold = 0;
|
||||
|
||||
extern bool prune_at_shallow_depth;
|
||||
|
||||
/// Stack struct keeps track of the information we need to remember from nodes
|
||||
/// shallower and deeper in the tree during the search. Each search thread has
|
||||
@@ -48,6 +50,8 @@ struct Stack {
|
||||
int statScore;
|
||||
int moveCount;
|
||||
bool inCheck;
|
||||
bool ttPv;
|
||||
bool ttHit;
|
||||
};
|
||||
|
||||
|
||||
@@ -69,7 +73,6 @@ struct RootMove {
|
||||
Value previousScore = -VALUE_INFINITE;
|
||||
int selDepth = 0;
|
||||
int tbRank = 0;
|
||||
int bestMoveCount = 0;
|
||||
Value tbScore;
|
||||
std::vector<Move> pv;
|
||||
};
|
||||
@@ -86,9 +89,7 @@ struct LimitsType {
|
||||
time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
|
||||
movestogo = depth = mate = perft = infinite = 0;
|
||||
nodes = 0;
|
||||
#if defined (EVAL_LEARN)
|
||||
silent = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool use_time_management() const {
|
||||
@@ -99,11 +100,9 @@ struct LimitsType {
|
||||
TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
|
||||
int movestogo, depth, mate, perft, infinite;
|
||||
int64_t nodes;
|
||||
#if defined (EVAL_LEARN)
|
||||
// Silent mode that does not output to the screen (for continuous self-play in process)
|
||||
// Do not output PV at this time.
|
||||
bool silent;
|
||||
#endif
|
||||
};
|
||||
|
||||
extern LimitsType Limits;
|
||||
@@ -111,6 +110,12 @@ extern LimitsType Limits;
|
||||
void init();
|
||||
void clear();
|
||||
|
||||
} // namespace Search
|
||||
// A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
|
||||
using ValueAndPV = std::pair<Value, std::vector<Move>>;
|
||||
|
||||
ValueAndPV qsearch(Position& pos);
|
||||
ValueAndPV search(Position& pos, int depth_, size_t multiPV = 1, uint64_t nodesLimit = 0);
|
||||
|
||||
}
|
||||
|
||||
#endif // #ifndef SEARCH_H_INCLUDED
|
||||
|
||||
+11
-9
@@ -28,12 +28,12 @@
|
||||
#include <type_traits>
|
||||
#include <mutex>
|
||||
|
||||
#include "../bitboard.h"
|
||||
#include "../movegen.h"
|
||||
#include "../position.h"
|
||||
#include "../search.h"
|
||||
#include "../types.h"
|
||||
#include "../uci.h"
|
||||
#include "bitboard.h"
|
||||
#include "movegen.h"
|
||||
#include "position.h"
|
||||
#include "search.h"
|
||||
#include "types.h"
|
||||
#include "uci.h"
|
||||
|
||||
#include "tbprobe.h"
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
|
||||
using namespace Tablebases;
|
||||
|
||||
int Tablebases::MaxCardinality;
|
||||
int Tablebases::MaxCardinality = 0;
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -223,7 +223,9 @@ public:
|
||||
|
||||
*mapping = statbuf.st_size;
|
||||
*baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
||||
#if defined(MADV_RANDOM)
|
||||
madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
|
||||
#endif
|
||||
::close(fd);
|
||||
|
||||
if (*baseAddress == MAP_FAILED)
|
||||
@@ -758,7 +760,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
|
||||
if (entry->hasPawns) {
|
||||
idx = LeadPawnIdx[leadPawnsCnt][squares[0]];
|
||||
|
||||
std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
|
||||
std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
|
||||
|
||||
for (int i = 1; i < leadPawnsCnt; ++i)
|
||||
idx += Binomial[i][MapPawns[squares[i]]];
|
||||
@@ -859,7 +861,7 @@ encode_remaining:
|
||||
|
||||
while (d->groupLen[++next])
|
||||
{
|
||||
std::sort(groupSq, groupSq + d->groupLen[next]);
|
||||
std::stable_sort(groupSq, groupSq + d->groupLen[next]);
|
||||
uint64_t n = 0;
|
||||
|
||||
// Map down a square if "comes later" than a square in the previous
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
|
||||
#include <ostream>
|
||||
|
||||
#include "../search.h"
|
||||
#include "search.h"
|
||||
|
||||
namespace Tablebases {
|
||||
|
||||
|
||||
+68
-25
@@ -35,6 +35,7 @@ ThreadPool Threads; // Global object
|
||||
Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) {
|
||||
|
||||
wait_for_search_finished();
|
||||
wait_for_worker_finished();
|
||||
}
|
||||
|
||||
|
||||
@@ -51,17 +52,6 @@ Thread::~Thread() {
|
||||
}
|
||||
|
||||
|
||||
/// Thread::bestMoveCount(Move move) return best move counter for the given root move
|
||||
|
||||
int Thread::best_move_count(Move move) const {
|
||||
|
||||
auto rm = std::find(rootMoves.begin() + pvIdx,
|
||||
rootMoves.begin() + pvLast, move);
|
||||
|
||||
return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0;
|
||||
}
|
||||
|
||||
|
||||
/// Thread::clear() reset histories, usually before a new game
|
||||
|
||||
void Thread::clear() {
|
||||
@@ -91,6 +81,14 @@ void Thread::start_searching() {
|
||||
cv.notify_one(); // Wake up the thread in idle_loop()
|
||||
}
|
||||
|
||||
void Thread::execute_with_worker(std::function<void(Thread&)> t)
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(mutex);
|
||||
worker = std::move(t);
|
||||
searching = true;
|
||||
cv.notify_one(); // Wake up the thread in idle_loop()
|
||||
}
|
||||
|
||||
|
||||
/// Thread::wait_for_search_finished() blocks on the condition variable
|
||||
/// until the thread has finished searching.
|
||||
@@ -102,6 +100,12 @@ void Thread::wait_for_search_finished() {
|
||||
}
|
||||
|
||||
|
||||
void Thread::wait_for_worker_finished() {
|
||||
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
cv.wait(lk, [&]{ return !searching; });
|
||||
}
|
||||
|
||||
/// Thread::idle_loop() is where the thread is parked, blocked on the
|
||||
/// condition variable, when it has no work to do.
|
||||
|
||||
@@ -119,15 +123,25 @@ void Thread::idle_loop() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
searching = false;
|
||||
worker = nullptr;
|
||||
cv.notify_one(); // Wake up anyone waiting for search finished
|
||||
cv.wait(lk, [&]{ return searching; });
|
||||
|
||||
if (exit)
|
||||
return;
|
||||
|
||||
auto wrk = std::move(worker);
|
||||
|
||||
lk.unlock();
|
||||
|
||||
search();
|
||||
if (wrk)
|
||||
{
|
||||
wrk(*this);
|
||||
}
|
||||
else
|
||||
{
|
||||
search();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,6 +186,13 @@ void ThreadPool::clear() {
|
||||
main()->previousTimeReduction = 1.0;
|
||||
}
|
||||
|
||||
void ThreadPool::execute_with_workers(const std::function<void(Thread&)>& worker)
|
||||
{
|
||||
for(Thread* th : *this)
|
||||
{
|
||||
th->execute_with_worker(worker);
|
||||
}
|
||||
}
|
||||
|
||||
/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and
|
||||
/// returns immediately. Main thread will wake up other threads and start the search.
|
||||
@@ -192,9 +213,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
|
||||
|| std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
|
||||
rootMoves.emplace_back(m);
|
||||
|
||||
if (!rootMoves.empty())
|
||||
Tablebases::rank_root_moves(pos, rootMoves);
|
||||
|
||||
// After ownership transfer 'states' becomes empty, so if we stop the search
|
||||
// and call 'go' again without setting a new position states.get() == NULL.
|
||||
assert(states.get() || setupStates.get());
|
||||
@@ -214,6 +232,24 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
|
||||
th->rootMoves = rootMoves;
|
||||
th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
|
||||
th->rootState = setupStates->back();
|
||||
// This is also set by rank_root_moves but we need to set it
|
||||
// also when there is no legal moves.
|
||||
th->rootInTB = false;
|
||||
th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
|
||||
th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
|
||||
th->Cardinality = int(Options["SyzygyProbeLimit"]);
|
||||
|
||||
// Tables with fewer pieces than SyzygyProbeLimit are searched with
|
||||
// ProbeDepth == DEPTH_ZERO
|
||||
if (th->Cardinality > Tablebases::MaxCardinality)
|
||||
{
|
||||
th->Cardinality = Tablebases::MaxCardinality;
|
||||
th->ProbeDepth = 0;
|
||||
}
|
||||
|
||||
if (!rootMoves.empty())
|
||||
Tablebases::rank_root_moves(pos, rootMoves);
|
||||
|
||||
}
|
||||
|
||||
main()->start_searching();
|
||||
@@ -235,16 +271,16 @@ Thread* ThreadPool::get_best_thread() const {
|
||||
votes[th->rootMoves[0].pv[0]] +=
|
||||
(th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);
|
||||
|
||||
if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
|
||||
{
|
||||
// Make sure we pick the shortest mate / TB conversion or stave off mate the longest
|
||||
if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
|
||||
bestThread = th;
|
||||
}
|
||||
else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
|
||||
|| ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
|
||||
&& votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
|
||||
bestThread = th;
|
||||
if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
|
||||
{
|
||||
// Make sure we pick the shortest mate / TB conversion or stave off mate the longest
|
||||
if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
|
||||
bestThread = th;
|
||||
}
|
||||
else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
|
||||
|| ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
|
||||
&& votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
|
||||
bestThread = th;
|
||||
}
|
||||
|
||||
return bestThread;
|
||||
@@ -269,3 +305,10 @@ void ThreadPool::wait_for_search_finished() const {
|
||||
if (th != front())
|
||||
th->wait_for_search_finished();
|
||||
}
|
||||
|
||||
|
||||
void ThreadPool::wait_for_workers_finished() const {
|
||||
|
||||
for (Thread* th : *this)
|
||||
th->wait_for_worker_finished();
|
||||
}
|
||||
|
||||
+81
-1
@@ -24,6 +24,7 @@
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include "material.h"
|
||||
#include "movepick.h"
|
||||
@@ -38,23 +39,41 @@
|
||||
/// pointer to an entry its life time is unlimited and we don't have
|
||||
/// to care about someone changing the entry under our feet.
|
||||
|
||||
namespace Detail {
|
||||
|
||||
template <typename T>
|
||||
struct TypeIdentity {
|
||||
using Type = T;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
class Thread {
|
||||
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
size_t idx;
|
||||
bool exit = false, searching = true; // Set before starting std::thread
|
||||
std::function<void(Thread&)> worker;
|
||||
NativeThread stdThread;
|
||||
|
||||
public:
|
||||
explicit Thread(size_t);
|
||||
virtual ~Thread();
|
||||
virtual void search();
|
||||
|
||||
// The function object to be executed is taken by value to remove
|
||||
// the need for separate lvalue and rvalue overloads.
|
||||
// The worker thread needs to have ownership of the task
|
||||
// to be executed because otherwise there's no way to manage its lifetime.
|
||||
virtual void execute_with_worker(std::function<void(Thread&)> t);
|
||||
|
||||
void clear();
|
||||
void idle_loop();
|
||||
void start_searching();
|
||||
void wait_for_search_finished();
|
||||
int best_move_count(Move move) const;
|
||||
void wait_for_worker_finished();
|
||||
size_t thread_idx() const { return idx; }
|
||||
|
||||
Pawns::Table pawnsTable;
|
||||
Material::Table materialTable;
|
||||
@@ -74,6 +93,11 @@ public:
|
||||
CapturePieceToHistory captureHistory;
|
||||
ContinuationHistory continuationHistory[2][2];
|
||||
Score contempt;
|
||||
int failedHighCnt;
|
||||
bool rootInTB;
|
||||
int Cardinality;
|
||||
bool UseRule50;
|
||||
Depth ProbeDepth;
|
||||
};
|
||||
|
||||
|
||||
@@ -101,6 +125,61 @@ struct MainThread : public Thread {
|
||||
|
||||
struct ThreadPool : public std::vector<Thread*> {
|
||||
|
||||
// Each thread gets its own copy of the `worker` function object.
|
||||
// This means that each worker thread will have exclusive access
|
||||
// to the state of the `worker` function object.
|
||||
void execute_with_workers(const std::function<void(Thread&)>& worker);
|
||||
|
||||
template <typename IndexT, typename FuncT>
|
||||
void for_each_index_with_workers(
|
||||
IndexT begin,
|
||||
typename Detail::TypeIdentity<IndexT>::Type end,
|
||||
FuncT func)
|
||||
{
|
||||
// This value must outlive the function call.
|
||||
// It's fairly safe if we make it static
|
||||
// because for_each_index_with_workers
|
||||
// is not reentrant nor thread safe.
|
||||
static std::atomic<IndexT> i_atomic;
|
||||
i_atomic.store(begin);
|
||||
|
||||
execute_with_workers(
|
||||
[end, func](Thread& th) mutable {
|
||||
for(;;) {
|
||||
const auto i = i_atomic.fetch_add(1);
|
||||
if (i >= end)
|
||||
break;
|
||||
|
||||
func(th, i);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename IndexT, typename FuncT>
|
||||
void for_each_index_chunk_with_workers(
|
||||
IndexT begin,
|
||||
typename Detail::TypeIdentity<IndexT>::Type end,
|
||||
FuncT func)
|
||||
{
|
||||
// This value must outlive the function call.
|
||||
// It's fairly safe if we make it static
|
||||
// because for_each_index_with_workers
|
||||
// is not reentrant nor thread safe.
|
||||
const IndexT size = end - begin;
|
||||
const IndexT chunk_size = (size + this->size()) / this->size();
|
||||
|
||||
execute_with_workers(
|
||||
[chunk_size, end, func](Thread& th) mutable {
|
||||
const IndexT thread_id = th.thread_idx();
|
||||
const IndexT offset = chunk_size * thread_id;
|
||||
if (offset >= end)
|
||||
return;
|
||||
|
||||
const IndexT count = offset + chunk_size > end ? end - offset : chunk_size;
|
||||
func(th, offset, count);
|
||||
});
|
||||
}
|
||||
|
||||
void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false);
|
||||
void clear();
|
||||
void set(size_t);
|
||||
@@ -111,6 +190,7 @@ struct ThreadPool : public std::vector<Thread*> {
|
||||
Thread* get_best_thread() const;
|
||||
void start_searching();
|
||||
void wait_for_search_finished() const;
|
||||
void wait_for_workers_finished() const;
|
||||
|
||||
std::atomic_bool stop, increaseDepth;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user