diff --git a/AUTHORS b/AUTHORS index c96f870a..198dfa5a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -36,10 +36,11 @@ Bryan Cross (crossbr) candirufish Chess13234 Chris Cain (ceebo) +Dale Weiler (graphitemaster) Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) -Dariusz Orzechowski +Dariusz Orzechowski (dorzechowski) David Zar Daylen Yang (daylen) DiscanX @@ -62,6 +63,7 @@ Gary Heckman (gheckman) George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) +Deshawn Mohan-Smith (GoldenRare) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 0ea5ac72..482e9000 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,154 +1,173 @@ -Contributors with >10,000 CPU hours as of January 7, 2020 +Contributors with >10,000 CPU hours as of Sept 2, 2020 Thank you! Username CPU Hours Games played -------------------------------------------------- -noobpwnftw 9305707 695548021 -mlang 780050 61648867 -dew 621626 43921547 -mibere 524702 42238645 -crunchy 354587 27344275 -cw 354495 27274181 -fastgm 332801 22804359 -JojoM 295750 20437451 -CSU_Dynasty 262015 21828122 -Fisherman 232181 18939229 -ctoks 218866 17622052 -glinscott 201989 13780820 -tvijlbrief 201204 15337115 -velislav 188630 14348485 -gvreuls 187164 15149976 -bking_US 180289 11876016 -nordlandia 172076 13467830 -leszek 157152 11443978 -Thanar 148021 12365359 -spams 141975 10319326 -drabel 138073 11121749 -vdv 137850 9394330 -mgrabiak 133578 10454324 -TueRens 132485 10878471 -bcross 129683 11557084 -marrco 126078 9356740 -sqrt2 125830 9724586 -robal 122873 9593418 -vdbergh 120766 8926915 -malala 115926 8002293 -CoffeeOne 114241 5004100 -dsmith 113189 7570238 -BrunoBanani 104644 7436849 -Data 92328 8220352 -mhoram 89333 6695109 -davar 87924 7009424 -xoto 81094 6869316 -ElbertoOne 80899 7023771 -grandphish2 78067 6160199 -brabos 77212 6186135 -psk 75733 5984901 -BRAVONE 73875 5054681 -sunu 70771 5597972 -sterni1971 70605 5590573 -MaZePallas 66886 5188978 -Vizvezdenec 63708 4967313 -nssy 63462 5259388 -jromang 61634 4940891 -teddybaer 61231 5407666 -Pking_cda 60099 5293873 -solarlight 57469 5028306 -dv8silencer 56913 3883992 -tinker 54936 4086118 -renouve 49732 3501516 -Freja 49543 3733019 -robnjr 46972 4053117 -rap 46563 3219146 -Bobo1239 46036 3817196 -ttruscott 45304 3649765 -racerschmacer 44881 3975413 -finfish 44764 3370515 -eva42 41783 3599691 -biffhero 40263 3111352 -bigpen0r 39817 3291647 -mhunt 38871 2691355 -ronaldjerum 38820 3240695 -Antihistamine 38785 2761312 -pb00067 38038 3086320 -speedycpu 37591 3003273 -rkl 37207 3289580 -VoyagerOne 37050 3441673 -jbwiebe 35320 2805433 -cuistot 34191 2146279 -homyur 33927 2850481 -manap 32873 2327384 -gri 32538 2515779 -oryx 31267 2899051 -EthanOConnor 30959 2090311 -SC 30832 2730764 -csnodgrass 29505 2688994 -jmdana 29458 2205261 -strelock 28219 2067805 -jkiiski 27832 1904470 -Pyafue 27533 1902349 -Garf 27515 2747562 -eastorwest 27421 2317535 -slakovv 26903 2021889 -Prcuvu 24835 2170122 -anst 24714 2190091 -hyperbolic.tom 24319 2017394 -Patrick_G 23687 1801617 -Sharaf_DG 22896 1786697 -nabildanial 22195 1519409 -chriswk 21931 1868317 -achambord 21665 1767323 -Zirie 20887 1472937 -team-oh 20217 1636708 -Isidor 20096 1680691 -ncfish1 19931 1520927 -nesoneg 19875 1463031 -Spprtr 19853 1548165 -JanErik 19849 1703875 -agg177 19478 1395014 -SFTUser 19231 1567999 -xor12 19017 1680165 -sg4032 18431 1641865 -rstoesser 18118 1293588 -MazeOfGalious 17917 1629593 -j3corre 17743 941444 -cisco2015 17725 1690126 -ianh2105 17706 1632562 -dex 17678 1467203 -jundery 17194 1115855 -iisiraider 17019 1101015 -horst.prack 17012 1465656 -Adrian.Schmidt123 16563 1281436 -purplefishies 16342 1092533 -wei 16274 1745989 -ville 16144 1384026 -eudhan 15712 1283717 -OuaisBla 15581 972000 -DragonLord 15559 1162790 -dju 14716 875569 -chris 14479 1487385 -0xB00B1ES 14079 1001120 -OssumOpossum 13776 1007129 -enedene 13460 905279 -bpfliegel 13346 884523 -Ente 13198 1156722 -IgorLeMasson 13087 1147232 -jpulman 13000 870599 -ako027ako 12775 1173203 -Nikolay.IT 12352 1068349 -Andrew Grant 12327 895539 -joster 12008 950160 -AdrianSA 11996 804972 -Nesa92 11455 1111993 -fatmurphy 11345 853210 -Dark_wizzie 11108 1007152 -modolief 10869 896470 -mschmidt 10757 803401 -infinity 10594 727027 -mabichito 10524 749391 -Thomas A. Anderson 10474 732094 -thijsk 10431 719357 -Flopzee 10339 894821 -crocogoat 10104 1013854 -SapphireBrand 10104 969604 -stocky 10017 699440 +noobpwnftw 19352969 1231459677 +mlang 957168 61657446 +dew 949885 56893432 +mibere 703817 46865007 +crunchy 427035 27344275 +cw 416006 27521077 +JojoM 415904 24479564 +fastgm 404873 23953472 +CSU_Dynasty 335774 22850550 +tvijlbrief 335199 21871270 +Fisherman 325053 21786603 +gvreuls 311480 20751516 +ctoks 275877 18710423 +velislav 241267 15596372 +glinscott 217799 13780820 +nordlandia 211692 13484886 +bcross 206213 14934233 +bking_US 198894 11876016 +leszek 189170 11446821 +mgrabiak 183896 11778092 +drabel 181408 12489478 +TueRens 181349 12192000 +Thanar 179852 12365359 +vdv 175171 9881246 +robal 166948 10702862 +spams 157128 10319326 +marrco 149947 9376421 +sqrt2 147963 9724586 +vdbergh 137041 8926915 +CoffeeOne 136294 5004100 +malala 136182 8002293 +mhoram 128934 8177193 +davar 122092 7960001 +dsmith 122059 7570238 +xoto 119696 8222144 +grandphish2 116481 7582197 +Data 113305 8220352 +BrunoBanani 112960 7436849 +ElbertoOne 99028 7023771 +MaZePallas 98571 6362619 +brabos 92118 6186135 +psk 89957 5984901 +sunu 88463 6007033 +sterni1971 86948 5613788 +Vizvezdenec 83752 5343724 +BRAVONE 81239 5054681 +nssy 76497 5259388 +teddybaer 75125 5407666 +Pking_cda 73776 5293873 +jromang 70695 4940891 +solarlight 70517 5028306 +dv8silencer 70287 3883992 +Bobo1239 68515 4652287 +racerschmacer 67468 4935996 +manap 66273 4121774 +tinker 63458 4213726 +linrock 59082 4516053 +robnjr 57262 4053117 +Freja 56938 3733019 +ttruscott 56005 3679485 +renouve 53811 3501516 +cuistot 52532 3014920 +finfish 51360 3370515 +eva42 51272 3599691 +rkl 50759 3840947 +rap 49985 3219146 +pb00067 49727 3298270 +ronaldjerum 47654 3240695 +bigpen0r 47278 3291647 +biffhero 46564 3111352 +VoyagerOne 45386 3445881 +speedycpu 43842 3003273 +jbwiebe 43305 2805433 +Antihistamine 41788 2761312 +mhunt 41735 2691355 +eastorwest 40387 2812173 +homyur 39893 2850481 +gri 39871 2515779 +oryx 38228 2941656 +0x3C33 37773 2529097 +SC 37290 2731014 +csnodgrass 36207 2688994 +jmdana 36108 2205261 +strelock 34716 2074055 +Garf 33800 2747562 +EthanOConnor 33370 2090311 +slakovv 32915 2021889 +Spprtr 32591 2139601 +Prcuvu 30377 2170122 +anst 30301 2190091 +jkiiski 30136 1904470 +hyperbolic.tom 29840 2017394 +Pyafue 29650 1902349 +OuaisBla 27629 1578000 +chriswk 26902 1868317 +achambord 26582 1767323 +Patrick_G 26276 1801617 +yorkman 26193 1992080 +SFTUser 25182 1675689 +nabildanial 24942 1519409 +Sharaf_DG 24765 1786697 +ncfish1 24411 1520927 +agg177 23890 1395014 +JanErik 23408 1703875 +Isidor 23388 1680691 +Norabor 22976 1587862 +cisco2015 22880 1759669 +Zirie 22542 1472937 +team-oh 22272 1636708 +MazeOfGalious 21978 1629593 +sg4032 21945 1643065 +ianh2105 21725 1632562 +xor12 21628 1680365 +dex 21612 1467203 +nesoneg 21494 1463031 +horst.prack 20878 1465656 +0xB00B1ES 20590 1208666 +j3corre 20405 941444 +Adrian.Schmidt123 20316 1281436 +wei 19973 1745989 +rstoesser 19569 1293588 +eudhan 19274 1283717 +Ente 19070 1373058 +jundery 18445 1115855 +iisiraider 18247 1101015 +ville 17883 1384026 +chris 17698 1487385 +purplefishies 17595 1092533 +DragonLord 17014 1162790 +dju 16515 929427 +IgorLeMasson 16064 1147232 +ako027ako 15671 1173203 +Nikolay.IT 15154 1068349 +Andrew Grant 15114 895539 +yurikvelo 15027 1165616 +OssumOpossum 14857 1007129 +enedene 14476 905279 +bpfliegel 14298 884523 +jpulman 13982 870599 +joster 13794 950160 +Nesa92 13786 1114691 +Dark_wizzie 13422 1007152 +Hjax 13350 900887 +Fifis 13313 965473 +mabichito 12903 749391 +thijsk 12886 722107 +crocogoat 12876 1048802 +AdrianSA 12860 804972 +Flopzee 12698 894821 +fatmurphy 12547 853210 +SapphireBrand 12416 969604 +modolief 12386 896470 +scuzzi 12362 833465 +pgontarz 12151 848794 +stocky 11954 699440 +mschmidt 11941 803401 +infinity 11470 727027 +torbjo 11387 728873 +Thomas A. Anderson 11372 732094 +snicolet 11106 869170 +amicic 10779 733593 +rpngn 10712 688203 +d64 10680 771144 +basepi 10637 744851 +jjoshua2 10559 670905 +dzjp 10343 732529 +ols 10259 570669 +lbraesch 10252 647825 diff --git a/appveyor.yml b/appveyor.yml index a3732a23..ab608409 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -63,7 +63,7 @@ build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - ps: | # Download default NNUE net from fishtest - $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" $nnuenet = $Matches.nnuenet Write-Host "Default net:" $nnuenet diff --git a/src/Makefile b/src/Makefile index 69517c3c..0b2f99ed 100644 --- a/src/Makefile +++ b/src/Makefile @@ -60,7 +60,6 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp learn/learn.cpp \ learn/gensfen.cpp \ learn/convert.cpp \ - learn/learning_tools.cpp \ learn/multi_think.cpp OBJS = $(notdir $(SRCS:.cpp=.o)) @@ -101,12 +100,17 @@ VPATH = syzygy:nnue:nnue/features:eval:extra:learn ### 2.1. General and architecture defaults +ifeq ($(ARCH),) + ARCH = x86-64-modern + help_skip_sanity = yes +endif # explicitly check for the list of supported architectures (as listed with make help), # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` -ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ - x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ - x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ - armv7 armv7-neon armv8 apple-silicon general-64 general-32)) +ifeq ($(ARCH), $(filter $(ARCH), \ + x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -130,7 +134,6 @@ avx512 = no vnni256 = no vnni512 = no neon = no -ARCH = x86-64-modern STRIP = strip ### 2.2 Architecture specific @@ -394,19 +397,6 @@ ifeq ($(COMP),clang) endif endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use -else -ifeq ($(comp),clang) - profile_make = clang-profile-make - profile_use = clang-profile-use -else - profile_make = gcc-profile-make - profile_use = gcc-profile-use -endif -endif - ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 @@ -418,20 +408,30 @@ endif # Currently we don't know how to make PGO builds with the NDK yet. ifeq ($(COMP),ndk) CXXFLAGS += -stdlib=libc++ -fPIE + comp=clang ifeq ($(arch),armv7) - comp=armv7a-linux-androideabi16-clang CXX=armv7a-linux-androideabi16-clang++ CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon STRIP=arm-linux-androideabi-strip endif ifeq ($(arch),armv8) - comp=aarch64-linux-android21-clang CXX=aarch64-linux-android21-clang++ STRIP=aarch64-linux-android-strip endif LDFLAGS += -static-libstdc++ -pie -lm -latomic endif +ifeq ($(comp),icc) + profile_make = icc-profile-make + profile_use = icc-profile-use +else ifeq ($(comp),clang) + profile_make = clang-profile-make + profile_use = clang-profile-use +else + profile_make = gcc-profile-make + profile_use = gcc-profile-use +endif + ### Travis CI script uses COMPILER to overwrite CXX ifdef COMPILER COMPCXX=$(COMPILER) @@ -622,11 +622,13 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(COMP),ndk) - CXXFLAGS += -flto=thin - LDFLAGS += $(CXXFLAGS) - else ifeq ($(comp),clang) + ifeq ($(comp),clang) CXXFLAGS += -flto=thin + ifneq ($(findstring MINGW,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + else ifneq ($(findstring MSYS,$(KERNEL)),) + CXXFLAGS += -fuse-ld=lld + endif LDFLAGS += $(CXXFLAGS) # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be @@ -650,10 +652,12 @@ ifeq ($(debug), no) # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) + ifneq ($(arch),i386) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver endif endif + endif endif endif @@ -729,11 +733,12 @@ help: @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" @echo "-------------------------------" -ifeq ($(SUPPORTED_ARCH), true) +ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true) @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity else @echo "Specify a supported architecture with the ARCH option for more details" + @echo "" endif @@ -741,7 +746,7 @@ endif config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make -build: config-sanity +build: config-sanity net $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all profile-build: net config-sanity objclean profileclean @@ -768,12 +773,13 @@ install: -cp $(EXE) $(BINDIR) -strip $(BINDIR)/$(EXE) -#clean all +# clean all clean: objclean profileclean @rm -f .depend *~ core +# evaluation network (nnue) net: - $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + $(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @@ -795,7 +801,6 @@ net: echo "shasum / sha256sum not found, skipping net validation"; \ fi - # clean binaries and objects objclean: @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 806e9840..ffb631a2 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -164,5 +164,7 @@ vector setup_bench(const Position& current, istream& is) { ++posCounter; } + list.emplace_back("setoption name Use NNUE value true"); + return list; } diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h deleted file mode 100644 index 47e69a44..00000000 --- a/src/eval/evaluate_common.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _EVALUATE_COMMON_H_ -#define _EVALUATE_COMMON_H_ - -// A common header-like function for modern evaluation functions. - -#include - -namespace Eval -{ - // -------------------------- - // for learning - // -------------------------- - - // Save the evaluation function parameters to a file. - // You can specify the extension added to the end of the file. - void save_eval(std::string suffix); - - // Get the current eta. - double get_eta(); -} - -#endif // _EVALUATE_KPPT_COMMON_H_ diff --git a/src/evaluate.cpp b/src/evaluate.cpp index e619a747..aa9bbd67 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -20,22 +20,29 @@ #include #include #include // For std::memset +#include #include #include #include -#include +#include +#include #include "bitboard.h" #include "evaluate.h" #include "material.h" +#include "misc.h" #include "pawns.h" #include "thread.h" #include "uci.h" +#include "incbin/incbin.h" + +using namespace std; +using namespace Eval::NNUE; namespace Eval { UseNNUEMode useNNUE; - std::string eval_file_loaded="None"; + string eval_file_loaded = "None"; static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode) { @@ -49,35 +56,67 @@ namespace Eval { return UseNNUEMode::False; } - void init_NNUE() { + void NNUE::init() { useNNUE = nnue_mode_from_option(Options["Use NNUE"]); + if (useNNUE == UseNNUEMode::False) + return; - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) - if (Eval::NNUE::load_eval_file(eval_file)) - eval_file_loaded = eval_file; + string eval_file = string(Options["EvalFile"]); + + #if defined(DEFAULT_NNUE_DIRECTORY) + #define stringify2(x) #x + #define stringify(x) stringify2(x) + vector dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; + #else + vector dirs = { "" , CommandLine::binaryDirectory }; + #endif + + for (string directory : dirs) + if (eval_file_loaded != eval_file) + { + ifstream stream(directory + eval_file, ios::binary); + if (load_eval(eval_file, stream)) + { + sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl; + eval_file_loaded = eval_file; + } + else + { + sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl; + } + } } - void verify_NNUE() { + /// NNUE::verify() verifies that the last net used was loaded successfully + void NNUE::verify() { - std::string eval_file = std::string(Options["EvalFile"]); - if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) { + string eval_file = string(Options["EvalFile"]); + + if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file) + { UCI::OptionsMap defaults; UCI::init(defaults); - sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl; - sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl; - sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl; - sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl; - sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl; - std::exit(EXIT_FAILURE); + string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available."; + string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully."; + string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file."; + string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]); + string msg5 = "The engine will be terminated now."; + + sync_cout << "info string ERROR: " << msg1 << sync_endl; + sync_cout << "info string ERROR: " << msg2 << sync_endl; + sync_cout << "info string ERROR: " << msg3 << sync_endl; + sync_cout << "info string ERROR: " << msg4 << sync_endl; + sync_cout << "info string ERROR: " << msg5 << sync_endl; + + exit(EXIT_FAILURE); } if (useNNUE != UseNNUEMode::False) - sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl; + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl; else - sync_cout << "info string classical evaluation enabled." << sync_endl; + sync_cout << "info string classical evaluation enabled" << sync_endl; } } @@ -165,26 +204,26 @@ namespace { // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. - constexpr Score Outpost[] = { S(56, 36), S(30, 23) }; + constexpr Score Outpost[] = { S(56, 34), S(31, 23) }; // PassedRank[Rank] contains a bonus according to the rank of a passed pawn constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) + S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260) }; // RookOnFile[semiopen/open] contains bonuses for each rook when there is // no (friendly) pawn on the rook file. - constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) }; + constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) }; // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to // which piece type attacks which one. Attacks on lesser pieces which are // pawn-defended are not considered. constexpr Score ThreatByMinor[PIECE_TYPE_NB] = { - S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161) + S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162) }; constexpr Score ThreatByRook[PIECE_TYPE_NB] = { - S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41) + S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43) }; // Assorted bonuses and penalties @@ -952,17 +991,32 @@ make_v: /// evaluation of the position from the point of view of the side to move. Value Eval::evaluate(const Position& pos) { - if (useNNUE == UseNNUEMode::Pure) { - return NNUE::evaluate(pos); + + Value v; + + if (Eval::useNNUE == UseNNUEMode::Pure) { + v = NNUE::evaluate(pos); } + else if (Eval::useNNUE == UseNNUEMode::False) + v = Evaluation(pos).value(); + else + { + // scale and shift NNUE for compatibility with search and classical evaluation + auto adjusted_NNUE = [&](){ return NNUE::evaluate(pos) * 5 / 4 + Tempo; }; - bool classical = useNNUE == UseNNUEMode::False - || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); - Value v = classical ? Evaluation(pos).value() - : NNUE::evaluate(pos) * 5 / 4 + Tempo; + // if there is PSQ imbalance use classical eval, with small probability if it is small + Value psq = Value(abs(eg_value(pos.psq_score()))); + int r50 = 16 + pos.rule50_count(); + bool largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50; + bool classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB)); - if (classical && useNNUE != UseNNUEMode::False && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) - v = NNUE::evaluate(pos) * 5 / 4 + Tempo; + v = classical ? Evaluation(pos).value() : adjusted_NNUE(); + + // if the classical eval is small and imbalance large, use NNUE nevertheless. + if ( largePsq + && abs(v) * 16 < NNUEThreshold2 * r50) + v = adjusted_NNUE(); + } // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; diff --git a/src/evaluate.h b/src/evaluate.h index 900a77fc..ac67494d 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -38,15 +38,18 @@ namespace Eval { extern UseNNUEMode useNNUE; extern std::string eval_file_loaded; - void init_NNUE(); - void verify_NNUE(); + + // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. Do not change the + // name of the macro, as it is used in the Makefile. + #define EvalFileDefaultName "nn-28e08a9fe2ad.nnue" namespace NNUE { Value evaluate(const Position& pos); - Value compute_eval(const Position& pos); - void update_eval(const Position& pos); - bool load_eval_file(const std::string& evalFile); + bool load_eval(std::string name, std::istream& stream); + void init(); + void verify(); } // namespace NNUE diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE new file mode 100644 index 00000000..32484ab5 --- /dev/null +++ b/src/incbin/UNLICENCE @@ -0,0 +1,26 @@ +The file "incbin.h" is free and unencumbered software released into +the public domain by Dale Weiler, see: + + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h new file mode 100755 index 00000000..c19684d7 --- /dev/null +++ b/src/incbin/incbin.h @@ -0,0 +1,368 @@ +/** + * @file incbin.h + * @author Dale Weiler + * @brief Utility for including binary files + * + * Facilities for including binary files into the current translation unit and + * making use from them externally in other translation units. + */ +#ifndef INCBIN_HDR +#define INCBIN_HDR +#include +#if defined(__AVX512BW__) || \ + defined(__AVX512CD__) || \ + defined(__AVX512DQ__) || \ + defined(__AVX512ER__) || \ + defined(__AVX512PF__) || \ + defined(__AVX512VL__) || \ + defined(__AVX512F__) +# define INCBIN_ALIGNMENT_INDEX 6 +#elif defined(__AVX__) || \ + defined(__AVX2__) +# define INCBIN_ALIGNMENT_INDEX 5 +#elif defined(__SSE__) || \ + defined(__SSE2__) || \ + defined(__SSE3__) || \ + defined(__SSSE3__) || \ + defined(__SSE4_1__) || \ + defined(__SSE4_2__) || \ + defined(__neon__) +# define INCBIN_ALIGNMENT_INDEX 4 +#elif ULONG_MAX != 0xffffffffu +# define INCBIN_ALIGNMENT_INDEX 3 +# else +# define INCBIN_ALIGNMENT_INDEX 2 +#endif + +/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */ +#define INCBIN_ALIGN_SHIFT_0 1 +#define INCBIN_ALIGN_SHIFT_1 2 +#define INCBIN_ALIGN_SHIFT_2 4 +#define INCBIN_ALIGN_SHIFT_3 8 +#define INCBIN_ALIGN_SHIFT_4 16 +#define INCBIN_ALIGN_SHIFT_5 32 +#define INCBIN_ALIGN_SHIFT_6 64 + +/* Actual alignment value */ +#define INCBIN_ALIGNMENT \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \ + INCBIN_ALIGNMENT_INDEX) + +/* Stringize */ +#define INCBIN_STR(X) \ + #X +#define INCBIN_STRINGIZE(X) \ + INCBIN_STR(X) +/* Concatenate */ +#define INCBIN_CAT(X, Y) \ + X ## Y +#define INCBIN_CONCATENATE(X, Y) \ + INCBIN_CAT(X, Y) +/* Deferred macro expansion */ +#define INCBIN_EVAL(X) \ + X +#define INCBIN_INVOKE(N, ...) \ + INCBIN_EVAL(N(__VA_ARGS__)) + +/* Green Hills uses a different directive for including binary data */ +#if defined(__ghs__) +# if (__ghs_asm == 2) +# define INCBIN_MACRO ".file" +/* Or consider the ".myrawdata" entry in the ld file */ +# else +# define INCBIN_MACRO "\tINCBIN" +# endif +#else +# define INCBIN_MACRO ".incbin" +#endif + +#ifndef _MSC_VER +# define INCBIN_ALIGN \ + __attribute__((aligned(INCBIN_ALIGNMENT))) +#else +# define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT)) +#endif + +#if defined(__arm__) || /* GNU C and RealView */ \ + defined(__arm) || /* Diab */ \ + defined(_ARM) /* ImageCraft */ +# define INCBIN_ARM +#endif + +#ifdef __GNUC__ +/* Utilize .balign where supported */ +# define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".balign 1\n" +#elif defined(INCBIN_ARM) +/* + * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is + * the shift count. This is the value passed to `.align' + */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n" +# define INCBIN_ALIGN_BYTE ".align 0\n" +#else +/* We assume other inline assembler's treat `.align' as `.balign' */ +# define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n" +# define INCBIN_ALIGN_BYTE ".align 1\n" +#endif + +/* INCBIN_CONST is used by incbin.c generated files */ +#if defined(__cplusplus) +# define INCBIN_EXTERNAL extern "C" +# define INCBIN_CONST extern const +#else +# define INCBIN_EXTERNAL extern +# define INCBIN_CONST const +#endif + +/** + * @brief Optionally override the linker section into which data is emitted. + * + * @warning If you use this facility, you'll have to deal with platform-specific linker output + * section naming on your own + * + * Overriding the default linker output section, e.g for esp8266/Arduino: + * @code + * #define INCBIN_OUTPUT_SECTION ".irom.text" + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * // Data is emitted into program memory that never gets copied to RAM + * @endcode + */ +#if !defined(INCBIN_OUTPUT_SECTION) +# if defined(__APPLE__) +# define INCBIN_OUTPUT_SECTION ".const_data" +# else +# define INCBIN_OUTPUT_SECTION ".rodata" +# endif +#endif + +#if defined(__APPLE__) +/* The directives are different for Apple branded compilers */ +# define INCBIN_SECTION INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# define INCBIN_INT ".long " +# define INCBIN_MANGLE "_" +# define INCBIN_BYTE ".byte " +# define INCBIN_TYPE(...) +#else +# define INCBIN_SECTION ".section " INCBIN_OUTPUT_SECTION "\n" +# define INCBIN_GLOBAL(NAME) ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n" +# if defined(__ghs__) +# define INCBIN_INT ".word " +# else +# define INCBIN_INT ".int " +# endif +# if defined(__USER_LABEL_PREFIX__) +# define INCBIN_MANGLE INCBIN_STRINGIZE(__USER_LABEL_PREFIX__) +# else +# define INCBIN_MANGLE "" +# endif +# if defined(INCBIN_ARM) +/* On arm assemblers, `@' is used as a line comment token */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n" +# elif defined(__MINGW32__) || defined(__MINGW64__) +/* Mingw doesn't support this directive either */ +# define INCBIN_TYPE(NAME) +# else +/* It's safe to use `@' on other architectures */ +# define INCBIN_TYPE(NAME) ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n" +# endif +# define INCBIN_BYTE ".byte " +#endif + +/* List of style types used for symbol names */ +#define INCBIN_STYLE_CAMEL 0 +#define INCBIN_STYLE_SNAKE 1 + +/** + * @brief Specify the prefix to use for symbol names. + * + * By default this is `g', producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char gFooData[]; + * // const unsigned char *const gFooEnd; + * // const unsigned int gFooSize; + * @endcode + * + * If however you specify a prefix before including: e.g: + * @code + * #define INCBIN_PREFIX incbin + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols instead: + * // const unsigned char incbinFooData[]; + * // const unsigned char *const incbinFooEnd; + * // const unsigned int incbinFooSize; + * @endcode + */ +#if !defined(INCBIN_PREFIX) +# define INCBIN_PREFIX g +#endif + +/** + * @brief Specify the style used for symbol names. + * + * Possible options are + * - INCBIN_STYLE_CAMEL "CamelCase" + * - INCBIN_STYLE_SNAKE "snake_case" + * + * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form: + * @code + * #include "incbin.h" + * INCBIN(Foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char FooData[]; + * // const unsigned char *const FooEnd; + * // const unsigned int FooSize; + * @endcode + * + * If however you specify a style before including: e.g: + * @code + * #define INCBIN_STYLE INCBIN_STYLE_SNAKE + * #include "incbin.h" + * INCBIN(foo, "foo.txt"); + * + * // Now you have the following symbols: + * // const unsigned char foo_data[]; + * // const unsigned char *const foo_end; + * // const unsigned int foo_size; + * @endcode + */ +#if !defined(INCBIN_STYLE) +# define INCBIN_STYLE INCBIN_STYLE_CAMEL +#endif + +/* Style lookup tables */ +#define INCBIN_STYLE_0_DATA Data +#define INCBIN_STYLE_0_END End +#define INCBIN_STYLE_0_SIZE Size +#define INCBIN_STYLE_1_DATA _data +#define INCBIN_STYLE_1_END _end +#define INCBIN_STYLE_1_SIZE _size + +/* Style lookup: returning identifier */ +#define INCBIN_STYLE_IDENT(TYPE) \ + INCBIN_CONCATENATE( \ + INCBIN_STYLE_, \ + INCBIN_CONCATENATE( \ + INCBIN_EVAL(INCBIN_STYLE), \ + INCBIN_CONCATENATE(_, TYPE))) + +/* Style lookup: returning string literal */ +#define INCBIN_STYLE_STRING(TYPE) \ + INCBIN_STRINGIZE( \ + INCBIN_STYLE_IDENT(TYPE)) \ + +/* Generate the global labels by indirectly invoking the macro with our style + * type and concatenating the name against them. */ +#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \ + INCBIN_INVOKE( \ + INCBIN_GLOBAL, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) \ + INCBIN_INVOKE( \ + INCBIN_TYPE, \ + INCBIN_CONCATENATE( \ + NAME, \ + INCBIN_INVOKE( \ + INCBIN_STYLE_IDENT, \ + TYPE))) + +/** + * @brief Externally reference binary data included in another translation unit. + * + * Produces three external symbols that reference the binary data included in + * another translation unit. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name given for the binary data + * + * @code + * INCBIN_EXTERN(Foo); + * + * // Now you have the following symbols: + * // extern const unsigned char FooData[]; + * // extern const unsigned char *const FooEnd; + * // extern const unsigned int FooSize; + * @endcode + */ +#define INCBIN_EXTERN(NAME) \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(DATA))[]; \ + INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(END)); \ + INCBIN_EXTERNAL const unsigned int \ + INCBIN_CONCATENATE( \ + INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \ + INCBIN_STYLE_IDENT(SIZE)) + +/** + * @brief Include a binary file into the current translation unit. + * + * Includes a binary file into the current translation unit, producing three symbols + * for objects that encode the data and size respectively. + * + * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with + * "Data", as well as "End" and "Size" after. An example is provided below. + * + * @param NAME The name to associate with this binary data (as an identifier.) + * @param FILENAME The file to include (as a string literal.) + * + * @code + * INCBIN(Icon, "icon.png"); + * + * // Now you have the following symbols: + * // const unsigned char IconData[]; + * // const unsigned char *const IconEnd; + * // const unsigned int IconSize; + * @endcode + * + * @warning This must be used in global scope + * @warning The identifiers may be different if INCBIN_STYLE is not default + * + * To externally reference the data included by this in another translation unit + * please @see INCBIN_EXTERN. + */ +#ifdef _MSC_VER +#define INCBIN(NAME, FILENAME) \ + INCBIN_EXTERN(NAME) +#else +#define INCBIN(NAME, FILENAME) \ + __asm__(INCBIN_SECTION \ + INCBIN_GLOBAL_LABELS(NAME, DATA) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \ + INCBIN_MACRO " \"" FILENAME "\"\n" \ + INCBIN_GLOBAL_LABELS(NAME, END) \ + INCBIN_ALIGN_BYTE \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \ + INCBIN_BYTE "1\n" \ + INCBIN_GLOBAL_LABELS(NAME, SIZE) \ + INCBIN_ALIGN_HOST \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \ + INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \ + INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \ + INCBIN_ALIGN_HOST \ + ".text\n" \ + ); \ + INCBIN_EXTERN(NAME) + +#endif +#endif diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp index 483296a1..59111dcf 100644 --- a/src/learn/convert.cpp +++ b/src/learn/convert.cpp @@ -8,9 +8,6 @@ #include "position.h" #include "tt.h" -// evaluate header for learning -#include "eval/evaluate_common.h" - #include "extra/nnue_data_binpack_format.h" #include "syzygy/tbprobe.h" @@ -122,7 +119,7 @@ namespace Learner else if (token == "score") { double score; ss >> score; - // Training Formula Issue #71 nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 + // Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 // Normalize to [0.0, 1.0]. score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value); // Scale to [dest_score_min_value, dest_score_max_value]. @@ -480,7 +477,7 @@ namespace Learner { if (fs.read((char*)&p, sizeof(PackedSfenValue))) { StateInfo si; - tpos.set_from_packed_sfen(p.sfen, &si, th, false); + tpos.set_from_packed_sfen(p.sfen, &si, th); // write as plain text ofs << "fen " << tpos.fen() << std::endl; diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp index f7cc5669..7e931726 100644 --- a/src/learn/gensfen.cpp +++ b/src/learn/gensfen.cpp @@ -2,6 +2,7 @@ #include "packed_sfen.h" #include "multi_think.h" +#include "../syzygy/tbprobe.h" #include "misc.h" #include "position.h" @@ -9,8 +10,6 @@ #include "tt.h" #include "uci.h" -#include "eval/evaluate_common.h" - #include "extra/nnue_data_binpack_format.h" #include "nnue/evaluate_nnue_learner.h" @@ -392,7 +391,6 @@ namespace Learner Position& pos, std::vector>& states, int ply, - int depth, vector& pv); // Min and max depths for search during gensfen @@ -467,18 +465,7 @@ namespace Learner return 0; } - // Initialize the Syzygy Ending Tablebase and sort the moves. - Search::RootMoves rootMoves; - for (const auto& m : MoveList(pos)) - { - rootMoves.emplace_back(m); - } - - if (!rootMoves.empty()) - { - Tablebases::rank_root_moves(pos, rootMoves); - } - else + if(pos.this_thread()->rootMoves.empty()) { // If there is no legal move return pos.checkers() @@ -749,7 +736,6 @@ namespace Learner Position& pos, std::vector>& states, int ply, - int depth, vector& pv) { auto rootColor = pos.side_to_move(); @@ -763,15 +749,6 @@ namespace Learner } pos.do_move(m, states[ply++]); - - // Because the difference calculation of evaluate() cannot be - // performed unless each node evaluate() is called! - // If the depth is 8 or more, it seems - // faster not to calculate this difference. - if (depth < 8) - { - Eval::NNUE::update_eval(pos); - } } // Reach leaf @@ -830,6 +807,8 @@ namespace Learner auto& pos = th->rootPos; pos.set(StartFEN, false, &si, th); + int resign_counter = 0; + bool should_resign = prng.rand(10) > 1; // Vector for holding the sfens in the current simulated game. PSVector a_psv; a_psv.reserve(write_maxply + MAX_PLY); @@ -857,6 +836,11 @@ namespace Learner // Current search depth const int depth = search_depth_min + (int)prng.rand(search_depth_max - search_depth_min + 1); + // Starting search calls init_for_search + auto [search_value, search_pv] = search(pos, depth, 1, nodes); + + // This has to be performed after search because it needs to know + // rootMoves which are filled in init_for_search. const auto result = get_current_game_result(pos, move_hist_scores); if (result.has_value()) { @@ -864,113 +848,91 @@ namespace Learner break; } + // Always adjudivate by eval limit. + // Also because of this we don't have to check for TB/MATE scores + if (abs(search_value) >= eval_limit) { - auto [search_value, search_pv] = search(pos, depth, 1, nodes); - - // Always adjudivate by eval limit. - // Also because of this we don't have to check for TB/MATE scores - if (abs(search_value) >= eval_limit) - { - const auto wdl = (search_value >= eval_limit) ? 1 : -1; - flush_psv(wdl); + resign_counter++; + if ((should_resign && resign_counter >= 4) || abs(search_value) >= 10000) { + flush_psv((search_value >= eval_limit) ? 1 : -1); break; } + } else { + resign_counter = 0; + } + // Verification of a strange move + if (search_pv.size() > 0 + && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL)) + { + // (???) + // MOVE_WIN is checking if it is the declaration victory stage before this + // The declarative winning move should never come back here. + // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)... + cout << "Error! : " << pos.fen() << next_move << search_value << endl; + break; + } - // Verification of a strange move - if (search_pv.size() > 0 - && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL)) + // Save the move score for adjudication. + move_hist_scores.push_back(search_value); + + // Discard stuff before write_minply is reached + // because it can harm training due to overfitting. + // Initial positions would be too common. + if (ply < write_minply - 1) + { + a_psv.clear(); + goto SKIP_SAVE; + } + + // Look into the position hashtable to see if the same + // position was seen before. + // This is a good heuristic to exlude already seen + // positions without many false positives. + { + auto key = pos.key(); + auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1)); + auto old_key = hash[hash_index]; + if (key == old_key) { - // (???) - // MOVE_WIN is checking if it is the declaration victory stage before this - // The declarative winning move should never come back here. - // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)... - cout << "Error! : " << pos.fen() << next_move << search_value << endl; - break; - } - - // Save the move score for adjudication. - move_hist_scores.push_back(search_value); - - // If depth 0, pv is not obtained, so search again at depth 2. - if (search_depth_min <= 0) - { - auto [research_value, research_pv] = search(pos, 2); - search_pv = research_pv; - } - - // Discard stuff before write_minply is reached - // because it can harm training due to overfitting. - // Initial positions would be too common. - if (ply < write_minply - 1) - { - a_psv.clear(); goto SKIP_SAVE; } - - // Look into the position hashtable to see if the same - // position was seen before. - // This is a good heuristic to exlude already seen - // positions without many false positives. + else { - auto key = pos.key(); - auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1)); - auto old_key = hash[hash_index]; - if (key == old_key) - { - a_psv.clear(); - goto SKIP_SAVE; - } - else - { - // Replace with the current key. - hash[hash_index] = key; - } + // Replace with the current key. + hash[hash_index] = key; } - - // Pack the current position into a packed sfen and save it into the buffer. - { - a_psv.emplace_back(PackedSfenValue()); - auto& psv = a_psv.back(); - - // Here we only write the position data. - // Result is added after the whole game is done. - pos.sfen_pack(psv.sfen); - - // Get the value of evaluate() as seen from the - // root color on the leaf node of the PV line. - // I don't know the goodness and badness of using the - // return value of search() as it is. - // TODO: Consider using search value instead of evaluate_leaf. - // Maybe give it as an option. - - // Use PV moves to reach the leaf node and use the value - // that evaluated() is called on that leaf node. - const auto leaf_value = evaluate_leaf(pos, states, ply, depth, search_pv); - - // If for some reason the leaf node couldn't yield an eval - // we fallback to search value. - psv.score = leaf_value == VALUE_NONE ? search_value : leaf_value; - - psv.gamePly = ply; - - // Take out the first PV move. This should be present unless depth 0. - assert(search_pv.size() >= 1); - psv.move = search_pv[0]; - } - - SKIP_SAVE:; - - // For some reason, We could not get PV (hit the substitution table etc. and got stuck?) - // so go to the next game. It's a rare case, so you can ignore it. - if (search_pv.size() == 0) - { - break; - } - - // Update the next move according to best search result. - next_move = search_pv[0]; } + // Pack the current position into a packed sfen and save it into the buffer. + { + a_psv.emplace_back(PackedSfenValue()); + auto& psv = a_psv.back(); + + // Here we only write the position data. + // Result is added after the whole game is done. + pos.sfen_pack(psv.sfen); + + psv.score = search_value; + + psv.gamePly = ply; + + // Take out the first PV move. This should be present unless depth 0. + assert(search_pv.size() >= 1); + psv.move = search_pv[0]; + } + + SKIP_SAVE:; + + // For some reason, We could not get PV (hit the substitution table etc. and got stuck?) + // so go to the next game. It's a rare case, so you can ignore it. + if (search_pv.size() == 0) + { + break; + } + + // Update the next move according to best search result. + next_move = search_pv[0]; + // Random move. auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count); if (random_move.has_value()) @@ -983,18 +945,11 @@ namespace Learner { break; } - - // Clear the sfens that were written before the random move. - // (???) why? - a_psv.clear(); } // Do move. pos.do_move(next_move, states[ply]); - // Call node evaluate() for each difference calculation. - Eval::NNUE::update_eval(pos); - } // for (int ply = 0; ; ++ply) } // while(!quit) @@ -1177,10 +1132,28 @@ namespace Learner << " detect_draw_by_insufficient_mating_material = " << detect_draw_by_insufficient_mating_material << endl; // Show if the training data generator uses NNUE. - Eval::verify_NNUE(); + Eval::NNUE::verify(); Threads.main()->ponder = false; + // About Search::Limits + // Be careful because this member variable is global and affects other threads. + { + auto& limits = Search::Limits; + + // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done) + limits.infinite = true; + + // Since PV is an obstacle when displayed, erase it. + limits.silent = true; + + // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it. + limits.nodes = 0; + + // depth is also processed by the one passed as an argument of Learner::search(). + limits.depth = 0; + } + // Create and execute threads as many as Options["Threads"]. { SfenWriter sfen_writer(output_file_name, thread_num); diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp index ba904e9d..e2d9af1b 100644 --- a/src/learn/learn.cpp +++ b/src/learn/learn.cpp @@ -29,8 +29,6 @@ #include "uci.h" #include "search.h" -#include "eval/evaluate_common.h" - #include "extra/nnue_data_binpack_format.h" #include "nnue/evaluate_nnue_learner.h" @@ -58,6 +56,7 @@ #include #endif +extern double global_learning_rate; using namespace std; @@ -92,12 +91,6 @@ namespace Learner static double dest_score_min_value = 0.0; static double dest_score_max_value = 1.0; - // Assume teacher signals are the scores of deep searches, - // and convert them into winning probabilities in the trainer. - // Sometimes we want to use the winning probabilities in the training - // data directly. In those cases, we set false to this variable. - static bool convert_teacher_signal_to_winning_probability = true; - // Using stockfish's WDL with win rate model instead of sigmoid static bool use_wdl = false; @@ -164,14 +157,6 @@ namespace Learner return ((y2 - y1) / epsilon) / winning_probability_coefficient; } - // A constant used in elmo (WCSC27). Adjustment required. - // Since elmo does not internally divide the expression, the value is different. - // You can set this value with the learn command. - // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27) - double ELMO_LAMBDA = 0.33; - double ELMO_LAMBDA2 = 0.33; - double ELMO_LAMBDA_LIMIT = 32000; - // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71 double get_scaled_signal(double signal) { @@ -194,26 +179,7 @@ namespace Learner double calculate_p(double teacher_signal, int ply) { const double scaled_teacher_signal = get_scaled_signal(teacher_signal); - - double p = scaled_teacher_signal; - if (convert_teacher_signal_to_winning_probability) - { - p = winning_percentage(scaled_teacher_signal, ply); - } - - return p; - } - - double calculate_lambda(double teacher_signal) - { - // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT - // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA. - const double lambda = - (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) - ? ELMO_LAMBDA2 - : ELMO_LAMBDA; - - return lambda; + return winning_percentage(scaled_teacher_signal, ply); } double calculate_t(int game_result) @@ -226,32 +192,6 @@ namespace Learner return t; } - double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv) - { - // elmo (WCSC27) method - // Correct with the actual game wins and losses. - const double q = winning_percentage(shallow, psv.gamePly); - const double p = calculate_p(teacher_signal, psv.gamePly); - const double t = calculate_t(psv.game_result); - const double lambda = calculate_lambda(teacher_signal); - - double grad; - if (use_wdl) - { - const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly); - const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly); - grad = lambda * dce_p + (1.0 - lambda) * dce_t; - } - else - { - // Use the actual win rate as a correction term. - // This is the idea of ​​elmo (WCSC27), modern O-parts. - grad = lambda * (q - p) + (1.0 - lambda) * (q - t); - } - - return grad; - } - // Calculate cross entropy during learning // The individual cross entropy of the win/loss term and win // rate term of the elmo expression is returned @@ -262,21 +202,16 @@ namespace Learner const PackedSfenValue& psv, double& cross_entropy_eval, double& cross_entropy_win, - double& cross_entropy, double& entropy_eval, - double& entropy_win, - double& entropy) + double& entropy_win) { // Teacher winning probability. const double q = winning_percentage(shallow, psv.gamePly); const double p = calculate_p(teacher_signal, psv.gamePly); const double t = calculate_t(psv.game_result); - const double lambda = calculate_lambda(teacher_signal); constexpr double epsilon = 0.000001; - const double m = (1.0 - lambda) * t + lambda * p; - cross_entropy_eval = (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon)); cross_entropy_win = @@ -285,17 +220,12 @@ namespace Learner (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon)); entropy_win = (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon)); - - cross_entropy = - (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon)); - entropy = - (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon)); } // Other objective functions may be considered in the future... double calc_grad(Value shallow, const PackedSfenValue& psv) { - return calc_grad((Value)psv.score, shallow, psv); + return (double)(shallow - (Value)psv.score) / 2400.0; } struct BasicSfenInputStream @@ -787,15 +717,9 @@ namespace Learner std::atomic stop_flag; - // Discount rate - double discount_rate; - // Option to exclude early stage from learning int reduction_gameply; - // Option not to learn kk/kkp/kpp/kppp - std::array freeze; - // If the absolute value of the evaluation value of the deep search // of the teacher phase exceeds this value, discard the teacher phase. int eval_limit; @@ -825,7 +749,6 @@ namespace Learner uint64_t eval_save_interval; uint64_t loss_output_interval; - uint64_t mirror_percentage; // Loss calculation. // done: Number of phases targeted this time @@ -849,7 +772,6 @@ namespace Learner for (size_t i = 0; i < pv.size(); ++i) { task_pos.do_move(pv[i], states[i]); - Eval::NNUE::update_eval(task_pos); } const Value shallow_value = @@ -870,20 +792,18 @@ namespace Learner // It doesn't matter if you have disabled the substitution table. TT.new_search(); - std::cout << "PROGRESS: " << now_string() << ", "; - std::cout << sr.total_done << " sfens"; - std::cout << ", iteration " << epoch; - std::cout << ", eta = " << Eval::get_eta() << ", "; + cout << "PROGRESS: " << now_string() << ", "; + cout << sr.total_done << " sfens"; + cout << ", iteration " << epoch; + cout << ", learning rate = " << global_learning_rate << ", "; // For calculation of verification data loss - atomic test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy; - atomic test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy; + atomic test_sum_cross_entropy_eval, test_sum_cross_entropy_win; + atomic test_sum_entropy_eval, test_sum_entropy_win; test_sum_cross_entropy_eval = 0; test_sum_cross_entropy_win = 0; - test_sum_cross_entropy = 0; test_sum_entropy_eval = 0; test_sum_entropy_win = 0; - test_sum_entropy = 0; // norm for learning atomic sum_norm; @@ -899,7 +819,7 @@ namespace Learner auto& pos = th->rootPos; StateInfo si; pos.set(StartFEN, false, &si, th); - std::cout << "hirate eval = " << Eval::evaluate(pos); + cout << "hirate eval = " << Eval::evaluate(pos) << endl; // It's better to parallelize here, but it's a bit // troublesome because the search before slave has not finished. @@ -923,10 +843,8 @@ namespace Learner &ps, &test_sum_cross_entropy_eval, &test_sum_cross_entropy_win, - &test_sum_cross_entropy, &test_sum_entropy_eval, &test_sum_entropy_win, - &test_sum_entropy, &sum_norm, &task_count, &move_accord_count @@ -954,26 +872,22 @@ namespace Learner // For the time being, regarding the win rate and loss terms only in the elmo method // Calculate and display the cross entropy. - double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy; - double test_entropy_eval, test_entropy_win, test_entropy; + double test_cross_entropy_eval, test_cross_entropy_win; + double test_entropy_eval, test_entropy_win; calc_cross_entropy( deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, - test_cross_entropy, test_entropy_eval, - test_entropy_win, - test_entropy); + test_entropy_win); // The total cross entropy need not be abs() by definition. test_sum_cross_entropy_eval += test_cross_entropy_eval; test_sum_cross_entropy_win += test_cross_entropy_win; - test_sum_cross_entropy += test_cross_entropy; test_sum_entropy_eval += test_entropy_eval; test_sum_entropy_win += test_entropy_win; - test_sum_entropy += test_entropy; sum_norm += (double)abs(shallow_value); // Determine if the teacher's move and the score of the shallow search match @@ -998,7 +912,7 @@ namespace Learner while (task_count) sleep(1); - latest_loss_sum += test_sum_cross_entropy - test_sum_entropy; + latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval; latest_loss_count += sr.sfen_for_mse.size(); // learn_cross_entropy may be called train cross @@ -1008,27 +922,24 @@ namespace Learner if (sr.sfen_for_mse.size() && done) { - cout - << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size() + cout << "INFO: " + << "test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size() << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size() << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size() << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size() - << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size() - << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size() << " , norm = " << sum_norm - << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"; + << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%" + << endl; if (done != static_cast(-1)) { - cout - << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done + cout << "INFO: " + << "learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done << " , learn_entropy_eval = " << learn_sum_entropy_eval / done << " , learn_entropy_win = " << learn_sum_entropy_win / done - << " , learn_cross_entropy = " << learn_sum_cross_entropy / done - << " , learn_entropy = " << learn_sum_entropy / done; + << endl; } - cout << endl; } else { @@ -1038,10 +949,8 @@ namespace Learner // Clear 0 for next time. learn_sum_cross_entropy_eval = 0.0; learn_sum_cross_entropy_win = 0.0; - learn_sum_cross_entropy = 0.0; learn_sum_entropy_eval = 0.0; learn_sum_entropy_win = 0.0; - learn_sum_entropy = 0.0; } void LearnerThink::thread_worker(size_t thread_id) @@ -1058,7 +967,7 @@ namespace Learner // display mse (this is sometimes done only for thread 0) // Immediately after being read from the file... - // Lock the evaluation function so that it is not used during updating. + // Lock the evaluation function so that it is not used during updating. shared_lock read_lock(nn_mutex, defer_lock); if (sr.next_update_weights <= sr.total_done || (thread_id != 0 && !read_lock.try_lock())) @@ -1090,7 +999,7 @@ namespace Learner // Lock the evaluation function so that it is not used during updating. lock_guard write_lock(nn_mutex); - Eval::NNUE::UpdateParameters(epoch); + Eval::NNUE::UpdateParameters(); } ++epoch; @@ -1167,8 +1076,7 @@ namespace Learner goto RETRY_READ; StateInfo si; - const bool mirror = prng.rand(100) < mirror_percentage; - if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0) + if (pos.set_from_packed_sfen(ps.sfen, &si, th) != 0) { // I got a strange sfen. Should be debugged! // Since it is an illegal sfen, it may not be @@ -1177,18 +1085,30 @@ namespace Learner goto RETRY_READ; } - // There is a possibility that all the pieces are blocked and stuck. - // Also, the declaration win phase is excluded from - // learning because you cannot go to leaf with PV moves. - // (shouldn't write out such teacher aspect itself, - // but may have written it out with an old generation routine) - // Skip the position if there are no legal moves (=checkmated or stalemate). - if (MoveList(pos).size() == 0) - goto RETRY_READ; - // I can read it, so try displaying it. // cout << pos << value << endl; + const auto rootColor = pos.side_to_move(); + + int ply = 0; + StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long. + + if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move)) + { + goto RETRY_READ; + } + + pos.do_move((Move)ps.move, state[ply++]); + + // There is a possibility that all the pieces are blocked and stuck. + // Also, the declaration win phase is excluded from + // learning because you cannot go to leaf with PV moves. + // (shouldn't write out such teacher aspect itself, + // but may have written it out with an old generation routine) + // Skip the position if there are no legal moves (=checkmated or stalemate). + if (MoveList(pos).size() == 0) + goto RETRY_READ; + // Evaluation value of shallow search (qsearch) const auto [_, pv] = qsearch(pos); @@ -1199,13 +1119,11 @@ namespace Learner // Go to the leaf node as it is, add only to the gradient array, // and later try AdaGrad at the time of rmse aggregation. - const auto rootColor = pos.side_to_move(); // If the initial PV is different, it is better not to use it for learning. // If it is the result of searching a completely different place, it may become noise. // It may be better not to study where the difference in evaluation values ​​is too large. - int ply = 0; // A helper function that adds the gradient to the current phase. auto pos_add_grad = [&]() { @@ -1224,35 +1142,28 @@ namespace Learner : -Eval::evaluate(pos); // Calculate loss for training data - double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy; - double learn_entropy_eval, learn_entropy_win, learn_entropy; + double learn_cross_entropy_eval, learn_cross_entropy_win; + double learn_entropy_eval, learn_entropy_win; calc_cross_entropy( deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, - learn_cross_entropy, learn_entropy_eval, - learn_entropy_win, - learn_entropy); + learn_entropy_win); learn_sum_cross_entropy_eval += learn_cross_entropy_eval; learn_sum_cross_entropy_win += learn_cross_entropy_win; - learn_sum_cross_entropy += learn_cross_entropy; learn_sum_entropy_eval += learn_entropy_eval; learn_sum_entropy_win += learn_entropy_win; - learn_sum_entropy += learn_entropy; - const double example_weight = - (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0; - Eval::NNUE::AddExample(pos, rootColor, ps, example_weight); + Eval::NNUE::AddExample(pos, rootColor, ps, 1.0); // Since the processing is completed, the counter of the processed number is incremented sr.total_done++; }; - StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long. bool illegal_move = false; for (auto m : pv) { @@ -1266,29 +1177,16 @@ namespace Learner break; } - // Processing when adding the gradient to the node on each PV. - //If discount_rate is 0, this process is not performed. - if (discount_rate != 0) - pos_add_grad(); - pos.do_move(m, state[ply++]); - - // Since the value of evaluate in leaf is used, the difference is updated. - Eval::NNUE::update_eval(pos); } if (illegal_move) { - sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl; - continue; + goto RETRY_READ; } // Since we have reached the end phase of PV, add the slope here. pos_add_grad(); - - // rewind the phase - for (auto it = pv.rbegin(); it != pv.rend(); ++it) - pos.undo_move(*it); } } @@ -1303,18 +1201,18 @@ namespace Learner { // When EVAL_SAVE_ONLY_ONCE is defined, // Do not dig a subfolder because I want to save it only once. - Eval::save_eval(""); + Eval::NNUE::save_eval(""); } else if (is_final) { - Eval::save_eval("final"); + Eval::NNUE::save_eval("final"); return true; } else { static int dir_number = 0; const std::string dir_name = std::to_string(dir_number++); - Eval::save_eval(dir_name); + Eval::NNUE::save_eval(dir_name); if (newbob_decay != 1.0 && latest_loss_count > 0) { static int trials = newbob_num_trials; @@ -1332,25 +1230,17 @@ namespace Learner else { cout << " >= best (" << best_loss << "), rejected" << endl; - if (best_nn_directory.empty()) - { - cout << "WARNING: no improvement from initial model" << endl; - } - else - { - cout << "restoring parameters from " << best_nn_directory << endl; - Eval::NNUE::RestoreParameters(best_nn_directory); - } + best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name); if (--trials > 0 && !is_final) { cout - << "reducing learning rate scale from " << newbob_scale + << "reducing learning rate from " << newbob_scale << " to " << (newbob_scale * newbob_decay) << " (" << trials << " more trials)" << endl; newbob_scale *= newbob_decay; - Eval::NNUE::SetGlobalLearningRateScale(newbob_scale); + global_learning_rate = newbob_scale; } } @@ -1628,13 +1518,6 @@ namespace Learner string target_dir; - // If 0, it will be the default value. - double eta1 = 0.0; - double eta2 = 0.0; - double eta3 = 0.0; - uint64_t eta1_epoch = 0; // eta2 is not applied by default - uint64_t eta2_epoch = 0; // eta3 is not applied by default - // --- Function that only shuffles the teacher aspect // normal shuffle @@ -1675,24 +1558,13 @@ namespace Learner // Turn on if you want to pass a pre-shuffled file. bool no_shuffle = false; - // elmo lambda - ELMO_LAMBDA = 0.33; - ELMO_LAMBDA2 = 0.33; - ELMO_LAMBDA_LIMIT = 32000; - - // Discount rate. If this is set to a value other than 0, - // the slope will be added even at other than the PV termination. - // (At that time, apply this discount rate) - double discount_rate = 0; + global_learning_rate = 1.0; // if (gamePly freeze = {}; - uint64_t nn_batch_size = 1000; double newbob_decay = 1.0; int newbob_num_trials = 2; @@ -1700,7 +1572,6 @@ namespace Learner uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL; uint64_t loss_output_interval = 0; - uint64_t mirror_percentage = 0; string validation_set_file_name; string seed; @@ -1734,12 +1605,7 @@ namespace Learner else if (option == "batchsize") is >> mini_batch_size; // learning rate - else if (option == "eta") is >> eta1; - else if (option == "eta1") is >> eta1; // alias - else if (option == "eta2") is >> eta2; - else if (option == "eta3") is >> eta3; - else if (option == "eta1_epoch") is >> eta1_epoch; - else if (option == "eta2_epoch") is >> eta2_epoch; + else if (option == "lr") is >> global_learning_rate; // Accept also the old option name. else if (option == "use_draw_in_training" @@ -1758,22 +1624,9 @@ namespace Learner else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient; - // Discount rate - else if (option == "discount_rate") is >> discount_rate; - // Using WDL with win rate model instead of sigmoid else if (option == "use_wdl") is >> use_wdl; - // No learning of KK/KKP/KPP/KPPP. - else if (option == "freeze_kk") is >> freeze[0]; - else if (option == "freeze_kkp") is >> freeze[1]; - else if (option == "freeze_kpp") is >> freeze[2]; - - // LAMBDA - else if (option == "lambda") is >> ELMO_LAMBDA; - else if (option == "lambda2") is >> ELMO_LAMBDA2; - else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT; - else if (option == "reduction_gameply") is >> reduction_gameply; // shuffle related @@ -1794,7 +1647,6 @@ namespace Learner else if (option == "eval_save_interval") is >> eval_save_interval; else if (option == "loss_output_interval") is >> loss_output_interval; - else if (option == "mirror_percentage") is >> mirror_percentage; else if (option == "validation_set_file_name") is >> validation_set_file_name; // Rabbit convert related @@ -1810,7 +1662,6 @@ namespace Learner else if (option == "src_score_max_value") is >> src_score_max_value; else if (option == "dest_score_min_value") is >> dest_score_min_value; else if (option == "dest_score_max_value") is >> dest_score_max_value; - else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability; else if (option == "seed") is >> seed; // Otherwise, it's a filename. else @@ -1884,7 +1735,7 @@ namespace Learner if (use_convert_plain) { - Eval::init_NNUE(); + Eval::NNUE::init(); cout << "convert_plain.." << endl; convert_plain(filenames, output_file_name); return; @@ -1892,7 +1743,7 @@ namespace Learner if (use_convert_bin) { - Eval::init_NNUE(); + Eval::NNUE::init(); cout << "convert_bin.." << endl; convert_bin( filenames, @@ -1913,7 +1764,7 @@ namespace Learner if (use_convert_bin_from_pgn_extract) { - Eval::init_NNUE(); + Eval::NNUE::init(); cout << "convert_bin_from_pgn-extract.." << endl; convert_bin_from_pgn_extract( filenames, @@ -1946,8 +1797,7 @@ namespace Learner cout << "nn_batch_size : " << nn_batch_size << endl; cout << "nn_options : " << nn_options << endl; - cout << "learning rate : " << eta1 << " , " << eta2 << " , " << eta3 << endl; - cout << "eta_epoch : " << eta1_epoch << " , " << eta2_epoch << endl; + cout << "learning rate : " << global_learning_rate << endl; cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl; cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl; @@ -1960,17 +1810,10 @@ namespace Learner cout << "scheduling : default" << endl; } - cout << "discount rate : " << discount_rate << endl; - // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. reduction_gameply = max(reduction_gameply, 1); cout << "reduction_gameply : " << reduction_gameply << endl; - cout << "LAMBDA : " << ELMO_LAMBDA << endl; - cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl; - cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl; - - cout << "mirror_percentage : " << mirror_percentage << endl; cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl; cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl; @@ -1981,7 +1824,7 @@ namespace Learner cout << "init.." << endl; // Read evaluation function parameters - Eval::init_NNUE(); + Eval::NNUE::init(); Threads.main()->ponder = false; @@ -2004,12 +1847,12 @@ namespace Learner } cout << "init_training.." << endl; - Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3); + Eval::NNUE::InitializeTraining(seed); Eval::NNUE::SetBatchSize(nn_batch_size); Eval::NNUE::SetOptions(nn_options); if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) { // Save the current net to [EvalSaveDir]\original. - Eval::save_eval("original"); + Eval::NNUE::save_eval("original"); // Set the folder above to best_nn_directory so that the trainer can // resotre the network parameters from the original net file. @@ -2020,11 +1863,9 @@ namespace Learner cout << "init done." << endl; // Reflect other option settings. - learn_think.discount_rate = discount_rate; learn_think.eval_limit = eval_limit; learn_think.save_only_once = save_only_once; learn_think.sr.no_shuffle = no_shuffle; - learn_think.freeze = freeze; learn_think.reduction_gameply = reduction_gameply; learn_think.newbob_scale = 1.0; @@ -2033,7 +1874,6 @@ namespace Learner learn_think.eval_save_interval = eval_save_interval; learn_think.loss_output_interval = loss_output_interval; - learn_think.mirror_percentage = mirror_percentage; // Start a thread that loads the phase file in the background // (If this is not started, mse cannot be calculated.) @@ -2069,6 +1909,8 @@ namespace Learner // Start learning. learn_think.go_think(); + Eval::NNUE::FinalizeNet(); + // Save once at the end. learn_think.save(true); } diff --git a/src/learn/learn.h b/src/learn/learn.h index 4b09f825..c76d76c5 100644 --- a/src/learn/learn.h +++ b/src/learn/learn.h @@ -23,11 +23,7 @@ using LearnFloatType = float; // configure // ====================== -// ---------------------- -// Learning with the method of elmo (WCSC27) -// ---------------------- - -#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)" +#define LOSS_FUNCTION "cross_entropy_eval" // ---------------------- // Definition of struct used in Learner diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp deleted file mode 100644 index 925905c6..00000000 --- a/src/learn/learning_tools.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "learning_tools.h" - -#include "misc.h" - -using namespace Eval; - -namespace EvalLearningTools -{ - - // --- static variables - - double Weight::eta; - double Weight::eta1; - double Weight::eta2; - double Weight::eta3; - uint64_t Weight::eta1_epoch; - uint64_t Weight::eta2_epoch; -} diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h deleted file mode 100644 index dcb2c4aa..00000000 --- a/src/learn/learning_tools.h +++ /dev/null @@ -1,99 +0,0 @@ -#ifndef __LEARN_WEIGHT_H__ -#define __LEARN_WEIGHT_H__ - -// A set of machine learning tools related to the weight array used for machine learning of evaluation functions - -#include "learn.h" - -#include "misc.h" // PRNG , my_insertion_sort - -#include -#include // std::sqrt() - -namespace EvalLearningTools -{ - // ------------------------------------------------- - // Array for learning that stores gradients etc. - // ------------------------------------------------- - -#if defined(_MSC_VER) -#pragma pack(push,2) -#elif defined(__GNUC__) -#pragma pack(2) -#endif - struct Weight - { - // cumulative value of one mini-batch gradient - LearnFloatType g = LearnFloatType(0); - - // Learning rate η(eta) such as AdaGrad. - // It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called. - // The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch. - // After eta2_epoch, gradually change from eta2 to eta3. - static double eta; - static double eta1; - static double eta2; - static double eta3; - static uint64_t eta1_epoch; - static uint64_t eta2_epoch; - - // Batch initialization of eta. If 0 is passed, the default value will be set. - static void init_eta(double new_eta1, double new_eta2, double new_eta3, - uint64_t new_eta1_epoch, uint64_t new_eta2_epoch) - { - Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0; - Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0; - Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0; - Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0; - Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0; - } - - // Set eta according to epoch. - static void calc_eta(uint64_t epoch) - { - if (Weight::eta1_epoch == 0) // Exclude eta2 - Weight::eta = Weight::eta1; - else if (epoch < Weight::eta1_epoch) - // apportion - Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch; - else if (Weight::eta2_epoch == 0) // Exclude eta3 - Weight::eta = Weight::eta2; - else if (epoch < Weight::eta2_epoch) - Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch); - else - Weight::eta = Weight::eta3; - } - - template void updateFV(T& v) { updateFV(v, 1.0); } - - // grad setting - template void set_grad(const T& g_) { g = g_; } - - // Add grad - template void add_grad(const T& g_) { g += g_; } - - LearnFloatType get_grad() const { return g; } - }; -#if defined(_MSC_VER) -#pragma pack(pop) -#elif defined(__GNUC__) -#pragma pack(0) -#endif - - // Turned weight array - // In order to be able to handle it transparently, let's have the same member as Weight. - struct Weight2 - { - Weight w[2]; - - //Evaluate your turn, eta 1/8. - template void updateFV(std::array& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); } - - template void set_grad(const std::array& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); } - template void add_grad(const std::array& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); } - - std::array get_grad() const { return std::array{w[0].get_grad(), w[1].get_grad()}; } - }; -} - -#endif diff --git a/src/learn/multi_think.cpp b/src/learn/multi_think.cpp index 7c389d40..80bc72b5 100644 --- a/src/learn/multi_think.cpp +++ b/src/learn/multi_think.cpp @@ -9,39 +9,14 @@ void MultiThink::go_think() { - // Keep a copy to restore the Options settings later. - auto oldOptions = Options; - - // When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is - // Since it is not thread safe, it is guaranteed here that it is being completely read in memory. - Options["BookOnTheFly"] = std::string("false"); - // Read evaluation function, etc. // In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so // Skip memory corruption check. - Eval::init_NNUE(); + Eval::NNUE::init(); // Call the derived class's init(). init(); - // About Search::Limits - // Be careful because this member variable is global and affects other threads. - { - auto& limits = Search::Limits; - - // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done) - limits.infinite = true; - - // Since PV is an obstacle when displayed, erase it. - limits.silent = true; - - // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it. - limits.nodes = 0; - - // depth is also processed by the one passed as an argument of Learner::search(). - limits.depth = 0; - } - // The loop upper limit is set with set_loop_max(). loop_count = 0; done_count = 0; @@ -123,10 +98,4 @@ void MultiThink::go_think() // The file writing thread etc. are still running only when all threads are finished // Since the work itself may not have completed, output only that all threads have finished. std::cout << "all threads are joined." << std::endl; - - // Restored because Options were rewritten. - // Restore the handler because the handler will not start unless you assign a value. - for (auto& s : oldOptions) - Options[s.first] = std::string(s.second); - } diff --git a/src/learn/sfen_packer.cpp b/src/learn/sfen_packer.cpp index 734a477b..19c745ad 100644 --- a/src/learn/sfen_packer.cpp +++ b/src/learn/sfen_packer.cpp @@ -259,7 +259,7 @@ namespace Learner { return make_piece(c, pr); } - int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror) + int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th) { SfenPacker packer; auto& stream = packer.stream; @@ -280,16 +280,8 @@ namespace Learner { pos.pieceList[B_KING][0] = SQUARE_NB; // First the position of the ball - if (mirror) - { - for (auto c : Colors) - pos.board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING); - } - else - { - for (auto c : Colors) - pos.board[stream.read_n_bit(6)] = make_piece(c, KING); - } + for (auto c : Colors) + pos.board[stream.read_n_bit(6)] = make_piece(c, KING); // Piece placement for (Rank r = RANK_8; r >= RANK_1; --r) @@ -297,9 +289,6 @@ namespace Learner { for (File f = FILE_A; f <= FILE_H; ++f) { auto sq = make_square(f, r); - if (mirror) { - sq = flip_file(sq); - } // it seems there are already balls Piece pc; @@ -355,9 +344,6 @@ namespace Learner { // En passant square. Ignore if no pawn capture is possible if (stream.read_one_bit()) { Square ep_square = static_cast(stream.read_n_bit(6)); - if (mirror) { - ep_square = flip_file(ep_square); - } pos.st->epSquare = ep_square; if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN)) diff --git a/src/learn/sfen_packer.h b/src/learn/sfen_packer.h index 533d3fc9..5f232fed 100644 --- a/src/learn/sfen_packer.h +++ b/src/learn/sfen_packer.h @@ -13,7 +13,7 @@ class Thread; namespace Learner { - int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror); + int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th); PackedSfen sfen_pack(Position& pos); } diff --git a/src/main.cpp b/src/main.cpp index fbad6622..e6dff918 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -35,6 +35,7 @@ int main(int argc, char* argv[]) { std::cout << engine_info() << std::endl; + CommandLine::init(argc, argv); UCI::init(Options); Tune::init(); PSQT::init(); @@ -44,7 +45,7 @@ int main(int argc, char* argv[]) { Endgames::init(); Threads.set(size_t(Options["Threads"])); Search::clear(); // After threads are up - Eval::init_NNUE(); + Eval::NNUE::init(); UCI::loop(argc, argv); diff --git a/src/misc.cpp b/src/misc.cpp index 5ef5ecdc..d31538fa 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -132,6 +132,7 @@ public: } // namespace + /// engine_info() returns the full name of the current Stockfish version. This /// will be either "Stockfish DD-MM-YY" (where DD-MM-YY is the date when /// the program was compiled) or "Stockfish ", depending on whether @@ -356,27 +357,11 @@ void std_aligned_free(void* ptr) { #endif } -/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages. -/// The returned pointer is the aligned one, while the mem argument is the one that needs -/// to be passed to free. With c++17 some of this functionality could be simplified. +/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages. -#if defined(__linux__) && !defined(__ANDROID__) +#if defined(_WIN32) -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { - - constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes - size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment - if (posix_memalign(&mem, alignment, size)) - mem = nullptr; -#if defined(MADV_HUGEPAGE) - madvise(mem, allocSize, MADV_HUGEPAGE); -#endif - return mem; -} - -#elif defined(_WIN64) - -static void* aligned_ttmem_alloc_large_pages(size_t allocSize) { +static void* aligned_large_pages_alloc_win(size_t allocSize) { HANDLE hProcessToken { }; LUID luid { }; @@ -421,12 +406,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) { return mem; } -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { +void* aligned_large_pages_alloc(size_t allocSize) { static bool firstCall = true; + void* mem; // Try to allocate large pages - mem = aligned_ttmem_alloc_large_pages(allocSize); + mem = aligned_large_pages_alloc_win(allocSize); // Suppress info strings on the first call. The first call occurs before 'uci' // is received and in that case this output confuses some GUIs. @@ -448,23 +434,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { #else -void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { +void* aligned_large_pages_alloc(size_t allocSize) { - constexpr size_t alignment = 64; // assumed cache line size - size_t size = allocSize + alignment - 1; // allocate some extra space - mem = malloc(size); - void* ret = reinterpret_cast((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1)); - return ret; +#if defined(__linux__) + constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size +#else + constexpr size_t alignment = 4096; // assumed small page size +#endif + + // round up to multiples of alignment + size_t size = ((allocSize + alignment - 1) / alignment) * alignment; + void *mem = std_aligned_alloc(alignment, size); +#if defined(MADV_HUGEPAGE) + madvise(mem, size, MADV_HUGEPAGE); +#endif + return mem; } #endif -/// aligned_ttmem_free() will free the previously allocated ttmem +/// aligned_large_pages_free() will free the previously allocated ttmem -#if defined(_WIN64) +#if defined(_WIN32) -void aligned_ttmem_free(void* mem) { +void aligned_large_pages_free(void* mem) { if (mem && !VirtualFree(mem, 0, MEM_RELEASE)) { @@ -477,8 +471,8 @@ void aligned_ttmem_free(void* mem) { #else -void aligned_ttmem_free(void *mem) { - free(mem); +void aligned_large_pages_free(void *mem) { + std_aligned_free(mem); } #endif @@ -590,6 +584,63 @@ void bindThisThread(size_t idx) { } // namespace WinProcGroup +#ifdef _WIN32 +#include +#define GETCWD _getcwd +#else +#include +#define GETCWD getcwd +#endif + +namespace CommandLine { + +string argv0; // path+name of the executable binary, as given by argv[0] +string binaryDirectory; // path of the executable directory +string workingDirectory; // path of the working directory +string pathSeparator; // Separator for our current OS + +void init(int argc, char* argv[]) { + (void)argc; + string separator; + + // extract the path+name of the executable binary + argv0 = argv[0]; + +#ifdef _WIN32 + pathSeparator = "\\"; + #ifdef _MSC_VER + // Under windows argv[0] may not have the extension. Also _get_pgmptr() had + // issues in some windows 10 versions, so check returned values carefully. + char* pgmptr = nullptr; + if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr) + argv0 = pgmptr; + #endif +#else + pathSeparator = "/"; +#endif + + // extract the working directory + workingDirectory = ""; + char buff[40000]; + char* cwd = GETCWD(buff, 40000); + if (cwd) + workingDirectory = cwd; + + // extract the binary directory path from argv0 + binaryDirectory = argv0; + size_t pos = binaryDirectory.find_last_of("\\/"); + if (pos == std::string::npos) + binaryDirectory = "." + pathSeparator; + else + binaryDirectory.resize(pos + 1); + + // pattern replacement: "./" at the start of path is replaced by the working directory + if (binaryDirectory.find("." + pathSeparator) == 0) + binaryDirectory.replace(0, 1, workingDirectory); +} + + +} // namespace CommandLine // Returns a string that represents the current time. (Used when learning evaluation functions) std::string now_string() { diff --git a/src/misc.h b/src/misc.h index 5b7c8870..6696b0a8 100644 --- a/src/misc.h +++ b/src/misc.h @@ -39,8 +39,8 @@ void prefetch(void* addr); void start_logger(const std::string& fname); void* std_aligned_alloc(size_t alignment, size_t size); void std_aligned_free(void* ptr); -void* aligned_ttmem_alloc(size_t size, void*& mem); -void aligned_ttmem_free(void* mem); // nop if mem == nullptr +void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes +void aligned_large_pages_free(void* mem); // nop if mem == nullptr void dbg_hit_on(bool b); void dbg_hit_on(bool c, bool b); @@ -48,9 +48,7 @@ void dbg_mean_of(int v); void dbg_print(); typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds - static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); - inline TimePoint now() { return std::chrono::duration_cast (std::chrono::steady_clock::now().time_since_epoch()).count(); @@ -337,4 +335,11 @@ namespace Dependency extern bool getline(std::ifstream& fs, std::string& s); } +namespace CommandLine { + void init(int argc, char* argv[]); + + extern std::string binaryDirectory; // path of the executable directory + extern std::string workingDirectory; // path of the working directory +} + #endif // #ifndef MISC_H_INCLUDED diff --git a/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h b/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h index 37b155d5..a90de8e6 100644 --- a/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h +++ b/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h @@ -1,7 +1,25 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + // Definition of input features and network structure used in NNUE evaluation function -#ifndef HALFKP_CR_EP_256X2_32_32_H -#define HALFKP_CR_EP_256X2_32_32_H +#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED +#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED #include "../features/feature_set.h" #include "../features/half_kp.h" @@ -12,31 +30,28 @@ #include "../layers/affine_transform.h" #include "../layers/clipped_relu.h" -namespace Eval { +namespace Eval::NNUE { - namespace NNUE { +// Input features used in evaluation function +using RawFeatures = Features::FeatureSet< + Features::HalfKP, Features::CastlingRight, + Features::EnPassant>; - // Input features used in evaluation function - using RawFeatures = Features::FeatureSet< - Features::HalfKP, Features::CastlingRight, - Features::EnPassant>; +// Number of input feature dimensions after conversion +constexpr IndexType kTransformedFeatureDimensions = 256; - // Number of input feature dimensions after conversion - constexpr IndexType kTransformedFeatureDimensions = 256; +namespace Layers { - namespace Layers { +// Define network structure +using InputLayer = InputSlice; +using HiddenLayer1 = ClippedReLU>; +using HiddenLayer2 = ClippedReLU>; +using OutputLayer = AffineTransform; - // define network structure - using InputLayer = InputSlice; - using HiddenLayer1 = ClippedReLU>; - using HiddenLayer2 = ClippedReLU>; - using OutputLayer = AffineTransform; +} // namespace Layers - } // namespace Layers +using Network = Layers::OutputLayer; - using Network = Layers::OutputLayer; +} // namespace Eval::NNUE - } // namespace NNUE - -} // namespace Eval -#endif // HALFKP_CR_EP_256X2_32_32_H +#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED diff --git a/src/nnue/architectures/halfkp-cr_256x2-32-32.h b/src/nnue/architectures/halfkp-cr_256x2-32-32.h new file mode 100644 index 00000000..df14f499 --- /dev/null +++ b/src/nnue/architectures/halfkp-cr_256x2-32-32.h @@ -0,0 +1,37 @@ +// Definition of input features and network structure used in NNUE evaluation function + +#ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED +#define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED + +#include "../features/feature_set.h" +#include "../features/half_kp.h" +#include "../features/castling_right.h" + +#include "../layers/input_slice.h" +#include "../layers/affine_transform.h" +#include "../layers/clipped_relu.h" + +namespace Eval::NNUE { + +// Input features used in evaluation function +using RawFeatures = Features::FeatureSet< + Features::HalfKP, Features::CastlingRight>; + +// Number of input feature dimensions after conversion +constexpr IndexType kTransformedFeatureDimensions = 256; + +namespace Layers { + +// Define network structure +using InputLayer = InputSlice; +using HiddenLayer1 = ClippedReLU>; +using HiddenLayer2 = ClippedReLU>; +using OutputLayer = AffineTransform; + +} // namespace Layers + +using Network = Layers::OutputLayer; + +} // namespace Eval::NNUE + +#endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED diff --git a/src/nnue/architectures/k-p_256x2-32-32.h b/src/nnue/architectures/k-p_256x2-32-32.h index 00b14d47..0f340dee 100644 --- a/src/nnue/architectures/k-p_256x2-32-32.h +++ b/src/nnue/architectures/k-p_256x2-32-32.h @@ -1,4 +1,5 @@ // Definition of input features and network structure used in NNUE evaluation function + #ifndef K_P_256X2_32_32_H #define K_P_256X2_32_32_H diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index a2845c96..28c86feb 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -18,7 +18,6 @@ // Code for calculating NNUE evaluation function -#include #include #include @@ -31,7 +30,7 @@ namespace Eval::NNUE { - uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { + const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { // convention: W - us, B - them // viewed from other side, W and B are reversed { PS_NONE, PS_NONE }, @@ -53,7 +52,7 @@ namespace Eval::NNUE { }; // Input feature converter - AlignedPtr feature_transformer; + LargePagePtr feature_transformer; // Evaluation function AlignedPtr network; @@ -80,14 +79,22 @@ namespace Eval::NNUE { std::memset(pointer.get(), 0, sizeof(T)); } + template + void Initialize(LargePagePtr& pointer) { + + static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T"); + pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } + // Read evaluation function parameters template - bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { + bool ReadParameters(std::istream& stream, T& reference) { std::uint32_t header; header = read_little_endian(stream); if (!stream || header != T::GetHashValue()) return false; - return pointer->ReadParameters(stream); + return reference.ReadParameters(stream); } // write evaluation function parameters @@ -98,6 +105,13 @@ namespace Eval::NNUE { return pointer->WriteParameters(stream); } + template + bool WriteParameters(std::ostream& stream, const LargePagePtr& pointer) { + constexpr std::uint32_t header = T::GetHashValue(); + stream.write(reinterpret_cast(&header), sizeof(header)); + return pointer->WriteParameters(stream); + } + } // namespace Detail // Initialize the evaluation function parameters @@ -139,11 +153,10 @@ namespace Eval::NNUE { std::string architecture; if (!ReadHeader(stream, &hash_value, &architecture)) return false; if (hash_value != kHashValue) return false; - if (!Detail::ReadParameters(stream, feature_transformer)) return false; - if (!Detail::ReadParameters(stream, network)) return false; + if (!Detail::ReadParameters(stream, *feature_transformer)) return false; + if (!Detail::ReadParameters(stream, *network)) return false; return stream && stream.peek() == std::ios::traits_type::eof(); } - // write evaluation function parameters bool WriteParameters(std::ostream& stream) { if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false; @@ -151,36 +164,20 @@ namespace Eval::NNUE { if (!Detail::WriteParameters(stream, network)) return false; return !stream.fail(); } - - // Proceed with the difference calculation if possible - static void UpdateAccumulatorIfPossible(const Position& pos) { - - feature_transformer->UpdateAccumulatorIfPossible(pos); - } - - // Calculate the evaluation value - static Value ComputeScore(const Position& pos, bool refresh) { - - auto& accumulator = pos.state()->accumulator; - if (!refresh && accumulator.computed_score) { - return accumulator.score; - } + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos) { alignas(kCacheLineSize) TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize]; - feature_transformer->Transform(pos, transformed_features, refresh); + feature_transformer->Transform(pos, transformed_features); alignas(kCacheLineSize) char buffer[Network::kBufferSize]; const auto output = network->Propagate(transformed_features, buffer); - auto score = static_cast(output[0] / FV_SCALE); - - accumulator.score = score; - accumulator.computed_score = true; - return accumulator.score; + return static_cast(output[0] / FV_SCALE); } - // Load the evaluation function file - bool load_eval_file(const std::string& evalFile) { + // Load eval, from a file stream or a memory stream + bool load_eval(std::string name, std::istream& stream) { Initialize(); @@ -189,29 +186,8 @@ namespace Eval::NNUE { std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl; return true; } - - fileName = evalFile; - - std::ifstream stream(evalFile, std::ios::binary); - - const bool result = ReadParameters(stream); - - return result; - } - - // Evaluation function. Perform differential calculation. - Value evaluate(const Position& pos) { - return ComputeScore(pos, false); - } - - // Evaluation function. Perform full calculation. - Value compute_eval(const Position& pos) { - return ComputeScore(pos, true); - } - - // Proceed with the difference calculation if possible - void update_eval(const Position& pos) { - UpdateAccumulatorIfPossible(pos); + fileName = name; + return ReadParameters(stream); } } // namespace Eval::NNUE diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 75700d03..68153cac 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -40,11 +40,22 @@ namespace Eval::NNUE { } }; + template + struct LargePageDeleter { + void operator()(T* ptr) const { + ptr->~T(); + aligned_large_pages_free(ptr); + } + }; + template using AlignedPtr = std::unique_ptr>; + template + using LargePagePtr = std::unique_ptr>; + // Input feature converter - extern AlignedPtr feature_transformer; + extern LargePagePtr feature_transformer; // Evaluation function extern AlignedPtr network; diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp index ea680e31..2d6c6db3 100644 --- a/src/nnue/evaluate_nnue_learner.cpp +++ b/src/nnue/evaluate_nnue_learner.cpp @@ -5,15 +5,12 @@ #include #include "../learn/learn.h" -#include "../learn/learning_tools.h" #include "../position.h" #include "../uci.h" #include "../misc.h" #include "../thread_win32_osx.h" -#include "../eval/evaluate_common.h" - #include "evaluate_nnue.h" #include "evaluate_nnue_learner.h" #include "trainer/features/factorizer_feature_set.h" @@ -24,215 +21,191 @@ #include "trainer/trainer_clipped_relu.h" #include "trainer/trainer_sum.h" -namespace Eval { - -namespace NNUE { - -namespace { - -// learning data -std::vector examples; - -// Mutex for exclusive control of examples -std::mutex examples_mutex; - -// number of samples in mini-batch -uint64_t batch_size; - -// random number generator -std::mt19937 rng; - -// learner -std::shared_ptr> trainer; - // Learning rate scale -double global_learning_rate_scale; +double global_learning_rate; -// Get the learning rate scale -double GetGlobalLearningRateScale() { - return global_learning_rate_scale; -} +namespace Eval::NNUE { -// Tell the learner options such as hyperparameters -void SendMessages(std::vector messages) { - for (auto& message : messages) { - trainer->SendMessage(&message); - assert(message.num_receivers > 0); - } -} + namespace { -} // namespace + // learning data + std::vector examples; -// Initialize learning -void InitializeTraining(double eta1, uint64_t eta1_epoch, - double eta2, uint64_t eta2_epoch, double eta3) { - std::cout << "Initializing NN training for " - << GetArchitectureString() << std::endl; + // Mutex for exclusive control of examples + std::mutex examples_mutex; - assert(feature_transformer); - assert(network); - trainer = Trainer::Create(network.get(), feature_transformer.get()); + // number of samples in mini-batch + uint64_t batch_size; - if (Options["SkipLoadingEval"]) { - trainer->Initialize(rng); - } + // random number generator + std::mt19937 rng; - global_learning_rate_scale = 1.0; - EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch); -} + // learner + std::shared_ptr> trainer; -// set the number of samples in the mini-batch -void SetBatchSize(uint64_t size) { - assert(size > 0); - batch_size = size; -} - -// set the learning rate scale -void SetGlobalLearningRateScale(double scale) { - global_learning_rate_scale = scale; -} - -// Set options such as hyperparameters -void SetOptions(const std::string& options) { - std::vector messages; - for (const auto& option : Split(options, ',')) { - const auto fields = Split(option, '='); - assert(fields.size() == 1 || fields.size() == 2); - if (fields.size() == 1) { - messages.emplace_back(fields[0]); - } else { - messages.emplace_back(fields[0], fields[1]); - } - } - SendMessages(std::move(messages)); -} - -// Reread the evaluation function parameters for learning from the file -void RestoreParameters(const std::string& dir_name) { - const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName); - std::ifstream stream(file_name, std::ios::binary); -#ifndef NDEBUG - bool result = -#endif - ReadParameters(stream); -#ifndef NDEBUG - assert(result); -#endif - - SendMessages({{"reset"}}); -} - -// Add 1 sample of learning data -void AddExample(Position& pos, Color rootColor, - const Learner::PackedSfenValue& psv, double weight) { - Example example; - if (rootColor == pos.side_to_move()) { - example.sign = 1; - } else { - example.sign = -1; - } - example.psv = psv; - example.weight = weight; - - Features::IndexList active_indices[2]; - for (const auto trigger : kRefreshTriggers) { - RawFeatures::AppendActiveIndices(pos, trigger, active_indices); - } - if (pos.side_to_move() != WHITE) { - active_indices[0].swap(active_indices[1]); - } - for (const auto color : Colors) { - std::vector training_features; - for (const auto base_index : active_indices[color]) { - static_assert(Features::Factorizer::GetDimensions() < - (1 << TrainingFeature::kIndexBits), ""); - Features::Factorizer::AppendTrainingFeatures( - base_index, &training_features); - } - std::sort(training_features.begin(), training_features.end()); - - auto& unique_features = example.training_features[color]; - for (const auto& feature : training_features) { - if (!unique_features.empty() && - feature.GetIndex() == unique_features.back().GetIndex()) { - unique_features.back() += feature; - } else { - unique_features.push_back(feature); + // Tell the learner options such as hyperparameters + void SendMessages(std::vector messages) { + for (auto& message : messages) { + trainer->SendMessage(&message); + assert(message.num_receivers > 0); } } + + } // namespace + + // Initialize learning + void InitializeTraining(const std::string& seed) { + std::cout << "Initializing NN training for " + << GetArchitectureString() << std::endl; + + assert(feature_transformer); + assert(network); + trainer = Trainer::Create(network.get(), feature_transformer.get()); + rng.seed(PRNG(seed).rand()); + + if (Options["SkipLoadingEval"]) { + trainer->Initialize(rng); + } } - std::lock_guard lock(examples_mutex); - examples.push_back(std::move(example)); -} + // set the number of samples in the mini-batch + void SetBatchSize(uint64_t size) { + assert(size > 0); + batch_size = size; + } + + // Set options such as hyperparameters + void SetOptions(const std::string& options) { + std::vector messages; + for (const auto& option : Split(options, ',')) { + const auto fields = Split(option, '='); + assert(fields.size() == 1 || fields.size() == 2); + if (fields.size() == 1) { + messages.emplace_back(fields[0]); + } else { + messages.emplace_back(fields[0], fields[1]); + } + } + SendMessages(std::move(messages)); + } -// update the evaluation function parameters -void UpdateParameters(uint64_t epoch) { - assert(batch_size > 0); + // Reread the evaluation function parameters for learning from the file + void RestoreParameters(const std::string& dir_name) { + const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName); + std::ifstream stream(file_name, std::ios::binary); +#ifndef NDEBUG + bool result = +#endif + ReadParameters(stream); +#ifndef NDEBUG + assert(result); +#endif - EvalLearningTools::Weight::calc_eta(epoch); - const auto learning_rate = static_cast( - get_eta() / batch_size); + SendMessages({{"reset"}}); + } - std::lock_guard lock(examples_mutex); - std::shuffle(examples.begin(), examples.end(), rng); - while (examples.size() >= batch_size) { - std::vector batch(examples.end() - batch_size, examples.end()); - examples.resize(examples.size() - batch_size); + void FinalizeNet() { + SendMessages({{"clear_unobserved_feature_weights"}}); + } - const auto network_output = trainer->Propagate(batch); + // Add 1 sample of learning data + void AddExample(Position& pos, Color rootColor, + const Learner::PackedSfenValue& psv, double weight) { + Example example; + if (rootColor == pos.side_to_move()) { + example.sign = 1; + } else { + example.sign = -1; + } + example.psv = psv; + example.weight = weight; - std::vector gradients(batch.size()); - for (std::size_t b = 0; b < batch.size(); ++b) { - const auto shallow = static_cast(Round( - batch[b].sign * network_output[b] * kPonanzaConstant)); - const auto& psv = batch[b].psv; - const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv); - gradients[b] = static_cast(gradient * batch[b].weight); + Features::IndexList active_indices[2]; + for (const auto trigger : kRefreshTriggers) { + RawFeatures::AppendActiveIndices(pos, trigger, active_indices); + } + if (pos.side_to_move() != WHITE) { + active_indices[0].swap(active_indices[1]); + } + for (const auto color : Colors) { + std::vector training_features; + for (const auto base_index : active_indices[color]) { + static_assert(Features::Factorizer::GetDimensions() < + (1 << TrainingFeature::kIndexBits), ""); + Features::Factorizer::AppendTrainingFeatures( + base_index, &training_features); + } + std::sort(training_features.begin(), training_features.end()); + + auto& unique_features = example.training_features[color]; + for (const auto& feature : training_features) { + if (!unique_features.empty() && + feature.GetIndex() == unique_features.back().GetIndex()) { + unique_features.back() += feature; + } else { + unique_features.push_back(feature); + } + } } - trainer->Backpropagate(gradients.data(), learning_rate); - } - SendMessages({{"quantize_parameters"}}); -} - -// Check if there are any problems with learning -void CheckHealth() { - SendMessages({{"check_health"}}); -} - -} // namespace NNUE - -// save merit function parameters to a file -void save_eval(std::string dir_name) { - auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name); - std::cout << "save_eval() start. folder = " << eval_dir << std::endl; - - // mkdir() will fail if this folder already exists, but - // Apart from that. If not, I just want you to make it. - // Also, assume that the folders up to EvalSaveDir have been dug. - std::filesystem::create_directories(eval_dir); - - if (Options["SkipLoadingEval"] && NNUE::trainer) { - NNUE::SendMessages({{"clear_unobserved_feature_weights"}}); + std::lock_guard lock(examples_mutex); + examples.push_back(std::move(example)); } - const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName); - std::ofstream stream(file_name, std::ios::binary); + // update the evaluation function parameters + void UpdateParameters() { + assert(batch_size > 0); + + const auto learning_rate = static_cast( + global_learning_rate / batch_size); + + std::lock_guard lock(examples_mutex); + std::shuffle(examples.begin(), examples.end(), rng); + while (examples.size() >= batch_size) { + std::vector batch(examples.end() - batch_size, examples.end()); + examples.resize(examples.size() - batch_size); + + const auto network_output = trainer->Propagate(batch); + + std::vector gradients(batch.size()); + for (std::size_t b = 0; b < batch.size(); ++b) { + const auto shallow = static_cast(Round( + batch[b].sign * network_output[b] * kPonanzaConstant)); + const auto& psv = batch[b].psv; + const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv); + gradients[b] = static_cast(gradient * batch[b].weight); + } + + trainer->Backpropagate(gradients.data(), learning_rate); + } + SendMessages({{"quantize_parameters"}}); + } + + // Check if there are any problems with learning + void CheckHealth() { + SendMessages({{"check_health"}}); + } + + // save merit function parameters to a file + void save_eval(std::string dir_name) { + auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name); + std::cout << "save_eval() start. folder = " << eval_dir << std::endl; + + // mkdir() will fail if this folder already exists, but + // Apart from that. If not, I just want you to make it. + // Also, assume that the folders up to EvalSaveDir have been dug. + std::filesystem::create_directories(eval_dir); + + const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName); + std::ofstream stream(file_name, std::ios::binary); #ifndef NDEBUG - const bool result = + bool result = #endif - NNUE::WriteParameters(stream); + WriteParameters(stream); #ifndef NDEBUG - assert(result); + assert(result); #endif - std::cout << "save_eval() finished. folder = " << eval_dir << std::endl; -} - -// get the current eta -double get_eta() { - return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta; -} - -} // namespace Eval + std::cout << "save_eval() finished. folder = " << eval_dir << std::endl; + } +} // namespace Eval::NNUE \ No newline at end of file diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h index e9bd2fd2..c41d8d6b 100644 --- a/src/nnue/evaluate_nnue_learner.h +++ b/src/nnue/evaluate_nnue_learner.h @@ -5,38 +5,33 @@ #include "../learn/learn.h" -namespace Eval { +namespace Eval::NNUE { -namespace NNUE { + // Initialize learning + void InitializeTraining(const std::string& seed); -// Initialize learning -void InitializeTraining(double eta1, uint64_t eta1_epoch, - double eta2, uint64_t eta2_epoch, double eta3); + // set the number of samples in the mini-batch + void SetBatchSize(uint64_t size); -// set the number of samples in the mini-batch -void SetBatchSize(uint64_t size); + // Set options such as hyperparameters + void SetOptions(const std::string& options); -// set the learning rate scale -void SetGlobalLearningRateScale(double scale); - -// Set options such as hyperparameters -void SetOptions(const std::string& options); - -// Reread the evaluation function parameters for learning from the file -void RestoreParameters(const std::string& dir_name); + // Reread the evaluation function parameters for learning from the file + void RestoreParameters(const std::string& dir_name); // Add 1 sample of learning data -void AddExample(Position& pos, Color rootColor, - const Learner::PackedSfenValue& psv, double weight); + void AddExample(Position& pos, Color rootColor, + const Learner::PackedSfenValue& psv, double weight); -// update the evaluation function parameters -void UpdateParameters(uint64_t epoch); + // update the evaluation function parameters + void UpdateParameters(); -// Check if there are any problems with learning -void CheckHealth(); + // Check if there are any problems with learning + void CheckHealth(); -} // namespace NNUE + void FinalizeNet(); -} // namespace Eval + void save_eval(std::string suffix); +} // namespace Eval::NNUE #endif diff --git a/src/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp index ee2c88cf..2d7f563a 100644 --- a/src/nnue/features/castling_right.cpp +++ b/src/nnue/features/castling_right.cpp @@ -1,69 +1,40 @@ -//Definition of input feature quantity K of NNUE evaluation function +//Definition of input feature quantity CastlingRight of NNUE evaluation function #include "castling_right.h" #include "index_list.h" -namespace Eval { +namespace Eval::NNUE::Features { - namespace NNUE { + // Get a list of indices with a value of 1 among the features + void CastlingRight::AppendActiveIndices( + const Position& pos, Color perspective, IndexList* active) { + // do nothing if array size is small to avoid compiler warning + if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - namespace Features { + int castling_rights = pos.state()->castlingRights; + int relative_castling_rights; + if (perspective == WHITE) { + relative_castling_rights = castling_rights; + } + else { + // Invert the perspective. + relative_castling_rights = ((castling_rights & 3) << 2) + & ((castling_rights >> 2) & 3); + } - // Get a list of indices with a value of 1 among the features - void CastlingRight::AppendActiveIndices( - const Position& pos, Color perspective, IndexList* active) { - // do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - int castling_rights = pos.state()->castlingRights; - int relative_castling_rights; - if (perspective == WHITE) { - relative_castling_rights = castling_rights; - } - else { - // Invert the perspective. - relative_castling_rights = ((castling_rights & 3) << 2) - & ((castling_rights >> 2) & 3); - } - - for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) { - if (relative_castling_rights & (1 << i)) { - active->push_back(i); - } - } + for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) { + if (relative_castling_rights & (1 << i)) { + active->push_back(i); } + } + } - // Get a list of indices whose values ​​have changed from the previous one in the feature quantity - void CastlingRight::AppendChangedIndices( - const Position& pos, Color perspective, - IndexList* removed, IndexList* /* added */) { + // Get a list of indices whose values ​​have changed from the previous one in the feature quantity + void CastlingRight::AppendChangedIndices( + const Position& /* pos */, Color /* perspective */, + IndexList* /* removed */, IndexList* /* added */) { + // Not implemented. + assert(false); + } - int previous_castling_rights = pos.state()->previous->castlingRights; - int current_castling_rights = pos.state()->castlingRights; - int relative_previous_castling_rights; - int relative_current_castling_rights; - if (perspective == WHITE) { - relative_previous_castling_rights = previous_castling_rights; - relative_current_castling_rights = current_castling_rights; - } - else { - // Invert the perspective. - relative_previous_castling_rights = ((previous_castling_rights & 3) << 2) - & ((previous_castling_rights >> 2) & 3); - relative_current_castling_rights = ((current_castling_rights & 3) << 2) - & ((current_castling_rights >> 2) & 3); - } - - for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) { - if ((relative_previous_castling_rights & (1 << i)) && - (relative_current_castling_rights & (1 << i)) == 0) { - removed->push_back(i); - } - } - } - - } // namespace Features - - } // namespace NNUE - -} // namespace Eval +} // namespace Eval::NNUE::Features diff --git a/src/nnue/features/castling_right.h b/src/nnue/features/castling_right.h index 27074080..3a09e14b 100644 --- a/src/nnue/features/castling_right.h +++ b/src/nnue/features/castling_right.h @@ -1,4 +1,4 @@ -//Definition of input feature quantity K of NNUE evaluation function +//Definition of input feature quantity CastlingRight of NNUE evaluation function #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_ #define _NNUE_FEATURES_CASTLING_RIGHT_H_ @@ -6,39 +6,30 @@ #include "../../evaluate.h" #include "features_common.h" -namespace Eval { +namespace Eval::NNUE::Features { - namespace NNUE { + class CastlingRight { + public: + // feature quantity name + static constexpr const char* kName = "CastlingRight"; + // Hash value embedded in the evaluation function file + static constexpr std::uint32_t kHashValue = 0x913968AAu; + // number of feature dimensions + static constexpr IndexType kDimensions = 4; + // The maximum value of the number of indexes whose value is 1 at the same time among the feature values + static constexpr IndexType kMaxActiveDimensions = 4; + // Timing of full calculation instead of difference calculation + static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved; - namespace Features { + // Get a list of indices with a value of 1 among the features + static void AppendActiveIndices(const Position& pos, Color perspective, + IndexList* active); - // Feature K: Ball position - class CastlingRight { - public: - // feature quantity name - static constexpr const char* kName = "CastlingRight"; - // Hash value embedded in the evaluation function file - static constexpr std::uint32_t kHashValue = 0x913968AAu; - // number of feature dimensions - static constexpr IndexType kDimensions = 4; - // The maximum value of the number of indexes whose value is 1 at the same time among the feature values - static constexpr IndexType kMaxActiveDimensions = 4; - // Timing of full calculation instead of difference calculation - static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone; + // Get a list of indices whose values ​​have changed from the previous one in the feature quantity + static void AppendChangedIndices(const Position& pos, Color perspective, + IndexList* removed, IndexList* added); + }; - // Get a list of indices with a value of 1 among the features - static void AppendActiveIndices(const Position& pos, Color perspective, - IndexList* active); - - // Get a list of indices whose values ??have changed from the previous one in the feature quantity - static void AppendChangedIndices(const Position& pos, Color perspective, - IndexList* removed, IndexList* added); - }; - - } // namespace Features - - } // namespace NNUE - -} // namespace Eval +} // namespace Eval::NNUE::Features #endif diff --git a/src/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp index 386bd907..d771a85c 100644 --- a/src/nnue/features/enpassant.cpp +++ b/src/nnue/features/enpassant.cpp @@ -1,43 +1,30 @@ -//Definition of input feature quantity K of NNUE evaluation function +//Definition of input feature quantity EnPassant of NNUE evaluation function #include "enpassant.h" #include "index_list.h" -namespace Eval { +namespace Eval::NNUE::Features { - namespace NNUE { + // Get a list of indices with a value of 1 among the features + void EnPassant::AppendActiveIndices( + const Position& pos, Color /* perspective */, IndexList* active) { + // do nothing if array size is small to avoid compiler warning + if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - namespace Features { + auto epSquare = pos.state()->epSquare; + if (epSquare == SQ_NONE) { + return; + } + auto file = file_of(epSquare); + active->push_back(file); + } - // Get a list of indices with a value of 1 among the features - void EnPassant::AppendActiveIndices( - const Position& pos, Color perspective, IndexList* active) { - // do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; + // Get a list of indices whose values ​​have changed from the previous one in the feature quantity + void EnPassant::AppendChangedIndices( + const Position& /* pos */, Color /* perspective */, + IndexList* /* removed */, IndexList* /* added */) { + // Not implemented. + assert(false); + } - auto epSquare = pos.state()->epSquare; - if (epSquare == SQ_NONE) { - return; - } - - if (perspective == BLACK) { - epSquare = rotate180(epSquare); - } - - auto file = file_of(epSquare); - active->push_back(file); - } - - // Get a list of indices whose values ??have changed from the previous one in the feature quantity - void EnPassant::AppendChangedIndices( - const Position& /* pos */, Color /* perspective */, - IndexList* /* removed */, IndexList* /* added */) { - // Not implemented. - assert(false); - } - - } // namespace Features - - } // namespace NNUE - -} // namespace Eval +} // namespace Eval::NNUE::Features diff --git a/src/nnue/features/enpassant.h b/src/nnue/features/enpassant.h index 70a8eb5a..efa5eae9 100644 --- a/src/nnue/features/enpassant.h +++ b/src/nnue/features/enpassant.h @@ -1,4 +1,4 @@ -//Definition of input feature quantity K of NNUE evaluation function +//Definition of input feature quantity EnPassant of NNUE evaluation function #ifndef _NNUE_FEATURES_ENPASSANT_H_ #define _NNUE_FEATURES_ENPASSANT_H_ @@ -6,39 +6,30 @@ #include "../../evaluate.h" #include "features_common.h" -namespace Eval { +namespace Eval::NNUE::Features { - namespace NNUE { + class EnPassant { + public: + // feature quantity name + static constexpr const char* kName = "EnPassant"; + // Hash value embedded in the evaluation function file + static constexpr std::uint32_t kHashValue = 0x02924F91u; + // number of feature dimensions + static constexpr IndexType kDimensions = 8; + // The maximum value of the number of indexes whose value is 1 at the same time among the feature values + static constexpr IndexType kMaxActiveDimensions = 1; + // Timing of full calculation instead of difference calculation + static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved; - namespace Features { + // Get a list of indices with a value of 1 among the features + static void AppendActiveIndices(const Position& pos, Color perspective, + IndexList* active); - // Feature K: Ball position - class EnPassant { - public: - // feature quantity name - static constexpr const char* kName = "EnPassant"; - // Hash value embedded in the evaluation function file - static constexpr std::uint32_t kHashValue = 0x02924F91u; - // number of feature dimensions - static constexpr IndexType kDimensions = 8; - // The maximum value of the number of indexes whose value is 1 at the same time among the feature values - static constexpr IndexType kMaxActiveDimensions = 1; - // Timing of full calculation instead of difference calculation - static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved; + // Get a list of indices whose values ??have changed from the previous one in the feature quantity + static void AppendChangedIndices(const Position& pos, Color perspective, + IndexList* removed, IndexList* added); + }; - // Get a list of indices with a value of 1 among the features - static void AppendActiveIndices(const Position& pos, Color perspective, - IndexList* active); - - // Get a list of indices whose values ??have changed from the previous one in the feature quantity - static void AppendChangedIndices(const Position& pos, Color perspective, - IndexList* removed, IndexList* added); - }; - - } // namespace Features - - } // namespace NNUE - -} // namespace Eval +} // namespace Eval::NNUE::Features #endif diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index b933d2d9..2ef92e8e 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -105,9 +105,20 @@ namespace Eval::NNUE::Features { for (Color perspective : { WHITE, BLACK }) { reset[perspective] = false; switch (trigger) { + case TriggerEvent::kNone: + break; case TriggerEvent::kFriendKingMoved: reset[perspective] = dp.piece[0] == make_piece(perspective, KING); break; + case TriggerEvent::kEnemyKingMoved: + reset[perspective] = dp.piece[0] == make_piece(~perspective, KING); + break; + case TriggerEvent::kAnyKingMoved: + reset[perspective] = type_of(dp.piece[0]) == KING; + break; + case TriggerEvent::kAnyPieceMoved: + reset[perspective] = true; + break; default: assert(false); break; diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h index 3377cd8f..656502a3 100644 --- a/src/nnue/features/features_common.h +++ b/src/nnue/features/features_common.h @@ -34,10 +34,10 @@ namespace Eval::NNUE::Features { // Trigger to perform full calculations instead of difference only enum class TriggerEvent { kNone, // Calculate the difference whenever possible - kFriendKingMoved, // calculate all when own ball moves - kEnemyKingMoved, // do all calculations when enemy balls move - kAnyKingMoved, // do all calculations if either ball moves - kAnyPieceMoved, // always do all calculations + kFriendKingMoved, // calculate full evaluation when own king moves + kEnemyKingMoved, // calculate full evaluation when opponent king moves + kAnyKingMoved, // calculate full evaluation when any king moves + kAnyPieceMoved, // always calculate full evaluation }; enum class Side { diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp index 88e384a3..ff20a00a 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_kp.cpp @@ -23,9 +23,9 @@ namespace Eval::NNUE::Features { - // Orient a square according to perspective (rotates by 180 for black) + // Orient a square according to perspective (flip rank for black) inline Square orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 63)); + return Square(int(s) ^ (bool(perspective) * SQ_A8)); } // Find the index of the feature quantity from the king position and PieceSquare diff --git a/src/nnue/features/half_relative_kp.cpp b/src/nnue/features/half_relative_kp.cpp index 597d65fb..8a61bada 100644 --- a/src/nnue/features/half_relative_kp.cpp +++ b/src/nnue/features/half_relative_kp.cpp @@ -9,9 +9,9 @@ namespace NNUE { namespace Features { -// Orient a square according to perspective (rotates by 180 for black) +// Orient a square according to perspective (flip rank for black) inline Square orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 63)); + return Square(int(s) ^ (bool(perspective) * SQ_A8)); } // Find the index of the feature quantity from the ball position and PieceSquare diff --git a/src/nnue/features/k.cpp b/src/nnue/features/k.cpp index 38ec9997..bd8d7dd0 100644 --- a/src/nnue/features/k.cpp +++ b/src/nnue/features/k.cpp @@ -9,9 +9,9 @@ namespace NNUE { namespace Features { -// Orient a square according to perspective (rotates by 180 for black) +// Orient a square according to perspective (flip rank for black) inline Square orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 63)); + return Square(int(s) ^ (bool(perspective) * SQ_A8)); } // Index of a feature for a given king position. @@ -32,19 +32,11 @@ void K::AppendChangedIndices( const Position& pos, Color perspective, IndexList* removed, IndexList* added) { const auto& dp = pos.state()->dirtyPiece; - Color king_color; - if (dp.piece[0] == Piece::W_KING) { - king_color = WHITE; + if (type_of(dp.piece[0]) == KING) + { + removed->push_back(MakeIndex(perspective, dp.from[0], color_of(dp.piece[0]))); + added->push_back(MakeIndex(perspective, dp.to[0], color_of(dp.piece[0]))); } - else if (dp.piece[0] == Piece::B_KING) { - king_color = BLACK; - } - else { - return; - } - - removed->push_back(MakeIndex(perspective, dp.from[0], king_color)); - added->push_back(MakeIndex(perspective, dp.to[0], king_color)); } } // namespace Features diff --git a/src/nnue/features/p.cpp b/src/nnue/features/p.cpp index 0c1b7d50..012311ac 100644 --- a/src/nnue/features/p.cpp +++ b/src/nnue/features/p.cpp @@ -9,9 +9,9 @@ namespace NNUE { namespace Features { -// Orient a square according to perspective (rotates by 180 for black) +// Orient a square according to perspective (flip rank for black) inline Square orient(Color perspective, Square s) { - return Square(int(s) ^ (bool(perspective) * 63)); + return Square(int(s) ^ (bool(perspective) * SQ_A8)); } // Find the index of the feature quantity from the king position and PieceSquare diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 69dfaad2..26370710 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -29,9 +29,7 @@ namespace Eval::NNUE { struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; - Value score; bool computed_accumulation; - bool computed_score; }; } // namespace Eval::NNUE diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index 91cdc4bd..c395d515 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -22,7 +22,7 @@ #define NNUE_ARCHITECTURE_H_INCLUDED // Defines the network structure -#include "architectures/halfkp_256x2-32-32.h" +#include "architectures/halfkp-cr-ep_256x2-32-32.h" namespace Eval::NNUE { diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index d7ffa21a..319f005b 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -69,7 +69,7 @@ namespace Eval::NNUE { // Version of the evaluation file - constexpr std::uint32_t kVersion = 0x7AF32F16u; + constexpr std::uint32_t kVersion = 0x7AF32F17u; // Constant used in evaluation value calculation constexpr int FV_SCALE = 16; @@ -113,7 +113,7 @@ namespace Eval::NNUE { PS_END2 = 12 * SQUARE_NB + 1 }; - extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index e1bc2ab8..e3f4b1c6 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -1,4 +1,4 @@ -/* +/* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) @@ -29,6 +29,61 @@ namespace Eval::NNUE { + // If vector instructions are enabled, we update and refresh the + // accumulator tile by tile such that each tile fits in the CPU's + // vector registers. + #define TILING + + #ifdef USE_AVX512 + typedef __m512i vec_t; + #define vec_load(a) _mm512_loadA_si512(a) + #define vec_store(a,b) _mm512_storeA_si512(a,b) + #define vec_add_16(a,b) _mm512_add_epi16(a,b) + #define vec_sub_16(a,b) _mm512_sub_epi16(a,b) + #define vec_zero _mm512_setzero_si512() + static constexpr IndexType kNumRegs = 8; // only 8 are needed + + #elif USE_AVX2 + typedef __m256i vec_t; + #define vec_load(a) _mm256_loadA_si256(a) + #define vec_store(a,b) _mm256_storeA_si256(a,b) + #define vec_add_16(a,b) _mm256_add_epi16(a,b) + #define vec_sub_16(a,b) _mm256_sub_epi16(a,b) + #define vec_zero _mm256_setzero_si256() + static constexpr IndexType kNumRegs = 16; + + #elif USE_SSE2 + typedef __m128i vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_epi16(a,b) + #define vec_sub_16(a,b) _mm_sub_epi16(a,b) + #define vec_zero _mm_setzero_si128() + static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8; + + #elif USE_MMX + typedef __m64 vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) _mm_add_pi16(a,b) + #define vec_sub_16(a,b) _mm_sub_pi16(a,b) + #define vec_zero _mm_setzero_si64() + static constexpr IndexType kNumRegs = 8; + + #elif USE_NEON + typedef int16x8_t vec_t; + #define vec_load(a) (*(a)) + #define vec_store(a,b) *(a)=(b) + #define vec_add_16(a,b) vaddq_s16(a,b) + #define vec_sub_16(a,b) vsubq_s16(a,b) + #define vec_zero {0} + static constexpr IndexType kNumRegs = 16; + + #else + #undef TILING + + #endif + // Input feature converter class FeatureTransformer { @@ -36,6 +91,11 @@ namespace Eval::NNUE { // Number of output dimensions for one side static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + #ifdef TILING + static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2; + static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions"); + #endif + public: // Output type using OutputType = TransformedFeatureType; @@ -50,6 +110,7 @@ namespace Eval::NNUE { // Hash value embedded in the evaluation file static constexpr std::uint32_t GetHashValue() { + return RawFeatures::kHashValue ^ kOutputDimensions; } @@ -62,6 +123,7 @@ namespace Eval::NNUE { // Read network parameters bool ReadParameters(std::istream& stream) { + for (std::size_t i = 0; i < kHalfDimensions; ++i) biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) @@ -80,23 +142,26 @@ namespace Eval::NNUE { // Proceed with the difference calculation if possible bool UpdateAccumulatorIfPossible(const Position& pos) const { + const auto now = pos.state(); - if (now->accumulator.computed_accumulation) { + if (now->accumulator.computed_accumulation) return true; - } + const auto prev = now->previous; if (prev && prev->accumulator.computed_accumulation) { UpdateAccumulator(pos); return true; } + return false; } // Convert input features - void Transform(const Position& pos, OutputType* output, bool refresh) const { - if (refresh || !UpdateAccumulatorIfPossible(pos)) { + void Transform(const Position& pos, OutputType* output) const { + + if (!UpdateAccumulatorIfPossible(pos)) RefreshAccumulator(pos); - } + const auto& accumulation = pos.state()->accumulator.accumulation; #if defined(USE_AVX2) @@ -133,6 +198,12 @@ namespace Eval::NNUE { &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); __m256i sum1 = _mm256_loadA_si256( &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm256_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm256_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( _mm256_packs_epi16(sum0, sum1), kZero), kControl)); } @@ -144,6 +215,12 @@ namespace Eval::NNUE { accumulation[perspectives[p]][0])[j * 2 + 0]); __m128i sum1 = _mm_load_si128(&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm_add_epi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm_add_epi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); _mm_store_si128(&out[j], @@ -164,6 +241,12 @@ namespace Eval::NNUE { accumulation[perspectives[p]][0])[j * 2 + 0]); __m64 sum1 = *(&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 1]); + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum0 = _mm_add_pi16(sum0, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 0]); + sum1 = _mm_add_pi16(sum1, reinterpret_cast( + accumulation[perspectives[p]][i])[j * 2 + 1]); + } const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); } @@ -173,12 +256,19 @@ namespace Eval::NNUE { for (IndexType j = 0; j < kNumChunks; ++j) { int16x8_t sum = reinterpret_cast( accumulation[perspectives[p]][0])[j]; + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum = vaddq_s16(sum, reinterpret_cast( + accumulation[perspectives[p]][i])[j]); + } out[j] = vmax_s8(vqmovn_s16(sum), kZero); } #else for (IndexType j = 0; j < kHalfDimensions; ++j) { BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; + for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) { + sum += accumulation[static_cast(perspectives[p])][i][j]; + } output[offset + j] = static_cast( std::max(0, std::min(127, sum))); } @@ -193,192 +283,162 @@ namespace Eval::NNUE { private: // Calculate cumulative value without using difference calculation void RefreshAccumulator(const Position& pos) const { + auto& accumulator = pos.state()->accumulator; - IndexType i = 0; - Features::IndexList active_indices[2]; - RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], - active_indices); - for (Color perspective : { WHITE, BLACK }) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - for (const auto index : active_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - #if defined(USE_AVX512) - auto accumulation = reinterpret_cast<__m512i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; - for (IndexType j = 0; j < kNumChunks; ++j) - _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList active_indices[2]; + RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], + active_indices); + for (Color perspective : { WHITE, BLACK }) { + #ifdef TILING + for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_AVX2) - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); + if (i == 0) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_zero; + } + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_SSE2) - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } - #elif defined(USE_MMX) - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + for (unsigned k = 0; k < kNumRegs; k++) + vec_store(&accTile[k], acc[k]); + } + #else + if (i == 0) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memset(accumulator.accumulation[perspective][i], 0, + kHalfDimensions * sizeof(BiasType)); } - #elif defined(USE_NEON) - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - auto column = reinterpret_cast(&weights_[offset]); - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) - accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - #endif + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + } + #endif } + } + #if defined(USE_MMX) _mm_empty(); #endif accumulator.computed_accumulation = true; - accumulator.computed_score = false; } // Calculate cumulative value using difference calculation void UpdateAccumulator(const Position& pos) const { + const auto prev_accumulator = pos.state()->previous->accumulator; auto& accumulator = pos.state()->accumulator; - IndexType i = 0; - Features::IndexList removed_indices[2], added_indices[2]; - bool reset[2]; - RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], - removed_indices, added_indices, reset); - for (Color perspective : { WHITE, BLACK }) { + for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) { + Features::IndexList removed_indices[2], added_indices[2]; + bool reset[2]; + RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], + removed_indices, added_indices, reset); - #if defined(USE_AVX2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m256i*>( - &accumulator.accumulation[perspective][i][0]); + #ifdef TILING + for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) { + for (Color perspective : { WHITE, BLACK }) { + auto accTile = reinterpret_cast( + &accumulator.accumulation[perspective][i][j * kTileHeight]); + vec_t acc[kNumRegs]; - #elif defined(USE_SSE2) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m128i*>( - &accumulator.accumulation[perspective][i][0]); + if (reset[perspective]) { + if (i == 0) { + auto biasesTile = reinterpret_cast( + &biases_[j * kTileHeight]); + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = biasesTile[k]; + } else { + for (unsigned k = 0; k < kNumRegs; ++k) + acc[k] = vec_zero; + } + } else { + auto prevAccTile = reinterpret_cast( + &prev_accumulator.accumulation[perspective][i][j * kTileHeight]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_load(&prevAccTile[k]); - #elif defined(USE_MMX) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast<__m64*>( - &accumulator.accumulation[perspective][i][0]); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - #elif defined(USE_NEON) - constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - auto accumulation = reinterpret_cast( - &accumulator.accumulation[perspective][i][0]); - #endif + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index + j * kTileHeight; + auto column = reinterpret_cast(&weights_[offset]); - if (reset[perspective]) { - std::memcpy(accumulator.accumulation[perspective][i], biases_, - kHalfDimensions * sizeof(BiasType)); - } else { - std::memcpy(accumulator.accumulation[perspective][i], - prev_accumulator.accumulation[perspective][i], - kHalfDimensions * sizeof(BiasType)); - // Difference calculation for the deactivated features - for (const auto index : removed_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; - - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); + for (IndexType k = 0; k < kNumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } } - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); - } - - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); - } - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vsubq_s16(accumulation[j], column[j]); - } - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] -= - weights_[offset + j]; - } - #endif - + for (IndexType k = 0; k < kNumRegs; ++k) + vec_store(&accTile[k], acc[k]); } } - { // Difference calculation for the activated features - for (const auto index : added_indices[perspective]) { - const IndexType offset = kHalfDimensions * index; + #if defined(USE_MMX) + _mm_empty(); + #endif - #if defined(USE_AVX2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); + #else + for (Color perspective : { WHITE, BLACK }) { + + if (reset[perspective]) { + if (i == 0) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memset(accumulator.accumulation[perspective][i], 0, + kHalfDimensions * sizeof(BiasType)); } + } else { + std::memcpy(accumulator.accumulation[perspective][i], + prev_accumulator.accumulation[perspective][i], + kHalfDimensions * sizeof(BiasType)); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; - #elif defined(USE_SSE2) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] -= weights_[offset + j]; } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; - #elif defined(USE_MMX) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + for (IndexType j = 0; j < kHalfDimensions; ++j) + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; } - - #elif defined(USE_NEON) - auto column = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } - - #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { - accumulator.accumulation[perspective][i][j] += - weights_[offset + j]; - } - #endif - } } + #endif } - #if defined(USE_MMX) - _mm_empty(); - #endif - accumulator.computed_accumulation = true; - accumulator.computed_score = false; } using BiasType = std::int16_t; diff --git a/src/nnue/trainer/trainer_affine_transform.h b/src/nnue/trainer/trainer_affine_transform.h index 50751ffe..415b7dc8 100644 --- a/src/nnue/trainer/trainer_affine_transform.h +++ b/src/nnue/trainer/trainer_affine_transform.h @@ -194,7 +194,7 @@ class Trainer> { weights_(), biases_diff_(), weights_diff_(), - momentum_(0.0), + momentum_(0.2), learning_rate_scale_(1.0) { DequantizeParameters(); } diff --git a/src/nnue/trainer/trainer_feature_transformer.h b/src/nnue/trainer/trainer_feature_transformer.h index 190e009a..225c91fc 100644 --- a/src/nnue/trainer/trainer_feature_transformer.h +++ b/src/nnue/trainer/trainer_feature_transformer.h @@ -232,7 +232,7 @@ class Trainer { biases_(), weights_(), biases_diff_(), - momentum_(0.0), + momentum_(0.2), learning_rate_scale_(1.0) { min_pre_activation_ = std::numeric_limits::max(); max_pre_activation_ = std::numeric_limits::lowest(); diff --git a/src/position.cpp b/src/position.cpp index 38ac7c5c..52c47f66 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -707,7 +707,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; - st->accumulator.computed_score = false; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -1003,7 +1002,6 @@ void Position::do_null_move(StateInfo& newSt) { if (Eval::useNNUE != Eval::UseNNUEMode::False) { std::memcpy(&newSt, st, sizeof(StateInfo)); - st->accumulator.computed_score = false; } else std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); @@ -1353,9 +1351,9 @@ bool Position::pos_is_ok() const { // Add a function that directly unpacks for speed. It's pretty tough. // Write it by combining packer::unpack() and Position::set(). // If there is a problem with the passed phase and there is an error, non-zero is returned. -int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th, bool mirror) +int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th) { - return Learner::set_from_packed_sfen(*this, sfen, si, th, mirror); + return Learner::set_from_packed_sfen(*this, sfen, si, th); } // Give the board, hand piece, and turn, and return the sfen. diff --git a/src/position.h b/src/position.h index 2163dca3..e7513eb1 100644 --- a/src/position.h +++ b/src/position.h @@ -177,7 +177,7 @@ public: // --sfenization helper - friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror); + friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th); // Get the packed sfen. Returns to the buffer specified in the argument. // Do not include gamePly in pack. @@ -187,7 +187,7 @@ public: // Equivalent to pos.set(sfen_unpack(data),si,th);. // If there is a problem with the passed phase and there is an error, non-zero is returned. // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument. - int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false); + int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th); void clear() { std::memset(this, 0, sizeof(Position)); } diff --git a/src/search.cpp b/src/search.cpp index e1616c5c..647f0fd7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -40,21 +40,11 @@ namespace Search { LimitsType Limits; } -namespace Tablebases { - - int Cardinality; - bool RootInTB; - bool UseRule50; - Depth ProbeDepth; -} - -namespace TB = Tablebases; - using std::string; using Eval::evaluate; using namespace Search; -bool Search::prune_at_shallow_depth_on_pv_node = true; +bool Search::prune_at_shallow_depth = true; namespace { @@ -227,7 +217,7 @@ void MainThread::search() { Time.init(Limits, us, rootPos.game_ply()); TT.new_search(); - Eval::verify_NNUE(); + Eval::NNUE::verify(); if (rootMoves.empty()) { @@ -464,10 +454,7 @@ void Thread::search() { ++failedHighCnt; } else - { - ++rootMoves[pvIdx].bestMoveCount; break; - } delta += delta / 4 + 5; @@ -522,7 +509,7 @@ void Thread::search() { totBestMoveChanges += th->bestMoveChanges; th->bestMoveChanges = 0; } - double bestMoveInstability = 1 + totBestMoveChanges / Threads.size(); + double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size(); double totalTime = rootMoves.size() == 1 ? 0 : Time.optimum() * fallingEval * reduction * bestMoveInstability; @@ -599,7 +586,7 @@ namespace { Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; Value bestValue, value, ttValue, eval, maxValue, probCutBeta; - bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; + bool formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; Piece movedPiece; @@ -646,6 +633,7 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); (ss+1)->ply = ss->ply + 1; + (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; Square prevSq = to_sq((ss-1)->currentMove); @@ -655,9 +643,7 @@ namespace { // starts with statScore = 0. Later grandchildren start with the last calculated // statScore of the previous grandchild. This influences the reduction rules in // LMR which are based on the statScore of parent position. - if (rootNode) - (ss+4)->statScore = 0; - else + if (!rootNode) (ss+2)->statScore = 0; // Step 4. Transposition table lookup. We don't want the score of a partial @@ -665,14 +651,15 @@ namespace { // position key in case of an excluded move. excludedMove = ss->excludedMove; posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] - : ttHit ? tte->move() : MOVE_NONE; - ttPv = PvNode || (ttHit && tte->is_pv()); - formerPv = ttPv && !PvNode; + : ss->ttHit ? tte->move() : MOVE_NONE; + if (!excludedMove) + ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); + formerPv = ss->ttPv && !PvNode; - if ( ttPv + if ( ss->ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture @@ -681,11 +668,11 @@ namespace { // thisThread->ttHitAverage can be used to approximate the running average of ttHit thisThread->ttHitAverage = (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow - + TtHitAverageResolution * ttHit; + + TtHitAverageResolution * ss->ttHit; // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= depth && ttValue != VALUE_NONE // Possible in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -717,27 +704,27 @@ namespace { } // Step 5. Tablebases probe - if (!rootNode && TB::Cardinality) + if (!rootNode && thisThread->Cardinality) { int piecesCount = pos.count(); - if ( piecesCount <= TB::Cardinality - && (piecesCount < TB::Cardinality || depth >= TB::ProbeDepth) + if ( piecesCount <= thisThread->Cardinality + && (piecesCount < thisThread->Cardinality || depth >= thisThread->ProbeDepth) && pos.rule50_count() == 0 && !pos.can_castle(ANY_CASTLING)) { - TB::ProbeState err; - TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err); + Tablebases::ProbeState err; + Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &err); // Force check of time on the next occasion if (thisThread == Threads.main()) static_cast(thisThread)->callsCnt = 0; - if (err != TB::ProbeState::FAIL) + if (err != Tablebases::ProbeState::FAIL) { thisThread->tbHits.fetch_add(1, std::memory_order_relaxed); - int drawScore = TB::UseRule50 ? 1 : 0; + int drawScore = thisThread->UseRule50 ? 1 : 0; // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score value = wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1 @@ -750,7 +737,7 @@ namespace { if ( b == BOUND_EXACT || (b == BOUND_LOWER ? value >= beta : value <= alpha)) { - tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b, + tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b, std::min(MAX_PLY - 1, depth + 6), MOVE_NONE, VALUE_NONE); @@ -778,7 +765,7 @@ namespace { improving = false; goto moves_loop; } - else if (ttHit) + else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); @@ -800,7 +787,7 @@ namespace { else ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; - tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); + tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } // Step 7. Razoring (~1 Elo) @@ -826,7 +813,7 @@ namespace { && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -882,14 +869,14 @@ namespace { // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ttHit + && !( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { // if ttMove is a capture and value from transposition table is good enough produce probCut // cutoff without digging into actual probCut search - if ( ttHit + if ( ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue >= probCutBeta @@ -900,6 +887,8 @@ namespace { assert(probCutBeta < VALUE_INFINITE); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; + bool ttPv = ss->ttPv; + ss->ttPv = false; while ( (move = mp.next_move()) != MOVE_NONE && probCutCount < 2 + 2 * cutNode) @@ -931,7 +920,7 @@ namespace { if (value >= probCutBeta) { // if transposition table doesn't have equal or more deep info write probCut data into it - if ( !(ttHit + if ( !(ss->ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) tte->save(posKey, value_to_tt(value, ss->ply), ttPv, @@ -940,8 +929,15 @@ namespace { return value; } } + ss->ttPv = ttPv; } + // Step 11. If the position is not in TT, decrease depth by 2 + if ( PvNode + && depth >= 6 + && !ttMove) + depth -= 2; + moves_loop: // When in check, search starts from here const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -965,7 +961,7 @@ moves_loop: // When in check, search starts from here // Mark this node as being searched ThreadHolding th(thisThread, posKey, ss->ply); - // Step 11. Loop through all pseudo-legal moves until no moves remain + // Step 12. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -1005,9 +1001,9 @@ moves_loop: // When in check, search starts from here // Calculate new depth for this move newDepth = depth - 1; - // Step 12. Pruning at shallow depth (~200 Elo) + // Step 13. Pruning at shallow depth (~200 Elo) if ( !rootNode - && (PvNode ? prune_at_shallow_depth_on_pv_node : true) + && (PvNode ? prune_at_shallow_depth : true) && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { @@ -1052,7 +1048,6 @@ moves_loop: // When in check, search starts from here if ( !givesCheck && lmrDepth < 6 && !(PvNode && abs(bestValue) < 2) - && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) @@ -1064,7 +1059,7 @@ moves_loop: // When in check, search starts from here } } - // Step 13. Extensions (~75 Elo) + // Step 14. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), @@ -1123,11 +1118,6 @@ moves_loop: // When in check, search starts from here && pos.non_pawn_material() <= 2 * RookValueMg) extension = 1; - // Castling extension - if ( type_of(move) == CASTLING - && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) - extension = 1; - // Late irreversible move extension if ( move == ttMove && pos.rule50_count() > 80 @@ -1147,14 +1137,13 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 14. Make the move + // Step 15. Make the move pos.do_move(move, st, givesCheck); - // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be + // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 - && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) - && (!rootNode || thisThread->best_move_count(move) == 0) + && moveCount > 1 + 2 * rootNode && ( !captureOrPromotion || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha @@ -1163,13 +1152,6 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); - // Decrease reduction at non-check cut nodes for second move at low depths - if ( cutNode - && depth <= 10 - && moveCount <= 2 - && !ss->inCheck) - r--; - // Decrease reduction if the ttHit running average is large if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; @@ -1179,7 +1161,7 @@ moves_loop: // When in check, search starts from here r++; // Decrease reduction if position is or has been on the PV (~10 Elo) - if (ttPv) + if (ss->ttPv) r -= 2; if (moveCountPruning && !formerPv) @@ -1191,7 +1173,7 @@ moves_loop: // When in check, search starts from here // Decrease reduction if ttMove has been singularly extended (~3 Elo) if (singularQuietLMR) - r -= 1 + formerPv; + r--; if (!captureOrPromotion) { @@ -1208,7 +1190,7 @@ moves_loop: // When in check, search starts from here // hence break make_move(). (~2 Elo) else if ( type_of(move) == NORMAL && !pos.see_ge(reverse_move(move))) - r -= 2 + ttPv - (type_of(movedPiece) == PAWN); + r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN); ss->statScore = thisThread->mainHistory[us][from_to(move)] + (*contHist[0])[movedPiece][to_sq(move)] @@ -1228,14 +1210,14 @@ moves_loop: // When in check, search starts from here } else { - // Increase reduction for captures/promotions if late move and at low depth - if (depth < 8 && moveCount > 2) - r++; + // Increase reduction for captures/promotions if late move and at low depth + if (depth < 8 && moveCount > 2) + r++; - // Unless giving check, this capture is likely bad - if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) - r++; + // Unless giving check, this capture is likely bad + if ( !givesCheck + && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) + r++; } Depth d = std::clamp(newDepth - r, 1, newDepth); @@ -1253,7 +1235,7 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 16. Full depth search when LMR is skipped or fails high + // Step 17. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1281,12 +1263,12 @@ moves_loop: // When in check, search starts from here value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } - // Step 17. Undo move + // Step 18. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 18. Check for a new best move + // Step 19. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1363,7 +1345,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 19. Check for mate and stalemate + // Step 20. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. @@ -1386,8 +1368,17 @@ moves_loop: // When in check, search starts from here if (PvNode) bestValue = std::min(bestValue, maxValue); + // If no good move is found and the previous position was ttPv, then the previous + // opponent move is probably good and the new position is added to the search tree. + if (bestValue <= alpha) + ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); + // Otherwise, a counter move has been found and if the position is the last leaf + // in the search tree, remove the position from the search tree. + else if (depth > 3) + ss->ttPv = ss->ttPv && (ss+1)->ttPv; + if (!excludedMove && !(rootNode && thisThread->pvIdx)) - tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv, + tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv, bestValue >= beta ? BOUND_LOWER : PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER, depth, bestMove, ss->staticEval); @@ -1416,7 +1407,7 @@ moves_loop: // When in check, search starts from here Move ttMove, move, bestMove; Depth ttDepth; Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha; - bool ttHit, pvHit, givesCheck, captureOrPromotion; + bool pvHit, givesCheck, captureOrPromotion; int moveCount; if (PvNode) @@ -1446,13 +1437,13 @@ moves_loop: // When in check, search starts from here : DEPTH_QS_NO_CHECKS; // Transposition table lookup posKey = pos.key(); - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - pvHit = ttHit && tte->is_pv(); + tte = TT.probe(posKey, ss->ttHit); + ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; + ttMove = ss->ttHit ? tte->move() : MOVE_NONE; + pvHit = ss->ttHit && tte->is_pv(); if ( !PvNode - && ttHit + && ss->ttHit && tte->depth() >= ttDepth && ttValue != VALUE_NONE // Only in case of TT access race && (ttValue >= beta ? (tte->bound() & BOUND_LOWER) @@ -1467,7 +1458,7 @@ moves_loop: // When in check, search starts from here } else { - if (ttHit) + if (ss->ttHit) { // Never assume anything about values stored in TT if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) @@ -1486,7 +1477,7 @@ moves_loop: // When in check, search starts from here // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) { - if (!ttHit) + if (!ss->ttHit) tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER, DEPTH_NONE, MOVE_NONE, ss->staticEval); @@ -1524,6 +1515,7 @@ moves_loop: // When in check, search starts from here // Futility pruning if ( !ss->inCheck + && Search::prune_at_shallow_depth && !givesCheck && futilityBase > -VALUE_KNOWN_WIN && !pos.advanced_pawn_push(move)) @@ -1550,18 +1542,17 @@ moves_loop: // When in check, search starts from here } // Do not search moves with negative SEE values - if ( !ss->inCheck && !pos.see_ge(move)) + if ( !ss->inCheck + && Search::prune_at_shallow_depth + && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move)) + && !pos.see_ge(move)) continue; // Speculative prefetch as early as possible prefetch(TT.first_entry(pos.key_after(move))); // Check for legality just before making the move - if ( - // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning. - !pos.pseudo_legal(move) || - !pos.legal(move) - ) + if (!pos.legal(move)) { moveCount--; continue; @@ -1573,8 +1564,10 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; + // CounterMove based pruning if ( !captureOrPromotion - && moveCount >= abs(depth) + 1 + && Search::prune_at_shallow_depth + && moveCount && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) continue; @@ -1706,8 +1699,8 @@ moves_loop: // When in check, search starts from here else captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; - // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted - if ( ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0])) + // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted + if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); @@ -1844,19 +1837,22 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { size_t pvIdx = pos.this_thread()->pvIdx; size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size()); uint64_t nodesSearched = Threads.nodes_searched(); - uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0); + uint64_t tbHits = Threads.tb_hits() + (pos.this_thread()->rootInTB ? rootMoves.size() : 0); for (size_t i = 0; i < multiPV; ++i) { bool updated = rootMoves[i].score != -VALUE_INFINITE; - if (depth == 1 && !updated) + if (depth == 1 && !updated && i > 0) continue; - Depth d = updated ? depth : depth - 1; + Depth d = updated ? depth : std::max(1, depth - 1); Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; - bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; + if (v == -VALUE_INFINITE) + v = VALUE_ZERO; + + bool tb = pos.this_thread()->rootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY; v = tb ? rootMoves[i].tbScore : v; if (ss.rdbuf()->in_avail()) // Not at first line @@ -1923,42 +1919,42 @@ bool RootMove::extract_ponder_from_tt(Position& pos) { void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { - RootInTB = false; - UseRule50 = bool(Options["Syzygy50MoveRule"]); - ProbeDepth = int(Options["SyzygyProbeDepth"]); - Cardinality = int(Options["SyzygyProbeLimit"]); + auto& rootInTB = pos.this_thread()->rootInTB; + auto& cardinality = pos.this_thread()->Cardinality; + auto& probeDepth = pos.this_thread()->ProbeDepth; + rootInTB = false; bool dtz_available = true; // Tables with fewer pieces than SyzygyProbeLimit are searched with // ProbeDepth == DEPTH_ZERO - if (Cardinality > MaxCardinality) + if (cardinality > Tablebases::MaxCardinality) { - Cardinality = MaxCardinality; - ProbeDepth = 0; + cardinality = Tablebases::MaxCardinality; + probeDepth = 0; } - if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) + if (cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING)) { // Rank moves using DTZ tables - RootInTB = root_probe(pos, rootMoves); + rootInTB = root_probe(pos, rootMoves); - if (!RootInTB) + if (!rootInTB) { // DTZ tables are missing; try to rank moves using WDL tables dtz_available = false; - RootInTB = root_probe_wdl(pos, rootMoves); + rootInTB = root_probe_wdl(pos, rootMoves); } } - if (RootInTB) + if (rootInTB) { // Sort moves according to TB rank - std::sort(rootMoves.begin(), rootMoves.end(), + std::stable_sort(rootMoves.begin(), rootMoves.end(), [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } ); // Probe during search only if DTZ is not available and we are winning if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW) - Cardinality = 0; + cardinality = 0; } else { @@ -1966,6 +1962,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) { for (auto& m : rootMoves) m.tbRank = 0; } + } // --- expose the functions such as fixed depth search used for learning to the outside @@ -1998,7 +1995,7 @@ namespace Learner th->nmpMinPly = th->bestMoveChanges = 0; th->ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2; - // Zero initialization of the number of search nodes + // Zero initialization of the number of search nodes th->nodes = 0; // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood. @@ -2022,7 +2019,7 @@ namespace Learner for (int i = 7; i > 0; i--) (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel - // set rootMoves + // set rootMoves auto& rootMoves = th->rootMoves; rootMoves.clear(); @@ -2030,6 +2027,20 @@ namespace Learner rootMoves.push_back(Search::RootMove(m)); assert(!rootMoves.empty()); + + th->UseRule50 = bool(Options["Syzygy50MoveRule"]); + th->ProbeDepth = int(Options["SyzygyProbeDepth"]); + th->Cardinality = int(Options["SyzygyProbeLimit"]); + + // Tables with fewer pieces than SyzygyProbeLimit are searched with + // ProbeDepth == DEPTH_ZERO + if (th->Cardinality > Tablebases::MaxCardinality) + { + th->Cardinality = Tablebases::MaxCardinality; + th->ProbeDepth = 0; + } + + Tablebases::rank_root_moves(pos, rootMoves); } } @@ -2050,8 +2061,8 @@ namespace Learner // As it has a bad effect, I decided to stop allowing the window range to be specified. ValueAndPV qsearch(Position& pos) { - Stack stack[MAX_PLY + 10], * ss = stack + 7; - Move pv[MAX_PLY + 1]; + Stack stack[MAX_PLY+10], *ss = stack+7; + Move pv[MAX_PLY+1]; init_for_search(pos, ss); ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer. @@ -2070,7 +2081,7 @@ namespace Learner auto bestValue = ::qsearch(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0); - // Returns the PV obtained. + // Returns the PV obtained. std::vector pvs; for (Move* p = &ss->pv[0]; is_ok(*p); ++p) pvs.push_back(*p); @@ -2136,7 +2147,7 @@ namespace Learner Value bestValue = -VALUE_INFINITE; while ((rootDepth += 1) <= depth - // exit this loop even if the node limit is exceeded + // exit this loop even if the node limit is exceeded // The number of search nodes is passed in the argument of this function. && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit) ) @@ -2158,46 +2169,36 @@ namespace Learner break; } - // selDepth output with USI info for each depth and PV line + // selDepth output with USI info for each depth and PV line selDepth = 0; // Switch to aspiration search for depth 5 and above. - if (rootDepth >= 5 * 1) + if (rootDepth >= 4) { - delta = Value(20); - - Value p = rootMoves[pvIdx].previousScore; - - alpha = std::max(p - delta, -VALUE_INFINITE); - beta = std::min(p + delta, VALUE_INFINITE); + Value prev = rootMoves[pvIdx].previousScore; + delta = Value(17); + alpha = std::max(prev - delta,-VALUE_INFINITE); + beta = std::min(prev + delta, VALUE_INFINITE); } - // aspiration search - int failedHighCnt = 0; while (true) { - Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1); + Depth adjustedDepth = std::max(1, rootDepth); bestValue = ::search(pos, ss, alpha, beta, adjustedDepth, false); stable_sort(rootMoves.begin() + pvIdx, rootMoves.end()); //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx); - // Expand aspiration window for fail low/high. + // Expand aspiration window for fail low/high. // However, if it is the value specified by the argument, it will be treated as fail low/high and break. if (bestValue <= alpha) { beta = (alpha + beta) / 2; alpha = std::max(bestValue - delta, -VALUE_INFINITE); - - failedHighCnt = 0; - //if (mainThread) - // mainThread->stopOnPonderhit = false; - } else if (bestValue >= beta) { beta = std::min(bestValue + delta, VALUE_INFINITE); - ++failedHighCnt; } else break; @@ -2218,7 +2219,6 @@ namespace Learner } // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle. - // ¨ PV should not be NULL_MOVE because it is PV // MOVE_WIN has never been thrust. (For now) for (Move move : rootMoves[0].pv) { diff --git a/src/search.h b/src/search.h index 20dfe909..ab832ee2 100644 --- a/src/search.h +++ b/src/search.h @@ -24,6 +24,7 @@ #include "misc.h" #include "movepick.h" #include "types.h" +#include "uci.h" class Position; @@ -32,7 +33,7 @@ namespace Search { /// Threshold used for countermoves based pruning constexpr int CounterMovePruneThreshold = 0; -extern bool prune_at_shallow_depth_on_pv_node; +extern bool prune_at_shallow_depth; /// Stack struct keeps track of the information we need to remember from nodes /// shallower and deeper in the tree during the search. Each search thread has @@ -49,6 +50,8 @@ struct Stack { int statScore; int moveCount; bool inCheck; + bool ttPv; + bool ttHit; }; @@ -70,7 +73,6 @@ struct RootMove { Value previousScore = -VALUE_INFINITE; int selDepth = 0; int tbRank = 0; - int bestMoveCount = 0; Value tbScore; std::vector pv; }; diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 20215b96..f4b9447f 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -52,7 +52,7 @@ using namespace Tablebases; -int Tablebases::MaxCardinality; +int Tablebases::MaxCardinality = 0; namespace { @@ -223,7 +223,9 @@ public: *mapping = statbuf.st_size; *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); +#if defined(MADV_RANDOM) madvise(*baseAddress, statbuf.st_size, MADV_RANDOM); +#endif ::close(fd); if (*baseAddress == MAP_FAILED) @@ -758,7 +760,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu if (entry->hasPawns) { idx = LeadPawnIdx[leadPawnsCnt][squares[0]]; - std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp); + std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp); for (int i = 1; i < leadPawnsCnt; ++i) idx += Binomial[i][MapPawns[squares[i]]]; @@ -859,7 +861,7 @@ encode_remaining: while (d->groupLen[++next]) { - std::sort(groupSq, groupSq + d->groupLen[next]); + std::stable_sort(groupSq, groupSq + d->groupLen[next]); uint64_t n = 0; // Map down a square if "comes later" than a square in the previous diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index b998989b..5f97c746 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -25,6 +25,8 @@ namespace Tablebases { +extern int MaxCardinality; + enum WDLScore { WDLLoss = -2, // Loss WDLBlessedLoss = -1, // Loss, but draw under 50-move rule @@ -43,8 +45,6 @@ enum ProbeState { ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move) }; -extern int MaxCardinality; - void init(const std::string& paths); WDLScore probe_wdl(Position& pos, ProbeState* result); int probe_dtz(Position& pos, ProbeState* result); diff --git a/src/thread.cpp b/src/thread.cpp index 1aa66a81..c81ac43d 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -51,17 +51,6 @@ Thread::~Thread() { } -/// Thread::bestMoveCount(Move move) return best move counter for the given root move - -int Thread::best_move_count(Move move) const { - - auto rm = std::find(rootMoves.begin() + pvIdx, - rootMoves.begin() + pvLast, move); - - return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0; -} - - /// Thread::clear() reset histories, usually before a new game void Thread::clear() { @@ -192,9 +181,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m)) rootMoves.emplace_back(m); - if (!rootMoves.empty()) - Tablebases::rank_root_moves(pos, rootMoves); - // After ownership transfer 'states' becomes empty, so if we stop the search // and call 'go' again without setting a new position states.get() == NULL. assert(states.get() || setupStates.get()); @@ -214,6 +200,21 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, th->rootMoves = rootMoves; th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); th->rootState = setupStates->back(); + th->UseRule50 = bool(Options["Syzygy50MoveRule"]); + th->ProbeDepth = int(Options["SyzygyProbeDepth"]); + th->Cardinality = int(Options["SyzygyProbeLimit"]); + + // Tables with fewer pieces than SyzygyProbeLimit are searched with + // ProbeDepth == DEPTH_ZERO + if (th->Cardinality > Tablebases::MaxCardinality) + { + th->Cardinality = Tablebases::MaxCardinality; + th->ProbeDepth = 0; + } + + if (!rootMoves.empty()) + Tablebases::rank_root_moves(pos, rootMoves); + } main()->start_searching(); @@ -235,16 +236,16 @@ Thread* ThreadPool::get_best_thread() const { votes[th->rootMoves[0].pv[0]] += (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); - if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) - { - // Make sure we pick the shortest mate / TB conversion or stave off mate the longest - if (th->rootMoves[0].score > bestThread->rootMoves[0].score) - bestThread = th; - } - else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY - || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY - && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) - bestThread = th; + if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) + { + // Make sure we pick the shortest mate / TB conversion or stave off mate the longest + if (th->rootMoves[0].score > bestThread->rootMoves[0].score) + bestThread = th; + } + else if ( th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY + || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY + && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]])) + bestThread = th; } return bestThread; diff --git a/src/thread.h b/src/thread.h index 042bc2e9..501a6042 100644 --- a/src/thread.h +++ b/src/thread.h @@ -54,7 +54,6 @@ public: void idle_loop(); void start_searching(); void wait_for_search_finished(); - int best_move_count(Move move) const; Pawns::Table pawnsTable; Material::Table materialTable; @@ -74,6 +73,11 @@ public: CapturePieceToHistory captureHistory; ContinuationHistory continuationHistory[2][2]; Score contempt; + bool rootInTB; + int Cardinality; + bool UseRule50; + Depth ProbeDepth; + }; diff --git a/src/tt.cpp b/src/tt.cpp index c64670ac..718587a8 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -35,6 +35,9 @@ bool TranspositionTable::enable_transposition_table = true; void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) { + if (!TranspositionTable::enable_transposition_table) { + return; + } // Preserve any existing move for the same position if (m || (uint16_t)k != key16) move16 = (uint16_t)m; @@ -64,11 +67,12 @@ void TranspositionTable::resize(size_t mbSize) { Threads.main()->wait_for_search_finished(); - aligned_ttmem_free(mem); + aligned_large_pages_free(table); clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster); - table = static_cast(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem)); - if (!mem) + + table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster))); + if (!table) { std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl; diff --git a/src/tt.h b/src/tt.h index 29072bd8..d817f26d 100644 --- a/src/tt.h +++ b/src/tt.h @@ -73,7 +73,7 @@ class TranspositionTable { static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size"); public: - ~TranspositionTable() { aligned_ttmem_free(mem); } + ~TranspositionTable() { aligned_large_pages_free(table); } void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound TTEntry* probe(const Key key, bool& found) const; int hashfull() const; @@ -91,7 +91,6 @@ private: size_t clusterCount; Cluster* table; - void* mem; uint8_t generation8; // Size must be not bigger than TTEntry::genBound8 }; diff --git a/src/uci.cpp b/src/uci.cpp index 1128d4d9..a123bbc0 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -47,7 +47,7 @@ const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1 void test_cmd(Position& pos, istringstream& is) { // Initialize as it may be searched. - Eval::init_NNUE(); + Eval::NNUE::init(); std::string param; is >> param; @@ -100,7 +100,7 @@ namespace { Position p; p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); - Eval::verify_NNUE(); + Eval::NNUE::verify(); sync_cout << "\n" << Eval::trace(p) << sync_endl; } @@ -185,7 +185,7 @@ namespace { if (token == "go" || token == "eval") { - cerr << "\nPosition: " << cnt++ << '/' << num << endl; + cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl; if (token == "go") { go(pos, is, states); @@ -210,15 +210,15 @@ namespace { << "\nNodes/second : " << 1000 * nodes / elapsed << endl; } - // The win rate model returns the probability (per mille) of winning given an eval - // and a game-ply. The model fits rather accurately the LTC fishtest statistics. - int win_rate_model(Value v, int ply) { - // Return win rate in per mille (rounded to nearest) - return int(0.5 + UCI::win_rate_model_double(v, ply)); - } - } // namespace +// The win rate model returns the probability (per mille) of winning given an eval +// and a game-ply. The model fits rather accurately the LTC fishtest statistics. +int UCI::win_rate_model(Value v, int ply) { + // Return win rate in per mille (rounded to nearest) + return int(0.5 + win_rate_model_double(v, ply)); +} + // The win rate model returns the probability (per mille) of winning given an eval // and a game-ply. The model fits rather accurately the LTC fishtest statistics. double UCI::win_rate_model_double(double v, int ply) { diff --git a/src/uci.h b/src/uci.h index c0e8372f..2e0f5c11 100644 --- a/src/uci.h +++ b/src/uci.h @@ -72,6 +72,7 @@ std::string square(Square s); std::string move(Move m, bool chess960); std::string pv(const Position& pos, Depth depth, Value alpha, Value beta); std::string wdl(Value v, int ply); +int win_rate_model(Value v, int ply); double win_rate_model_double(double v, int ply); Move to_move(const Position& pos, std::string& str); diff --git a/src/ucioption.cpp b/src/ucioption.cpp index dde3844a..099ca2ae 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -21,6 +21,7 @@ #include #include +#include "evaluate.h" #include "misc.h" #include "search.h" #include "thread.h" @@ -40,10 +41,10 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); } void on_logger(const Option& o) { start_logger(o); } void on_threads(const Option& o) { Threads.set(size_t(o)); } void on_tb_path(const Option& o) { Tablebases::init(o); } -void on_use_NNUE(const Option& ) { Eval::init_NNUE(); } -void on_eval_file(const Option& ) { Eval::init_NNUE(); } -void on_prune_at_shallow_depth_on_pv_node(const Option& o) { - Search::prune_at_shallow_depth_on_pv_node = o; +void on_use_NNUE(const Option& ) { Eval::NNUE::init(); } +void on_eval_file(const Option& ) { Eval::NNUE::init(); } +void on_prune_at_shallow_depth(const Option& o) { + Search::prune_at_shallow_depth = o; } void on_enable_transposition_table(const Option& o) { TranspositionTable::enable_transposition_table = o; @@ -85,23 +86,19 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option("true var true var false var pure", "true", on_use_NNUE); - // The default must follow the format nn-[SHA256 first 12 digits].nnue - // for the build process (profile-build and fishtest) to work. - o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); + o["EvalFile"] << Option(EvalFileDefaultName, on_eval_file); // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function. // I want to hit the test eval convert command, but there is no new evaluation function // It ends abnormally before executing this command. // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame, // Hit the test eval convert command. o["SkipLoadingEval"] << Option(false); - // how many moves to use a fixed move - // o["BookMoves"] << Option(16, 0, 10000); // When learning the evaluation function, you can change the folder to save the evaluation function. // Evalsave by default. This folder shall be prepared in advance. // Automatically create a folder under this folder like "0/", "1/", ... and save the evaluation function file there. o["EvalSaveDir"] << Option("evalsave"); // Prune at shallow depth on PV nodes. False is recommended when using fixed depth search. - o["PruneAtShallowDepthOnPvNode"] << Option(true, on_prune_at_shallow_depth_on_pv_node); + o["PruneAtShallowDepth"] << Option(true, on_prune_at_shallow_depth); // Enable transposition table. o["EnableTranspositionTable"] << Option(true, on_enable_transposition_table); } diff --git a/tests/instrumented_learn.sh b/tests/instrumented_learn.sh index 7f76fd76..ce1fc429 100755 --- a/tests/instrumented_learn.sh +++ b/tests/instrumented_learn.sh @@ -78,11 +78,11 @@ cat << EOF > gensfen01.exp send "setoption name Threads value $threads\n" send "setoption name Use NNUE value false\n" send "isready\n" - send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin use_raw_nnue_eval 0 sfen_format bin\n" + send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin sfen_format bin\n" expect "gensfen finished." send "learn training_data/training_data.bin convert_plain output_file_name training_data.txt\n" expect "all done" - send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n" + send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack sfen_format binpack\n" expect "gensfen finished." send "quit\n" @@ -104,9 +104,9 @@ cat << EOF > gensfen02.exp send "setoption name Threads value $threads\n" send "setoption name Use NNUE value true\n" send "isready\n" - send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin use_raw_nnue_eval 0 sfen_format bin\n" + send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin sfen_format bin\n" expect "gensfen finished." - send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n" + send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack sfen_format binpack\n" expect "gensfen finished." send "quit\n" @@ -127,7 +127,7 @@ cat << EOF > learn01.exp send "setoption name Use NNUE value true\n" send "setoption name Threads value $threads\n" send "isready\n" - send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 mirror_percentage 50 validation_set_file_name validation_data/validation_data.bin\n" + send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 lr 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 validation_set_file_name validation_data/validation_data.bin\n" expect "save_eval() finished."