Merge pull request #165 from Sopel97/merge_attempt

Merge attempt with official-stockfish/master and noobpwnftw/trainer
2026-05-20 08:37:44 +00:00 · 2020-09-26 10:05:16 +09:00
parent 2931463d3a c99541828f
commit d1967bb281
59 changed files with 1819 additions and 1642 deletions
@@ -36,10 +36,11 @@ Bryan Cross (crossbr)
 candirufish
 Chess13234
 Chris Cain (ceebo)
+Dale Weiler (graphitemaster)
 Dan Schmidt (dfannius)
 Daniel Axtens (daxtens)
 Daniel Dugovic (ddugovic)
-Dariusz Orzechowski
+Dariusz Orzechowski (dorzechowski)
 David Zar
 Daylen Yang (daylen)
 DiscanX
@@ -62,6 +63,7 @@ Gary Heckman (gheckman)
 George Sobala (gsobala)
 gguliash
 Gian-Carlo Pascutto (gcp)
+Deshawn Mohan-Smith (GoldenRare)
 Gontran Lemaire (gonlem)
 Goodkov Vasiliy Aleksandrovich (goodkov)
 Gregor Cramer
@@ -1,154 +1,173 @@
-Contributors with >10,000 CPU hours as of January 7, 2020
+Contributors with >10,000 CPU hours as of Sept 2, 2020
 Thank you!

 Username                  CPU Hours   Games played
 --------------------------------------------------
-noobpwnftw                  9305707      695548021
-mlang                        780050       61648867
-dew                          621626       43921547
-mibere                       524702       42238645
-crunchy                      354587       27344275
-cw                           354495       27274181
-fastgm                       332801       22804359
-JojoM                        295750       20437451
-CSU_Dynasty                  262015       21828122
-Fisherman                    232181       18939229
-ctoks                        218866       17622052
-glinscott                    201989       13780820
-tvijlbrief                   201204       15337115
-velislav                     188630       14348485
-gvreuls                      187164       15149976
-bking_US                     180289       11876016
-nordlandia                   172076       13467830
-leszek                       157152       11443978
-Thanar                       148021       12365359
-spams                        141975       10319326
-drabel                       138073       11121749
-vdv                          137850        9394330
-mgrabiak                     133578       10454324
-TueRens                      132485       10878471
-bcross                       129683       11557084
-marrco                       126078        9356740
-sqrt2                        125830        9724586
-robal                        122873        9593418
-vdbergh                      120766        8926915
-malala                       115926        8002293
-CoffeeOne                    114241        5004100
-dsmith                       113189        7570238
-BrunoBanani                  104644        7436849
-Data                          92328        8220352
-mhoram                        89333        6695109
-davar                         87924        7009424
-xoto                          81094        6869316
-ElbertoOne                    80899        7023771
-grandphish2                   78067        6160199
-brabos                        77212        6186135
-psk                           75733        5984901
-BRAVONE                       73875        5054681
-sunu                          70771        5597972
-sterni1971                    70605        5590573
-MaZePallas                    66886        5188978
-Vizvezdenec                   63708        4967313
-nssy                          63462        5259388
-jromang                       61634        4940891
-teddybaer                     61231        5407666
-Pking_cda                     60099        5293873
-solarlight                    57469        5028306
-dv8silencer                   56913        3883992
-tinker                        54936        4086118
-renouve                       49732        3501516
-Freja                         49543        3733019
-robnjr                        46972        4053117
-rap                           46563        3219146
-Bobo1239                      46036        3817196
-ttruscott                     45304        3649765
-racerschmacer                 44881        3975413
-finfish                       44764        3370515
-eva42                         41783        3599691
-biffhero                      40263        3111352
-bigpen0r                      39817        3291647
-mhunt                         38871        2691355
-ronaldjerum                   38820        3240695
-Antihistamine                 38785        2761312
-pb00067                       38038        3086320
-speedycpu                     37591        3003273
-rkl                           37207        3289580
-VoyagerOne                    37050        3441673
-jbwiebe                       35320        2805433
-cuistot                       34191        2146279
-homyur                        33927        2850481
-manap                         32873        2327384
-gri                           32538        2515779
-oryx                          31267        2899051
-EthanOConnor                  30959        2090311
-SC                            30832        2730764
-csnodgrass                    29505        2688994
-jmdana                        29458        2205261
-strelock                      28219        2067805
-jkiiski                       27832        1904470
-Pyafue                        27533        1902349
-Garf                          27515        2747562
-eastorwest                    27421        2317535
-slakovv                       26903        2021889
-Prcuvu                        24835        2170122
-anst                          24714        2190091
-hyperbolic.tom                24319        2017394
-Patrick_G                     23687        1801617
-Sharaf_DG                     22896        1786697
-nabildanial                   22195        1519409
-chriswk                       21931        1868317
-achambord                     21665        1767323
-Zirie                         20887        1472937
-team-oh                       20217        1636708
-Isidor                        20096        1680691
-ncfish1                       19931        1520927
-nesoneg                       19875        1463031
-Spprtr                        19853        1548165
-JanErik                       19849        1703875
-agg177                        19478        1395014
-SFTUser                       19231        1567999
-xor12                         19017        1680165
-sg4032                        18431        1641865
-rstoesser                     18118        1293588
-MazeOfGalious                 17917        1629593
-j3corre                       17743         941444
-cisco2015                     17725        1690126
-ianh2105                      17706        1632562
-dex                           17678        1467203
-jundery                       17194        1115855
-iisiraider                    17019        1101015
-horst.prack                   17012        1465656
-Adrian.Schmidt123             16563        1281436
-purplefishies                 16342        1092533
-wei                           16274        1745989
-ville                         16144        1384026
-eudhan                        15712        1283717
-OuaisBla                      15581         972000
-DragonLord                    15559        1162790
-dju                           14716         875569
-chris                         14479        1487385
-0xB00B1ES                     14079        1001120
-OssumOpossum                  13776        1007129
-enedene                       13460         905279
-bpfliegel                     13346         884523
-Ente                          13198        1156722
-IgorLeMasson                  13087        1147232
-jpulman                       13000         870599
-ako027ako                     12775        1173203
-Nikolay.IT                    12352        1068349
-Andrew Grant                  12327         895539
-joster                        12008         950160
-AdrianSA                      11996         804972
-Nesa92                        11455        1111993
-fatmurphy                     11345         853210
-Dark_wizzie                   11108        1007152
-modolief                      10869         896470
-mschmidt                      10757         803401
-infinity                      10594         727027
-mabichito                     10524         749391
-Thomas A. Anderson            10474         732094
-thijsk                        10431         719357
-Flopzee                       10339         894821
-crocogoat                     10104        1013854
-SapphireBrand                 10104         969604
-stocky                        10017         699440
+noobpwnftw                 19352969     1231459677
+mlang                        957168       61657446
+dew                          949885       56893432
+mibere                       703817       46865007
+crunchy                      427035       27344275
+cw                           416006       27521077
+JojoM                        415904       24479564
+fastgm                       404873       23953472
+CSU_Dynasty                  335774       22850550
+tvijlbrief                   335199       21871270
+Fisherman                    325053       21786603
+gvreuls                      311480       20751516
+ctoks                        275877       18710423
+velislav                     241267       15596372
+glinscott                    217799       13780820
+nordlandia                   211692       13484886
+bcross                       206213       14934233
+bking_US                     198894       11876016
+leszek                       189170       11446821
+mgrabiak                     183896       11778092
+drabel                       181408       12489478
+TueRens                      181349       12192000
+Thanar                       179852       12365359
+vdv                          175171        9881246
+robal                        166948       10702862
+spams                        157128       10319326
+marrco                       149947        9376421
+sqrt2                        147963        9724586
+vdbergh                      137041        8926915
+CoffeeOne                    136294        5004100
+malala                       136182        8002293
+mhoram                       128934        8177193
+davar                        122092        7960001
+dsmith                       122059        7570238
+xoto                         119696        8222144
+grandphish2                  116481        7582197
+Data                         113305        8220352
+BrunoBanani                  112960        7436849
+ElbertoOne                    99028        7023771
+MaZePallas                    98571        6362619
+brabos                        92118        6186135
+psk                           89957        5984901
+sunu                          88463        6007033
+sterni1971                    86948        5613788
+Vizvezdenec                   83752        5343724
+BRAVONE                       81239        5054681
+nssy                          76497        5259388
+teddybaer                     75125        5407666
+Pking_cda                     73776        5293873
+jromang                       70695        4940891
+solarlight                    70517        5028306
+dv8silencer                   70287        3883992
+Bobo1239                      68515        4652287
+racerschmacer                 67468        4935996
+manap                         66273        4121774
+tinker                        63458        4213726
+linrock                       59082        4516053
+robnjr                        57262        4053117
+Freja                         56938        3733019
+ttruscott                     56005        3679485
+renouve                       53811        3501516
+cuistot                       52532        3014920
+finfish                       51360        3370515
+eva42                         51272        3599691
+rkl                           50759        3840947
+rap                           49985        3219146
+pb00067                       49727        3298270
+ronaldjerum                   47654        3240695
+bigpen0r                      47278        3291647
+biffhero                      46564        3111352
+VoyagerOne                    45386        3445881
+speedycpu                     43842        3003273
+jbwiebe                       43305        2805433
+Antihistamine                 41788        2761312
+mhunt                         41735        2691355
+eastorwest                    40387        2812173
+homyur                        39893        2850481
+gri                           39871        2515779
+oryx                          38228        2941656
+0x3C33                        37773        2529097
+SC                            37290        2731014
+csnodgrass                    36207        2688994
+jmdana                        36108        2205261
+strelock                      34716        2074055
+Garf                          33800        2747562
+EthanOConnor                  33370        2090311
+slakovv                       32915        2021889
+Spprtr                        32591        2139601
+Prcuvu                        30377        2170122
+anst                          30301        2190091
+jkiiski                       30136        1904470
+hyperbolic.tom                29840        2017394
+Pyafue                        29650        1902349
+OuaisBla                      27629        1578000
+chriswk                       26902        1868317
+achambord                     26582        1767323
+Patrick_G                     26276        1801617
+yorkman                       26193        1992080
+SFTUser                       25182        1675689
+nabildanial                   24942        1519409
+Sharaf_DG                     24765        1786697
+ncfish1                       24411        1520927
+agg177                        23890        1395014
+JanErik                       23408        1703875
+Isidor                        23388        1680691
+Norabor                       22976        1587862
+cisco2015                     22880        1759669
+Zirie                         22542        1472937
+team-oh                       22272        1636708
+MazeOfGalious                 21978        1629593
+sg4032                        21945        1643065
+ianh2105                      21725        1632562
+xor12                         21628        1680365
+dex                           21612        1467203
+nesoneg                       21494        1463031
+horst.prack                   20878        1465656
+0xB00B1ES                     20590        1208666
+j3corre                       20405         941444
+Adrian.Schmidt123             20316        1281436
+wei                           19973        1745989
+rstoesser                     19569        1293588
+eudhan                        19274        1283717
+Ente                          19070        1373058
+jundery                       18445        1115855
+iisiraider                    18247        1101015
+ville                         17883        1384026
+chris                         17698        1487385
+purplefishies                 17595        1092533
+DragonLord                    17014        1162790
+dju                           16515         929427
+IgorLeMasson                  16064        1147232
+ako027ako                     15671        1173203
+Nikolay.IT                    15154        1068349
+Andrew Grant                  15114         895539
+yurikvelo                     15027        1165616
+OssumOpossum                  14857        1007129
+enedene                       14476         905279
+bpfliegel                     14298         884523
+jpulman                       13982         870599
+joster                        13794         950160
+Nesa92                        13786        1114691
+Dark_wizzie                   13422        1007152
+Hjax                          13350         900887
+Fifis                         13313         965473
+mabichito                     12903         749391
+thijsk                        12886         722107
+crocogoat                     12876        1048802
+AdrianSA                      12860         804972
+Flopzee                       12698         894821
+fatmurphy                     12547         853210
+SapphireBrand                 12416         969604
+modolief                      12386         896470
+scuzzi                        12362         833465
+pgontarz                      12151         848794
+stocky                        11954         699440
+mschmidt                      11941         803401
+infinity                      11470         727027
+torbjo                        11387         728873
+Thomas A. Anderson            11372         732094
+snicolet                      11106         869170
+amicic                        10779         733593
+rpngn                         10712         688203
+d64                           10680         771144
+basepi                        10637         744851
+jjoshua2                      10559         670905
+dzjp                          10343         732529
+ols                           10259         570669
+lbraesch                      10252         647825
@@ -63,7 +63,7 @@ build_script:
  - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
  - ps: |
      # Download default NNUE net from fishtest
-      $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
+      $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
      $dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
      $nnuenet = $Matches.nnuenet
      Write-Host "Default net:" $nnuenet
@@ -60,7 +60,6 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	learn/learn.cpp \
 	learn/gensfen.cpp \
 	learn/convert.cpp \
-	learn/learning_tools.cpp \
 	learn/multi_think.cpp

 OBJS = $(notdir $(SRCS:.cpp=.o))
@@ -101,12 +100,17 @@ VPATH = syzygy:nnue:nnue/features:eval:extra:learn

 ### 2.1. General and architecture defaults

+ifeq ($(ARCH),)
+   ARCH = x86-64-modern
+   help_skip_sanity = yes
+endif
 # explicitly check for the list of supported architectures (as listed with make help),
 # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
-ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
-                               x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
-                               x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
-                               armv7 armv7-neon armv8 apple-silicon general-64 general-32))
+ifeq ($(ARCH), $(filter $(ARCH), \
+                 x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
+                 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
+                 x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
+                 armv7 armv7-neon armv8 apple-silicon general-64 general-32))
   SUPPORTED_ARCH=true
 else
   SUPPORTED_ARCH=false
@@ -130,7 +134,6 @@ avx512 = no
 vnni256 = no
 vnni512 = no
 neon = no
-ARCH = x86-64-modern
 STRIP = strip

 ### 2.2 Architecture specific
@@ -394,19 +397,6 @@ ifeq ($(COMP),clang)
 	endif
 endif

-ifeq ($(comp),icc)
-	profile_make = icc-profile-make
-	profile_use = icc-profile-use
-else
-ifeq ($(comp),clang)
-	profile_make = clang-profile-make
-	profile_use = clang-profile-use
-else
-	profile_make = gcc-profile-make
-	profile_use = gcc-profile-use
-endif
-endif
-
 ifeq ($(KERNEL),Darwin)
 	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
 	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
@@ -418,20 +408,30 @@ endif
 # Currently we don't know how to make PGO builds with the NDK yet.
 ifeq ($(COMP),ndk)
 	CXXFLAGS += -stdlib=libc++ -fPIE
+	comp=clang
 	ifeq ($(arch),armv7)
-		comp=armv7a-linux-androideabi16-clang
 		CXX=armv7a-linux-androideabi16-clang++
 		CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
 		STRIP=arm-linux-androideabi-strip
 	endif
 	ifeq ($(arch),armv8)
-		comp=aarch64-linux-android21-clang
 		CXX=aarch64-linux-android21-clang++
 		STRIP=aarch64-linux-android-strip
 	endif
 	LDFLAGS += -static-libstdc++ -pie -lm -latomic
 endif

+ifeq ($(comp),icc)
+	profile_make = icc-profile-make
+	profile_use = icc-profile-use
+else ifeq ($(comp),clang)
+	profile_make = clang-profile-make
+	profile_use = clang-profile-use
+else
+	profile_make = gcc-profile-make
+	profile_use = gcc-profile-use
+endif
+
 ### Travis CI script uses COMPILER to overwrite CXX
 ifdef COMPILER
 	COMPCXX=$(COMPILER)
@@ -622,11 +622,13 @@ endif
 ### needs access to the optimization flags.
 ifeq ($(optimize),yes)
 ifeq ($(debug), no)
-	ifeq ($(COMP),ndk)
-		CXXFLAGS += -flto=thin
-		LDFLAGS += $(CXXFLAGS)
-	else ifeq ($(comp),clang)
+	ifeq ($(comp),clang)
 		CXXFLAGS += -flto=thin
+		ifneq ($(findstring MINGW,$(KERNEL)),)
+			CXXFLAGS += -fuse-ld=lld
+		else ifneq ($(findstring MSYS,$(KERNEL)),)
+			CXXFLAGS += -fuse-ld=lld
+		endif
 		LDFLAGS += $(CXXFLAGS)

 # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
@@ -650,10 +652,12 @@ ifeq ($(debug), no)
 # So, only enable it for a cross from Linux by default.
 	else ifeq ($(comp),mingw)
 	ifeq ($(KERNEL),Linux)
+	ifneq ($(arch),i386)
 		CXXFLAGS += -flto
 		LDFLAGS += $(CXXFLAGS) -flto=jobserver
 	endif
 	endif
+	endif
 endif
 endif

@@ -729,11 +733,12 @@ help:
 	@echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
 	@echo ""
 	@echo "-------------------------------"
-ifeq ($(SUPPORTED_ARCH), true)
+ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true)
 	@echo "The selected architecture $(ARCH) will enable the following configuration: "
 	@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
 else
 	@echo "Specify a supported architecture with the ARCH option for more details"
+	@echo ""
 endif


@@ -741,7 +746,7 @@ endif
        config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
        clang-profile-use clang-profile-make

-build: config-sanity
+build: config-sanity net
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all

 profile-build: net config-sanity objclean profileclean
@@ -768,12 +773,13 @@ install:
 	-cp $(EXE) $(BINDIR)
 	-strip $(BINDIR)/$(EXE)

-#clean all
+# clean all
 clean: objclean profileclean
 	@rm -f .depend *~ core

+# evaluation network (nnue)
 net:
-	$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+	$(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
 	@echo "Default net: $(nnuenet)"
 	$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
 	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
@@ -795,7 +801,6 @@ net:
            echo "shasum / sha256sum not found, skipping net validation"; \
        fi

-
 # clean binaries and objects
 objclean:
 	@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o
@@ -164,5 +164,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
          ++posCounter;
      }

+  list.emplace_back("setoption name Use NNUE value true");
+
  return list;
 }
@@ -1,22 +0,0 @@
-#ifndef _EVALUATE_COMMON_H_
-#define _EVALUATE_COMMON_H_
-
-// A common header-like function for modern evaluation functions.
-
-#include <string>
-
-namespace Eval
-{
-	// --------------------------
-	// for learning
-	// --------------------------
-
-	// Save the evaluation function parameters to a file.
-	// You can specify the extension added to the end of the file.
-	void save_eval(std::string suffix);
-
-	// Get the current eta.
-	double get_eta();
-}
-
-#endif // _EVALUATE_KPPT_COMMON_H_
@@ -20,22 +20,29 @@
 #include <cassert>
 #include <cstdlib>
 #include <cstring>   // For std::memset
+#include <fstream>
 #include <iomanip>
 #include <sstream>
 #include <iostream>
-#include <set>
+#include <streambuf>
+#include <vector>

 #include "bitboard.h"
 #include "evaluate.h"
 #include "material.h"
+#include "misc.h"
 #include "pawns.h"
 #include "thread.h"
 #include "uci.h"
+#include "incbin/incbin.h"
+
+using namespace std;
+using namespace Eval::NNUE;

 namespace Eval {

  UseNNUEMode useNNUE;
-  std::string eval_file_loaded="None";
+  string eval_file_loaded = "None";

  static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
  {
@@ -49,35 +56,67 @@ namespace Eval {
    return UseNNUEMode::False;
  }

-  void init_NNUE() {
+  void NNUE::init() {

    useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
+    if (useNNUE == UseNNUEMode::False)
+        return;

-    std::string eval_file = std::string(Options["EvalFile"]);
-    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
-        if (Eval::NNUE::load_eval_file(eval_file))
-            eval_file_loaded = eval_file;
+    string eval_file = string(Options["EvalFile"]);
+
+    #if defined(DEFAULT_NNUE_DIRECTORY)
+    #define stringify2(x) #x
+    #define stringify(x) stringify2(x)
+    vector<string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
+    #else
+    vector<string> dirs = { "" , CommandLine::binaryDirectory };
+    #endif
+
+    for (string directory : dirs)
+        if (eval_file_loaded != eval_file)
+        {
+            ifstream stream(directory + eval_file, ios::binary);
+            if (load_eval(eval_file, stream))
+            {
+                sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
+                eval_file_loaded = eval_file;
+            }
+            else
+            {
+                sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
+            }
+        }
  }

-  void verify_NNUE() {
+  /// NNUE::verify() verifies that the last net used was loaded successfully
+  void NNUE::verify() {

-    std::string eval_file = std::string(Options["EvalFile"]);
-    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)    {
+    string eval_file = string(Options["EvalFile"]);
+
+    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
+    {
        UCI::OptionsMap defaults;
        UCI::init(defaults);

-        sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl;
-        sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl;
-        sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl;
-        sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl;
-        sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl;
-        std::exit(EXIT_FAILURE);
+        string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
+        string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
+        string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
+        string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
+        string msg5 = "The engine will be terminated now.";
+
+        sync_cout << "info string ERROR: " << msg1 << sync_endl;
+        sync_cout << "info string ERROR: " << msg2 << sync_endl;
+        sync_cout << "info string ERROR: " << msg3 << sync_endl;
+        sync_cout << "info string ERROR: " << msg4 << sync_endl;
+        sync_cout << "info string ERROR: " << msg5 << sync_endl;
+
+        exit(EXIT_FAILURE);
    }

    if (useNNUE != UseNNUEMode::False)
-        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
+        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
    else
-        sync_cout << "info string classical evaluation enabled." << sync_endl;
+        sync_cout << "info string classical evaluation enabled" << sync_endl;
  }
 }

@@ -165,26 +204,26 @@ namespace {

  // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
  // pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
-  constexpr Score Outpost[] = { S(56, 36), S(30, 23) };
+  constexpr Score Outpost[] = { S(56, 34), S(31, 23) };

  // PassedRank[Rank] contains a bonus according to the rank of a passed pawn
  constexpr Score PassedRank[RANK_NB] = {
-    S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
+    S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260)
  };

  // RookOnFile[semiopen/open] contains bonuses for each rook when there is
  // no (friendly) pawn on the rook file.
-  constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) };
+  constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) };

  // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
  // which piece type attacks which one. Attacks on lesser pieces which are
  // pawn-defended are not considered.
  constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
-    S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161)
+    S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
  };

  constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
-    S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41)
+    S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
  };

  // Assorted bonuses and penalties
@@ -952,17 +991,32 @@ make_v:
 /// evaluation of the position from the point of view of the side to move.

 Value Eval::evaluate(const Position& pos) {
-  if (useNNUE == UseNNUEMode::Pure) {
-      return NNUE::evaluate(pos);
+
+  Value v;
+
+  if (Eval::useNNUE == UseNNUEMode::Pure) {
+      v = NNUE::evaluate(pos);
  }
+  else if (Eval::useNNUE == UseNNUEMode::False)
+      v = Evaluation<NO_TRACE>(pos).value();
+  else
+  {
+      // scale and shift NNUE for compatibility with search and classical evaluation
+      auto  adjusted_NNUE = [&](){ return NNUE::evaluate(pos) * 5 / 4 + Tempo; };

-  bool classical = useNNUE == UseNNUEMode::False
-                || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
-  Value v = classical ? Evaluation<NO_TRACE>(pos).value()
-                      : NNUE::evaluate(pos) * 5 / 4 + Tempo;
+      // if there is PSQ imbalance use classical eval, with small probability if it is small
+      Value psq = Value(abs(eg_value(pos.psq_score())));
+      int   r50 = 16 + pos.rule50_count();
+      bool  largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
+      bool  classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));

-  if (classical && useNNUE != UseNNUEMode::False && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
-      v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
+      v = classical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
+
+      // if the classical eval is small and imbalance large, use NNUE nevertheless.
+      if (   largePsq
+          && abs(v) * 16 < NNUEThreshold2 * r50)
+          v = adjusted_NNUE();
+  }

  // Damp down the evaluation linearly when shuffling
  v = v * (100 - pos.rule50_count()) / 100;
@@ -38,15 +38,18 @@ namespace Eval {

  extern UseNNUEMode useNNUE;
  extern std::string eval_file_loaded;
-  void init_NNUE();
-  void verify_NNUE();
+
+  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
+  // for the build process (profile-build and fishtest) to work. Do not change the
+  // name of the macro, as it is used in the Makefile.
+  #define EvalFileDefaultName   "nn-28e08a9fe2ad.nnue"

  namespace NNUE {

    Value evaluate(const Position& pos);
-    Value compute_eval(const Position& pos);
-    void  update_eval(const Position& pos);
-    bool  load_eval_file(const std::string& evalFile);
+    bool load_eval(std::string name, std::istream& stream);
+    void init();
+    void verify();

  } // namespace NNUE

@@ -0,0 +1,26 @@
+The file "incbin.h" is free and unencumbered software released into
+the public domain by Dale Weiler, see:
+   <https://github.com/graphitemaster/incbin>
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
@@ -0,0 +1,368 @@
+/**
+ * @file incbin.h
+ * @author Dale Weiler
+ * @brief Utility for including binary files
+ *
+ * Facilities for including binary files into the current translation unit and
+ * making use from them externally in other translation units.
+ */
+#ifndef INCBIN_HDR
+#define INCBIN_HDR
+#include <limits.h>
+#if   defined(__AVX512BW__) || \
+      defined(__AVX512CD__) || \
+      defined(__AVX512DQ__) || \
+      defined(__AVX512ER__) || \
+      defined(__AVX512PF__) || \
+      defined(__AVX512VL__) || \
+      defined(__AVX512F__)
+# define INCBIN_ALIGNMENT_INDEX 6
+#elif defined(__AVX__)      || \
+      defined(__AVX2__)
+# define INCBIN_ALIGNMENT_INDEX 5
+#elif defined(__SSE__)      || \
+      defined(__SSE2__)     || \
+      defined(__SSE3__)     || \
+      defined(__SSSE3__)    || \
+      defined(__SSE4_1__)   || \
+      defined(__SSE4_2__)   || \
+      defined(__neon__)
+# define INCBIN_ALIGNMENT_INDEX 4
+#elif ULONG_MAX != 0xffffffffu
+# define INCBIN_ALIGNMENT_INDEX 3
+# else
+# define INCBIN_ALIGNMENT_INDEX 2
+#endif
+
+/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
+#define INCBIN_ALIGN_SHIFT_0 1
+#define INCBIN_ALIGN_SHIFT_1 2
+#define INCBIN_ALIGN_SHIFT_2 4
+#define INCBIN_ALIGN_SHIFT_3 8
+#define INCBIN_ALIGN_SHIFT_4 16
+#define INCBIN_ALIGN_SHIFT_5 32
+#define INCBIN_ALIGN_SHIFT_6 64
+
+/* Actual alignment value */
+#define INCBIN_ALIGNMENT \
+    INCBIN_CONCATENATE( \
+        INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
+        INCBIN_ALIGNMENT_INDEX)
+
+/* Stringize */
+#define INCBIN_STR(X) \
+    #X
+#define INCBIN_STRINGIZE(X) \
+    INCBIN_STR(X)
+/* Concatenate */
+#define INCBIN_CAT(X, Y) \
+    X ## Y
+#define INCBIN_CONCATENATE(X, Y) \
+    INCBIN_CAT(X, Y)
+/* Deferred macro expansion */
+#define INCBIN_EVAL(X) \
+    X
+#define INCBIN_INVOKE(N, ...) \
+    INCBIN_EVAL(N(__VA_ARGS__))
+
+/* Green Hills uses a different directive for including binary data */
+#if defined(__ghs__)
+#  if (__ghs_asm == 2)
+#    define INCBIN_MACRO ".file"
+/* Or consider the ".myrawdata" entry in the ld file */
+#  else
+#    define INCBIN_MACRO "\tINCBIN"
+#  endif
+#else
+#  define INCBIN_MACRO ".incbin"
+#endif
+
+#ifndef _MSC_VER
+#  define INCBIN_ALIGN \
+    __attribute__((aligned(INCBIN_ALIGNMENT)))
+#else
+#  define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
+#endif
+
+#if defined(__arm__) || /* GNU C and RealView */ \
+    defined(__arm) || /* Diab */ \
+    defined(_ARM) /* ImageCraft */
+#  define INCBIN_ARM
+#endif
+
+#ifdef __GNUC__
+/* Utilize .balign where supported */
+#  define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+#  define INCBIN_ALIGN_BYTE ".balign 1\n"
+#elif defined(INCBIN_ARM)
+/*
+ * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
+ * the shift count. This is the value passed to `.align'
+ */
+#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
+#  define INCBIN_ALIGN_BYTE ".align 0\n"
+#else
+/* We assume other inline assembler's treat `.align' as `.balign' */
+#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+#  define INCBIN_ALIGN_BYTE ".align 1\n"
+#endif
+
+/* INCBIN_CONST is used by incbin.c generated files */
+#if defined(__cplusplus)
+#  define INCBIN_EXTERNAL extern "C"
+#  define INCBIN_CONST    extern const
+#else
+#  define INCBIN_EXTERNAL extern
+#  define INCBIN_CONST    const
+#endif
+
+/**
+ * @brief Optionally override the linker section into which data is emitted.
+ *
+ * @warning If you use this facility, you'll have to deal with platform-specific linker output
+ * section naming on your own
+ *
+ * Overriding the default linker output section, e.g for esp8266/Arduino:
+ * @code
+ * #define INCBIN_OUTPUT_SECTION ".irom.text"
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ * // Data is emitted into program memory that never gets copied to RAM
+ * @endcode
+ */
+#if !defined(INCBIN_OUTPUT_SECTION)
+#  if defined(__APPLE__)
+#    define INCBIN_OUTPUT_SECTION         ".const_data"
+#  else
+#    define INCBIN_OUTPUT_SECTION         ".rodata"
+#  endif
+#endif
+
+#if defined(__APPLE__)
+/* The directives are different for Apple branded compilers */
+#  define INCBIN_SECTION         INCBIN_OUTPUT_SECTION "\n"
+#  define INCBIN_GLOBAL(NAME)    ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+#  define INCBIN_INT             ".long "
+#  define INCBIN_MANGLE          "_"
+#  define INCBIN_BYTE            ".byte "
+#  define INCBIN_TYPE(...)
+#else
+#  define INCBIN_SECTION         ".section " INCBIN_OUTPUT_SECTION "\n"
+#  define INCBIN_GLOBAL(NAME)    ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+#  if defined(__ghs__)
+#    define INCBIN_INT           ".word "
+#  else
+#    define INCBIN_INT           ".int "
+#  endif
+#  if defined(__USER_LABEL_PREFIX__)
+#    define INCBIN_MANGLE        INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
+#  else
+#    define INCBIN_MANGLE        ""
+#  endif
+#  if defined(INCBIN_ARM)
+/* On arm assemblers, `@' is used as a line comment token */
+#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
+#  elif defined(__MINGW32__) || defined(__MINGW64__)
+/* Mingw doesn't support this directive either */
+#    define INCBIN_TYPE(NAME)
+#  else
+/* It's safe to use `@' on other architectures */
+#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
+#  endif
+#  define INCBIN_BYTE            ".byte "
+#endif
+
+/* List of style types used for symbol names */
+#define INCBIN_STYLE_CAMEL 0
+#define INCBIN_STYLE_SNAKE 1
+
+/**
+ * @brief Specify the prefix to use for symbol names.
+ *
+ * By default this is `g', producing symbols of the form:
+ * @code
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char gFooData[];
+ * // const unsigned char *const gFooEnd;
+ * // const unsigned int gFooSize;
+ * @endcode
+ *
+ * If however you specify a prefix before including: e.g:
+ * @code
+ * #define INCBIN_PREFIX incbin
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols instead:
+ * // const unsigned char incbinFooData[];
+ * // const unsigned char *const incbinFooEnd;
+ * // const unsigned int incbinFooSize;
+ * @endcode
+ */
+#if !defined(INCBIN_PREFIX)
+#  define INCBIN_PREFIX g
+#endif
+
+/**
+ * @brief Specify the style used for symbol names.
+ *
+ * Possible options are
+ * - INCBIN_STYLE_CAMEL "CamelCase"
+ * - INCBIN_STYLE_SNAKE "snake_case"
+ *
+ * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
+ * @code
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>FooData[];
+ * // const unsigned char *const <prefix>FooEnd;
+ * // const unsigned int <prefix>FooSize;
+ * @endcode
+ *
+ * If however you specify a style before including: e.g:
+ * @code
+ * #define INCBIN_STYLE INCBIN_STYLE_SNAKE
+ * #include "incbin.h"
+ * INCBIN(foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>foo_data[];
+ * // const unsigned char *const <prefix>foo_end;
+ * // const unsigned int <prefix>foo_size;
+ * @endcode
+ */
+#if !defined(INCBIN_STYLE)
+#  define INCBIN_STYLE INCBIN_STYLE_CAMEL
+#endif
+
+/* Style lookup tables */
+#define INCBIN_STYLE_0_DATA Data
+#define INCBIN_STYLE_0_END End
+#define INCBIN_STYLE_0_SIZE Size
+#define INCBIN_STYLE_1_DATA _data
+#define INCBIN_STYLE_1_END _end
+#define INCBIN_STYLE_1_SIZE _size
+
+/* Style lookup: returning identifier */
+#define INCBIN_STYLE_IDENT(TYPE) \
+    INCBIN_CONCATENATE( \
+        INCBIN_STYLE_, \
+        INCBIN_CONCATENATE( \
+            INCBIN_EVAL(INCBIN_STYLE), \
+            INCBIN_CONCATENATE(_, TYPE)))
+
+/* Style lookup: returning string literal */
+#define INCBIN_STYLE_STRING(TYPE) \
+    INCBIN_STRINGIZE( \
+        INCBIN_STYLE_IDENT(TYPE)) \
+
+/* Generate the global labels by indirectly invoking the macro with our style
+ * type and concatenating the name against them. */
+#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
+    INCBIN_INVOKE( \
+        INCBIN_GLOBAL, \
+        INCBIN_CONCATENATE( \
+            NAME, \
+            INCBIN_INVOKE( \
+                INCBIN_STYLE_IDENT, \
+                TYPE))) \
+    INCBIN_INVOKE( \
+        INCBIN_TYPE, \
+        INCBIN_CONCATENATE( \
+            NAME, \
+            INCBIN_INVOKE( \
+                INCBIN_STYLE_IDENT, \
+                TYPE)))
+
+/**
+ * @brief Externally reference binary data included in another translation unit.
+ *
+ * Produces three external symbols that reference the binary data included in
+ * another translation unit.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name given for the binary data
+ *
+ * @code
+ * INCBIN_EXTERN(Foo);
+ *
+ * // Now you have the following symbols:
+ * // extern const unsigned char <prefix>FooData[];
+ * // extern const unsigned char *const <prefix>FooEnd;
+ * // extern const unsigned int <prefix>FooSize;
+ * @endcode
+ */
+#define INCBIN_EXTERN(NAME) \
+    INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
+        INCBIN_CONCATENATE( \
+            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+            INCBIN_STYLE_IDENT(DATA))[]; \
+    INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
+    INCBIN_CONCATENATE( \
+        INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+        INCBIN_STYLE_IDENT(END)); \
+    INCBIN_EXTERNAL const unsigned int \
+        INCBIN_CONCATENATE( \
+            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+            INCBIN_STYLE_IDENT(SIZE))
+
+/**
+ * @brief Include a binary file into the current translation unit.
+ *
+ * Includes a binary file into the current translation unit, producing three symbols
+ * for objects that encode the data and size respectively.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name to associate with this binary data (as an identifier.)
+ * @param FILENAME The file to include (as a string literal.)
+ *
+ * @code
+ * INCBIN(Icon, "icon.png");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>IconData[];
+ * // const unsigned char *const <prefix>IconEnd;
+ * // const unsigned int <prefix>IconSize;
+ * @endcode
+ *
+ * @warning This must be used in global scope
+ * @warning The identifiers may be different if INCBIN_STYLE is not default
+ *
+ * To externally reference the data included by this in another translation unit
+ * please @see INCBIN_EXTERN.
+ */
+#ifdef _MSC_VER
+#define INCBIN(NAME, FILENAME) \
+    INCBIN_EXTERN(NAME)
+#else
+#define INCBIN(NAME, FILENAME) \
+    __asm__(INCBIN_SECTION \
+            INCBIN_GLOBAL_LABELS(NAME, DATA) \
+            INCBIN_ALIGN_HOST \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
+            INCBIN_MACRO " \"" FILENAME "\"\n" \
+            INCBIN_GLOBAL_LABELS(NAME, END) \
+            INCBIN_ALIGN_BYTE \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
+                INCBIN_BYTE "1\n" \
+            INCBIN_GLOBAL_LABELS(NAME, SIZE) \
+            INCBIN_ALIGN_HOST \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
+                INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
+                           INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
+            INCBIN_ALIGN_HOST \
+            ".text\n" \
+    ); \
+    INCBIN_EXTERN(NAME)
+
+#endif
+#endif
@@ -8,9 +8,6 @@
 #include "position.h"
 #include "tt.h"

-// evaluate header for learning
-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"

 #include "syzygy/tbprobe.h"
@@ -122,7 +119,7 @@ namespace Learner
                else if (token == "score") {
                    double score;
                    ss >> score;
-                    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
+                    // Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
                    // Normalize to [0.0, 1.0].
                    score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value);
                    // Scale to [dest_score_min_value, dest_score_max_value].
@@ -480,7 +477,7 @@ namespace Learner
            {
                if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
                    StateInfo si;
-                    tpos.set_from_packed_sfen(p.sfen, &si, th, false);
+                    tpos.set_from_packed_sfen(p.sfen, &si, th);

                    // write as plain text
                    ofs << "fen " << tpos.fen() << std::endl;
@@ -2,6 +2,7 @@

 #include "packed_sfen.h"
 #include "multi_think.h"
+#include "../syzygy/tbprobe.h"

 #include "misc.h"
 #include "position.h"
@@ -9,8 +10,6 @@
 #include "tt.h"
 #include "uci.h"

-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"

 #include "nnue/evaluate_nnue_learner.h"
@@ -392,7 +391,6 @@ namespace Learner
            Position& pos,
            std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
            int ply,
-            int depth,
            vector<Move>& pv);

        // Min and max depths for search during gensfen
@@ -467,18 +465,7 @@ namespace Learner
            return 0;
        }

-        // Initialize the Syzygy Ending Tablebase and sort the moves.
-        Search::RootMoves rootMoves;
-        for (const auto& m : MoveList<LEGAL>(pos))
-        {
-            rootMoves.emplace_back(m);
-        }
-
-        if (!rootMoves.empty())
-        {
-            Tablebases::rank_root_moves(pos, rootMoves);
-        }
-        else
+        if(pos.this_thread()->rootMoves.empty())
        {
            // If there is no legal move
            return pos.checkers()
@@ -749,7 +736,6 @@ namespace Learner
        Position& pos,
        std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
        int ply,
-        int depth,
        vector<Move>& pv)
    {
        auto rootColor = pos.side_to_move();
@@ -763,15 +749,6 @@ namespace Learner
            }

            pos.do_move(m, states[ply++]);
-
-            // Because the difference calculation of evaluate() cannot be
-            // performed unless each node evaluate() is called!
-            // If the depth is 8 or more, it seems
-            // faster not to calculate this difference.
-            if (depth < 8)
-            {
-                Eval::NNUE::update_eval(pos);
-            }
        }

        // Reach leaf
@@ -830,6 +807,8 @@ namespace Learner
            auto& pos = th->rootPos;
            pos.set(StartFEN, false, &si, th);

+            int resign_counter = 0;
+            bool should_resign = prng.rand(10) > 1;
            // Vector for holding the sfens in the current simulated game.
            PSVector a_psv;
            a_psv.reserve(write_maxply + MAX_PLY);
@@ -857,6 +836,11 @@ namespace Learner
                // Current search depth
                const int depth = search_depth_min + (int)prng.rand(search_depth_max - search_depth_min + 1);

+                // Starting search calls init_for_search
+                auto [search_value, search_pv] = search(pos, depth, 1, nodes);
+
+                // This has to be performed after search because it needs to know
+                // rootMoves which are filled in init_for_search.
                const auto result = get_current_game_result(pos, move_hist_scores);
                if (result.has_value())
                {
@@ -864,113 +848,91 @@ namespace Learner
                    break;
                }

+                // Always adjudivate by eval limit.
+                // Also because of this we don't have to check for TB/MATE scores
+                if (abs(search_value) >= eval_limit)
                {
-                    auto [search_value, search_pv] = search(pos, depth, 1, nodes);
-
-                    // Always adjudivate by eval limit.
-                    // Also because of this we don't have to check for TB/MATE scores
-                    if (abs(search_value) >= eval_limit)
-                    {
-                        const auto wdl = (search_value >= eval_limit) ? 1 : -1;
-                        flush_psv(wdl);
+                    resign_counter++;
+                    if ((should_resign && resign_counter >= 4) || abs(search_value) >= 10000) {
+                        flush_psv((search_value >= eval_limit) ? 1 : -1);
                        break;
                    }
+                } else {
+                    resign_counter = 0;
+                }
+                // Verification of a strange move
+                if (search_pv.size() > 0
+                    && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
+                {
+                    // (???)
+                    // MOVE_WIN is checking if it is the declaration victory stage before this
+                    // The declarative winning move should never come back here.
+                    // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)...
+                    cout << "Error! : " << pos.fen() << next_move << search_value << endl;
+                    break;
+                }

-                    // Verification of a strange move
-                    if (search_pv.size() > 0
-                        && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
+                // Save the move score for adjudication.
+                move_hist_scores.push_back(search_value);
+
+                // Discard stuff before write_minply is reached
+                // because it can harm training due to overfitting.
+                // Initial positions would be too common.
+                if (ply < write_minply - 1)
+                {
+                    a_psv.clear();
+                    goto SKIP_SAVE;
+                }
+
+                // Look into the position hashtable to see if the same
+                // position was seen before.
+                // This is a good heuristic to exlude already seen
+                // positions without many false positives.
+                {
+                    auto key = pos.key();
+                    auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
+                    auto old_key = hash[hash_index];
+                    if (key == old_key)
                    {
-                        // (???)
-                        // MOVE_WIN is checking if it is the declaration victory stage before this
-                        // The declarative winning move should never come back here.
-                        // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)...
-                        cout << "Error! : " << pos.fen() << next_move << search_value << endl;
-                        break;
-                    }
-
-                    // Save the move score for adjudication.
-                    move_hist_scores.push_back(search_value);
-
-                    // If depth 0, pv is not obtained, so search again at depth 2.
-                    if (search_depth_min <= 0)
-                    {
-                        auto [research_value, research_pv] = search(pos, 2);
-                        search_pv = research_pv;
-                    }
-
-                    // Discard stuff before write_minply is reached
-                    // because it can harm training due to overfitting.
-                    // Initial positions would be too common.
-                    if (ply < write_minply - 1)
-                    {
-                        a_psv.clear();
                        goto SKIP_SAVE;
                    }
-
-                    // Look into the position hashtable to see if the same
-                    // position was seen before.
-                    // This is a good heuristic to exlude already seen
-                    // positions without many false positives.
+                    else
                    {
-                        auto key = pos.key();
-                        auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
-                        auto old_key = hash[hash_index];
-                        if (key == old_key)
-                        {
-                            a_psv.clear();
-                            goto SKIP_SAVE;
-                        }
-                        else
-                        {
-                            // Replace with the current key.
-                            hash[hash_index] = key;
-                        }
+                        // Replace with the current key.
+                        hash[hash_index] = key;
                    }
-
-                    // Pack the current position into a packed sfen and save it into the buffer.
-                    {
-                        a_psv.emplace_back(PackedSfenValue());
-                        auto& psv = a_psv.back();
-
-                        // Here we only write the position data.
-                        // Result is added after the whole game is done.
-                        pos.sfen_pack(psv.sfen);
-
-                        // Get the value of evaluate() as seen from the
-                        // root color on the leaf node of the PV line.
-                        // I don't know the goodness and badness of using the
-                        // return value of search() as it is.
-                        // TODO: Consider using search value instead of evaluate_leaf.
-                        //       Maybe give it as an option.
-
-                        // Use PV moves to reach the leaf node and use the value
-                        // that evaluated() is called on that leaf node.
-                        const auto leaf_value = evaluate_leaf(pos, states, ply, depth, search_pv);
-
-                        // If for some reason the leaf node couldn't yield an eval
-                        // we fallback to search value.
-                        psv.score = leaf_value == VALUE_NONE ? search_value : leaf_value;
-
-                        psv.gamePly = ply;
-
-                        // Take out the first PV move. This should be present unless depth 0.
-                        assert(search_pv.size() >= 1);
-                        psv.move = search_pv[0];
-                    }
-
-                SKIP_SAVE:;
-
-                    // For some reason, We could not get PV (hit the substitution table etc. and got stuck?)
-                    // so go to the next game. It's a rare case, so you can ignore it.
-                    if (search_pv.size() == 0)
-                    {
-                        break;
-                    }
-
-                    // Update the next move according to best search result.
-                    next_move = search_pv[0];
                }

+                // Pack the current position into a packed sfen and save it into the buffer.
+                {
+                    a_psv.emplace_back(PackedSfenValue());
+                    auto& psv = a_psv.back();
+
+                    // Here we only write the position data.
+                    // Result is added after the whole game is done.
+                    pos.sfen_pack(psv.sfen);
+
+                    psv.score = search_value;
+
+                    psv.gamePly = ply;
+
+                    // Take out the first PV move. This should be present unless depth 0.
+                    assert(search_pv.size() >= 1);
+                    psv.move = search_pv[0];
+                }
+
+            SKIP_SAVE:;
+
+                // For some reason, We could not get PV (hit the substitution table etc. and got stuck?)
+                // so go to the next game. It's a rare case, so you can ignore it.
+                if (search_pv.size() == 0)
+                {
+                    break;
+                }
+
+                // Update the next move according to best search result.
+                next_move = search_pv[0];
+
                // Random move.
                auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
                if (random_move.has_value())
@@ -983,18 +945,11 @@ namespace Learner
                    {
                        break;
                    }
-
-                    // Clear the sfens that were written before the random move.
-                    // (???) why?
-                    a_psv.clear();
                }

                // Do move.
                pos.do_move(next_move, states[ply]);

-                // Call node evaluate() for each difference calculation.
-                Eval::NNUE::update_eval(pos);
-
            } // for (int ply = 0; ; ++ply)

        } // while(!quit)
@@ -1177,10 +1132,28 @@ namespace Learner
            << "  detect_draw_by_insufficient_mating_material = " << detect_draw_by_insufficient_mating_material << endl;

        // Show if the training data generator uses NNUE.
-        Eval::verify_NNUE();
+        Eval::NNUE::verify();

        Threads.main()->ponder = false;

+        // About Search::Limits
+        // Be careful because this member variable is global and affects other threads.
+        {
+          auto& limits = Search::Limits;
+
+          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
+          limits.infinite = true;
+
+          // Since PV is an obstacle when displayed, erase it.
+          limits.silent = true;
+
+          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
+          limits.nodes = 0;
+
+          // depth is also processed by the one passed as an argument of Learner::search().
+          limits.depth = 0;
+        }
+
        // Create and execute threads as many as Options["Threads"].
        {
            SfenWriter sfen_writer(output_file_name, thread_num);
@@ -29,8 +29,6 @@
 #include "uci.h"
 #include "search.h"

-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"

 #include "nnue/evaluate_nnue_learner.h"
@@ -58,6 +56,7 @@
 #include <omp.h>
 #endif

+extern double global_learning_rate;

 using namespace std;

@@ -92,12 +91,6 @@ namespace Learner
    static double dest_score_min_value = 0.0;
    static double dest_score_max_value = 1.0;

-    // Assume teacher signals are the scores of deep searches,
-    // and convert them into winning probabilities in the trainer.
-    // Sometimes we want to use the winning probabilities in the training
-    // data directly. In those cases, we set false to this variable.
-    static bool convert_teacher_signal_to_winning_probability = true;
-
    // Using stockfish's WDL with win rate model instead of sigmoid
    static bool use_wdl = false;

@@ -164,14 +157,6 @@ namespace Learner
        return ((y2 - y1) / epsilon) / winning_probability_coefficient;
    }

-    // A constant used in elmo (WCSC27). Adjustment required.
-    // Since elmo does not internally divide the expression, the value is different.
-    // You can set this value with the learn command.
-    // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
-    double ELMO_LAMBDA = 0.33;
-    double ELMO_LAMBDA2 = 0.33;
-    double ELMO_LAMBDA_LIMIT = 32000;
-
    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
    double get_scaled_signal(double signal)
    {
@@ -194,26 +179,7 @@ namespace Learner
    double calculate_p(double teacher_signal, int ply)
    {
        const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
-
-        double p = scaled_teacher_signal;
-        if (convert_teacher_signal_to_winning_probability)
-        {
-            p = winning_percentage(scaled_teacher_signal, ply);
-        }
-
-        return p;
-    }
-
-    double calculate_lambda(double teacher_signal)
-    {
-        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
-        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-        const double lambda =
-            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
-            ? ELMO_LAMBDA2
-            : ELMO_LAMBDA;
-
-        return lambda;
+        return winning_percentage(scaled_teacher_signal, ply);
    }

    double calculate_t(int game_result)
@@ -226,32 +192,6 @@ namespace Learner
        return t;
    }

-    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
-    {
-        // elmo (WCSC27) method
-        // Correct with the actual game wins and losses.
-        const double q = winning_percentage(shallow, psv.gamePly);
-        const double p = calculate_p(teacher_signal, psv.gamePly);
-        const double t = calculate_t(psv.game_result);
-        const double lambda = calculate_lambda(teacher_signal);
-
-        double grad;
-        if (use_wdl)
-        {
-            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
-            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
-            grad = lambda * dce_p + (1.0 - lambda) * dce_t;
-        }
-        else
-        {
-            // Use the actual win rate as a correction term.
-            // This is the idea of elmo (WCSC27), modern O-parts.
-            grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
-        }
-
-        return grad;
-    }
-
    // Calculate cross entropy during learning
    // The individual cross entropy of the win/loss term and win
    // rate term of the elmo expression is returned
@@ -262,21 +202,16 @@ namespace Learner
        const PackedSfenValue& psv,
        double& cross_entropy_eval,
        double& cross_entropy_win,
-        double& cross_entropy,
        double& entropy_eval,
-        double& entropy_win,
-        double& entropy)
+        double& entropy_win)
    {
        // Teacher winning probability.
        const double q = winning_percentage(shallow, psv.gamePly);
        const double p = calculate_p(teacher_signal, psv.gamePly);
        const double t = calculate_t(psv.game_result);
-        const double lambda = calculate_lambda(teacher_signal);

        constexpr double epsilon = 0.000001;

-        const double m = (1.0 - lambda) * t + lambda * p;
-
        cross_entropy_eval =
            (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
        cross_entropy_win =
@@ -285,17 +220,12 @@ namespace Learner
            (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
        entropy_win =
            (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
-
-        cross_entropy =
-            (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
-        entropy =
-            (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
    }

    // Other objective functions may be considered in the future...
    double calc_grad(Value shallow, const PackedSfenValue& psv)
    {
-        return calc_grad((Value)psv.score, shallow, psv);
+        return (double)(shallow - (Value)psv.score) / 2400.0;
    }

    struct BasicSfenInputStream
@@ -787,15 +717,9 @@ namespace Learner

        std::atomic<bool> stop_flag;

-        // Discount rate
-        double discount_rate;
-
        // Option to exclude early stage from learning
        int reduction_gameply;

-        // Option not to learn kk/kkp/kpp/kppp
-        std::array<bool, 4> freeze;
-
        // If the absolute value of the evaluation value of the deep search
        // of the teacher phase exceeds this value, discard the teacher phase.
        int eval_limit;
@@ -825,7 +749,6 @@ namespace Learner

        uint64_t eval_save_interval;
        uint64_t loss_output_interval;
-        uint64_t mirror_percentage;

        // Loss calculation.
        // done: Number of phases targeted this time
@@ -849,7 +772,6 @@ namespace Learner
        for (size_t i = 0; i < pv.size(); ++i)
        {
            task_pos.do_move(pv[i], states[i]);
-            Eval::NNUE::update_eval(task_pos);
        }

        const Value shallow_value =
@@ -870,20 +792,18 @@ namespace Learner
        // It doesn't matter if you have disabled the substitution table.
        TT.new_search();

-        std::cout << "PROGRESS: " << now_string() << ", ";
-        std::cout << sr.total_done << " sfens";
-        std::cout << ", iteration " << epoch;
-        std::cout << ", eta = " << Eval::get_eta() << ", ";
+        cout << "PROGRESS: " << now_string() << ", ";
+        cout << sr.total_done << " sfens";
+        cout << ", iteration " << epoch;
+        cout << ", learning rate = " << global_learning_rate << ", ";

        // For calculation of verification data loss
-        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
-        atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
+        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
+        atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
        test_sum_cross_entropy_eval = 0;
        test_sum_cross_entropy_win = 0;
-        test_sum_cross_entropy = 0;
        test_sum_entropy_eval = 0;
        test_sum_entropy_win = 0;
-        test_sum_entropy = 0;

        // norm for learning
        atomic<double> sum_norm;
@@ -899,7 +819,7 @@ namespace Learner
        auto& pos = th->rootPos;
        StateInfo si;
        pos.set(StartFEN, false, &si, th);
-        std::cout << "hirate eval = " << Eval::evaluate(pos);
+        cout << "hirate eval = " << Eval::evaluate(pos) << endl;

        // It's better to parallelize here, but it's a bit
        // troublesome because the search before slave has not finished.
@@ -923,10 +843,8 @@ namespace Learner
                    &ps,
                    &test_sum_cross_entropy_eval,
                    &test_sum_cross_entropy_win,
-                    &test_sum_cross_entropy,
                    &test_sum_entropy_eval,
                    &test_sum_entropy_win,
-                    &test_sum_entropy,
                    &sum_norm,
                    &task_count,
                    &move_accord_count
@@ -954,26 +872,22 @@ namespace Learner
                // For the time being, regarding the win rate and loss terms only in the elmo method
                // Calculate and display the cross entropy.

-                double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
-                double test_entropy_eval, test_entropy_win, test_entropy;
+                double test_cross_entropy_eval, test_cross_entropy_win;
+                double test_entropy_eval, test_entropy_win;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    test_cross_entropy_eval,
                    test_cross_entropy_win,
-                    test_cross_entropy,
                    test_entropy_eval,
-                    test_entropy_win,
-                    test_entropy);
+                    test_entropy_win);

                // The total cross entropy need not be abs() by definition.
                test_sum_cross_entropy_eval += test_cross_entropy_eval;
                test_sum_cross_entropy_win += test_cross_entropy_win;
-                test_sum_cross_entropy += test_cross_entropy;
                test_sum_entropy_eval += test_entropy_eval;
                test_sum_entropy_win += test_entropy_win;
-                test_sum_entropy += test_entropy;
                sum_norm += (double)abs(shallow_value);

                // Determine if the teacher's move and the score of the shallow search match
@@ -998,7 +912,7 @@ namespace Learner
        while (task_count)
            sleep(1);

-        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
+        latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
        latest_loss_count += sr.sfen_for_mse.size();

        // learn_cross_entropy may be called train cross
@@ -1008,27 +922,24 @@ namespace Learner

        if (sr.sfen_for_mse.size() && done)
        {
-            cout
-                << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
+            cout << "INFO: "
+                << "test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
                << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
                << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
                << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
-                << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
-                << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                << " , norm = " << sum_norm
-                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
+                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
+                << endl;

            if (done != static_cast<uint64_t>(-1))
            {
-                cout
-                    << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
+                cout << "INFO: "
+                    << "learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
                    << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
                    << " , learn_entropy_eval = " << learn_sum_entropy_eval / done
                    << " , learn_entropy_win = " << learn_sum_entropy_win / done
-                    << " , learn_cross_entropy = " << learn_sum_cross_entropy / done
-                    << " , learn_entropy = " << learn_sum_entropy / done;
+                    << endl;
            }
-            cout << endl;
        }
        else
        {
@@ -1038,10 +949,8 @@ namespace Learner
        // Clear 0 for next time.
        learn_sum_cross_entropy_eval = 0.0;
        learn_sum_cross_entropy_win = 0.0;
-        learn_sum_cross_entropy = 0.0;
        learn_sum_entropy_eval = 0.0;
        learn_sum_entropy_win = 0.0;
-        learn_sum_entropy = 0.0;
    }

    void LearnerThink::thread_worker(size_t thread_id)
@@ -1058,7 +967,7 @@ namespace Learner
            // display mse (this is sometimes done only for thread 0)
            // Immediately after being read from the file...

-        // Lock the evaluation function so that it is not used during updating.
+            // Lock the evaluation function so that it is not used during updating.
            shared_lock<shared_timed_mutex> read_lock(nn_mutex, defer_lock);
            if (sr.next_update_weights <= sr.total_done ||
                (thread_id != 0 && !read_lock.try_lock()))
@@ -1090,7 +999,7 @@ namespace Learner

                        // Lock the evaluation function so that it is not used during updating.
                        lock_guard<shared_timed_mutex> write_lock(nn_mutex);
-                        Eval::NNUE::UpdateParameters(epoch);
+                        Eval::NNUE::UpdateParameters();
                    }

                    ++epoch;
@@ -1167,8 +1076,7 @@ namespace Learner
                goto RETRY_READ;

            StateInfo si;
-            const bool mirror = prng.rand(100) < mirror_percentage;
-            if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
+            if (pos.set_from_packed_sfen(ps.sfen, &si, th) != 0)
            {
                // I got a strange sfen. Should be debugged!
                // Since it is an illegal sfen, it may not be
@@ -1177,18 +1085,30 @@ namespace Learner
                goto RETRY_READ;
            }

-            // There is a possibility that all the pieces are blocked and stuck.
-            // Also, the declaration win phase is excluded from
-            // learning because you cannot go to leaf with PV moves.
-            // (shouldn't write out such teacher aspect itself,
-            // but may have written it out with an old generation routine)
-            // Skip the position if there are no legal moves (=checkmated or stalemate).
-            if (MoveList<LEGAL>(pos).size() == 0)
-                goto RETRY_READ;
-
            // I can read it, so try displaying it.
            //      cout << pos << value << endl;

+            const auto rootColor = pos.side_to_move();
+
+            int ply = 0;
+            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
+
+            if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
+            {
+                goto RETRY_READ;
+            }
+
+            pos.do_move((Move)ps.move, state[ply++]);
+
+			// There is a possibility that all the pieces are blocked and stuck.
+			// Also, the declaration win phase is excluded from
+			// learning because you cannot go to leaf with PV moves.
+			// (shouldn't write out such teacher aspect itself,
+			// but may have written it out with an old generation routine)
+			// Skip the position if there are no legal moves (=checkmated or stalemate).
+			if (MoveList<LEGAL>(pos).size() == 0)
+				goto RETRY_READ;
+
            // Evaluation value of shallow search (qsearch)
            const auto [_, pv] = qsearch(pos);

@@ -1199,13 +1119,11 @@ namespace Learner
            // Go to the leaf node as it is, add only to the gradient array,
            // and later try AdaGrad at the time of rmse aggregation.

-            const auto rootColor = pos.side_to_move();

            // If the initial PV is different, it is better not to use it for learning.
            // If it is the result of searching a completely different place, it may become noise.
            // It may be better not to study where the difference in evaluation values is too large.

-            int ply = 0;

            // A helper function that adds the gradient to the current phase.
            auto pos_add_grad = [&]() {
@@ -1224,35 +1142,28 @@ namespace Learner
                    : -Eval::evaluate(pos);

                // Calculate loss for training data
-                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
-                double learn_entropy_eval, learn_entropy_win, learn_entropy;
+                double learn_cross_entropy_eval, learn_cross_entropy_win;
+                double learn_entropy_eval, learn_entropy_win;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    learn_cross_entropy_eval,
                    learn_cross_entropy_win,
-                    learn_cross_entropy,
                    learn_entropy_eval,
-                    learn_entropy_win,
-                    learn_entropy);
+                    learn_entropy_win);

                learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                learn_sum_cross_entropy_win += learn_cross_entropy_win;
-                learn_sum_cross_entropy += learn_cross_entropy;
                learn_sum_entropy_eval += learn_entropy_eval;
                learn_sum_entropy_win += learn_entropy_win;
-                learn_sum_entropy += learn_entropy;

-                const double example_weight =
-                    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
-                Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
+                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);

                // Since the processing is completed, the counter of the processed number is incremented
                sr.total_done++;
            };

-            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
            bool illegal_move = false;
            for (auto m : pv)
            {
@@ -1266,29 +1177,16 @@ namespace Learner
                    break;
                }

-                // Processing when adding the gradient to the node on each PV.
-                //If discount_rate is 0, this process is not performed.
-                if (discount_rate != 0)
-                    pos_add_grad();
-
                pos.do_move(m, state[ply++]);
-
-                // Since the value of evaluate in leaf is used, the difference is updated.
-                Eval::NNUE::update_eval(pos);
            }

            if (illegal_move)
            {
-                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
-                continue;
+                goto RETRY_READ;
            }

            // Since we have reached the end phase of PV, add the slope here.
            pos_add_grad();
-
-            // rewind the phase
-            for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-                pos.undo_move(*it);
        }

    }
@@ -1303,18 +1201,18 @@ namespace Learner
        {
            // When EVAL_SAVE_ONLY_ONCE is defined,
            // Do not dig a subfolder because I want to save it only once.
-            Eval::save_eval("");
+            Eval::NNUE::save_eval("");
        }
        else if (is_final)
        {
-            Eval::save_eval("final");
+            Eval::NNUE::save_eval("final");
            return true;
        }
        else
        {
            static int dir_number = 0;
            const std::string dir_name = std::to_string(dir_number++);
-            Eval::save_eval(dir_name);
+            Eval::NNUE::save_eval(dir_name);

            if (newbob_decay != 1.0 && latest_loss_count > 0) {
                static int trials = newbob_num_trials;
@@ -1332,25 +1230,17 @@ namespace Learner
                else
                {
                    cout << " >= best (" << best_loss << "), rejected" << endl;
-                    if (best_nn_directory.empty())
-                    {
-                        cout << "WARNING: no improvement from initial model" << endl;
-                    }
-                    else
-                    {
-                        cout << "restoring parameters from " << best_nn_directory << endl;
-                        Eval::NNUE::RestoreParameters(best_nn_directory);
-                    }
+                    best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);

                    if (--trials > 0 && !is_final)
                    {
                        cout
-                            << "reducing learning rate scale from " << newbob_scale
+                            << "reducing learning rate from " << newbob_scale
                            << " to " << (newbob_scale * newbob_decay)
                            << " (" << trials << " more trials)" << endl;

                        newbob_scale *= newbob_decay;
-                        Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
+                        global_learning_rate = newbob_scale;
                    }
                }

@@ -1628,13 +1518,6 @@ namespace Learner

        string target_dir;

-        // If 0, it will be the default value.
-        double eta1 = 0.0;
-        double eta2 = 0.0;
-        double eta3 = 0.0;
-        uint64_t eta1_epoch = 0; // eta2 is not applied by default
-        uint64_t eta2_epoch = 0; // eta3 is not applied by default
-
        // --- Function that only shuffles the teacher aspect

        // normal shuffle
@@ -1675,24 +1558,13 @@ namespace Learner
        // Turn on if you want to pass a pre-shuffled file.
        bool no_shuffle = false;

-        // elmo lambda
-        ELMO_LAMBDA = 0.33;
-        ELMO_LAMBDA2 = 0.33;
-        ELMO_LAMBDA_LIMIT = 32000;
-
-        // Discount rate. If this is set to a value other than 0,
-        // the slope will be added even at other than the PV termination.
-        // (At that time, apply this discount rate)
-        double discount_rate = 0;
+        global_learning_rate = 1.0;

        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
        // If set to 1, rand(1)==0, so nothing is excluded.
        int reduction_gameply = 1;

-        // Optional item that does not let you learn KK/KKP/KPP/KPPP
-        array<bool, 4> freeze = {};
-
        uint64_t nn_batch_size = 1000;
        double newbob_decay = 1.0;
        int newbob_num_trials = 2;
@@ -1700,7 +1572,6 @@ namespace Learner

        uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
        uint64_t loss_output_interval = 0;
-        uint64_t mirror_percentage = 0;

        string validation_set_file_name;
        string seed;
@@ -1734,12 +1605,7 @@ namespace Learner
            else if (option == "batchsize") is >> mini_batch_size;

            // learning rate
-            else if (option == "eta")        is >> eta1;
-            else if (option == "eta1")       is >> eta1; // alias
-            else if (option == "eta2")       is >> eta2;
-            else if (option == "eta3")       is >> eta3;
-            else if (option == "eta1_epoch") is >> eta1_epoch;
-            else if (option == "eta2_epoch") is >> eta2_epoch;
+            else if (option == "lr")        is >> global_learning_rate;

            // Accept also the old option name.
            else if (option == "use_draw_in_training"
@@ -1758,22 +1624,9 @@ namespace Learner

            else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;

-            // Discount rate
-            else if (option == "discount_rate") is >> discount_rate;
-
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;

-            // No learning of KK/KKP/KPP/KPPP.
-            else if (option == "freeze_kk")    is >> freeze[0];
-            else if (option == "freeze_kkp")   is >> freeze[1];
-            else if (option == "freeze_kpp")   is >> freeze[2];
-
-            // LAMBDA
-            else if (option == "lambda")       is >> ELMO_LAMBDA;
-            else if (option == "lambda2")      is >> ELMO_LAMBDA2;
-            else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
-
            else if (option == "reduction_gameply") is >> reduction_gameply;

            // shuffle related
@@ -1794,7 +1647,6 @@ namespace Learner

            else if (option == "eval_save_interval") is >> eval_save_interval;
            else if (option == "loss_output_interval") is >> loss_output_interval;
-            else if (option == "mirror_percentage") is >> mirror_percentage;
            else if (option == "validation_set_file_name") is >> validation_set_file_name;

            // Rabbit convert related
@@ -1810,7 +1662,6 @@ namespace Learner
            else if (option == "src_score_max_value") is >> src_score_max_value;
            else if (option == "dest_score_min_value") is >> dest_score_min_value;
            else if (option == "dest_score_max_value") is >> dest_score_max_value;
-            else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability;
            else if (option == "seed") is >> seed;
            // Otherwise, it's a filename.
            else
@@ -1884,7 +1735,7 @@ namespace Learner

        if (use_convert_plain)
        {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
            cout << "convert_plain.." << endl;
            convert_plain(filenames, output_file_name);
            return;
@@ -1892,7 +1743,7 @@ namespace Learner

        if (use_convert_bin)
        {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
            cout << "convert_bin.." << endl;
            convert_bin(
                filenames,
@@ -1913,7 +1764,7 @@ namespace Learner

        if (use_convert_bin_from_pgn_extract)
        {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
            cout << "convert_bin_from_pgn-extract.." << endl;
            convert_bin_from_pgn_extract(
                filenames,
@@ -1946,8 +1797,7 @@ namespace Learner
        cout << "nn_batch_size     : " << nn_batch_size << endl;
        cout << "nn_options        : " << nn_options << endl;

-        cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
-        cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
+        cout << "learning rate     : " << global_learning_rate << endl;
        cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
        cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
        cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
@@ -1960,17 +1810,10 @@ namespace Learner
            cout << "scheduling        : default" << endl;
        }

-        cout << "discount rate     : " << discount_rate << endl;
-
        // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
        reduction_gameply = max(reduction_gameply, 1);
        cout << "reduction_gameply : " << reduction_gameply << endl;

-        cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
-        cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
-        cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
-
-        cout << "mirror_percentage : " << mirror_percentage << endl;
        cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
        cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;

@@ -1981,7 +1824,7 @@ namespace Learner
        cout << "init.." << endl;

        // Read evaluation function parameters
-        Eval::init_NNUE();
+        Eval::NNUE::init();

        Threads.main()->ponder = false;

@@ -2004,12 +1847,12 @@ namespace Learner
        }

        cout << "init_training.." << endl;
-        Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
+        Eval::NNUE::InitializeTraining(seed);
        Eval::NNUE::SetBatchSize(nn_batch_size);
        Eval::NNUE::SetOptions(nn_options);
        if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
            // Save the current net to [EvalSaveDir]\original.
-            Eval::save_eval("original");
+            Eval::NNUE::save_eval("original");

            // Set the folder above to best_nn_directory so that the trainer can
            // resotre the network parameters from the original net file.
@@ -2020,11 +1863,9 @@ namespace Learner
        cout << "init done." << endl;

        // Reflect other option settings.
-        learn_think.discount_rate = discount_rate;
        learn_think.eval_limit = eval_limit;
        learn_think.save_only_once = save_only_once;
        learn_think.sr.no_shuffle = no_shuffle;
-        learn_think.freeze = freeze;
        learn_think.reduction_gameply = reduction_gameply;

        learn_think.newbob_scale = 1.0;
@@ -2033,7 +1874,6 @@ namespace Learner

        learn_think.eval_save_interval = eval_save_interval;
        learn_think.loss_output_interval = loss_output_interval;
-        learn_think.mirror_percentage = mirror_percentage;

        // Start a thread that loads the phase file in the background
        // (If this is not started, mse cannot be calculated.)
@@ -2069,6 +1909,8 @@ namespace Learner
        // Start learning.
        learn_think.go_think();

+        Eval::NNUE::FinalizeNet();
+
        // Save once at the end.
        learn_think.save(true);
    }
@@ -23,11 +23,7 @@ using LearnFloatType = float;
 // configure
 // ======================

-// ----------------------
-// Learning with the method of elmo (WCSC27)
-// ----------------------
-
-#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
+#define LOSS_FUNCTION "cross_entropy_eval"

 // ----------------------
 // Definition of struct used in Learner
@@ -1,18 +0,0 @@
-#include "learning_tools.h"
-
-#include "misc.h"
-
-using namespace Eval;
-
-namespace EvalLearningTools
-{
-
-	// --- static variables
-
-	double Weight::eta;
-	double Weight::eta1;
-	double Weight::eta2;
-	double Weight::eta3;
-	uint64_t Weight::eta1_epoch;
-	uint64_t Weight::eta2_epoch;
-}
@@ -1,99 +0,0 @@
-#ifndef __LEARN_WEIGHT_H__
-#define __LEARN_WEIGHT_H__
-
-// A set of machine learning tools related to the weight array used for machine learning of evaluation functions
-
-#include "learn.h"
-
-#include "misc.h"  // PRNG , my_insertion_sort
-
-#include <array>
-#include <cmath>	// std::sqrt()
-
-namespace EvalLearningTools
-{
-	// -------------------------------------------------
-	//   Array for learning that stores gradients etc.
-	// -------------------------------------------------
-
-#if defined(_MSC_VER)
-#pragma pack(push,2)
-#elif defined(__GNUC__)
-#pragma pack(2)
-#endif
-	struct Weight
-	{
-		// cumulative value of one mini-batch gradient
-		LearnFloatType g = LearnFloatType(0);
-
-		// Learning rate η(eta) such as AdaGrad.
-		// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
-		// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
-		// After eta2_epoch, gradually change from eta2 to eta3.
-		static double eta;
-		static double eta1;
-		static double eta2;
-		static double eta3;
-		static uint64_t eta1_epoch;
-		static uint64_t eta2_epoch;
-
-		// Batch initialization of eta. If 0 is passed, the default value will be set.
-		static void init_eta(double new_eta1, double new_eta2, double new_eta3,
-			uint64_t new_eta1_epoch, uint64_t new_eta2_epoch)
-		{
-			Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0;
-			Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0;
-			Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0;
-			Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0;
-			Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0;
-		}
-
-		// Set eta according to epoch.
-		static void calc_eta(uint64_t epoch)
-		{
-			if (Weight::eta1_epoch == 0) // Exclude eta2
-				Weight::eta = Weight::eta1;
-			else if (epoch < Weight::eta1_epoch)
-				// apportion
-				Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
-			else if (Weight::eta2_epoch == 0) // Exclude eta3
-				Weight::eta = Weight::eta2;
-			else if (epoch < Weight::eta2_epoch)
-				Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
-			else
-				Weight::eta = Weight::eta3;
-		}
-
-		template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
-
-		// grad setting
-		template <typename T> void set_grad(const T& g_) { g = g_; }
-
-		// Add grad
-		template <typename T> void add_grad(const T& g_) { g += g_; }
-
-		LearnFloatType get_grad() const { return g; }
-	};
-#if defined(_MSC_VER)
-#pragma pack(pop)
-#elif defined(__GNUC__)
-#pragma pack(0)
-#endif
-
-	// Turned weight array
-	// In order to be able to handle it transparently, let's have the same member as Weight.
-	struct Weight2
-	{
-		Weight w[2];
-
-		//Evaluate your turn, eta 1/8.
-		template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
-
-		template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
-		template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
-
-		std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
-	};
-}
-
-#endif
@@ -9,39 +9,14 @@

 void MultiThink::go_think()
 {
-	// Keep a copy to restore the Options settings later.
-	auto oldOptions = Options;
-
-	// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
-	// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
-	Options["BookOnTheFly"] = std::string("false");
-
 	// Read evaluation function, etc.
 	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
 	// Skip memory corruption check.
-	Eval::init_NNUE();
+	Eval::NNUE::init();

 	// Call the derived class's init().
 	init();

-        // About Search::Limits
-        // Be careful because this member variable is global and affects other threads.
-        {
-          auto& limits = Search::Limits;
-
-          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
-          limits.infinite = true;
-
-          // Since PV is an obstacle when displayed, erase it.
-          limits.silent = true;
-
-          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
-          limits.nodes = 0;
-
-          // depth is also processed by the one passed as an argument of Learner::search().
-          limits.depth = 0;
-        }
-
 	// The loop upper limit is set with set_loop_max().
 	loop_count = 0;
 	done_count = 0;
@@ -123,10 +98,4 @@ void MultiThink::go_think()
 	// The file writing thread etc. are still running only when all threads are finished
 	// Since the work itself may not have completed, output only that all threads have finished.
 	std::cout << "all threads are joined." << std::endl;
-
-	// Restored because Options were rewritten.
-	// Restore the handler because the handler will not start unless you assign a value.
-	for (auto& s : oldOptions)
-		Options[s.first] = std::string(s.second);
-
 }
@@ -259,7 +259,7 @@ namespace Learner {
    return make_piece(c, pr);
  }

-  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror)
+  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th)
  {
    SfenPacker packer;
    auto& stream = packer.stream;
@@ -280,16 +280,8 @@ namespace Learner {
    pos.pieceList[B_KING][0] = SQUARE_NB;

    // First the position of the ball
-    if (mirror)
-    {
-      for (auto c : Colors)
-        pos.board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
-    }
-    else
-    {
-      for (auto c : Colors)
-        pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
-    }
+    for (auto c : Colors)
+      pos.board[stream.read_n_bit(6)] = make_piece(c, KING);

    // Piece placement
    for (Rank r = RANK_8; r >= RANK_1; --r)
@@ -297,9 +289,6 @@ namespace Learner {
      for (File f = FILE_A; f <= FILE_H; ++f)
      {
        auto sq = make_square(f, r);
-        if (mirror) {
-          sq = flip_file(sq);
-        }

        // it seems there are already balls
        Piece pc;
@@ -355,9 +344,6 @@ namespace Learner {
    // En passant square. Ignore if no pawn capture is possible
    if (stream.read_one_bit()) {
      Square ep_square = static_cast<Square>(stream.read_n_bit(6));
-      if (mirror) {
-        ep_square = flip_file(ep_square);
-      }
      pos.st->epSquare = ep_square;

      if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
@@ -13,7 +13,7 @@ class Thread;

 namespace Learner {

-    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th);
    PackedSfen sfen_pack(Position& pos);
 }

@@ -35,6 +35,7 @@ int main(int argc, char* argv[]) {

  std::cout << engine_info() << std::endl;

+  CommandLine::init(argc, argv);
  UCI::init(Options);
  Tune::init();
  PSQT::init();
@@ -44,7 +45,7 @@ int main(int argc, char* argv[]) {
  Endgames::init();
  Threads.set(size_t(Options["Threads"]));
  Search::clear(); // After threads are up
-  Eval::init_NNUE();
+  Eval::NNUE::init();

  UCI::loop(argc, argv);

@@ -132,6 +132,7 @@ public:

 } // namespace

+
 /// engine_info() returns the full name of the current Stockfish version. This
 /// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
 /// the program was compiled) or "Stockfish <Version>", depending on whether
@@ -356,27 +357,11 @@ void std_aligned_free(void* ptr) {
 #endif
 }

-/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
-/// The returned pointer is the aligned one, while the mem argument is the one that needs
-/// to be passed to free. With c++17 some of this functionality could be simplified.
+/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.

-#if defined(__linux__) && !defined(__ANDROID__)
+#if defined(_WIN32)

-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
-
-  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
-  size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
-  if (posix_memalign(&mem, alignment, size))
-     mem = nullptr;
-#if defined(MADV_HUGEPAGE)
-  madvise(mem, allocSize, MADV_HUGEPAGE);
-#endif
-  return mem;
-}
-
-#elif defined(_WIN64)
-
-static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
+static void* aligned_large_pages_alloc_win(size_t allocSize) {

  HANDLE hProcessToken { };
  LUID luid { };
@@ -421,12 +406,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
  return mem;
 }

-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+void* aligned_large_pages_alloc(size_t allocSize) {

  static bool firstCall = true;
+  void* mem;

  // Try to allocate large pages
-  mem = aligned_ttmem_alloc_large_pages(allocSize);
+  mem = aligned_large_pages_alloc_win(allocSize);

  // Suppress info strings on the first call. The first call occurs before 'uci'
  // is received and in that case this output confuses some GUIs.
@@ -448,23 +434,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {

 #else

-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+void* aligned_large_pages_alloc(size_t allocSize) {

-  constexpr size_t alignment = 64; // assumed cache line size
-  size_t size = allocSize + alignment - 1; // allocate some extra space
-  mem = malloc(size);
-  void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
-  return ret;
+#if defined(__linux__)
+  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
+#else
+  constexpr size_t alignment = 4096; // assumed small page size
+#endif
+
+  // round up to multiples of alignment
+  size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
+  void *mem = std_aligned_alloc(alignment, size);
+#if defined(MADV_HUGEPAGE)
+  madvise(mem, size, MADV_HUGEPAGE);
+#endif
+  return mem;
 }

 #endif


-/// aligned_ttmem_free() will free the previously allocated ttmem
+/// aligned_large_pages_free() will free the previously allocated ttmem

-#if defined(_WIN64)
+#if defined(_WIN32)

-void aligned_ttmem_free(void* mem) {
+void aligned_large_pages_free(void* mem) {

  if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
  {
@@ -477,8 +471,8 @@ void aligned_ttmem_free(void* mem) {

 #else

-void aligned_ttmem_free(void *mem) {
-  free(mem);
+void aligned_large_pages_free(void *mem) {
+  std_aligned_free(mem);
 }

 #endif
@@ -590,6 +584,63 @@ void bindThisThread(size_t idx) {

 } // namespace WinProcGroup

+#ifdef _WIN32
+#include <direct.h>
+#define GETCWD _getcwd
+#else
+#include <unistd.h>
+#define GETCWD getcwd
+#endif
+
+namespace CommandLine {
+
+string argv0;            // path+name of the executable binary, as given by argv[0]
+string binaryDirectory;  // path of the executable directory
+string workingDirectory; // path of the working directory
+string pathSeparator;    // Separator for our current OS
+
+void init(int argc, char* argv[]) {
+    (void)argc;
+    string separator;
+
+    // extract the path+name of the executable binary
+    argv0 = argv[0];
+
+#ifdef _WIN32
+    pathSeparator = "\\";
+  #ifdef _MSC_VER
+    // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
+    // issues in some windows 10 versions, so check returned values carefully.
+    char* pgmptr = nullptr;
+    if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
+        argv0 = pgmptr;
+  #endif
+#else
+    pathSeparator = "/";
+#endif
+
+    // extract the working directory
+    workingDirectory = "";
+    char buff[40000];
+    char* cwd = GETCWD(buff, 40000);
+    if (cwd)
+        workingDirectory = cwd;
+
+    // extract the binary directory path from argv0
+    binaryDirectory = argv0;
+    size_t pos = binaryDirectory.find_last_of("\\/");
+    if (pos == std::string::npos)
+        binaryDirectory = "." + pathSeparator;
+    else
+        binaryDirectory.resize(pos + 1);
+
+    // pattern replacement: "./" at the start of path is replaced by the working directory
+    if (binaryDirectory.find("." + pathSeparator) == 0)
+        binaryDirectory.replace(0, 1, workingDirectory);
+}
+
+
+} // namespace CommandLine
 // Returns a string that represents the current time. (Used when learning evaluation functions)
 std::string now_string()
 {
@@ -39,8 +39,8 @@ void prefetch(void* addr);
 void start_logger(const std::string& fname);
 void* std_aligned_alloc(size_t alignment, size_t size);
 void std_aligned_free(void* ptr);
-void* aligned_ttmem_alloc(size_t size, void*& mem);
-void aligned_ttmem_free(void* mem); // nop if mem == nullptr
+void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
+void aligned_large_pages_free(void* mem); // nop if mem == nullptr

 void dbg_hit_on(bool b);
 void dbg_hit_on(bool c, bool b);
@@ -48,9 +48,7 @@ void dbg_mean_of(int v);
 void dbg_print();

 typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
-
 static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
-
 inline TimePoint now() {
  return std::chrono::duration_cast<std::chrono::milliseconds>
        (std::chrono::steady_clock::now().time_since_epoch()).count();
@@ -337,4 +335,11 @@ namespace Dependency
  extern bool getline(std::ifstream& fs, std::string& s);
 }

+namespace CommandLine {
+  void init(int argc, char* argv[]);
+
+  extern std::string binaryDirectory;  // path of the executable directory
+  extern std::string workingDirectory; // path of the working directory
+}
+
 #endif // #ifndef MISC_H_INCLUDED
@@ -1,7 +1,25 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
 // Definition of input features and network structure used in NNUE evaluation function

-#ifndef HALFKP_CR_EP_256X2_32_32_H
-#define HALFKP_CR_EP_256X2_32_32_H
+#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED

 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@@ -12,31 +30,28 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"

-namespace Eval {
+namespace Eval::NNUE {

-  namespace NNUE {
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
+    Features::EnPassant>;

-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<
-      Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
-      Features::EnPassant>;
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;

-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
+namespace Layers {

-    namespace Layers {
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;

-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+}  // namespace Layers

-    }  // namespace Layers
+using Network = Layers::OutputLayer;

-    using Network = Layers::OutputLayer;
+}  // namespace Eval::NNUE

-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // HALFKP_CR_EP_256X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
@@ -0,0 +1,37 @@
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+
+#include "../features/feature_set.h"
+#include "../features/half_kp.h"
+#include "../features/castling_right.h"
+
+#include "../layers/input_slice.h"
+#include "../layers/affine_transform.h"
+#include "../layers/clipped_relu.h"
+
+namespace Eval::NNUE {
+
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight>;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
+
+namespace Layers {
+
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+}  // namespace Layers
+
+using Network = Layers::OutputLayer;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
@@ -1,4 +1,5 @@
 // Definition of input features and network structure used in NNUE evaluation function
+
 #ifndef K_P_256X2_32_32_H
 #define K_P_256X2_32_32_H

@@ -18,7 +18,6 @@

 // Code for calculating NNUE evaluation function

-#include <fstream>
 #include <iostream>
 #include <set>

@@ -31,7 +30,7 @@

 namespace Eval::NNUE {

-  uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
+  const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
   // convention: W - us, B - them
   // viewed from other side, W and B are reversed
      { PS_NONE,     PS_NONE     },
@@ -53,7 +52,7 @@ namespace Eval::NNUE {
  };

  // Input feature converter
-  AlignedPtr<FeatureTransformer> feature_transformer;
+  LargePagePtr<FeatureTransformer> feature_transformer;

  // Evaluation function
  AlignedPtr<Network> network;
@@ -80,14 +79,22 @@ namespace Eval::NNUE {
    std::memset(pointer.get(), 0, sizeof(T));
  }

+  template <typename T>
+  void Initialize(LargePagePtr<T>& pointer) {
+
+    static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
+    pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
  // Read evaluation function parameters
  template <typename T>
-  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+  bool ReadParameters(std::istream& stream, T& reference) {

    std::uint32_t header;
    header = read_little_endian<std::uint32_t>(stream);
    if (!stream || header != T::GetHashValue()) return false;
-    return pointer->ReadParameters(stream);
+    return reference.ReadParameters(stream);
  }

  // write evaluation function parameters
@@ -98,6 +105,13 @@ namespace Eval::NNUE {
    return pointer->WriteParameters(stream);
  }

+  template <typename T>
+  bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
+    constexpr std::uint32_t header = T::GetHashValue();
+    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
+    return pointer->WriteParameters(stream);
+  }
+
  }  // namespace Detail

  // Initialize the evaluation function parameters
@@ -139,11 +153,10 @@ namespace Eval::NNUE {
    std::string architecture;
    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
    if (hash_value != kHashValue) return false;
-    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
-    if (!Detail::ReadParameters(stream, network)) return false;
+    if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, *network)) return false;
    return stream && stream.peek() == std::ios::traits_type::eof();
  }
-
  // write evaluation function parameters
  bool WriteParameters(std::ostream& stream) {
    if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
@@ -151,36 +164,20 @@ namespace Eval::NNUE {
    if (!Detail::WriteParameters(stream, network)) return false;
    return !stream.fail();
  }
-
-  // Proceed with the difference calculation if possible
-  static void UpdateAccumulatorIfPossible(const Position& pos) {
-
-    feature_transformer->UpdateAccumulatorIfPossible(pos);
-  }
-
-  // Calculate the evaluation value
-  static Value ComputeScore(const Position& pos, bool refresh) {
-
-    auto& accumulator = pos.state()->accumulator;
-    if (!refresh && accumulator.computed_score) {
-      return accumulator.score;
-    }
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {

    alignas(kCacheLineSize) TransformedFeatureType
        transformed_features[FeatureTransformer::kBufferSize];
-    feature_transformer->Transform(pos, transformed_features, refresh);
+    feature_transformer->Transform(pos, transformed_features);
    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
    const auto output = network->Propagate(transformed_features, buffer);

-    auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-    accumulator.score = score;
-    accumulator.computed_score = true;
-    return accumulator.score;
+    return static_cast<Value>(output[0] / FV_SCALE);
  }

-  // Load the evaluation function file
-  bool load_eval_file(const std::string& evalFile) {
+  // Load eval, from a file stream or a memory stream
+  bool load_eval(std::string name, std::istream& stream) {

    Initialize();

@@ -189,29 +186,8 @@ namespace Eval::NNUE {
      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
      return true;
    }
-
-    fileName = evalFile;
-
-    std::ifstream stream(evalFile, std::ios::binary);
-
-    const bool result = ReadParameters(stream);
-
-    return result;
-  }
-
-  // Evaluation function. Perform differential calculation.
-  Value evaluate(const Position& pos) {
-    return ComputeScore(pos, false);
-  }
-
-  // Evaluation function. Perform full calculation.
-  Value compute_eval(const Position& pos) {
-    return ComputeScore(pos, true);
-  }
-
-  // Proceed with the difference calculation if possible
-  void update_eval(const Position& pos) {
-    UpdateAccumulatorIfPossible(pos);
+    fileName = name;
+    return ReadParameters(stream);
  }

 } // namespace Eval::NNUE
@@ -40,11 +40,22 @@ namespace Eval::NNUE {
    }
  };

+  template <typename T>
+  struct LargePageDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      aligned_large_pages_free(ptr);
+    }
+  };
+
  template <typename T>
  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;

+  template <typename T>
+  using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
+
  // Input feature converter
-  extern AlignedPtr<FeatureTransformer> feature_transformer;
+  extern LargePagePtr<FeatureTransformer> feature_transformer;

  // Evaluation function
  extern AlignedPtr<Network> network;
@@ -5,15 +5,12 @@
 #include <filesystem>

 #include "../learn/learn.h"
-#include "../learn/learning_tools.h"

 #include "../position.h"
 #include "../uci.h"
 #include "../misc.h"
 #include "../thread_win32_osx.h"

-#include "../eval/evaluate_common.h"
-
 #include "evaluate_nnue.h"
 #include "evaluate_nnue_learner.h"
 #include "trainer/features/factorizer_feature_set.h"
@@ -24,215 +21,191 @@
 #include "trainer/trainer_clipped_relu.h"
 #include "trainer/trainer_sum.h"

-namespace Eval {
-
-namespace NNUE {
-
-namespace {
-
-// learning data
-std::vector<Example> examples;
-
-// Mutex for exclusive control of examples
-std::mutex examples_mutex;
-
-// number of samples in mini-batch
-uint64_t batch_size;
-
-// random number generator
-std::mt19937 rng;
-
-// learner
-std::shared_ptr<Trainer<Network>> trainer;
-
 // Learning rate scale
-double global_learning_rate_scale;
+double global_learning_rate;

-// Get the learning rate scale
-double GetGlobalLearningRateScale() {
-  return global_learning_rate_scale;
-}
+namespace Eval::NNUE {

-// Tell the learner options such as hyperparameters
-void SendMessages(std::vector<Message> messages) {
-  for (auto& message : messages) {
-    trainer->SendMessage(&message);
-    assert(message.num_receivers > 0);
-  }
-}
+  namespace {

-}  // namespace
+    // learning data
+    std::vector<Example> examples;

-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3) {
-  std::cout << "Initializing NN training for "
-            << GetArchitectureString() << std::endl;
+    // Mutex for exclusive control of examples
+    std::mutex examples_mutex;

-  assert(feature_transformer);
-  assert(network);
-  trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
+    // number of samples in mini-batch
+    uint64_t batch_size;

-  if (Options["SkipLoadingEval"]) {
-    trainer->Initialize(rng);
-  }
+    // random number generator
+    std::mt19937 rng;

-  global_learning_rate_scale = 1.0;
-  EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
-}
+    // learner
+    std::shared_ptr<Trainer<Network>> trainer;

-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size) {
-  assert(size > 0);
-  batch_size = size;
-}
-
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale) {
-  global_learning_rate_scale = scale;
-}
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options) {
-  std::vector<Message> messages;
-  for (const auto& option : Split(options, ',')) {
-    const auto fields = Split(option, '=');
-    assert(fields.size() == 1 || fields.size() == 2);
-    if (fields.size() == 1) {
-      messages.emplace_back(fields[0]);
-    } else {
-      messages.emplace_back(fields[0], fields[1]);
-    }
-  }
-  SendMessages(std::move(messages));
-}
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name) {
-  const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
-  std::ifstream stream(file_name, std::ios::binary);
-#ifndef NDEBUG
-  bool result =
-#endif
-  ReadParameters(stream);
-#ifndef NDEBUG
-  assert(result);
-#endif
-
-  SendMessages({{"reset"}});
-}
-
-// Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight) {
-  Example example;
-  if (rootColor == pos.side_to_move()) {
-    example.sign = 1;
-  } else {
-    example.sign = -1;
-  }
-  example.psv = psv;
-  example.weight = weight;
-
-  Features::IndexList active_indices[2];
-  for (const auto trigger : kRefreshTriggers) {
-    RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
-  }
-  if (pos.side_to_move() != WHITE) {
-    active_indices[0].swap(active_indices[1]);
-  }
-  for (const auto color : Colors) {
-    std::vector<TrainingFeature> training_features;
-    for (const auto base_index : active_indices[color]) {
-      static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
-                    (1 << TrainingFeature::kIndexBits), "");
-      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
-          base_index, &training_features);
-    }
-    std::sort(training_features.begin(), training_features.end());
-
-    auto& unique_features = example.training_features[color];
-    for (const auto& feature : training_features) {
-      if (!unique_features.empty() &&
-          feature.GetIndex() == unique_features.back().GetIndex()) {
-        unique_features.back() += feature;
-      } else {
-        unique_features.push_back(feature);
+    // Tell the learner options such as hyperparameters
+    void SendMessages(std::vector<Message> messages) {
+      for (auto& message : messages) {
+        trainer->SendMessage(&message);
+        assert(message.num_receivers > 0);
      }
    }
+
+  }  // namespace
+
+  // Initialize learning
+  void InitializeTraining(const std::string& seed) {
+    std::cout << "Initializing NN training for "
+              << GetArchitectureString() << std::endl;
+
+    assert(feature_transformer);
+    assert(network);
+    trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
+    rng.seed(PRNG(seed).rand<uint64_t>());
+
+    if (Options["SkipLoadingEval"]) {
+      trainer->Initialize(rng);
+    }
  }

-  std::lock_guard<std::mutex> lock(examples_mutex);
-  examples.push_back(std::move(example));
-}
+  // set the number of samples in the mini-batch
+  void SetBatchSize(uint64_t size) {
+    assert(size > 0);
+    batch_size = size;
+  }
+  
+  // Set options such as hyperparameters
+  void SetOptions(const std::string& options) {
+    std::vector<Message> messages;
+    for (const auto& option : Split(options, ',')) {
+      const auto fields = Split(option, '=');
+      assert(fields.size() == 1 || fields.size() == 2);
+      if (fields.size() == 1) {
+        messages.emplace_back(fields[0]);
+      } else {
+        messages.emplace_back(fields[0], fields[1]);
+      }
+    }
+    SendMessages(std::move(messages));
+  }

-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch) {
-  assert(batch_size > 0);
+  // Reread the evaluation function parameters for learning from the file
+  void RestoreParameters(const std::string& dir_name) {
+    const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
+    std::ifstream stream(file_name, std::ios::binary);
+#ifndef NDEBUG
+    bool result =
+#endif
+    ReadParameters(stream);
+#ifndef NDEBUG
+    assert(result);
+#endif

-  EvalLearningTools::Weight::calc_eta(epoch);
-  const auto learning_rate = static_cast<LearnFloatType>(
-      get_eta() / batch_size);
+    SendMessages({{"reset"}});
+  }

-  std::lock_guard<std::mutex> lock(examples_mutex);
-  std::shuffle(examples.begin(), examples.end(), rng);
-  while (examples.size() >= batch_size) {
-    std::vector<Example> batch(examples.end() - batch_size, examples.end());
-    examples.resize(examples.size() - batch_size);
+  void FinalizeNet() {
+    SendMessages({{"clear_unobserved_feature_weights"}});
+  }

-    const auto network_output = trainer->Propagate(batch);
+  // Add 1 sample of learning data
+  void AddExample(Position& pos, Color rootColor,
+                  const Learner::PackedSfenValue& psv, double weight) {
+    Example example;
+    if (rootColor == pos.side_to_move()) {
+      example.sign = 1;
+    } else {
+      example.sign = -1;
+    }
+    example.psv = psv;
+    example.weight = weight;

-    std::vector<LearnFloatType> gradients(batch.size());
-    for (std::size_t b = 0; b < batch.size(); ++b) {
-      const auto shallow = static_cast<Value>(Round<std::int32_t>(
-          batch[b].sign * network_output[b] * kPonanzaConstant));
-      const auto& psv = batch[b].psv;
-      const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
-      gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
+    Features::IndexList active_indices[2];
+    for (const auto trigger : kRefreshTriggers) {
+      RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
+    }
+    if (pos.side_to_move() != WHITE) {
+      active_indices[0].swap(active_indices[1]);
+    }
+    for (const auto color : Colors) {
+      std::vector<TrainingFeature> training_features;
+      for (const auto base_index : active_indices[color]) {
+        static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
+                      (1 << TrainingFeature::kIndexBits), "");
+        Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
+            base_index, &training_features);
+      }
+      std::sort(training_features.begin(), training_features.end());
+
+      auto& unique_features = example.training_features[color];
+      for (const auto& feature : training_features) {
+        if (!unique_features.empty() &&
+            feature.GetIndex() == unique_features.back().GetIndex()) {
+          unique_features.back() += feature;
+        } else {
+          unique_features.push_back(feature);
+        }
+      }
    }

-    trainer->Backpropagate(gradients.data(), learning_rate);
-  }
-  SendMessages({{"quantize_parameters"}});
-}
-
-// Check if there are any problems with learning
-void CheckHealth() {
-  SendMessages({{"check_health"}});
-}
-
-}  // namespace NNUE
-
-// save merit function parameters to a file
-void save_eval(std::string dir_name) {
-  auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
-  std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
-
-  // mkdir() will fail if this folder already exists, but
-  // Apart from that. If not, I just want you to make it.
-  // Also, assume that the folders up to EvalSaveDir have been dug.
-  std::filesystem::create_directories(eval_dir);
-
-  if (Options["SkipLoadingEval"] && NNUE::trainer) {
-    NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
+    std::lock_guard<std::mutex> lock(examples_mutex);
+    examples.push_back(std::move(example));
  }

-  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
-  std::ofstream stream(file_name, std::ios::binary);
+  // update the evaluation function parameters
+  void UpdateParameters() {
+    assert(batch_size > 0);
+
+    const auto learning_rate = static_cast<LearnFloatType>(
+        global_learning_rate / batch_size);
+
+    std::lock_guard<std::mutex> lock(examples_mutex);
+    std::shuffle(examples.begin(), examples.end(), rng);
+    while (examples.size() >= batch_size) {
+      std::vector<Example> batch(examples.end() - batch_size, examples.end());
+      examples.resize(examples.size() - batch_size);
+
+      const auto network_output = trainer->Propagate(batch);
+
+      std::vector<LearnFloatType> gradients(batch.size());
+      for (std::size_t b = 0; b < batch.size(); ++b) {
+        const auto shallow = static_cast<Value>(Round<std::int32_t>(
+            batch[b].sign * network_output[b] * kPonanzaConstant));
+        const auto& psv = batch[b].psv;
+        const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
+        gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
+      }
+
+      trainer->Backpropagate(gradients.data(), learning_rate);
+    }
+    SendMessages({{"quantize_parameters"}});
+  }
+
+  // Check if there are any problems with learning
+  void CheckHealth() {
+    SendMessages({{"check_health"}});
+  }
+
+  // save merit function parameters to a file
+  void save_eval(std::string dir_name) {
+    auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
+    std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
+
+    // mkdir() will fail if this folder already exists, but
+    // Apart from that. If not, I just want you to make it.
+    // Also, assume that the folders up to EvalSaveDir have been dug.
+    std::filesystem::create_directories(eval_dir);
+
+    const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
+    std::ofstream stream(file_name, std::ios::binary);
 #ifndef NDEBUG
-  const bool result =
+    bool result =
 #endif
-  NNUE::WriteParameters(stream);
+    WriteParameters(stream);
 #ifndef NDEBUG
-  assert(result);
+    assert(result);
 #endif

-  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
-}
-
-// get the current eta
-double get_eta() {
-  return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
-}
-
-}  // namespace Eval
+    std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
+  }
+}  // namespace Eval::NNUE
@@ -5,38 +5,33 @@

 #include "../learn/learn.h"

-namespace Eval {
+namespace Eval::NNUE {

-namespace NNUE {
+  // Initialize learning
+  void InitializeTraining(const std::string& seed);

-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3);
+  // set the number of samples in the mini-batch
+  void SetBatchSize(uint64_t size);

-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size);
+  // Set options such as hyperparameters
+  void SetOptions(const std::string& options);

-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale);
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options);
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name);
+  // Reread the evaluation function parameters for learning from the file
+  void RestoreParameters(const std::string& dir_name);

 // Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight);
+  void AddExample(Position& pos, Color rootColor,
+  	const Learner::PackedSfenValue& psv, double weight);

-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch);
+  // update the evaluation function parameters
+  void UpdateParameters();

-// Check if there are any problems with learning
-void CheckHealth();
+  // Check if there are any problems with learning
+  void CheckHealth();

-}  // namespace NNUE
+  void FinalizeNet();

-}  // namespace Eval
+  void save_eval(std::string suffix);
+}  // namespace Eval::NNUE

 #endif
@@ -1,69 +1,40 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function

 #include "castling_right.h"
 #include "index_list.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
+  void CastlingRight::AppendActiveIndices(
+    const Position& pos, Color perspective, IndexList* active) {
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-    namespace Features {
+    int castling_rights = pos.state()->castlingRights;
+    int relative_castling_rights;
+    if (perspective == WHITE) {
+      relative_castling_rights = castling_rights;
+    }
+    else {
+      // Invert the perspective.
+      relative_castling_rights = ((castling_rights & 3) << 2)
+        & ((castling_rights >> 2) & 3);
+    }

-      // Get a list of indices with a value of 1 among the features
-      void CastlingRight::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        int castling_rights = pos.state()->castlingRights;
-        int relative_castling_rights;
-        if (perspective == WHITE) {
-          relative_castling_rights = castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_castling_rights = ((castling_rights & 3) << 2)
-            & ((castling_rights >> 2) & 3);
-        }
-
-        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-          if (relative_castling_rights & (1 << i)) {
-            active->push_back(i);
-          }
-        }
+    for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
+      if (relative_castling_rights & (1 << i)) {
+        active->push_back(i);
      }
+    }
+  }

-      // Get a list of indices whose values have changed from the previous one in the feature quantity
-      void CastlingRight::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* /* added */) {
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
+  void CastlingRight::AppendChangedIndices(
+    const Position& /* pos */, Color /* perspective */,
+    IndexList* /* removed */, IndexList* /* added */) {
+    // Not implemented.
+    assert(false);
+  }

-        int previous_castling_rights = pos.state()->previous->castlingRights;
-        int current_castling_rights = pos.state()->castlingRights;
-        int relative_previous_castling_rights;
-        int relative_current_castling_rights;
-        if (perspective == WHITE) {
-          relative_previous_castling_rights = previous_castling_rights;
-          relative_current_castling_rights = current_castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
-            & ((previous_castling_rights >> 2) & 3);
-          relative_current_castling_rights = ((current_castling_rights & 3) << 2)
-            & ((current_castling_rights >> 2) & 3);
-        }
-
-        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-          if ((relative_previous_castling_rights & (1 << i)) &&
-            (relative_current_castling_rights & (1 << i)) == 0) {
-            removed->push_back(i);
-          }
-        }
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
@@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function

 #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
 #define _NNUE_FEATURES_CASTLING_RIGHT_H_
@@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  class CastlingRight {
+  public:
+    // feature quantity name
+    static constexpr const char* kName = "CastlingRight";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = 0x913968AAu;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = 4;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = 4;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;

-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+      IndexList* active);

-      // Feature K: Ball position
-      class CastlingRight {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "CastlingRight";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x913968AAu;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 4;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 4;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added);
+  };

-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features

 #endif
@@ -1,43 +1,30 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function

 #include "enpassant.h"
 #include "index_list.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
+  void EnPassant::AppendActiveIndices(
+    const Position& pos, Color /* perspective */, IndexList* active) {
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-    namespace Features {
+    auto epSquare = pos.state()->epSquare;
+    if (epSquare == SQ_NONE) {
+      return;
+    }
+    auto file = file_of(epSquare);
+    active->push_back(file);
+  }

-      // Get a list of indices with a value of 1 among the features
-      void EnPassant::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
+  void EnPassant::AppendChangedIndices(
+    const Position& /* pos */, Color /* perspective */,
+    IndexList* /* removed */, IndexList* /* added */) {
+    // Not implemented.
+    assert(false);
+  }

-        auto epSquare = pos.state()->epSquare;
-        if (epSquare == SQ_NONE) {
-          return;
-        }
-
-        if (perspective == BLACK) {
-          epSquare = rotate180(epSquare);
-        }
-
-        auto file = file_of(epSquare);
-        active->push_back(file);
-      }
-
-      // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-      void EnPassant::AppendChangedIndices(
-        const Position& /* pos */, Color /* perspective */,
-        IndexList* /* removed */, IndexList* /* added */) {
-        // Not implemented.
-        assert(false);
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
@@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function

 #ifndef _NNUE_FEATURES_ENPASSANT_H_
 #define _NNUE_FEATURES_ENPASSANT_H_
@@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-  namespace NNUE {
+  class EnPassant {
+  public:
+    // feature quantity name
+    static constexpr const char* kName = "EnPassant";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = 0x02924F91u;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = 8;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = 1;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;

-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+      IndexList* active);

-      // Feature K: Ball position
-      class EnPassant {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "EnPassant";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x02924F91u;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 8;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 1;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    // Get a list of indices whose values ??have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added);
+  };

-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features

 #endif
@@ -105,9 +105,20 @@ namespace Eval::NNUE::Features {
      for (Color perspective : { WHITE, BLACK }) {
        reset[perspective] = false;
        switch (trigger) {
+          case TriggerEvent::kNone:
+            break;
          case TriggerEvent::kFriendKingMoved:
            reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
            break;
+          case TriggerEvent::kEnemyKingMoved:
+              reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
+            break;
+          case TriggerEvent::kAnyKingMoved:
+            reset[perspective] = type_of(dp.piece[0]) == KING;
+            break;
+          case TriggerEvent::kAnyPieceMoved:
+            reset[perspective] = true;
+            break;
          default:
            assert(false);
            break;
@@ -34,10 +34,10 @@ namespace Eval::NNUE::Features {
  // Trigger to perform full calculations instead of difference only
  enum class TriggerEvent {
    kNone, // Calculate the difference whenever possible
-    kFriendKingMoved, // calculate all when own ball moves
-    kEnemyKingMoved, // do all calculations when enemy balls move
-    kAnyKingMoved, // do all calculations if either ball moves
-    kAnyPieceMoved, // always do all calculations
+    kFriendKingMoved, // calculate full evaluation when own king moves
+    kEnemyKingMoved, // calculate full evaluation when opponent king moves
+    kAnyKingMoved, // calculate full evaluation when any king moves
+    kAnyPieceMoved, // always calculate full evaluation
  };

  enum class Side {
@@ -23,9 +23,9 @@

 namespace Eval::NNUE::Features {

-  // Orient a square according to perspective (rotates by 180 for black)
+  // Orient a square according to perspective (flip rank for black)
  inline Square orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
+    return Square(int(s) ^ (bool(perspective) * SQ_A8));
  }

  // Find the index of the feature quantity from the king position and PieceSquare
@@ -9,9 +9,9 @@ namespace NNUE {

 namespace Features {

-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }

 // Find the index of the feature quantity from the ball position and PieceSquare
@@ -9,9 +9,9 @@ namespace NNUE {

 namespace Features {

-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }

 // Index of a feature for a given king position.
@@ -32,19 +32,11 @@ void K::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
-  Color king_color;
-  if (dp.piece[0] == Piece::W_KING) {
-    king_color = WHITE;
+  if (type_of(dp.piece[0]) == KING)
+  {
+    removed->push_back(MakeIndex(perspective, dp.from[0], color_of(dp.piece[0])));
+    added->push_back(MakeIndex(perspective, dp.to[0], color_of(dp.piece[0])));
  }
-  else if (dp.piece[0] == Piece::B_KING) {
-    king_color = BLACK;
-  }
-  else {
-    return;
-  }
-
-  removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
-  added->push_back(MakeIndex(perspective, dp.to[0], king_color));
 }

 }  // namespace Features
@@ -9,9 +9,9 @@ namespace NNUE {

 namespace Features {

-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }

 // Find the index of the feature quantity from the king position and PieceSquare
@@ -29,9 +29,7 @@ namespace Eval::NNUE {
  struct alignas(kCacheLineSize) Accumulator {
    std::int16_t
        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-    Value score;
    bool computed_accumulation;
-    bool computed_score;
  };

 }  // namespace Eval::NNUE
@@ -22,7 +22,7 @@
 #define NNUE_ARCHITECTURE_H_INCLUDED

 // Defines the network structure
-#include "architectures/halfkp_256x2-32-32.h"
+#include "architectures/halfkp-cr-ep_256x2-32-32.h"

 namespace Eval::NNUE {

@@ -69,7 +69,7 @@
 namespace Eval::NNUE {

  // Version of the evaluation file
-  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+  constexpr std::uint32_t kVersion = 0x7AF32F17u;

  // Constant used in evaluation value calculation
  constexpr int FV_SCALE = 16;
@@ -113,7 +113,7 @@ namespace Eval::NNUE {
    PS_END2     = 12 * SQUARE_NB + 1
  };

-  extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
+  extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB];

  // Type of input feature after conversion
  using TransformedFeatureType = std::uint8_t;
@@ -1,4 +1,4 @@
-/*
+/*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

@@ -29,6 +29,61 @@

 namespace Eval::NNUE {

+  // If vector instructions are enabled, we update and refresh the
+  // accumulator tile by tile such that each tile fits in the CPU's
+  // vector registers.
+  #define TILING
+
+  #ifdef USE_AVX512
+  typedef __m512i vec_t;
+  #define vec_load(a) _mm512_loadA_si512(a)
+  #define vec_store(a,b) _mm512_storeA_si512(a,b)
+  #define vec_add_16(a,b) _mm512_add_epi16(a,b)
+  #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
+  #define vec_zero _mm512_setzero_si512()
+  static constexpr IndexType kNumRegs = 8; // only 8 are needed
+
+  #elif USE_AVX2
+  typedef __m256i vec_t;
+  #define vec_load(a) _mm256_loadA_si256(a)
+  #define vec_store(a,b) _mm256_storeA_si256(a,b)
+  #define vec_add_16(a,b) _mm256_add_epi16(a,b)
+  #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
+  #define vec_zero _mm256_setzero_si256()
+  static constexpr IndexType kNumRegs = 16;
+
+  #elif USE_SSE2
+  typedef __m128i vec_t;
+  #define vec_load(a) (*(a))
+  #define vec_store(a,b) *(a)=(b)
+  #define vec_add_16(a,b) _mm_add_epi16(a,b)
+  #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
+  #define vec_zero _mm_setzero_si128()
+  static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
+
+  #elif USE_MMX
+  typedef __m64 vec_t;
+  #define vec_load(a) (*(a))
+  #define vec_store(a,b) *(a)=(b)
+  #define vec_add_16(a,b) _mm_add_pi16(a,b)
+  #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
+  #define vec_zero _mm_setzero_si64()
+  static constexpr IndexType kNumRegs = 8;
+
+  #elif USE_NEON
+  typedef int16x8_t vec_t;
+  #define vec_load(a) (*(a))
+  #define vec_store(a,b) *(a)=(b)
+  #define vec_add_16(a,b) vaddq_s16(a,b)
+  #define vec_sub_16(a,b) vsubq_s16(a,b)
+  #define vec_zero {0}
+  static constexpr IndexType kNumRegs = 16;
+
+  #else
+  #undef TILING
+
+  #endif
+
  // Input feature converter
  class FeatureTransformer {

@@ -36,6 +91,11 @@ namespace Eval::NNUE {
    // Number of output dimensions for one side
    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;

+    #ifdef TILING
+    static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
+    static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
+    #endif
+
   public:
    // Output type
    using OutputType = TransformedFeatureType;
@@ -50,6 +110,7 @@ namespace Eval::NNUE {

    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t GetHashValue() {
+
      return RawFeatures::kHashValue ^ kOutputDimensions;
    }

@@ -62,6 +123,7 @@ namespace Eval::NNUE {

    // Read network parameters
    bool ReadParameters(std::istream& stream) {
+
      for (std::size_t i = 0; i < kHalfDimensions; ++i)
        biases_[i] = read_little_endian<BiasType>(stream);
      for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
@@ -80,23 +142,26 @@ namespace Eval::NNUE {

    // Proceed with the difference calculation if possible
    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+
      const auto now = pos.state();
-      if (now->accumulator.computed_accumulation) {
+      if (now->accumulator.computed_accumulation)
        return true;
-      }
+
      const auto prev = now->previous;
      if (prev && prev->accumulator.computed_accumulation) {
        UpdateAccumulator(pos);
        return true;
      }
+
      return false;
    }

    // Convert input features
-    void Transform(const Position& pos, OutputType* output, bool refresh) const {
-      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+    void Transform(const Position& pos, OutputType* output) const {
+
+      if (!UpdateAccumulatorIfPossible(pos))
        RefreshAccumulator(pos);
-      }
+
      const auto& accumulation = pos.state()->accumulator.accumulation;

  #if defined(USE_AVX2)
@@ -133,6 +198,12 @@ namespace Eval::NNUE {
              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m256i sum1 = _mm256_loadA_si256(
            &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
          _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
        }
@@ -144,6 +215,12 @@ namespace Eval::NNUE {
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);

          _mm_store_si128(&out[j],
@@ -164,6 +241,12 @@ namespace Eval::NNUE {
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m64 sum1 = *(&reinterpret_cast<const __m64*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
          const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
          out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
        }
@@ -173,12 +256,19 @@ namespace Eval::NNUE {
        for (IndexType j = 0; j < kNumChunks; ++j) {
          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
              accumulation[perspectives[p]][0])[j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
+                accumulation[perspectives[p]][i])[j]);
+          }
          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
        }

  #else
        for (IndexType j = 0; j < kHalfDimensions; ++j) {
          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum += accumulation[static_cast<int>(perspectives[p])][i][j];
+          }
          output[offset + j] = static_cast<OutputType>(
              std::max<int>(0, std::min<int>(127, sum)));
        }
@@ -193,192 +283,162 @@ namespace Eval::NNUE {
   private:
    // Calculate cumulative value without using difference calculation
    void RefreshAccumulator(const Position& pos) const {
+
      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (Color perspective : { WHITE, BLACK }) {
-        std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                   kHalfDimensions * sizeof(BiasType));
-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-  #if defined(USE_AVX512)
-          auto accumulation = reinterpret_cast<__m512i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList active_indices[2];
+        RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                         active_indices);
+        for (Color perspective : { WHITE, BLACK }) {
+    #ifdef TILING
+          for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+            auto accTile = reinterpret_cast<vec_t*>(
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
+            vec_t acc[kNumRegs];

-  #elif defined(USE_AVX2)
-          auto accumulation = reinterpret_cast<__m256i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
+            if (i == 0) {
+              auto biasesTile = reinterpret_cast<const vec_t*>(
+                  &biases_[j * kTileHeight]);
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = biasesTile[k];
+            } else {
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_zero;
+            }
+            for (const auto index : active_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);

-  #elif defined(USE_SSE2)
-          auto accumulation = reinterpret_cast<__m128i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_add_16(acc[k], column[k]);
+            }

-  #elif defined(USE_MMX)
-          auto accumulation = reinterpret_cast<__m64*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
+            for (unsigned k = 0; k < kNumRegs; k++)
+              vec_store(&accTile[k], acc[k]);
+          }
+    #else
+          if (i == 0) {
+            std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                        kHalfDimensions * sizeof(BiasType));
+          } else {
+            std::memset(accumulator.accumulation[perspective][i], 0,
+                        kHalfDimensions * sizeof(BiasType));
          }

-  #elif defined(USE_NEON)
-          auto accumulation = reinterpret_cast<int16x8_t*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-
-  #else
-          for (IndexType j = 0; j < kHalfDimensions; ++j)
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-  #endif
+          for (const auto index : active_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;

+            for (IndexType j = 0; j < kHalfDimensions; ++j)
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+          }
+    #endif
        }
+
      }
+
  #if defined(USE_MMX)
      _mm_empty();
  #endif

      accumulator.computed_accumulation = true;
-      accumulator.computed_score = false;
    }

    // Calculate cumulative value using difference calculation
    void UpdateAccumulator(const Position& pos) const {
+
      const auto prev_accumulator = pos.state()->previous->accumulator;
      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (Color perspective : { WHITE, BLACK }) {
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList removed_indices[2], added_indices[2];
+        bool reset[2];
+        RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                          removed_indices, added_indices, reset);

-  #if defined(USE_AVX2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m256i*>(
-            &accumulator.accumulation[perspective][i][0]);
+    #ifdef TILING
+        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+          for (Color perspective : { WHITE, BLACK }) {
+            auto accTile = reinterpret_cast<vec_t*>(
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
+            vec_t acc[kNumRegs];

-  #elif defined(USE_SSE2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m128i*>(
-            &accumulator.accumulation[perspective][i][0]);
+            if (reset[perspective]) {
+              if (i == 0) {
+                auto biasesTile = reinterpret_cast<const vec_t*>(
+                    &biases_[j * kTileHeight]);
+                for (unsigned k = 0; k < kNumRegs; ++k)
+                  acc[k] = biasesTile[k];
+              } else {
+                for (unsigned k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_zero;
+              }
+            } else {
+              auto prevAccTile = reinterpret_cast<const vec_t*>(
+                  &prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
+              for (IndexType k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_load(&prevAccTile[k]);

-  #elif defined(USE_MMX)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m64*>(
-            &accumulator.accumulation[perspective][i][0]);
+              // Difference calculation for the deactivated features
+              for (const auto index : removed_indices[perspective]) {
+                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);

-  #elif defined(USE_NEON)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<int16x8_t*>(
-            &accumulator.accumulation[perspective][i][0]);
-  #endif
+                for (IndexType k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_sub_16(acc[k], column[k]);
+              }
+            }
+            { // Difference calculation for the activated features
+              for (const auto index : added_indices[perspective]) {
+                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);

-        if (reset[perspective]) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator.accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
-          // Difference calculation for the deactivated features
-          for (const auto index : removed_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-
-  #if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+                for (IndexType k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_add_16(acc[k], column[k]);
+              }
            }

-  #elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
-            }
-
-  #elif defined(USE_MMX)
-            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
-            }
-
-  #elif defined(USE_NEON)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
-            }
-
-  #else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] -=
-                  weights_[offset + j];
-            }
-  #endif
-
+            for (IndexType k = 0; k < kNumRegs; ++k)
+              vec_store(&accTile[k], acc[k]);
          }
        }
-        { // Difference calculation for the activated features
-          for (const auto index : added_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
+    #if defined(USE_MMX)
+        _mm_empty();
+    #endif

-  #if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+    #else
+        for (Color perspective : { WHITE, BLACK }) {
+
+          if (reset[perspective]) {
+            if (i == 0) {
+              std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                          kHalfDimensions * sizeof(BiasType));
+            } else {
+              std::memset(accumulator.accumulation[perspective][i], 0,
+                          kHalfDimensions * sizeof(BiasType));
            }
+          } else {
+            std::memcpy(accumulator.accumulation[perspective][i],
+                        prev_accumulator.accumulation[perspective][i],
+                        kHalfDimensions * sizeof(BiasType));
+            // Difference calculation for the deactivated features
+            for (const auto index : removed_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;

-  #elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
+                accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
            }
+          }
+          { // Difference calculation for the activated features
+            for (const auto index : added_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;

-  #elif defined(USE_MMX)
-            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
+                accumulator.accumulation[perspective][i][j] += weights_[offset + j];
            }
-
-  #elif defined(USE_NEON)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-            }
-
-  #else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] +=
-                  weights_[offset + j];
-            }
-  #endif
-
          }
        }
+    #endif
      }
-  #if defined(USE_MMX)
-      _mm_empty();
-  #endif
-
      accumulator.computed_accumulation = true;
-      accumulator.computed_score = false;
    }

    using BiasType = std::int16_t;
@@ -194,7 +194,7 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
      weights_(),
      biases_diff_(),
      weights_diff_(),
-      momentum_(0.0),
+      momentum_(0.2),
      learning_rate_scale_(1.0) {
    DequantizeParameters();
  }
@@ -232,7 +232,7 @@ class Trainer<FeatureTransformer> {
      biases_(),
      weights_(),
      biases_diff_(),
-      momentum_(0.0),
+      momentum_(0.2),
      learning_rate_scale_(1.0) {
    min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
    max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
@@ -707,7 +707,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {

  // Used by NNUE
  st->accumulator.computed_accumulation = false;
-  st->accumulator.computed_score = false;
  auto& dp = st->dirtyPiece;
  dp.dirty_num = 1;

@@ -1003,7 +1002,6 @@ void Position::do_null_move(StateInfo& newSt) {
  if (Eval::useNNUE != Eval::UseNNUEMode::False)
  {
      std::memcpy(&newSt, st, sizeof(StateInfo));
-      st->accumulator.computed_score = false;
  }
  else
      std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
@@ -1353,9 +1351,9 @@ bool Position::pos_is_ok() const {
 // Add a function that directly unpacks for speed. It's pretty tough.
 // Write it by combining packer::unpack() and Position::set().
 // If there is a problem with the passed phase and there is an error, non-zero is returned.
-int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th, bool mirror)
+int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th)
 {
-  return Learner::set_from_packed_sfen(*this, sfen, si, th, mirror);
+  return Learner::set_from_packed_sfen(*this, sfen, si, th);
 }

 // Give the board, hand piece, and turn, and return the sfen.
@@ -177,7 +177,7 @@ public:

  // --sfenization helper

-  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);

  // Get the packed sfen. Returns to the buffer specified in the argument.
  // Do not include gamePly in pack.
@@ -187,7 +187,7 @@ public:
  // Equivalent to pos.set(sfen_unpack(data),si,th);.
  // If there is a problem with the passed phase and there is an error, non-zero is returned.
  // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
-  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
+  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);

  void clear() { std::memset(this, 0, sizeof(Position)); }

@@ -40,21 +40,11 @@ namespace Search {
  LimitsType Limits;
 }

-namespace Tablebases {
-
-  int Cardinality;
-  bool RootInTB;
-  bool UseRule50;
-  Depth ProbeDepth;
-}
-
-namespace TB = Tablebases;
-
 using std::string;
 using Eval::evaluate;
 using namespace Search;

-bool Search::prune_at_shallow_depth_on_pv_node = true;
+bool Search::prune_at_shallow_depth = true;

 namespace {

@@ -227,7 +217,7 @@ void MainThread::search() {
  Time.init(Limits, us, rootPos.game_ply());
  TT.new_search();

-  Eval::verify_NNUE();
+  Eval::NNUE::verify();

  if (rootMoves.empty())
  {
@@ -464,10 +454,7 @@ void Thread::search() {
                  ++failedHighCnt;
              }
              else
-              {
-                  ++rootMoves[pvIdx].bestMoveCount;
                  break;
-              }

              delta += delta / 4 + 5;

@@ -522,7 +509,7 @@ void Thread::search() {
              totBestMoveChanges += th->bestMoveChanges;
              th->bestMoveChanges = 0;
          }
-          double bestMoveInstability = 1 + totBestMoveChanges / Threads.size();
+          double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size();

          double totalTime = rootMoves.size() == 1 ? 0 :
                             Time.optimum() * fallingEval * reduction * bestMoveInstability;
@@ -599,7 +586,7 @@ namespace {
    Move ttMove, move, excludedMove, bestMove;
    Depth extension, newDepth;
    Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
-    bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture;
+    bool formerPv, givesCheck, improving, didLMR, priorCapture;
    bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
         ttCapture, singularQuietLMR;
    Piece movedPiece;
@@ -646,6 +633,7 @@ namespace {
    assert(0 <= ss->ply && ss->ply < MAX_PLY);

    (ss+1)->ply = ss->ply + 1;
+    (ss+1)->ttPv = false;
    (ss+1)->excludedMove = bestMove = MOVE_NONE;
    (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE;
    Square prevSq = to_sq((ss-1)->currentMove);
@@ -655,9 +643,7 @@ namespace {
    // starts with statScore = 0. Later grandchildren start with the last calculated
    // statScore of the previous grandchild. This influences the reduction rules in
    // LMR which are based on the statScore of parent position.
-    if (rootNode)
-        (ss+4)->statScore = 0;
-    else
+    if (!rootNode)
        (ss+2)->statScore = 0;

    // Step 4. Transposition table lookup. We don't want the score of a partial
@@ -665,14 +651,15 @@ namespace {
    // position key in case of an excluded move.
    excludedMove = ss->excludedMove;
    posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove);
-    tte = TT.probe(posKey, ttHit);
-    ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+    tte = TT.probe(posKey, ss->ttHit);
+    ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
    ttMove =  rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
-            : ttHit    ? tte->move() : MOVE_NONE;
-    ttPv = PvNode || (ttHit && tte->is_pv());
-    formerPv = ttPv && !PvNode;
+            : ss->ttHit    ? tte->move() : MOVE_NONE;
+    if (!excludedMove)
+        ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
+    formerPv = ss->ttPv && !PvNode;

-    if (   ttPv
+    if (   ss->ttPv
        && depth > 12
        && ss->ply - 1 < MAX_LPH
        && !priorCapture
@@ -681,11 +668,11 @@ namespace {

    // thisThread->ttHitAverage can be used to approximate the running average of ttHit
    thisThread->ttHitAverage =   (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow
-                                + TtHitAverageResolution * ttHit;
+                                + TtHitAverageResolution * ss->ttHit;

    // At non-PV nodes we check for an early TT cutoff
    if (  !PvNode
-        && ttHit
+        && ss->ttHit
        && tte->depth() >= depth
        && ttValue != VALUE_NONE // Possible in case of TT access race
        && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
@@ -717,27 +704,27 @@ namespace {
    }

    // Step 5. Tablebases probe
-    if (!rootNode && TB::Cardinality)
+    if (!rootNode && thisThread->Cardinality)
    {
        int piecesCount = pos.count<ALL_PIECES>();

-        if (    piecesCount <= TB::Cardinality
-            && (piecesCount <  TB::Cardinality || depth >= TB::ProbeDepth)
+        if (    piecesCount <= thisThread->Cardinality
+            && (piecesCount <  thisThread->Cardinality || depth >= thisThread->ProbeDepth)
            &&  pos.rule50_count() == 0
            && !pos.can_castle(ANY_CASTLING))
        {
-            TB::ProbeState err;
-            TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
+            Tablebases::ProbeState err;
+            Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &err);

            // Force check of time on the next occasion
            if (thisThread == Threads.main())
                static_cast<MainThread*>(thisThread)->callsCnt = 0;

-            if (err != TB::ProbeState::FAIL)
+            if (err != Tablebases::ProbeState::FAIL)
            {
                thisThread->tbHits.fetch_add(1, std::memory_order_relaxed);

-                int drawScore = TB::UseRule50 ? 1 : 0;
+                int drawScore = thisThread->UseRule50 ? 1 : 0;

                // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score
                value =  wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1
@@ -750,7 +737,7 @@ namespace {
                if (    b == BOUND_EXACT
                    || (b == BOUND_LOWER ? value >= beta : value <= alpha))
                {
-                    tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b,
+                    tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b,
                              std::min(MAX_PLY - 1, depth + 6),
                              MOVE_NONE, VALUE_NONE);

@@ -778,7 +765,7 @@ namespace {
        improving = false;
        goto moves_loop;
    }
-    else if (ttHit)
+    else if (ss->ttHit)
    {
        // Never assume anything about values stored in TT
        ss->staticEval = eval = tte->eval();
@@ -800,7 +787,7 @@ namespace {
        else
            ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;

-        tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
+        tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
    }

    // Step 7. Razoring (~1 Elo)
@@ -826,7 +813,7 @@ namespace {
        && (ss-1)->statScore < 22977
        &&  eval >= beta
        &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182
+        &&  ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182
        && !excludedMove
        &&  pos.non_pawn_material(us)
        && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -882,14 +869,14 @@ namespace {
        // there and in further interactions with transposition table cutoff depth is set to depth - 3
        // because probCut search has depth set to depth - 4 but we also do a move before it
        // so effective depth is equal to depth - 3
-        && !(   ttHit
+        && !(   ss->ttHit
             && tte->depth() >= depth - 3
             && ttValue != VALUE_NONE
             && ttValue < probCutBeta))
    {
        // if ttMove is a capture and value from transposition table is good enough produce probCut
        // cutoff without digging into actual probCut search
-        if (   ttHit
+        if (   ss->ttHit
            && tte->depth() >= depth - 3
            && ttValue != VALUE_NONE
            && ttValue >= probCutBeta
@@ -900,6 +887,8 @@ namespace {
        assert(probCutBeta < VALUE_INFINITE);
        MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
        int probCutCount = 0;
+        bool ttPv = ss->ttPv;
+        ss->ttPv = false;

        while (   (move = mp.next_move()) != MOVE_NONE
               && probCutCount < 2 + 2 * cutNode)
@@ -931,7 +920,7 @@ namespace {
                if (value >= probCutBeta)
                {
                    // if transposition table doesn't have equal or more deep info write probCut data into it
-                    if ( !(ttHit
+                    if ( !(ss->ttHit
                       && tte->depth() >= depth - 3
                       && ttValue != VALUE_NONE))
                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
@@ -940,8 +929,15 @@ namespace {
                    return value;
                }
            }
+         ss->ttPv = ttPv;
    }

+    // Step 11. If the position is not in TT, decrease depth by 2
+    if (   PvNode
+        && depth >= 6
+        && !ttMove)
+        depth -= 2;
+
 moves_loop: // When in check, search starts from here

    const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -965,7 +961,7 @@ moves_loop: // When in check, search starts from here
    // Mark this node as being searched
    ThreadHolding th(thisThread, posKey, ss->ply);

-    // Step 11. Loop through all pseudo-legal moves until no moves remain
+    // Step 12. Loop through all pseudo-legal moves until no moves remain
    // or a beta cutoff occurs.
    while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE)
    {
@@ -1005,9 +1001,9 @@ moves_loop: // When in check, search starts from here
      // Calculate new depth for this move
      newDepth = depth - 1;

-      // Step 12. Pruning at shallow depth (~200 Elo)
+      // Step 13. Pruning at shallow depth (~200 Elo)
      if (  !rootNode
-          && (PvNode ? prune_at_shallow_depth_on_pv_node : true)
+          && (PvNode ? prune_at_shallow_depth : true)
          && pos.non_pawn_material(us)
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
      {
@@ -1052,7 +1048,6 @@ moves_loop: // When in check, search starts from here
              if (   !givesCheck
                  && lmrDepth < 6
                  && !(PvNode && abs(bestValue) < 2)
-                  && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
                  && !ss->inCheck
                  && ss->staticEval + 169 + 244 * lmrDepth
                     + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
@@ -1064,7 +1059,7 @@ moves_loop: // When in check, search starts from here
          }
      }

-      // Step 13. Extensions (~75 Elo)
+      // Step 14. Extensions (~75 Elo)

      // Singular extension search (~70 Elo). If all moves but one fail low on a
      // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
@@ -1123,11 +1118,6 @@ moves_loop: // When in check, search starts from here
               && pos.non_pawn_material() <= 2 * RookValueMg)
          extension = 1;

-      // Castling extension
-      if (   type_of(move) == CASTLING
-          && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
-          extension = 1;
-
      // Late irreversible move extension
      if (   move == ttMove
          && pos.rule50_count() > 80
@@ -1147,14 +1137,13 @@ moves_loop: // When in check, search starts from here
                                                                [movedPiece]
                                                                [to_sq(move)];

-      // Step 14. Make the move
+      // Step 15. Make the move
      pos.do_move(move, st, givesCheck);

-      // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
+      // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
      // re-searched at full depth.
      if (    depth >= 3
-          &&  moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
-          && (!rootNode || thisThread->best_move_count(move) == 0)
+          &&  moveCount > 1 + 2 * rootNode
          && (  !captureOrPromotion
              || moveCountPruning
              || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
@@ -1163,13 +1152,6 @@ moves_loop: // When in check, search starts from here
      {
          Depth r = reduction(improving, depth, moveCount);

-          // Decrease reduction at non-check cut nodes for second move at low depths
-          if (   cutNode
-              && depth <= 10
-              && moveCount <= 2
-              && !ss->inCheck)
-              r--;
-
          // Decrease reduction if the ttHit running average is large
          if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
              r--;
@@ -1179,7 +1161,7 @@ moves_loop: // When in check, search starts from here
              r++;

          // Decrease reduction if position is or has been on the PV (~10 Elo)
-          if (ttPv)
+          if (ss->ttPv)
              r -= 2;

          if (moveCountPruning && !formerPv)
@@ -1191,7 +1173,7 @@ moves_loop: // When in check, search starts from here

          // Decrease reduction if ttMove has been singularly extended (~3 Elo)
          if (singularQuietLMR)
-              r -= 1 + formerPv;
+              r--;

          if (!captureOrPromotion)
          {
@@ -1208,7 +1190,7 @@ moves_loop: // When in check, search starts from here
              // hence break make_move(). (~2 Elo)
              else if (    type_of(move) == NORMAL
                       && !pos.see_ge(reverse_move(move)))
-                  r -= 2 + ttPv - (type_of(movedPiece) == PAWN);
+                  r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN);

              ss->statScore =  thisThread->mainHistory[us][from_to(move)]
                             + (*contHist[0])[movedPiece][to_sq(move)]
@@ -1228,14 +1210,14 @@ moves_loop: // When in check, search starts from here
          }
          else
          {
-            // Increase reduction for captures/promotions if late move and at low depth
-            if (depth < 8 && moveCount > 2)
-                r++;
+              // Increase reduction for captures/promotions if late move and at low depth
+              if (depth < 8 && moveCount > 2)
+                  r++;

-            // Unless giving check, this capture is likely bad
-            if (   !givesCheck
-                && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
-                r++;
+              // Unless giving check, this capture is likely bad
+              if (   !givesCheck
+                  && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
+                  r++;
          }

          Depth d = std::clamp(newDepth - r, 1, newDepth);
@@ -1253,7 +1235,7 @@ moves_loop: // When in check, search starts from here
          didLMR = false;
      }

-      // Step 16. Full depth search when LMR is skipped or fails high
+      // Step 17. Full depth search when LMR is skipped or fails high
      if (doFullDepthSearch)
      {
          value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
@@ -1281,12 +1263,12 @@ moves_loop: // When in check, search starts from here
          value = -search<PV>(pos, ss+1, -beta, -alpha, newDepth, false);
      }

-      // Step 17. Undo move
+      // Step 18. Undo move
      pos.undo_move(move);

      assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);

-      // Step 18. Check for a new best move
+      // Step 19. Check for a new best move
      // Finished searching the move. If a stop occurred, the return value of
      // the search cannot be trusted, and we return immediately without
      // updating best move, PV and TT.
@@ -1363,7 +1345,7 @@ moves_loop: // When in check, search starts from here
        return VALUE_DRAW;
    */

-    // Step 19. Check for mate and stalemate
+    // Step 20. Check for mate and stalemate
    // All legal moves have been searched and if there are no legal moves, it
    // must be a mate or a stalemate. If we are in a singular extension search then
    // return a fail low score.
@@ -1386,8 +1368,17 @@ moves_loop: // When in check, search starts from here
    if (PvNode)
        bestValue = std::min(bestValue, maxValue);

+    // If no good move is found and the previous position was ttPv, then the previous
+    // opponent move is probably good and the new position is added to the search tree.
+    if (bestValue <= alpha)
+        ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3);
+    // Otherwise, a counter move has been found and if the position is the last leaf
+    // in the search tree, remove the position from the search tree.
+    else if (depth > 3)
+        ss->ttPv = ss->ttPv && (ss+1)->ttPv;
+
    if (!excludedMove && !(rootNode && thisThread->pvIdx))
-        tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv,
+        tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
                  bestValue >= beta ? BOUND_LOWER :
                  PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER,
                  depth, bestMove, ss->staticEval);
@@ -1416,7 +1407,7 @@ moves_loop: // When in check, search starts from here
    Move ttMove, move, bestMove;
    Depth ttDepth;
    Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha;
-    bool ttHit, pvHit, givesCheck, captureOrPromotion;
+    bool pvHit, givesCheck, captureOrPromotion;
    int moveCount;

    if (PvNode)
@@ -1446,13 +1437,13 @@ moves_loop: // When in check, search starts from here
                                                  : DEPTH_QS_NO_CHECKS;
    // Transposition table lookup
    posKey = pos.key();
-    tte = TT.probe(posKey, ttHit);
-    ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
-    ttMove = ttHit ? tte->move() : MOVE_NONE;
-    pvHit = ttHit && tte->is_pv();
+    tte = TT.probe(posKey, ss->ttHit);
+    ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+    ttMove = ss->ttHit ? tte->move() : MOVE_NONE;
+    pvHit = ss->ttHit && tte->is_pv();

    if (  !PvNode
-        && ttHit
+        && ss->ttHit
        && tte->depth() >= ttDepth
        && ttValue != VALUE_NONE // Only in case of TT access race
        && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
@@ -1467,7 +1458,7 @@ moves_loop: // When in check, search starts from here
    }
    else
    {
-        if (ttHit)
+        if (ss->ttHit)
        {
            // Never assume anything about values stored in TT
            if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE)
@@ -1486,7 +1477,7 @@ moves_loop: // When in check, search starts from here
        // Stand pat. Return immediately if static value is at least beta
        if (bestValue >= beta)
        {
-            if (!ttHit)
+            if (!ss->ttHit)
                tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER,
                          DEPTH_NONE, MOVE_NONE, ss->staticEval);

@@ -1524,6 +1515,7 @@ moves_loop: // When in check, search starts from here

      // Futility pruning
      if (   !ss->inCheck
+          && Search::prune_at_shallow_depth
          && !givesCheck
          &&  futilityBase > -VALUE_KNOWN_WIN
          && !pos.advanced_pawn_push(move))
@@ -1550,18 +1542,17 @@ moves_loop: // When in check, search starts from here
      }

      // Do not search moves with negative SEE values
-      if (  !ss->inCheck && !pos.see_ge(move))
+      if (   !ss->inCheck
+          && Search::prune_at_shallow_depth
+          && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move))
+          && !pos.see_ge(move))
          continue;

      // Speculative prefetch as early as possible
      prefetch(TT.first_entry(pos.key_after(move)));

      // Check for legality just before making the move
-      if (
-        // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning.
-        !pos.pseudo_legal(move) ||
-        !pos.legal(move)
-        )
+      if (!pos.legal(move))
      {
          moveCount--;
          continue;
@@ -1573,8 +1564,10 @@ moves_loop: // When in check, search starts from here
                                                                [pos.moved_piece(move)]
                                                                [to_sq(move)];

+      // CounterMove based pruning
      if (  !captureOrPromotion
-          && moveCount >= abs(depth) + 1
+          && Search::prune_at_shallow_depth
+          && moveCount
          && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
          && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
          continue;
@@ -1706,8 +1699,8 @@ moves_loop: // When in check, search starts from here
    else
        captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1;

-    // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted
-    if (   ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0]))
+    // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted
+    if (   ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0]))
        && !pos.captured_piece())
            update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1);

@@ -1844,19 +1837,22 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
  size_t pvIdx = pos.this_thread()->pvIdx;
  size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size());
  uint64_t nodesSearched = Threads.nodes_searched();
-  uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0);
+  uint64_t tbHits = Threads.tb_hits() + (pos.this_thread()->rootInTB ? rootMoves.size() : 0);

  for (size_t i = 0; i < multiPV; ++i)
  {
      bool updated = rootMoves[i].score != -VALUE_INFINITE;

-      if (depth == 1 && !updated)
+      if (depth == 1 && !updated && i > 0)
          continue;

-      Depth d = updated ? depth : depth - 1;
+      Depth d = updated ? depth : std::max(1, depth - 1);
      Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore;

-      bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
+      if (v == -VALUE_INFINITE)
+          v = VALUE_ZERO;
+
+      bool tb = pos.this_thread()->rootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
      v = tb ? rootMoves[i].tbScore : v;

      if (ss.rdbuf()->in_avail()) // Not at first line
@@ -1923,42 +1919,42 @@ bool RootMove::extract_ponder_from_tt(Position& pos) {

 void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {

-    RootInTB = false;
-    UseRule50 = bool(Options["Syzygy50MoveRule"]);
-    ProbeDepth = int(Options["SyzygyProbeDepth"]);
-    Cardinality = int(Options["SyzygyProbeLimit"]);
+    auto& rootInTB = pos.this_thread()->rootInTB;
+    auto& cardinality = pos.this_thread()->Cardinality;
+    auto& probeDepth = pos.this_thread()->ProbeDepth;
+    rootInTB = false;
    bool dtz_available = true;

    // Tables with fewer pieces than SyzygyProbeLimit are searched with
    // ProbeDepth == DEPTH_ZERO
-    if (Cardinality > MaxCardinality)
+    if (cardinality > Tablebases::MaxCardinality)
    {
-        Cardinality = MaxCardinality;
-        ProbeDepth = 0;
+        cardinality = Tablebases::MaxCardinality;
+        probeDepth = 0;
    }

-    if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
+    if (cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
    {
        // Rank moves using DTZ tables
-        RootInTB = root_probe(pos, rootMoves);
+        rootInTB = root_probe(pos, rootMoves);

-        if (!RootInTB)
+        if (!rootInTB)
        {
            // DTZ tables are missing; try to rank moves using WDL tables
            dtz_available = false;
-            RootInTB = root_probe_wdl(pos, rootMoves);
+            rootInTB = root_probe_wdl(pos, rootMoves);
        }
    }

-    if (RootInTB)
+    if (rootInTB)
    {
        // Sort moves according to TB rank
-        std::sort(rootMoves.begin(), rootMoves.end(),
+        std::stable_sort(rootMoves.begin(), rootMoves.end(),
                  [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } );

        // Probe during search only if DTZ is not available and we are winning
        if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW)
-            Cardinality = 0;
+            cardinality = 0;
    }
    else
    {
@@ -1966,6 +1962,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
        for (auto& m : rootMoves)
            m.tbRank = 0;
    }
+
 }

 // --- expose the functions such as fixed depth search used for learning to the outside
@@ -1998,7 +1995,7 @@ namespace Learner
      th->nmpMinPly = th->bestMoveChanges = 0;
      th->ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;

-	  // Zero initialization of the number of search nodes
+      // Zero initialization of the number of search nodes
      th->nodes = 0;

      // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood.
@@ -2022,7 +2019,7 @@ namespace Learner
      for (int i = 7; i > 0; i--)
          (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel

- // set rootMoves
+      // set rootMoves
      auto& rootMoves = th->rootMoves;

      rootMoves.clear();
@@ -2030,6 +2027,20 @@ namespace Learner
        rootMoves.push_back(Search::RootMove(m));

      assert(!rootMoves.empty());
+
+      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
+      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
+      th->Cardinality = int(Options["SyzygyProbeLimit"]);
+
+      // Tables with fewer pieces than SyzygyProbeLimit are searched with
+      // ProbeDepth == DEPTH_ZERO
+      if (th->Cardinality > Tablebases::MaxCardinality)
+      {
+          th->Cardinality = Tablebases::MaxCardinality;
+          th->ProbeDepth = 0;
+      }
+
+      Tablebases::rank_root_moves(pos, rootMoves);
    }
  }

@@ -2050,8 +2061,8 @@ namespace Learner
  // As it has a bad effect, I decided to stop allowing the window range to be specified.
  ValueAndPV qsearch(Position& pos)
  {
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
+    Stack stack[MAX_PLY+10], *ss = stack+7;
+    Move  pv[MAX_PLY+1];

    init_for_search(pos, ss);
    ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
@@ -2070,7 +2081,7 @@ namespace Learner

    auto bestValue = ::qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0);

-  // Returns the PV obtained.
+    // Returns the PV obtained.
    std::vector<Move> pvs;
    for (Move* p = &ss->pv[0]; is_ok(*p); ++p)
      pvs.push_back(*p);
@@ -2136,7 +2147,7 @@ namespace Learner
    Value bestValue = -VALUE_INFINITE;

    while ((rootDepth += 1) <= depth
-	  // exit this loop even if the node limit is exceeded
+      // exit this loop even if the node limit is exceeded
      // The number of search nodes is passed in the argument of this function.
      && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit)
      )
@@ -2158,46 +2169,36 @@ namespace Learner
              break;
        }

-	    // selDepth output with USI info for each depth and PV line
+        // selDepth output with USI info for each depth and PV line
        selDepth = 0;

        // Switch to aspiration search for depth 5 and above.
-        if (rootDepth >= 5 * 1)
+        if (rootDepth >= 4)
        {
-          delta = Value(20);
-
-          Value p = rootMoves[pvIdx].previousScore;
-
-          alpha = std::max(p - delta, -VALUE_INFINITE);
-          beta = std::min(p + delta, VALUE_INFINITE);
+            Value prev = rootMoves[pvIdx].previousScore;
+            delta = Value(17);
+            alpha = std::max(prev - delta,-VALUE_INFINITE);
+            beta  = std::min(prev + delta, VALUE_INFINITE);
        }

-        // aspiration search
-        int failedHighCnt = 0;
        while (true)
        {
-          Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1);
+          Depth adjustedDepth = std::max(1, rootDepth);
          bestValue = ::search<PV>(pos, ss, alpha, beta, adjustedDepth, false);

          stable_sort(rootMoves.begin() + pvIdx, rootMoves.end());
          //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx);

-		  // Expand aspiration window for fail low/high.
+          // Expand aspiration window for fail low/high.
          // However, if it is the value specified by the argument, it will be treated as fail low/high and break.
          if (bestValue <= alpha)
          {
            beta = (alpha + beta) / 2;
            alpha = std::max(bestValue - delta, -VALUE_INFINITE);
-
-            failedHighCnt = 0;
-            //if (mainThread)
-            //    mainThread->stopOnPonderhit = false;
-
          }
          else if (bestValue >= beta)
          {
            beta = std::min(bestValue + delta, VALUE_INFINITE);
-            ++failedHighCnt;
          }
          else
            break;
@@ -2218,7 +2219,6 @@ namespace Learner
    }

    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // ¨ PV should not be NULL_MOVE because it is PV
    // MOVE_WIN has never been thrust. (For now)
    for (Move move : rootMoves[0].pv)
    {
@@ -24,6 +24,7 @@
 #include "misc.h"
 #include "movepick.h"
 #include "types.h"
+#include "uci.h"

 class Position;

@@ -32,7 +33,7 @@ namespace Search {
 /// Threshold used for countermoves based pruning
 constexpr int CounterMovePruneThreshold = 0;

-extern bool prune_at_shallow_depth_on_pv_node;
+extern bool prune_at_shallow_depth;

 /// Stack struct keeps track of the information we need to remember from nodes
 /// shallower and deeper in the tree during the search. Each search thread has
@@ -49,6 +50,8 @@ struct Stack {
  int statScore;
  int moveCount;
  bool inCheck;
+  bool ttPv;
+  bool ttHit;
 };


@@ -70,7 +73,6 @@ struct RootMove {
  Value previousScore = -VALUE_INFINITE;
  int selDepth = 0;
  int tbRank = 0;
-  int bestMoveCount = 0;
  Value tbScore;
  std::vector<Move> pv;
 };
@@ -52,7 +52,7 @@

 using namespace Tablebases;

-int Tablebases::MaxCardinality;
+int Tablebases::MaxCardinality = 0;

 namespace {

@@ -223,7 +223,9 @@ public:

        *mapping = statbuf.st_size;
        *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
+#if defined(MADV_RANDOM)
        madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
+#endif
        ::close(fd);

        if (*baseAddress == MAP_FAILED)
@@ -758,7 +760,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
    if (entry->hasPawns) {
        idx = LeadPawnIdx[leadPawnsCnt][squares[0]];

-        std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
+        std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);

        for (int i = 1; i < leadPawnsCnt; ++i)
            idx += Binomial[i][MapPawns[squares[i]]];
@@ -859,7 +861,7 @@ encode_remaining:

    while (d->groupLen[++next])
    {
-        std::sort(groupSq, groupSq + d->groupLen[next]);
+        std::stable_sort(groupSq, groupSq + d->groupLen[next]);
        uint64_t n = 0;

        // Map down a square if "comes later" than a square in the previous
@@ -25,6 +25,8 @@

 namespace Tablebases {

+extern int MaxCardinality;
+
 enum WDLScore {
    WDLLoss        = -2, // Loss
    WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
@@ -43,8 +45,6 @@ enum ProbeState {
    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };

-extern int MaxCardinality;
-
 void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
 int probe_dtz(Position& pos, ProbeState* result);
@@ -51,17 +51,6 @@ Thread::~Thread() {
 }


-/// Thread::bestMoveCount(Move move) return best move counter for the given root move
-
-int Thread::best_move_count(Move move) const {
-
-  auto rm = std::find(rootMoves.begin() + pvIdx,
-                      rootMoves.begin() + pvLast, move);
-
-  return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0;
-}
-
-
 /// Thread::clear() reset histories, usually before a new game

 void Thread::clear() {
@@ -192,9 +181,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
          || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
          rootMoves.emplace_back(m);

-  if (!rootMoves.empty())
-      Tablebases::rank_root_moves(pos, rootMoves);
-
  // After ownership transfer 'states' becomes empty, so if we stop the search
  // and call 'go' again without setting a new position states.get() == NULL.
  assert(states.get() || setupStates.get());
@@ -214,6 +200,21 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
      th->rootMoves = rootMoves;
      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
      th->rootState = setupStates->back();
+      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
+      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
+      th->Cardinality = int(Options["SyzygyProbeLimit"]);
+
+      // Tables with fewer pieces than SyzygyProbeLimit are searched with
+      // ProbeDepth == DEPTH_ZERO
+      if (th->Cardinality > Tablebases::MaxCardinality)
+      {
+          th->Cardinality = Tablebases::MaxCardinality;
+          th->ProbeDepth = 0;
+      }
+
+      if (!rootMoves.empty())
+          Tablebases::rank_root_moves(pos, rootMoves);
+
  }

  main()->start_searching();
@@ -235,16 +236,16 @@ Thread* ThreadPool::get_best_thread() const {
        votes[th->rootMoves[0].pv[0]] +=
            (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);

-          if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
-          {
-              // Make sure we pick the shortest mate / TB conversion or stave off mate the longest
-              if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
-                  bestThread = th;
-          }
-          else if (   th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
-                   || (   th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
-                       && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
-              bestThread = th;
+        if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
+        {
+            // Make sure we pick the shortest mate / TB conversion or stave off mate the longest
+            if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
+                bestThread = th;
+        }
+        else if (   th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
+                 || (   th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
+                     && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
+            bestThread = th;
    }

    return bestThread;
@@ -54,7 +54,6 @@ public:
  void idle_loop();
  void start_searching();
  void wait_for_search_finished();
-  int best_move_count(Move move) const;

  Pawns::Table pawnsTable;
  Material::Table materialTable;
@@ -74,6 +73,11 @@ public:
  CapturePieceToHistory captureHistory;
  ContinuationHistory continuationHistory[2][2];
  Score contempt;
+  bool rootInTB;
+  int Cardinality;
+  bool UseRule50;
+  Depth ProbeDepth;
+
 };


@@ -35,6 +35,9 @@ bool TranspositionTable::enable_transposition_table = true;

 void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {

+  if (!TranspositionTable::enable_transposition_table) {
+      return;
+  }
  // Preserve any existing move for the same position
  if (m || (uint16_t)k != key16)
      move16 = (uint16_t)m;
@@ -64,11 +67,12 @@ void TranspositionTable::resize(size_t mbSize) {

  Threads.main()->wait_for_search_finished();

-  aligned_ttmem_free(mem);
+  aligned_large_pages_free(table);

  clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
-  table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
-  if (!mem)
+
+  table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
+  if (!table)
  {
      std::cerr << "Failed to allocate " << mbSize
                << "MB for transposition table." << std::endl;
@@ -73,7 +73,7 @@ class TranspositionTable {
  static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");

 public:
- ~TranspositionTable() { aligned_ttmem_free(mem); }
+ ~TranspositionTable() { aligned_large_pages_free(table); }
  void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound
  TTEntry* probe(const Key key, bool& found) const;
  int hashfull() const;
@@ -91,7 +91,6 @@ private:

  size_t clusterCount;
  Cluster* table;
-  void* mem;
  uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
 };

@@ -47,7 +47,7 @@ const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
 void test_cmd(Position& pos, istringstream& is)
 {
    // Initialize as it may be searched.
-    Eval::init_NNUE();
+    Eval::NNUE::init();

    std::string param;
    is >> param;
@@ -100,7 +100,7 @@ namespace {
    Position p;
    p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main());

-    Eval::verify_NNUE();
+    Eval::NNUE::verify();

    sync_cout << "\n" << Eval::trace(p) << sync_endl;
  }
@@ -185,7 +185,7 @@ namespace {

        if (token == "go" || token == "eval")
        {
-            cerr << "\nPosition: " << cnt++ << '/' << num << endl;
+            cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl;
            if (token == "go")
            {
               go(pos, is, states);
@@ -210,15 +210,15 @@ namespace {
         << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
  }

-  // The win rate model returns the probability (per mille) of winning given an eval
-  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
-  int win_rate_model(Value v, int ply) {
-     // Return win rate in per mille (rounded to nearest)
-     return int(0.5 + UCI::win_rate_model_double(v, ply));
-  }
-
 } // namespace

+// The win rate model returns the probability (per mille) of winning given an eval
+// and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+int UCI::win_rate_model(Value v, int ply) {
+   // Return win rate in per mille (rounded to nearest)
+   return int(0.5 + win_rate_model_double(v, ply));
+}
+
 // The win rate model returns the probability (per mille) of winning given an eval
 // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
 double UCI::win_rate_model_double(double v, int ply) {
@@ -72,6 +72,7 @@ std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
+int win_rate_model(Value v, int ply);
 double win_rate_model_double(double v, int ply);
 Move to_move(const Position& pos, std::string& str);

@@ -21,6 +21,7 @@
 #include <ostream>
 #include <sstream>

+#include "evaluate.h"
 #include "misc.h"
 #include "search.h"
 #include "thread.h"
@@ -40,10 +41,10 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
-void on_eval_file(const Option& ) { Eval::init_NNUE(); }
-void on_prune_at_shallow_depth_on_pv_node(const Option& o) {
-    Search::prune_at_shallow_depth_on_pv_node = o;
+void on_use_NNUE(const Option& ) { Eval::NNUE::init(); }
+void on_eval_file(const Option& ) { Eval::NNUE::init(); }
+void on_prune_at_shallow_depth(const Option& o) {
+    Search::prune_at_shallow_depth = o;
 }
 void on_enable_transposition_table(const Option& o) {
    TranspositionTable::enable_transposition_table = o;
@@ -85,23 +86,19 @@ void init(OptionsMap& o) {
  o["Syzygy50MoveRule"]      << Option(true);
  o["SyzygyProbeLimit"]      << Option(7, 0, 7);
  o["Use NNUE"]              << Option("true var true var false var pure", "true", on_use_NNUE);
-  // The default must follow the format nn-[SHA256 first 12 digits].nnue
-  // for the build process (profile-build and fishtest) to work.
-  o["EvalFile"]              << Option("nn-82215d0fd0df.nnue", on_eval_file);
+  o["EvalFile"]              << Option(EvalFileDefaultName, on_eval_file);
  // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
  // I want to hit the test eval convert command, but there is no new evaluation function
  // It ends abnormally before executing this command.
  // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame,
  // Hit the test eval convert command.
  o["SkipLoadingEval"]       << Option(false);
-  // how many moves to use a fixed move
-  // o["BookMoves"] << Option(16, 0, 10000);
  // When learning the evaluation function, you can change the folder to save the evaluation function.
  // Evalsave by default. This folder shall be prepared in advance.
  // Automatically create a folder under this folder like "0/", "1/", ... and save the evaluation function file there.
  o["EvalSaveDir"] << Option("evalsave");
  // Prune at shallow depth on PV nodes. False is recommended when using fixed depth search.
-  o["PruneAtShallowDepthOnPvNode"] << Option(true, on_prune_at_shallow_depth_on_pv_node);
+  o["PruneAtShallowDepth"] << Option(true, on_prune_at_shallow_depth);
  // Enable transposition table.
  o["EnableTranspositionTable"] << Option(true, on_enable_transposition_table);
 }
@@ -78,11 +78,11 @@ cat << EOF > gensfen01.exp
 send "setoption name Threads value $threads\n"
 send "setoption name Use NNUE value false\n"
 send "isready\n"
- send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin use_raw_nnue_eval 0 sfen_format bin\n"
+ send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin sfen_format bin\n"
 expect "gensfen finished."
 send "learn training_data/training_data.bin convert_plain output_file_name training_data.txt\n"
 expect "all done"
- send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n"
+ send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack sfen_format binpack\n"
 expect "gensfen finished."

 send "quit\n"
@@ -104,9 +104,9 @@ cat << EOF > gensfen02.exp
 send "setoption name Threads value $threads\n"
 send "setoption name Use NNUE value true\n"
 send "isready\n"
- send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin use_raw_nnue_eval 0 sfen_format bin\n"
+ send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin sfen_format bin\n"
 expect "gensfen finished."
- send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n"
+ send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack sfen_format binpack\n"
 expect "gensfen finished."

 send "quit\n"
@@ -127,7 +127,7 @@ cat << EOF > learn01.exp
 send "setoption name Use NNUE value true\n"
 send "setoption name Threads value $threads\n"
 send "isready\n"
- send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 mirror_percentage 50 validation_set_file_name validation_data/validation_data.bin\n"
+ send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 lr 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 validation_set_file_name validation_data/validation_data.bin\n"

 expect "save_eval() finished."