diff --git a/AUTHORS b/AUTHORS
index c96f870a..198dfa5a 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -36,10 +36,11 @@ Bryan Cross (crossbr)
 candirufish
 Chess13234
 Chris Cain (ceebo)
+Dale Weiler (graphitemaster)
 Dan Schmidt (dfannius)
 Daniel Axtens (daxtens)
 Daniel Dugovic (ddugovic)
-Dariusz Orzechowski
+Dariusz Orzechowski (dorzechowski)
 David Zar
 Daylen Yang (daylen)
 DiscanX
@@ -62,6 +63,7 @@ Gary Heckman (gheckman)
 George Sobala (gsobala)
 gguliash
 Gian-Carlo Pascutto (gcp)
+Deshawn Mohan-Smith (GoldenRare)
 Gontran Lemaire (gonlem)
 Goodkov Vasiliy Aleksandrovich (goodkov)
 Gregor Cramer
diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt
index 0ea5ac72..482e9000 100644
--- a/Top CPU Contributors.txt	
+++ b/Top CPU Contributors.txt	
@@ -1,154 +1,173 @@
-Contributors with >10,000 CPU hours as of January 7, 2020
+Contributors with >10,000 CPU hours as of Sept 2, 2020
 Thank you!
 
 Username                  CPU Hours   Games played
 --------------------------------------------------
-noobpwnftw                  9305707      695548021
-mlang                        780050       61648867
-dew                          621626       43921547
-mibere                       524702       42238645
-crunchy                      354587       27344275
-cw                           354495       27274181
-fastgm                       332801       22804359
-JojoM                        295750       20437451
-CSU_Dynasty                  262015       21828122
-Fisherman                    232181       18939229
-ctoks                        218866       17622052
-glinscott                    201989       13780820
-tvijlbrief                   201204       15337115
-velislav                     188630       14348485
-gvreuls                      187164       15149976
-bking_US                     180289       11876016
-nordlandia                   172076       13467830
-leszek                       157152       11443978
-Thanar                       148021       12365359
-spams                        141975       10319326
-drabel                       138073       11121749
-vdv                          137850        9394330
-mgrabiak                     133578       10454324
-TueRens                      132485       10878471
-bcross                       129683       11557084
-marrco                       126078        9356740
-sqrt2                        125830        9724586
-robal                        122873        9593418
-vdbergh                      120766        8926915
-malala                       115926        8002293
-CoffeeOne                    114241        5004100
-dsmith                       113189        7570238
-BrunoBanani                  104644        7436849
-Data                          92328        8220352
-mhoram                        89333        6695109
-davar                         87924        7009424
-xoto                          81094        6869316
-ElbertoOne                    80899        7023771
-grandphish2                   78067        6160199
-brabos                        77212        6186135
-psk                           75733        5984901
-BRAVONE                       73875        5054681
-sunu                          70771        5597972
-sterni1971                    70605        5590573
-MaZePallas                    66886        5188978
-Vizvezdenec                   63708        4967313
-nssy                          63462        5259388
-jromang                       61634        4940891
-teddybaer                     61231        5407666
-Pking_cda                     60099        5293873
-solarlight                    57469        5028306
-dv8silencer                   56913        3883992
-tinker                        54936        4086118
-renouve                       49732        3501516
-Freja                         49543        3733019
-robnjr                        46972        4053117
-rap                           46563        3219146
-Bobo1239                      46036        3817196
-ttruscott                     45304        3649765
-racerschmacer                 44881        3975413
-finfish                       44764        3370515
-eva42                         41783        3599691
-biffhero                      40263        3111352
-bigpen0r                      39817        3291647
-mhunt                         38871        2691355
-ronaldjerum                   38820        3240695
-Antihistamine                 38785        2761312
-pb00067                       38038        3086320
-speedycpu                     37591        3003273
-rkl                           37207        3289580
-VoyagerOne                    37050        3441673
-jbwiebe                       35320        2805433
-cuistot                       34191        2146279
-homyur                        33927        2850481
-manap                         32873        2327384
-gri                           32538        2515779
-oryx                          31267        2899051
-EthanOConnor                  30959        2090311
-SC                            30832        2730764
-csnodgrass                    29505        2688994
-jmdana                        29458        2205261
-strelock                      28219        2067805
-jkiiski                       27832        1904470
-Pyafue                        27533        1902349
-Garf                          27515        2747562
-eastorwest                    27421        2317535
-slakovv                       26903        2021889
-Prcuvu                        24835        2170122
-anst                          24714        2190091
-hyperbolic.tom                24319        2017394
-Patrick_G                     23687        1801617
-Sharaf_DG                     22896        1786697
-nabildanial                   22195        1519409
-chriswk                       21931        1868317
-achambord                     21665        1767323
-Zirie                         20887        1472937
-team-oh                       20217        1636708
-Isidor                        20096        1680691
-ncfish1                       19931        1520927
-nesoneg                       19875        1463031
-Spprtr                        19853        1548165
-JanErik                       19849        1703875
-agg177                        19478        1395014
-SFTUser                       19231        1567999
-xor12                         19017        1680165
-sg4032                        18431        1641865
-rstoesser                     18118        1293588
-MazeOfGalious                 17917        1629593
-j3corre                       17743         941444
-cisco2015                     17725        1690126
-ianh2105                      17706        1632562
-dex                           17678        1467203
-jundery                       17194        1115855
-iisiraider                    17019        1101015
-horst.prack                   17012        1465656
-Adrian.Schmidt123             16563        1281436
-purplefishies                 16342        1092533
-wei                           16274        1745989
-ville                         16144        1384026
-eudhan                        15712        1283717
-OuaisBla                      15581         972000
-DragonLord                    15559        1162790
-dju                           14716         875569
-chris                         14479        1487385
-0xB00B1ES                     14079        1001120
-OssumOpossum                  13776        1007129
-enedene                       13460         905279
-bpfliegel                     13346         884523
-Ente                          13198        1156722
-IgorLeMasson                  13087        1147232
-jpulman                       13000         870599
-ako027ako                     12775        1173203
-Nikolay.IT                    12352        1068349
-Andrew Grant                  12327         895539
-joster                        12008         950160
-AdrianSA                      11996         804972
-Nesa92                        11455        1111993
-fatmurphy                     11345         853210
-Dark_wizzie                   11108        1007152
-modolief                      10869         896470
-mschmidt                      10757         803401
-infinity                      10594         727027
-mabichito                     10524         749391
-Thomas A. Anderson            10474         732094
-thijsk                        10431         719357
-Flopzee                       10339         894821
-crocogoat                     10104        1013854
-SapphireBrand                 10104         969604
-stocky                        10017         699440
+noobpwnftw                 19352969     1231459677
+mlang                        957168       61657446
+dew                          949885       56893432
+mibere                       703817       46865007
+crunchy                      427035       27344275
+cw                           416006       27521077
+JojoM                        415904       24479564
+fastgm                       404873       23953472
+CSU_Dynasty                  335774       22850550
+tvijlbrief                   335199       21871270
+Fisherman                    325053       21786603
+gvreuls                      311480       20751516
+ctoks                        275877       18710423
+velislav                     241267       15596372
+glinscott                    217799       13780820
+nordlandia                   211692       13484886
+bcross                       206213       14934233
+bking_US                     198894       11876016
+leszek                       189170       11446821
+mgrabiak                     183896       11778092
+drabel                       181408       12489478
+TueRens                      181349       12192000
+Thanar                       179852       12365359
+vdv                          175171        9881246
+robal                        166948       10702862
+spams                        157128       10319326
+marrco                       149947        9376421
+sqrt2                        147963        9724586
+vdbergh                      137041        8926915
+CoffeeOne                    136294        5004100
+malala                       136182        8002293
+mhoram                       128934        8177193
+davar                        122092        7960001
+dsmith                       122059        7570238
+xoto                         119696        8222144
+grandphish2                  116481        7582197
+Data                         113305        8220352
+BrunoBanani                  112960        7436849
+ElbertoOne                    99028        7023771
+MaZePallas                    98571        6362619
+brabos                        92118        6186135
+psk                           89957        5984901
+sunu                          88463        6007033
+sterni1971                    86948        5613788
+Vizvezdenec                   83752        5343724
+BRAVONE                       81239        5054681
+nssy                          76497        5259388
+teddybaer                     75125        5407666
+Pking_cda                     73776        5293873
+jromang                       70695        4940891
+solarlight                    70517        5028306
+dv8silencer                   70287        3883992
+Bobo1239                      68515        4652287
+racerschmacer                 67468        4935996
+manap                         66273        4121774
+tinker                        63458        4213726
+linrock                       59082        4516053
+robnjr                        57262        4053117
+Freja                         56938        3733019
+ttruscott                     56005        3679485
+renouve                       53811        3501516
+cuistot                       52532        3014920
+finfish                       51360        3370515
+eva42                         51272        3599691
+rkl                           50759        3840947
+rap                           49985        3219146
+pb00067                       49727        3298270
+ronaldjerum                   47654        3240695
+bigpen0r                      47278        3291647
+biffhero                      46564        3111352
+VoyagerOne                    45386        3445881
+speedycpu                     43842        3003273
+jbwiebe                       43305        2805433
+Antihistamine                 41788        2761312
+mhunt                         41735        2691355
+eastorwest                    40387        2812173
+homyur                        39893        2850481
+gri                           39871        2515779
+oryx                          38228        2941656
+0x3C33                        37773        2529097
+SC                            37290        2731014
+csnodgrass                    36207        2688994
+jmdana                        36108        2205261
+strelock                      34716        2074055
+Garf                          33800        2747562
+EthanOConnor                  33370        2090311
+slakovv                       32915        2021889
+Spprtr                        32591        2139601
+Prcuvu                        30377        2170122
+anst                          30301        2190091
+jkiiski                       30136        1904470
+hyperbolic.tom                29840        2017394
+Pyafue                        29650        1902349
+OuaisBla                      27629        1578000
+chriswk                       26902        1868317
+achambord                     26582        1767323
+Patrick_G                     26276        1801617
+yorkman                       26193        1992080
+SFTUser                       25182        1675689
+nabildanial                   24942        1519409
+Sharaf_DG                     24765        1786697
+ncfish1                       24411        1520927
+agg177                        23890        1395014
+JanErik                       23408        1703875
+Isidor                        23388        1680691
+Norabor                       22976        1587862
+cisco2015                     22880        1759669
+Zirie                         22542        1472937
+team-oh                       22272        1636708
+MazeOfGalious                 21978        1629593
+sg4032                        21945        1643065
+ianh2105                      21725        1632562
+xor12                         21628        1680365
+dex                           21612        1467203
+nesoneg                       21494        1463031
+horst.prack                   20878        1465656
+0xB00B1ES                     20590        1208666
+j3corre                       20405         941444
+Adrian.Schmidt123             20316        1281436
+wei                           19973        1745989
+rstoesser                     19569        1293588
+eudhan                        19274        1283717
+Ente                          19070        1373058
+jundery                       18445        1115855
+iisiraider                    18247        1101015
+ville                         17883        1384026
+chris                         17698        1487385
+purplefishies                 17595        1092533
+DragonLord                    17014        1162790
+dju                           16515         929427
+IgorLeMasson                  16064        1147232
+ako027ako                     15671        1173203
+Nikolay.IT                    15154        1068349
+Andrew Grant                  15114         895539
+yurikvelo                     15027        1165616
+OssumOpossum                  14857        1007129
+enedene                       14476         905279
+bpfliegel                     14298         884523
+jpulman                       13982         870599
+joster                        13794         950160
+Nesa92                        13786        1114691
+Dark_wizzie                   13422        1007152
+Hjax                          13350         900887
+Fifis                         13313         965473
+mabichito                     12903         749391
+thijsk                        12886         722107
+crocogoat                     12876        1048802
+AdrianSA                      12860         804972
+Flopzee                       12698         894821
+fatmurphy                     12547         853210
+SapphireBrand                 12416         969604
+modolief                      12386         896470
+scuzzi                        12362         833465
+pgontarz                      12151         848794
+stocky                        11954         699440
+mschmidt                      11941         803401
+infinity                      11470         727027
+torbjo                        11387         728873
+Thomas A. Anderson            11372         732094
+snicolet                      11106         869170
+amicic                        10779         733593
+rpngn                         10712         688203
+d64                           10680         771144
+basepi                        10637         744851
+jjoshua2                      10559         670905
+dzjp                          10343         732529
+ols                           10259         570669
+lbraesch                      10252         647825
diff --git a/appveyor.yml b/appveyor.yml
index a3732a23..ab608409 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -63,7 +63,7 @@ build_script:
   - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
   - ps: |
       # Download default NNUE net from fishtest
-      $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
+      $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
       $dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
       $nnuenet = $Matches.nnuenet
       Write-Host "Default net:" $nnuenet
diff --git a/src/Makefile b/src/Makefile
index 69517c3c..0b2f99ed 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -60,7 +60,6 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	learn/learn.cpp \
 	learn/gensfen.cpp \
 	learn/convert.cpp \
-	learn/learning_tools.cpp \
 	learn/multi_think.cpp
 
 OBJS = $(notdir $(SRCS:.cpp=.o))
@@ -101,12 +100,17 @@ VPATH = syzygy:nnue:nnue/features:eval:extra:learn
 
 ### 2.1. General and architecture defaults
 
+ifeq ($(ARCH),)
+   ARCH = x86-64-modern
+   help_skip_sanity = yes
+endif
 # explicitly check for the list of supported architectures (as listed with make help),
 # the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
-ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
-                               x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
-                               x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
-                               armv7 armv7-neon armv8 apple-silicon general-64 general-32))
+ifeq ($(ARCH), $(filter $(ARCH), \
+                 x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
+                 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
+                 x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
+                 armv7 armv7-neon armv8 apple-silicon general-64 general-32))
    SUPPORTED_ARCH=true
 else
    SUPPORTED_ARCH=false
@@ -130,7 +134,6 @@ avx512 = no
 vnni256 = no
 vnni512 = no
 neon = no
-ARCH = x86-64-modern
 STRIP = strip
 
 ### 2.2 Architecture specific
@@ -394,19 +397,6 @@ ifeq ($(COMP),clang)
 	endif
 endif
 
-ifeq ($(comp),icc)
-	profile_make = icc-profile-make
-	profile_use = icc-profile-use
-else
-ifeq ($(comp),clang)
-	profile_make = clang-profile-make
-	profile_use = clang-profile-use
-else
-	profile_make = gcc-profile-make
-	profile_use = gcc-profile-use
-endif
-endif
-
 ifeq ($(KERNEL),Darwin)
 	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
 	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
@@ -418,20 +408,30 @@ endif
 # Currently we don't know how to make PGO builds with the NDK yet.
 ifeq ($(COMP),ndk)
 	CXXFLAGS += -stdlib=libc++ -fPIE
+	comp=clang
 	ifeq ($(arch),armv7)
-		comp=armv7a-linux-androideabi16-clang
 		CXX=armv7a-linux-androideabi16-clang++
 		CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
 		STRIP=arm-linux-androideabi-strip
 	endif
 	ifeq ($(arch),armv8)
-		comp=aarch64-linux-android21-clang
 		CXX=aarch64-linux-android21-clang++
 		STRIP=aarch64-linux-android-strip
 	endif
 	LDFLAGS += -static-libstdc++ -pie -lm -latomic
 endif
 
+ifeq ($(comp),icc)
+	profile_make = icc-profile-make
+	profile_use = icc-profile-use
+else ifeq ($(comp),clang)
+	profile_make = clang-profile-make
+	profile_use = clang-profile-use
+else
+	profile_make = gcc-profile-make
+	profile_use = gcc-profile-use
+endif
+
 ### Travis CI script uses COMPILER to overwrite CXX
 ifdef COMPILER
 	COMPCXX=$(COMPILER)
@@ -622,11 +622,13 @@ endif
 ### needs access to the optimization flags.
 ifeq ($(optimize),yes)
 ifeq ($(debug), no)
-	ifeq ($(COMP),ndk)
-		CXXFLAGS += -flto=thin
-		LDFLAGS += $(CXXFLAGS)
-	else ifeq ($(comp),clang)
+	ifeq ($(comp),clang)
 		CXXFLAGS += -flto=thin
+		ifneq ($(findstring MINGW,$(KERNEL)),)
+			CXXFLAGS += -fuse-ld=lld
+		else ifneq ($(findstring MSYS,$(KERNEL)),)
+			CXXFLAGS += -fuse-ld=lld
+		endif
 		LDFLAGS += $(CXXFLAGS)
 
 # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
@@ -650,10 +652,12 @@ ifeq ($(debug), no)
 # So, only enable it for a cross from Linux by default.
 	else ifeq ($(comp),mingw)
 	ifeq ($(KERNEL),Linux)
+	ifneq ($(arch),i386)
 		CXXFLAGS += -flto
 		LDFLAGS += $(CXXFLAGS) -flto=jobserver
 	endif
 	endif
+	endif
 endif
 endif
 
@@ -729,11 +733,12 @@ help:
 	@echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
 	@echo ""
 	@echo "-------------------------------"
-ifeq ($(SUPPORTED_ARCH), true)
+ifeq ($(SUPPORTED_ARCH)$(help_skip_sanity), true)
 	@echo "The selected architecture $(ARCH) will enable the following configuration: "
 	@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
 else
 	@echo "Specify a supported architecture with the ARCH option for more details"
+	@echo ""
 endif
 
 
@@ -741,7 +746,7 @@ endif
         config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
         clang-profile-use clang-profile-make
 
-build: config-sanity
+build: config-sanity net
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
 
 profile-build: net config-sanity objclean profileclean
@@ -768,12 +773,13 @@ install:
 	-cp $(EXE) $(BINDIR)
 	-strip $(BINDIR)/$(EXE)
 
-#clean all
+# clean all
 clean: objclean profileclean
 	@rm -f .depend *~ core
 
+# evaluation network (nnue)
 net:
-	$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+	$(eval nnuenet := $(shell grep EvalFileDefaultName evaluate.h | grep define | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
 	@echo "Default net: $(nnuenet)"
 	$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
 	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
@@ -795,7 +801,6 @@ net:
             echo "shasum / sha256sum not found, skipping net validation"; \
         fi
 
-
 # clean binaries and objects
 objclean:
 	@rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o
diff --git a/src/benchmark.cpp b/src/benchmark.cpp
index 806e9840..ffb631a2 100644
--- a/src/benchmark.cpp
+++ b/src/benchmark.cpp
@@ -164,5 +164,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
           ++posCounter;
       }
 
+  list.emplace_back("setoption name Use NNUE value true");
+
   return list;
 }
diff --git a/src/eval/evaluate_common.h b/src/eval/evaluate_common.h
deleted file mode 100644
index 47e69a44..00000000
--- a/src/eval/evaluate_common.h
+++ /dev/null
@@ -1,22 +0,0 @@
-﻿#ifndef _EVALUATE_COMMON_H_
-#define _EVALUATE_COMMON_H_
-
-// A common header-like function for modern evaluation functions.
-
-#include <string>
-
-namespace Eval
-{
-	// --------------------------
-	// for learning
-	// --------------------------
-
-	// Save the evaluation function parameters to a file.
-	// You can specify the extension added to the end of the file.
-	void save_eval(std::string suffix);
-
-	// Get the current eta.
-	double get_eta();
-}
-
-#endif // _EVALUATE_KPPT_COMMON_H_
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index e619a747..aa9bbd67 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -20,22 +20,29 @@
 #include <cassert>
 #include <cstdlib>
 #include <cstring>   // For std::memset
+#include <fstream>
 #include <iomanip>
 #include <sstream>
 #include <iostream>
-#include <set>
+#include <streambuf>
+#include <vector>
 
 #include "bitboard.h"
 #include "evaluate.h"
 #include "material.h"
+#include "misc.h"
 #include "pawns.h"
 #include "thread.h"
 #include "uci.h"
+#include "incbin/incbin.h"
+
+using namespace std;
+using namespace Eval::NNUE;
 
 namespace Eval {
 
   UseNNUEMode useNNUE;
-  std::string eval_file_loaded="None";
+  string eval_file_loaded = "None";
 
   static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
   {
@@ -49,35 +56,67 @@ namespace Eval {
     return UseNNUEMode::False;
   }
 
-  void init_NNUE() {
+  void NNUE::init() {
 
     useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
+    if (useNNUE == UseNNUEMode::False)
+        return;
 
-    std::string eval_file = std::string(Options["EvalFile"]);
-    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
-        if (Eval::NNUE::load_eval_file(eval_file))
-            eval_file_loaded = eval_file;
+    string eval_file = string(Options["EvalFile"]);
+
+    #if defined(DEFAULT_NNUE_DIRECTORY)
+    #define stringify2(x) #x
+    #define stringify(x) stringify2(x)
+    vector<string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
+    #else
+    vector<string> dirs = { "" , CommandLine::binaryDirectory };
+    #endif
+
+    for (string directory : dirs)
+        if (eval_file_loaded != eval_file)
+        {
+            ifstream stream(directory + eval_file, ios::binary);
+            if (load_eval(eval_file, stream))
+            {
+                sync_cout << "info string Loaded eval file " << directory + eval_file << sync_endl;
+                eval_file_loaded = eval_file;
+            }
+            else
+            {
+                sync_cout << "info string ERROR: failed to load eval file " << directory + eval_file << sync_endl;
+            }
+        }
   }
 
-  void verify_NNUE() {
+  /// NNUE::verify() verifies that the last net used was loaded successfully
+  void NNUE::verify() {
 
-    std::string eval_file = std::string(Options["EvalFile"]);
-    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)    {
+    string eval_file = string(Options["EvalFile"]);
+
+    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
+    {
         UCI::OptionsMap defaults;
         UCI::init(defaults);
 
-        sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl;
-        sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl;
-        sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl;
-        sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl;
-        sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl;
-        std::exit(EXIT_FAILURE);
+        string msg1 = "If the UCI option \"Use NNUE\" is set to true, network evaluation parameters compatible with the engine must be available.";
+        string msg2 = "The option is set to true, but the network file " + eval_file + " was not loaded successfully.";
+        string msg3 = "The UCI option EvalFile might need to specify the full path, including the directory name, to the network file.";
+        string msg4 = "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/" + string(defaults["EvalFile"]);
+        string msg5 = "The engine will be terminated now.";
+
+        sync_cout << "info string ERROR: " << msg1 << sync_endl;
+        sync_cout << "info string ERROR: " << msg2 << sync_endl;
+        sync_cout << "info string ERROR: " << msg3 << sync_endl;
+        sync_cout << "info string ERROR: " << msg4 << sync_endl;
+        sync_cout << "info string ERROR: " << msg5 << sync_endl;
+
+        exit(EXIT_FAILURE);
     }
 
     if (useNNUE != UseNNUEMode::False)
-        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
+        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
     else
-        sync_cout << "info string classical evaluation enabled." << sync_endl;
+        sync_cout << "info string classical evaluation enabled" << sync_endl;
   }
 }
 
@@ -165,26 +204,26 @@ namespace {
 
   // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a
   // pawn protected square on rank 4 to 6 which is also safe from a pawn attack.
-  constexpr Score Outpost[] = { S(56, 36), S(30, 23) };
+  constexpr Score Outpost[] = { S(56, 34), S(31, 23) };
 
   // PassedRank[Rank] contains a bonus according to the rank of a passed pawn
   constexpr Score PassedRank[RANK_NB] = {
-    S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260)
+    S(0, 0), S(9, 28), S(15, 31), S(17, 39), S(64, 70), S(171, 177), S(277, 260)
   };
 
   // RookOnFile[semiopen/open] contains bonuses for each rook when there is
   // no (friendly) pawn on the rook file.
-  constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) };
+  constexpr Score RookOnFile[] = { S(19, 7), S(48, 27) };
 
   // ThreatByMinor/ByRook[attacked PieceType] contains bonuses according to
   // which piece type attacks which one. Attacks on lesser pieces which are
   // pawn-defended are not considered.
   constexpr Score ThreatByMinor[PIECE_TYPE_NB] = {
-    S(0, 0), S(5, 32), S(57, 41), S(77, 56), S(88, 119), S(79, 161)
+    S(0, 0), S(5, 32), S(55, 41), S(77, 56), S(89, 119), S(79, 162)
   };
 
   constexpr Score ThreatByRook[PIECE_TYPE_NB] = {
-    S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41)
+    S(0, 0), S(3, 44), S(37, 68), S(42, 60), S(0, 39), S(58, 43)
   };
 
   // Assorted bonuses and penalties
@@ -952,17 +991,32 @@ make_v:
 /// evaluation of the position from the point of view of the side to move.
 
 Value Eval::evaluate(const Position& pos) {
-  if (useNNUE == UseNNUEMode::Pure) {
-      return NNUE::evaluate(pos);
+
+  Value v;
+
+  if (Eval::useNNUE == UseNNUEMode::Pure) {
+      v = NNUE::evaluate(pos);
   }
+  else if (Eval::useNNUE == UseNNUEMode::False)
+      v = Evaluation<NO_TRACE>(pos).value();
+  else
+  {
+      // scale and shift NNUE for compatibility with search and classical evaluation
+      auto  adjusted_NNUE = [&](){ return NNUE::evaluate(pos) * 5 / 4 + Tempo; };
 
-  bool classical = useNNUE == UseNNUEMode::False
-                || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
-  Value v = classical ? Evaluation<NO_TRACE>(pos).value()
-                      : NNUE::evaluate(pos) * 5 / 4 + Tempo;
+      // if there is PSQ imbalance use classical eval, with small probability if it is small
+      Value psq = Value(abs(eg_value(pos.psq_score())));
+      int   r50 = 16 + pos.rule50_count();
+      bool  largePsq = psq * 16 > (NNUEThreshold1 + pos.non_pawn_material() / 64) * r50;
+      bool  classical = largePsq || (psq > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
 
-  if (classical && useNNUE != UseNNUEMode::False && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
-      v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
+      v = classical ? Evaluation<NO_TRACE>(pos).value() : adjusted_NNUE();
+
+      // if the classical eval is small and imbalance large, use NNUE nevertheless.
+      if (   largePsq
+          && abs(v) * 16 < NNUEThreshold2 * r50)
+          v = adjusted_NNUE();
+  }
 
   // Damp down the evaluation linearly when shuffling
   v = v * (100 - pos.rule50_count()) / 100;
diff --git a/src/evaluate.h b/src/evaluate.h
index 900a77fc..ac67494d 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -38,15 +38,18 @@ namespace Eval {
 
   extern UseNNUEMode useNNUE;
   extern std::string eval_file_loaded;
-  void init_NNUE();
-  void verify_NNUE();
+
+  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
+  // for the build process (profile-build and fishtest) to work. Do not change the
+  // name of the macro, as it is used in the Makefile.
+  #define EvalFileDefaultName   "nn-28e08a9fe2ad.nnue"
 
   namespace NNUE {
 
     Value evaluate(const Position& pos);
-    Value compute_eval(const Position& pos);
-    void  update_eval(const Position& pos);
-    bool  load_eval_file(const std::string& evalFile);
+    bool load_eval(std::string name, std::istream& stream);
+    void init();
+    void verify();
 
   } // namespace NNUE
 
diff --git a/src/incbin/UNLICENCE b/src/incbin/UNLICENCE
new file mode 100644
index 00000000..32484ab5
--- /dev/null
+++ b/src/incbin/UNLICENCE
@@ -0,0 +1,26 @@
+The file "incbin.h" is free and unencumbered software released into
+the public domain by Dale Weiler, see:
+   <https://github.com/graphitemaster/incbin>
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h
new file mode 100755
index 00000000..c19684d7
--- /dev/null
+++ b/src/incbin/incbin.h
@@ -0,0 +1,368 @@
+/**
+ * @file incbin.h
+ * @author Dale Weiler
+ * @brief Utility for including binary files
+ *
+ * Facilities for including binary files into the current translation unit and
+ * making use from them externally in other translation units.
+ */
+#ifndef INCBIN_HDR
+#define INCBIN_HDR
+#include <limits.h>
+#if   defined(__AVX512BW__) || \
+      defined(__AVX512CD__) || \
+      defined(__AVX512DQ__) || \
+      defined(__AVX512ER__) || \
+      defined(__AVX512PF__) || \
+      defined(__AVX512VL__) || \
+      defined(__AVX512F__)
+# define INCBIN_ALIGNMENT_INDEX 6
+#elif defined(__AVX__)      || \
+      defined(__AVX2__)
+# define INCBIN_ALIGNMENT_INDEX 5
+#elif defined(__SSE__)      || \
+      defined(__SSE2__)     || \
+      defined(__SSE3__)     || \
+      defined(__SSSE3__)    || \
+      defined(__SSE4_1__)   || \
+      defined(__SSE4_2__)   || \
+      defined(__neon__)
+# define INCBIN_ALIGNMENT_INDEX 4
+#elif ULONG_MAX != 0xffffffffu
+# define INCBIN_ALIGNMENT_INDEX 3
+# else
+# define INCBIN_ALIGNMENT_INDEX 2
+#endif
+
+/* Lookup table of (1 << n) where `n' is `INCBIN_ALIGNMENT_INDEX' */
+#define INCBIN_ALIGN_SHIFT_0 1
+#define INCBIN_ALIGN_SHIFT_1 2
+#define INCBIN_ALIGN_SHIFT_2 4
+#define INCBIN_ALIGN_SHIFT_3 8
+#define INCBIN_ALIGN_SHIFT_4 16
+#define INCBIN_ALIGN_SHIFT_5 32
+#define INCBIN_ALIGN_SHIFT_6 64
+
+/* Actual alignment value */
+#define INCBIN_ALIGNMENT \
+    INCBIN_CONCATENATE( \
+        INCBIN_CONCATENATE(INCBIN_ALIGN_SHIFT, _), \
+        INCBIN_ALIGNMENT_INDEX)
+
+/* Stringize */
+#define INCBIN_STR(X) \
+    #X
+#define INCBIN_STRINGIZE(X) \
+    INCBIN_STR(X)
+/* Concatenate */
+#define INCBIN_CAT(X, Y) \
+    X ## Y
+#define INCBIN_CONCATENATE(X, Y) \
+    INCBIN_CAT(X, Y)
+/* Deferred macro expansion */
+#define INCBIN_EVAL(X) \
+    X
+#define INCBIN_INVOKE(N, ...) \
+    INCBIN_EVAL(N(__VA_ARGS__))
+
+/* Green Hills uses a different directive for including binary data */
+#if defined(__ghs__)
+#  if (__ghs_asm == 2)
+#    define INCBIN_MACRO ".file"
+/* Or consider the ".myrawdata" entry in the ld file */
+#  else
+#    define INCBIN_MACRO "\tINCBIN"
+#  endif
+#else
+#  define INCBIN_MACRO ".incbin"
+#endif
+
+#ifndef _MSC_VER
+#  define INCBIN_ALIGN \
+    __attribute__((aligned(INCBIN_ALIGNMENT)))
+#else
+#  define INCBIN_ALIGN __declspec(align(INCBIN_ALIGNMENT))
+#endif
+
+#if defined(__arm__) || /* GNU C and RealView */ \
+    defined(__arm) || /* Diab */ \
+    defined(_ARM) /* ImageCraft */
+#  define INCBIN_ARM
+#endif
+
+#ifdef __GNUC__
+/* Utilize .balign where supported */
+#  define INCBIN_ALIGN_HOST ".balign " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+#  define INCBIN_ALIGN_BYTE ".balign 1\n"
+#elif defined(INCBIN_ARM)
+/*
+ * On arm assemblers, the alignment value is calculated as (1 << n) where `n' is
+ * the shift count. This is the value passed to `.align'
+ */
+#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT_INDEX) "\n"
+#  define INCBIN_ALIGN_BYTE ".align 0\n"
+#else
+/* We assume other inline assembler's treat `.align' as `.balign' */
+#  define INCBIN_ALIGN_HOST ".align " INCBIN_STRINGIZE(INCBIN_ALIGNMENT) "\n"
+#  define INCBIN_ALIGN_BYTE ".align 1\n"
+#endif
+
+/* INCBIN_CONST is used by incbin.c generated files */
+#if defined(__cplusplus)
+#  define INCBIN_EXTERNAL extern "C"
+#  define INCBIN_CONST    extern const
+#else
+#  define INCBIN_EXTERNAL extern
+#  define INCBIN_CONST    const
+#endif
+
+/**
+ * @brief Optionally override the linker section into which data is emitted.
+ *
+ * @warning If you use this facility, you'll have to deal with platform-specific linker output
+ * section naming on your own
+ *
+ * Overriding the default linker output section, e.g for esp8266/Arduino:
+ * @code
+ * #define INCBIN_OUTPUT_SECTION ".irom.text"
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ * // Data is emitted into program memory that never gets copied to RAM
+ * @endcode
+ */
+#if !defined(INCBIN_OUTPUT_SECTION)
+#  if defined(__APPLE__)
+#    define INCBIN_OUTPUT_SECTION         ".const_data"
+#  else
+#    define INCBIN_OUTPUT_SECTION         ".rodata"
+#  endif
+#endif
+
+#if defined(__APPLE__)
+/* The directives are different for Apple branded compilers */
+#  define INCBIN_SECTION         INCBIN_OUTPUT_SECTION "\n"
+#  define INCBIN_GLOBAL(NAME)    ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+#  define INCBIN_INT             ".long "
+#  define INCBIN_MANGLE          "_"
+#  define INCBIN_BYTE            ".byte "
+#  define INCBIN_TYPE(...)
+#else
+#  define INCBIN_SECTION         ".section " INCBIN_OUTPUT_SECTION "\n"
+#  define INCBIN_GLOBAL(NAME)    ".global " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
+#  if defined(__ghs__)
+#    define INCBIN_INT           ".word "
+#  else
+#    define INCBIN_INT           ".int "
+#  endif
+#  if defined(__USER_LABEL_PREFIX__)
+#    define INCBIN_MANGLE        INCBIN_STRINGIZE(__USER_LABEL_PREFIX__)
+#  else
+#    define INCBIN_MANGLE        ""
+#  endif
+#  if defined(INCBIN_ARM)
+/* On arm assemblers, `@' is used as a line comment token */
+#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", %object\n"
+#  elif defined(__MINGW32__) || defined(__MINGW64__)
+/* Mingw doesn't support this directive either */
+#    define INCBIN_TYPE(NAME)
+#  else
+/* It's safe to use `@' on other architectures */
+#    define INCBIN_TYPE(NAME)    ".type " INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME ", @object\n"
+#  endif
+#  define INCBIN_BYTE            ".byte "
+#endif
+
+/* List of style types used for symbol names */
+#define INCBIN_STYLE_CAMEL 0
+#define INCBIN_STYLE_SNAKE 1
+
+/**
+ * @brief Specify the prefix to use for symbol names.
+ *
+ * By default this is `g', producing symbols of the form:
+ * @code
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char gFooData[];
+ * // const unsigned char *const gFooEnd;
+ * // const unsigned int gFooSize;
+ * @endcode
+ *
+ * If however you specify a prefix before including: e.g:
+ * @code
+ * #define INCBIN_PREFIX incbin
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols instead:
+ * // const unsigned char incbinFooData[];
+ * // const unsigned char *const incbinFooEnd;
+ * // const unsigned int incbinFooSize;
+ * @endcode
+ */
+#if !defined(INCBIN_PREFIX)
+#  define INCBIN_PREFIX g
+#endif
+
+/**
+ * @brief Specify the style used for symbol names.
+ *
+ * Possible options are
+ * - INCBIN_STYLE_CAMEL "CamelCase"
+ * - INCBIN_STYLE_SNAKE "snake_case"
+ *
+ * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
+ * @code
+ * #include "incbin.h"
+ * INCBIN(Foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>FooData[];
+ * // const unsigned char *const <prefix>FooEnd;
+ * // const unsigned int <prefix>FooSize;
+ * @endcode
+ *
+ * If however you specify a style before including: e.g:
+ * @code
+ * #define INCBIN_STYLE INCBIN_STYLE_SNAKE
+ * #include "incbin.h"
+ * INCBIN(foo, "foo.txt");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>foo_data[];
+ * // const unsigned char *const <prefix>foo_end;
+ * // const unsigned int <prefix>foo_size;
+ * @endcode
+ */
+#if !defined(INCBIN_STYLE)
+#  define INCBIN_STYLE INCBIN_STYLE_CAMEL
+#endif
+
+/* Style lookup tables */
+#define INCBIN_STYLE_0_DATA Data
+#define INCBIN_STYLE_0_END End
+#define INCBIN_STYLE_0_SIZE Size
+#define INCBIN_STYLE_1_DATA _data
+#define INCBIN_STYLE_1_END _end
+#define INCBIN_STYLE_1_SIZE _size
+
+/* Style lookup: returning identifier */
+#define INCBIN_STYLE_IDENT(TYPE) \
+    INCBIN_CONCATENATE( \
+        INCBIN_STYLE_, \
+        INCBIN_CONCATENATE( \
+            INCBIN_EVAL(INCBIN_STYLE), \
+            INCBIN_CONCATENATE(_, TYPE)))
+
+/* Style lookup: returning string literal */
+#define INCBIN_STYLE_STRING(TYPE) \
+    INCBIN_STRINGIZE( \
+        INCBIN_STYLE_IDENT(TYPE)) \
+
+/* Generate the global labels by indirectly invoking the macro with our style
+ * type and concatenating the name against them. */
+#define INCBIN_GLOBAL_LABELS(NAME, TYPE) \
+    INCBIN_INVOKE( \
+        INCBIN_GLOBAL, \
+        INCBIN_CONCATENATE( \
+            NAME, \
+            INCBIN_INVOKE( \
+                INCBIN_STYLE_IDENT, \
+                TYPE))) \
+    INCBIN_INVOKE( \
+        INCBIN_TYPE, \
+        INCBIN_CONCATENATE( \
+            NAME, \
+            INCBIN_INVOKE( \
+                INCBIN_STYLE_IDENT, \
+                TYPE)))
+
+/**
+ * @brief Externally reference binary data included in another translation unit.
+ *
+ * Produces three external symbols that reference the binary data included in
+ * another translation unit.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name given for the binary data
+ *
+ * @code
+ * INCBIN_EXTERN(Foo);
+ *
+ * // Now you have the following symbols:
+ * // extern const unsigned char <prefix>FooData[];
+ * // extern const unsigned char *const <prefix>FooEnd;
+ * // extern const unsigned int <prefix>FooSize;
+ * @endcode
+ */
+#define INCBIN_EXTERN(NAME) \
+    INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
+        INCBIN_CONCATENATE( \
+            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+            INCBIN_STYLE_IDENT(DATA))[]; \
+    INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
+    INCBIN_CONCATENATE( \
+        INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+        INCBIN_STYLE_IDENT(END)); \
+    INCBIN_EXTERNAL const unsigned int \
+        INCBIN_CONCATENATE( \
+            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
+            INCBIN_STYLE_IDENT(SIZE))
+
+/**
+ * @brief Include a binary file into the current translation unit.
+ *
+ * Includes a binary file into the current translation unit, producing three symbols
+ * for objects that encode the data and size respectively.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name to associate with this binary data (as an identifier.)
+ * @param FILENAME The file to include (as a string literal.)
+ *
+ * @code
+ * INCBIN(Icon, "icon.png");
+ *
+ * // Now you have the following symbols:
+ * // const unsigned char <prefix>IconData[];
+ * // const unsigned char *const <prefix>IconEnd;
+ * // const unsigned int <prefix>IconSize;
+ * @endcode
+ *
+ * @warning This must be used in global scope
+ * @warning The identifiers may be different if INCBIN_STYLE is not default
+ *
+ * To externally reference the data included by this in another translation unit
+ * please @see INCBIN_EXTERN.
+ */
+#ifdef _MSC_VER
+#define INCBIN(NAME, FILENAME) \
+    INCBIN_EXTERN(NAME)
+#else
+#define INCBIN(NAME, FILENAME) \
+    __asm__(INCBIN_SECTION \
+            INCBIN_GLOBAL_LABELS(NAME, DATA) \
+            INCBIN_ALIGN_HOST \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
+            INCBIN_MACRO " \"" FILENAME "\"\n" \
+            INCBIN_GLOBAL_LABELS(NAME, END) \
+            INCBIN_ALIGN_BYTE \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
+                INCBIN_BYTE "1\n" \
+            INCBIN_GLOBAL_LABELS(NAME, SIZE) \
+            INCBIN_ALIGN_HOST \
+            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(SIZE) ":\n" \
+                INCBIN_INT INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) " - " \
+                           INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) "\n" \
+            INCBIN_ALIGN_HOST \
+            ".text\n" \
+    ); \
+    INCBIN_EXTERN(NAME)
+
+#endif
+#endif
diff --git a/src/learn/convert.cpp b/src/learn/convert.cpp
index 483296a1..59111dcf 100644
--- a/src/learn/convert.cpp
+++ b/src/learn/convert.cpp
@@ -8,9 +8,6 @@
 #include "position.h"
 #include "tt.h"
 
-// evaluate header for learning
-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"
 
 #include "syzygy/tbprobe.h"
@@ -122,7 +119,7 @@ namespace Learner
                 else if (token == "score") {
                     double score;
                     ss >> score;
-                    // Training Formula � Issue #71 � nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
+                    // Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
                     // Normalize to [0.0, 1.0].
                     score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value);
                     // Scale to [dest_score_min_value, dest_score_max_value].
@@ -480,7 +477,7 @@ namespace Learner
             {
                 if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
                     StateInfo si;
-                    tpos.set_from_packed_sfen(p.sfen, &si, th, false);
+                    tpos.set_from_packed_sfen(p.sfen, &si, th);
 
                     // write as plain text
                     ofs << "fen " << tpos.fen() << std::endl;
diff --git a/src/learn/gensfen.cpp b/src/learn/gensfen.cpp
index f7cc5669..7e931726 100644
--- a/src/learn/gensfen.cpp
+++ b/src/learn/gensfen.cpp
@@ -2,6 +2,7 @@
 
 #include "packed_sfen.h"
 #include "multi_think.h"
+#include "../syzygy/tbprobe.h"
 
 #include "misc.h"
 #include "position.h"
@@ -9,8 +10,6 @@
 #include "tt.h"
 #include "uci.h"
 
-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"
 
 #include "nnue/evaluate_nnue_learner.h"
@@ -392,7 +391,6 @@ namespace Learner
             Position& pos,
             std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
             int ply,
-            int depth,
             vector<Move>& pv);
 
         // Min and max depths for search during gensfen
@@ -467,18 +465,7 @@ namespace Learner
             return 0;
         }
 
-        // Initialize the Syzygy Ending Tablebase and sort the moves.
-        Search::RootMoves rootMoves;
-        for (const auto& m : MoveList<LEGAL>(pos))
-        {
-            rootMoves.emplace_back(m);
-        }
-
-        if (!rootMoves.empty())
-        {
-            Tablebases::rank_root_moves(pos, rootMoves);
-        }
-        else
+        if(pos.this_thread()->rootMoves.empty())
         {
             // If there is no legal move
             return pos.checkers()
@@ -749,7 +736,6 @@ namespace Learner
         Position& pos,
         std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
         int ply,
-        int depth,
         vector<Move>& pv)
     {
         auto rootColor = pos.side_to_move();
@@ -763,15 +749,6 @@ namespace Learner
             }
 
             pos.do_move(m, states[ply++]);
-
-            // Because the difference calculation of evaluate() cannot be
-            // performed unless each node evaluate() is called!
-            // If the depth is 8 or more, it seems
-            // faster not to calculate this difference.
-            if (depth < 8)
-            {
-                Eval::NNUE::update_eval(pos);
-            }
         }
 
         // Reach leaf
@@ -830,6 +807,8 @@ namespace Learner
             auto& pos = th->rootPos;
             pos.set(StartFEN, false, &si, th);
 
+            int resign_counter = 0;
+            bool should_resign = prng.rand(10) > 1;
             // Vector for holding the sfens in the current simulated game.
             PSVector a_psv;
             a_psv.reserve(write_maxply + MAX_PLY);
@@ -857,6 +836,11 @@ namespace Learner
                 // Current search depth
                 const int depth = search_depth_min + (int)prng.rand(search_depth_max - search_depth_min + 1);
 
+                // Starting search calls init_for_search
+                auto [search_value, search_pv] = search(pos, depth, 1, nodes);
+
+                // This has to be performed after search because it needs to know
+                // rootMoves which are filled in init_for_search.
                 const auto result = get_current_game_result(pos, move_hist_scores);
                 if (result.has_value())
                 {
@@ -864,113 +848,91 @@ namespace Learner
                     break;
                 }
 
+                // Always adjudivate by eval limit.
+                // Also because of this we don't have to check for TB/MATE scores
+                if (abs(search_value) >= eval_limit)
                 {
-                    auto [search_value, search_pv] = search(pos, depth, 1, nodes);
-
-                    // Always adjudivate by eval limit.
-                    // Also because of this we don't have to check for TB/MATE scores
-                    if (abs(search_value) >= eval_limit)
-                    {
-                        const auto wdl = (search_value >= eval_limit) ? 1 : -1;
-                        flush_psv(wdl);
+                    resign_counter++;
+                    if ((should_resign && resign_counter >= 4) || abs(search_value) >= 10000) {
+                        flush_psv((search_value >= eval_limit) ? 1 : -1);
                         break;
                     }
+                } else {
+                    resign_counter = 0;
+                }
+                // Verification of a strange move
+                if (search_pv.size() > 0
+                    && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
+                {
+                    // (???)
+                    // MOVE_WIN is checking if it is the declaration victory stage before this
+                    // The declarative winning move should never come back here.
+                    // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)...
+                    cout << "Error! : " << pos.fen() << next_move << search_value << endl;
+                    break;
+                }
 
-                    // Verification of a strange move
-                    if (search_pv.size() > 0
-                        && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
+                // Save the move score for adjudication.
+                move_hist_scores.push_back(search_value);
+
+                // Discard stuff before write_minply is reached
+                // because it can harm training due to overfitting.
+                // Initial positions would be too common.
+                if (ply < write_minply - 1)
+                {
+                    a_psv.clear();
+                    goto SKIP_SAVE;
+                }
+
+                // Look into the position hashtable to see if the same
+                // position was seen before.
+                // This is a good heuristic to exlude already seen
+                // positions without many false positives.
+                {
+                    auto key = pos.key();
+                    auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
+                    auto old_key = hash[hash_index];
+                    if (key == old_key)
                     {
-                        // (???)
-                        // MOVE_WIN is checking if it is the declaration victory stage before this
-                        // The declarative winning move should never come back here.
-                        // Also, when MOVE_RESIGN, search_value is a one-stop score, which should be the minimum value of eval_limit (-31998)...
-                        cout << "Error! : " << pos.fen() << next_move << search_value << endl;
-                        break;
-                    }
-
-                    // Save the move score for adjudication.
-                    move_hist_scores.push_back(search_value);
-
-                    // If depth 0, pv is not obtained, so search again at depth 2.
-                    if (search_depth_min <= 0)
-                    {
-                        auto [research_value, research_pv] = search(pos, 2);
-                        search_pv = research_pv;
-                    }
-
-                    // Discard stuff before write_minply is reached
-                    // because it can harm training due to overfitting.
-                    // Initial positions would be too common.
-                    if (ply < write_minply - 1)
-                    {
-                        a_psv.clear();
                         goto SKIP_SAVE;
                     }
-
-                    // Look into the position hashtable to see if the same
-                    // position was seen before.
-                    // This is a good heuristic to exlude already seen
-                    // positions without many false positives.
+                    else
                     {
-                        auto key = pos.key();
-                        auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
-                        auto old_key = hash[hash_index];
-                        if (key == old_key)
-                        {
-                            a_psv.clear();
-                            goto SKIP_SAVE;
-                        }
-                        else
-                        {
-                            // Replace with the current key.
-                            hash[hash_index] = key;
-                        }
+                        // Replace with the current key.
+                        hash[hash_index] = key;
                     }
-
-                    // Pack the current position into a packed sfen and save it into the buffer.
-                    {
-                        a_psv.emplace_back(PackedSfenValue());
-                        auto& psv = a_psv.back();
-
-                        // Here we only write the position data.
-                        // Result is added after the whole game is done.
-                        pos.sfen_pack(psv.sfen);
-
-                        // Get the value of evaluate() as seen from the
-                        // root color on the leaf node of the PV line.
-                        // I don't know the goodness and badness of using the
-                        // return value of search() as it is.
-                        // TODO: Consider using search value instead of evaluate_leaf.
-                        //       Maybe give it as an option.
-
-                        // Use PV moves to reach the leaf node and use the value
-                        // that evaluated() is called on that leaf node.
-                        const auto leaf_value = evaluate_leaf(pos, states, ply, depth, search_pv);
-
-                        // If for some reason the leaf node couldn't yield an eval
-                        // we fallback to search value.
-                        psv.score = leaf_value == VALUE_NONE ? search_value : leaf_value;
-
-                        psv.gamePly = ply;
-
-                        // Take out the first PV move. This should be present unless depth 0.
-                        assert(search_pv.size() >= 1);
-                        psv.move = search_pv[0];
-                    }
-
-                SKIP_SAVE:;
-
-                    // For some reason, We could not get PV (hit the substitution table etc. and got stuck?)
-                    // so go to the next game. It's a rare case, so you can ignore it.
-                    if (search_pv.size() == 0)
-                    {
-                        break;
-                    }
-
-                    // Update the next move according to best search result.
-                    next_move = search_pv[0];
                 }
 
+                // Pack the current position into a packed sfen and save it into the buffer.
+                {
+                    a_psv.emplace_back(PackedSfenValue());
+                    auto& psv = a_psv.back();
+
+                    // Here we only write the position data.
+                    // Result is added after the whole game is done.
+                    pos.sfen_pack(psv.sfen);
+
+                    psv.score = search_value;
+
+                    psv.gamePly = ply;
+
+                    // Take out the first PV move. This should be present unless depth 0.
+                    assert(search_pv.size() >= 1);
+                    psv.move = search_pv[0];
+                }
+
+            SKIP_SAVE:;
+
+                // For some reason, We could not get PV (hit the substitution table etc. and got stuck?)
+                // so go to the next game. It's a rare case, so you can ignore it.
+                if (search_pv.size() == 0)
+                {
+                    break;
+                }
+
+                // Update the next move according to best search result.
+                next_move = search_pv[0];
+
                 // Random move.
                 auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
                 if (random_move.has_value())
@@ -983,18 +945,11 @@ namespace Learner
                     {
                         break;
                     }
-
-                    // Clear the sfens that were written before the random move.
-                    // (???) why?
-                    a_psv.clear();
                 }
 
                 // Do move.
                 pos.do_move(next_move, states[ply]);
 
-                // Call node evaluate() for each difference calculation.
-                Eval::NNUE::update_eval(pos);
-
             } // for (int ply = 0; ; ++ply)
 
         } // while(!quit)
@@ -1177,10 +1132,28 @@ namespace Learner
             << "  detect_draw_by_insufficient_mating_material = " << detect_draw_by_insufficient_mating_material << endl;
 
         // Show if the training data generator uses NNUE.
-        Eval::verify_NNUE();
+        Eval::NNUE::verify();
 
         Threads.main()->ponder = false;
 
+        // About Search::Limits
+        // Be careful because this member variable is global and affects other threads.
+        {
+          auto& limits = Search::Limits;
+
+          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
+          limits.infinite = true;
+
+          // Since PV is an obstacle when displayed, erase it.
+          limits.silent = true;
+
+          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
+          limits.nodes = 0;
+
+          // depth is also processed by the one passed as an argument of Learner::search().
+          limits.depth = 0;
+        }
+
         // Create and execute threads as many as Options["Threads"].
         {
             SfenWriter sfen_writer(output_file_name, thread_num);
diff --git a/src/learn/learn.cpp b/src/learn/learn.cpp
index ba904e9d..e2d9af1b 100644
--- a/src/learn/learn.cpp
+++ b/src/learn/learn.cpp
@@ -29,8 +29,6 @@
 #include "uci.h"
 #include "search.h"
 
-#include "eval/evaluate_common.h"
-
 #include "extra/nnue_data_binpack_format.h"
 
 #include "nnue/evaluate_nnue_learner.h"
@@ -58,6 +56,7 @@
 #include <omp.h>
 #endif
 
+extern double global_learning_rate;
 
 using namespace std;
 
@@ -92,12 +91,6 @@ namespace Learner
     static double dest_score_min_value = 0.0;
     static double dest_score_max_value = 1.0;
 
-    // Assume teacher signals are the scores of deep searches,
-    // and convert them into winning probabilities in the trainer.
-    // Sometimes we want to use the winning probabilities in the training
-    // data directly. In those cases, we set false to this variable.
-    static bool convert_teacher_signal_to_winning_probability = true;
-
     // Using stockfish's WDL with win rate model instead of sigmoid
     static bool use_wdl = false;
 
@@ -164,14 +157,6 @@ namespace Learner
         return ((y2 - y1) / epsilon) / winning_probability_coefficient;
     }
 
-    // A constant used in elmo (WCSC27). Adjustment required.
-    // Since elmo does not internally divide the expression, the value is different.
-    // You can set this value with the learn command.
-    // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
-    double ELMO_LAMBDA = 0.33;
-    double ELMO_LAMBDA2 = 0.33;
-    double ELMO_LAMBDA_LIMIT = 32000;
-
     // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
     double get_scaled_signal(double signal)
     {
@@ -194,26 +179,7 @@ namespace Learner
     double calculate_p(double teacher_signal, int ply)
     {
         const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
-
-        double p = scaled_teacher_signal;
-        if (convert_teacher_signal_to_winning_probability)
-        {
-            p = winning_percentage(scaled_teacher_signal, ply);
-        }
-
-        return p;
-    }
-
-    double calculate_lambda(double teacher_signal)
-    {
-        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
-        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
-        const double lambda =
-            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
-            ? ELMO_LAMBDA2
-            : ELMO_LAMBDA;
-
-        return lambda;
+        return winning_percentage(scaled_teacher_signal, ply);
     }
 
     double calculate_t(int game_result)
@@ -226,32 +192,6 @@ namespace Learner
         return t;
     }
 
-    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
-    {
-        // elmo (WCSC27) method
-        // Correct with the actual game wins and losses.
-        const double q = winning_percentage(shallow, psv.gamePly);
-        const double p = calculate_p(teacher_signal, psv.gamePly);
-        const double t = calculate_t(psv.game_result);
-        const double lambda = calculate_lambda(teacher_signal);
-
-        double grad;
-        if (use_wdl)
-        {
-            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
-            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
-            grad = lambda * dce_p + (1.0 - lambda) * dce_t;
-        }
-        else
-        {
-            // Use the actual win rate as a correction term.
-            // This is the idea of ​​elmo (WCSC27), modern O-parts.
-            grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
-        }
-
-        return grad;
-    }
-
     // Calculate cross entropy during learning
     // The individual cross entropy of the win/loss term and win
     // rate term of the elmo expression is returned
@@ -262,21 +202,16 @@ namespace Learner
         const PackedSfenValue& psv,
         double& cross_entropy_eval,
         double& cross_entropy_win,
-        double& cross_entropy,
         double& entropy_eval,
-        double& entropy_win,
-        double& entropy)
+        double& entropy_win)
     {
         // Teacher winning probability.
         const double q = winning_percentage(shallow, psv.gamePly);
         const double p = calculate_p(teacher_signal, psv.gamePly);
         const double t = calculate_t(psv.game_result);
-        const double lambda = calculate_lambda(teacher_signal);
 
         constexpr double epsilon = 0.000001;
 
-        const double m = (1.0 - lambda) * t + lambda * p;
-
         cross_entropy_eval =
             (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
         cross_entropy_win =
@@ -285,17 +220,12 @@ namespace Learner
             (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
         entropy_win =
             (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
-
-        cross_entropy =
-            (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
-        entropy =
-            (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
     }
 
     // Other objective functions may be considered in the future...
     double calc_grad(Value shallow, const PackedSfenValue& psv)
     {
-        return calc_grad((Value)psv.score, shallow, psv);
+        return (double)(shallow - (Value)psv.score) / 2400.0;
     }
 
     struct BasicSfenInputStream
@@ -787,15 +717,9 @@ namespace Learner
 
         std::atomic<bool> stop_flag;
 
-        // Discount rate
-        double discount_rate;
-
         // Option to exclude early stage from learning
         int reduction_gameply;
 
-        // Option not to learn kk/kkp/kpp/kppp
-        std::array<bool, 4> freeze;
-
         // If the absolute value of the evaluation value of the deep search
         // of the teacher phase exceeds this value, discard the teacher phase.
         int eval_limit;
@@ -825,7 +749,6 @@ namespace Learner
 
         uint64_t eval_save_interval;
         uint64_t loss_output_interval;
-        uint64_t mirror_percentage;
 
         // Loss calculation.
         // done: Number of phases targeted this time
@@ -849,7 +772,6 @@ namespace Learner
         for (size_t i = 0; i < pv.size(); ++i)
         {
             task_pos.do_move(pv[i], states[i]);
-            Eval::NNUE::update_eval(task_pos);
         }
 
         const Value shallow_value =
@@ -870,20 +792,18 @@ namespace Learner
         // It doesn't matter if you have disabled the substitution table.
         TT.new_search();
 
-        std::cout << "PROGRESS: " << now_string() << ", ";
-        std::cout << sr.total_done << " sfens";
-        std::cout << ", iteration " << epoch;
-        std::cout << ", eta = " << Eval::get_eta() << ", ";
+        cout << "PROGRESS: " << now_string() << ", ";
+        cout << sr.total_done << " sfens";
+        cout << ", iteration " << epoch;
+        cout << ", learning rate = " << global_learning_rate << ", ";
 
         // For calculation of verification data loss
-        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
-        atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
+        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
+        atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
         test_sum_cross_entropy_eval = 0;
         test_sum_cross_entropy_win = 0;
-        test_sum_cross_entropy = 0;
         test_sum_entropy_eval = 0;
         test_sum_entropy_win = 0;
-        test_sum_entropy = 0;
 
         // norm for learning
         atomic<double> sum_norm;
@@ -899,7 +819,7 @@ namespace Learner
         auto& pos = th->rootPos;
         StateInfo si;
         pos.set(StartFEN, false, &si, th);
-        std::cout << "hirate eval = " << Eval::evaluate(pos);
+        cout << "hirate eval = " << Eval::evaluate(pos) << endl;
 
         // It's better to parallelize here, but it's a bit
         // troublesome because the search before slave has not finished.
@@ -923,10 +843,8 @@ namespace Learner
                     &ps,
                     &test_sum_cross_entropy_eval,
                     &test_sum_cross_entropy_win,
-                    &test_sum_cross_entropy,
                     &test_sum_entropy_eval,
                     &test_sum_entropy_win,
-                    &test_sum_entropy,
                     &sum_norm,
                     &task_count,
                     &move_accord_count
@@ -954,26 +872,22 @@ namespace Learner
                 // For the time being, regarding the win rate and loss terms only in the elmo method
                 // Calculate and display the cross entropy.
 
-                double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
-                double test_entropy_eval, test_entropy_win, test_entropy;
+                double test_cross_entropy_eval, test_cross_entropy_win;
+                double test_entropy_eval, test_entropy_win;
                 calc_cross_entropy(
                     deep_value,
                     shallow_value,
                     ps,
                     test_cross_entropy_eval,
                     test_cross_entropy_win,
-                    test_cross_entropy,
                     test_entropy_eval,
-                    test_entropy_win,
-                    test_entropy);
+                    test_entropy_win);
 
                 // The total cross entropy need not be abs() by definition.
                 test_sum_cross_entropy_eval += test_cross_entropy_eval;
                 test_sum_cross_entropy_win += test_cross_entropy_win;
-                test_sum_cross_entropy += test_cross_entropy;
                 test_sum_entropy_eval += test_entropy_eval;
                 test_sum_entropy_win += test_entropy_win;
-                test_sum_entropy += test_entropy;
                 sum_norm += (double)abs(shallow_value);
 
                 // Determine if the teacher's move and the score of the shallow search match
@@ -998,7 +912,7 @@ namespace Learner
         while (task_count)
             sleep(1);
 
-        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
+        latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
         latest_loss_count += sr.sfen_for_mse.size();
 
         // learn_cross_entropy may be called train cross
@@ -1008,27 +922,24 @@ namespace Learner
 
         if (sr.sfen_for_mse.size() && done)
         {
-            cout
-                << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
+            cout << "INFO: "
+                << "test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
                 << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
                 << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
                 << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
-                << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
-                << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                 << " , norm = " << sum_norm
-                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
+                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
+                << endl;
 
             if (done != static_cast<uint64_t>(-1))
             {
-                cout
-                    << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
+                cout << "INFO: "
+                    << "learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
                     << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
                     << " , learn_entropy_eval = " << learn_sum_entropy_eval / done
                     << " , learn_entropy_win = " << learn_sum_entropy_win / done
-                    << " , learn_cross_entropy = " << learn_sum_cross_entropy / done
-                    << " , learn_entropy = " << learn_sum_entropy / done;
+                    << endl;
             }
-            cout << endl;
         }
         else
         {
@@ -1038,10 +949,8 @@ namespace Learner
         // Clear 0 for next time.
         learn_sum_cross_entropy_eval = 0.0;
         learn_sum_cross_entropy_win = 0.0;
-        learn_sum_cross_entropy = 0.0;
         learn_sum_entropy_eval = 0.0;
         learn_sum_entropy_win = 0.0;
-        learn_sum_entropy = 0.0;
     }
 
     void LearnerThink::thread_worker(size_t thread_id)
@@ -1058,7 +967,7 @@ namespace Learner
             // display mse (this is sometimes done only for thread 0)
             // Immediately after being read from the file...
 
-        // Lock the evaluation function so that it is not used during updating.
+            // Lock the evaluation function so that it is not used during updating.
             shared_lock<shared_timed_mutex> read_lock(nn_mutex, defer_lock);
             if (sr.next_update_weights <= sr.total_done ||
                 (thread_id != 0 && !read_lock.try_lock()))
@@ -1090,7 +999,7 @@ namespace Learner
 
                         // Lock the evaluation function so that it is not used during updating.
                         lock_guard<shared_timed_mutex> write_lock(nn_mutex);
-                        Eval::NNUE::UpdateParameters(epoch);
+                        Eval::NNUE::UpdateParameters();
                     }
 
                     ++epoch;
@@ -1167,8 +1076,7 @@ namespace Learner
                 goto RETRY_READ;
 
             StateInfo si;
-            const bool mirror = prng.rand(100) < mirror_percentage;
-            if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
+            if (pos.set_from_packed_sfen(ps.sfen, &si, th) != 0)
             {
                 // I got a strange sfen. Should be debugged!
                 // Since it is an illegal sfen, it may not be
@@ -1177,18 +1085,30 @@ namespace Learner
                 goto RETRY_READ;
             }
 
-            // There is a possibility that all the pieces are blocked and stuck.
-            // Also, the declaration win phase is excluded from
-            // learning because you cannot go to leaf with PV moves.
-            // (shouldn't write out such teacher aspect itself,
-            // but may have written it out with an old generation routine)
-            // Skip the position if there are no legal moves (=checkmated or stalemate).
-            if (MoveList<LEGAL>(pos).size() == 0)
-                goto RETRY_READ;
-
             // I can read it, so try displaying it.
             //      cout << pos << value << endl;
 
+            const auto rootColor = pos.side_to_move();
+
+            int ply = 0;
+            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
+
+            if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
+            {
+                goto RETRY_READ;
+            }
+
+            pos.do_move((Move)ps.move, state[ply++]);
+
+			// There is a possibility that all the pieces are blocked and stuck.
+			// Also, the declaration win phase is excluded from
+			// learning because you cannot go to leaf with PV moves.
+			// (shouldn't write out such teacher aspect itself,
+			// but may have written it out with an old generation routine)
+			// Skip the position if there are no legal moves (=checkmated or stalemate).
+			if (MoveList<LEGAL>(pos).size() == 0)
+				goto RETRY_READ;
+
             // Evaluation value of shallow search (qsearch)
             const auto [_, pv] = qsearch(pos);
 
@@ -1199,13 +1119,11 @@ namespace Learner
             // Go to the leaf node as it is, add only to the gradient array,
             // and later try AdaGrad at the time of rmse aggregation.
 
-            const auto rootColor = pos.side_to_move();
 
             // If the initial PV is different, it is better not to use it for learning.
             // If it is the result of searching a completely different place, it may become noise.
             // It may be better not to study where the difference in evaluation values ​​is too large.
 
-            int ply = 0;
 
             // A helper function that adds the gradient to the current phase.
             auto pos_add_grad = [&]() {
@@ -1224,35 +1142,28 @@ namespace Learner
                     : -Eval::evaluate(pos);
 
                 // Calculate loss for training data
-                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
-                double learn_entropy_eval, learn_entropy_win, learn_entropy;
+                double learn_cross_entropy_eval, learn_cross_entropy_win;
+                double learn_entropy_eval, learn_entropy_win;
                 calc_cross_entropy(
                     deep_value,
                     shallow_value,
                     ps,
                     learn_cross_entropy_eval,
                     learn_cross_entropy_win,
-                    learn_cross_entropy,
                     learn_entropy_eval,
-                    learn_entropy_win,
-                    learn_entropy);
+                    learn_entropy_win);
 
                 learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                 learn_sum_cross_entropy_win += learn_cross_entropy_win;
-                learn_sum_cross_entropy += learn_cross_entropy;
                 learn_sum_entropy_eval += learn_entropy_eval;
                 learn_sum_entropy_win += learn_entropy_win;
-                learn_sum_entropy += learn_entropy;
 
-                const double example_weight =
-                    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
-                Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
+                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
 
                 // Since the processing is completed, the counter of the processed number is incremented
                 sr.total_done++;
             };
 
-            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
             bool illegal_move = false;
             for (auto m : pv)
             {
@@ -1266,29 +1177,16 @@ namespace Learner
                     break;
                 }
 
-                // Processing when adding the gradient to the node on each PV.
-                //If discount_rate is 0, this process is not performed.
-                if (discount_rate != 0)
-                    pos_add_grad();
-
                 pos.do_move(m, state[ply++]);
-
-                // Since the value of evaluate in leaf is used, the difference is updated.
-                Eval::NNUE::update_eval(pos);
             }
 
             if (illegal_move)
             {
-                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
-                continue;
+                goto RETRY_READ;
             }
 
             // Since we have reached the end phase of PV, add the slope here.
             pos_add_grad();
-
-            // rewind the phase
-            for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-                pos.undo_move(*it);
         }
 
     }
@@ -1303,18 +1201,18 @@ namespace Learner
         {
             // When EVAL_SAVE_ONLY_ONCE is defined,
             // Do not dig a subfolder because I want to save it only once.
-            Eval::save_eval("");
+            Eval::NNUE::save_eval("");
         }
         else if (is_final)
         {
-            Eval::save_eval("final");
+            Eval::NNUE::save_eval("final");
             return true;
         }
         else
         {
             static int dir_number = 0;
             const std::string dir_name = std::to_string(dir_number++);
-            Eval::save_eval(dir_name);
+            Eval::NNUE::save_eval(dir_name);
 
             if (newbob_decay != 1.0 && latest_loss_count > 0) {
                 static int trials = newbob_num_trials;
@@ -1332,25 +1230,17 @@ namespace Learner
                 else
                 {
                     cout << " >= best (" << best_loss << "), rejected" << endl;
-                    if (best_nn_directory.empty())
-                    {
-                        cout << "WARNING: no improvement from initial model" << endl;
-                    }
-                    else
-                    {
-                        cout << "restoring parameters from " << best_nn_directory << endl;
-                        Eval::NNUE::RestoreParameters(best_nn_directory);
-                    }
+                    best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
 
                     if (--trials > 0 && !is_final)
                     {
                         cout
-                            << "reducing learning rate scale from " << newbob_scale
+                            << "reducing learning rate from " << newbob_scale
                             << " to " << (newbob_scale * newbob_decay)
                             << " (" << trials << " more trials)" << endl;
 
                         newbob_scale *= newbob_decay;
-                        Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
+                        global_learning_rate = newbob_scale;
                     }
                 }
 
@@ -1628,13 +1518,6 @@ namespace Learner
 
         string target_dir;
 
-        // If 0, it will be the default value.
-        double eta1 = 0.0;
-        double eta2 = 0.0;
-        double eta3 = 0.0;
-        uint64_t eta1_epoch = 0; // eta2 is not applied by default
-        uint64_t eta2_epoch = 0; // eta3 is not applied by default
-
         // --- Function that only shuffles the teacher aspect
 
         // normal shuffle
@@ -1675,24 +1558,13 @@ namespace Learner
         // Turn on if you want to pass a pre-shuffled file.
         bool no_shuffle = false;
 
-        // elmo lambda
-        ELMO_LAMBDA = 0.33;
-        ELMO_LAMBDA2 = 0.33;
-        ELMO_LAMBDA_LIMIT = 32000;
-
-        // Discount rate. If this is set to a value other than 0,
-        // the slope will be added even at other than the PV termination.
-        // (At that time, apply this discount rate)
-        double discount_rate = 0;
+        global_learning_rate = 1.0;
 
         // if (gamePly <rand(reduction_gameply)) continue;
         // An option to exclude the early stage from the learning target moderately like
         // If set to 1, rand(1)==0, so nothing is excluded.
         int reduction_gameply = 1;
 
-        // Optional item that does not let you learn KK/KKP/KPP/KPPP
-        array<bool, 4> freeze = {};
-
         uint64_t nn_batch_size = 1000;
         double newbob_decay = 1.0;
         int newbob_num_trials = 2;
@@ -1700,7 +1572,6 @@ namespace Learner
 
         uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
         uint64_t loss_output_interval = 0;
-        uint64_t mirror_percentage = 0;
 
         string validation_set_file_name;
         string seed;
@@ -1734,12 +1605,7 @@ namespace Learner
             else if (option == "batchsize") is >> mini_batch_size;
 
             // learning rate
-            else if (option == "eta")        is >> eta1;
-            else if (option == "eta1")       is >> eta1; // alias
-            else if (option == "eta2")       is >> eta2;
-            else if (option == "eta3")       is >> eta3;
-            else if (option == "eta1_epoch") is >> eta1_epoch;
-            else if (option == "eta2_epoch") is >> eta2_epoch;
+            else if (option == "lr")        is >> global_learning_rate;
 
             // Accept also the old option name.
             else if (option == "use_draw_in_training"
@@ -1758,22 +1624,9 @@ namespace Learner
 
             else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;
 
-            // Discount rate
-            else if (option == "discount_rate") is >> discount_rate;
-
             // Using WDL with win rate model instead of sigmoid
             else if (option == "use_wdl") is >> use_wdl;
 
-            // No learning of KK/KKP/KPP/KPPP.
-            else if (option == "freeze_kk")    is >> freeze[0];
-            else if (option == "freeze_kkp")   is >> freeze[1];
-            else if (option == "freeze_kpp")   is >> freeze[2];
-
-            // LAMBDA
-            else if (option == "lambda")       is >> ELMO_LAMBDA;
-            else if (option == "lambda2")      is >> ELMO_LAMBDA2;
-            else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
-
             else if (option == "reduction_gameply") is >> reduction_gameply;
 
             // shuffle related
@@ -1794,7 +1647,6 @@ namespace Learner
 
             else if (option == "eval_save_interval") is >> eval_save_interval;
             else if (option == "loss_output_interval") is >> loss_output_interval;
-            else if (option == "mirror_percentage") is >> mirror_percentage;
             else if (option == "validation_set_file_name") is >> validation_set_file_name;
 
             // Rabbit convert related
@@ -1810,7 +1662,6 @@ namespace Learner
             else if (option == "src_score_max_value") is >> src_score_max_value;
             else if (option == "dest_score_min_value") is >> dest_score_min_value;
             else if (option == "dest_score_max_value") is >> dest_score_max_value;
-            else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability;
             else if (option == "seed") is >> seed;
             // Otherwise, it's a filename.
             else
@@ -1884,7 +1735,7 @@ namespace Learner
 
         if (use_convert_plain)
         {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
             cout << "convert_plain.." << endl;
             convert_plain(filenames, output_file_name);
             return;
@@ -1892,7 +1743,7 @@ namespace Learner
 
         if (use_convert_bin)
         {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
             cout << "convert_bin.." << endl;
             convert_bin(
                 filenames,
@@ -1913,7 +1764,7 @@ namespace Learner
 
         if (use_convert_bin_from_pgn_extract)
         {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
             cout << "convert_bin_from_pgn-extract.." << endl;
             convert_bin_from_pgn_extract(
                 filenames,
@@ -1946,8 +1797,7 @@ namespace Learner
         cout << "nn_batch_size     : " << nn_batch_size << endl;
         cout << "nn_options        : " << nn_options << endl;
 
-        cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
-        cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
+        cout << "learning rate     : " << global_learning_rate << endl;
         cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
         cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
         cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
@@ -1960,17 +1810,10 @@ namespace Learner
             cout << "scheduling        : default" << endl;
         }
 
-        cout << "discount rate     : " << discount_rate << endl;
-
         // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
         reduction_gameply = max(reduction_gameply, 1);
         cout << "reduction_gameply : " << reduction_gameply << endl;
 
-        cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
-        cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
-        cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
-
-        cout << "mirror_percentage : " << mirror_percentage << endl;
         cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
         cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
 
@@ -1981,7 +1824,7 @@ namespace Learner
         cout << "init.." << endl;
 
         // Read evaluation function parameters
-        Eval::init_NNUE();
+        Eval::NNUE::init();
 
         Threads.main()->ponder = false;
 
@@ -2004,12 +1847,12 @@ namespace Learner
         }
 
         cout << "init_training.." << endl;
-        Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
+        Eval::NNUE::InitializeTraining(seed);
         Eval::NNUE::SetBatchSize(nn_batch_size);
         Eval::NNUE::SetOptions(nn_options);
         if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
             // Save the current net to [EvalSaveDir]\original.
-            Eval::save_eval("original");
+            Eval::NNUE::save_eval("original");
 
             // Set the folder above to best_nn_directory so that the trainer can
             // resotre the network parameters from the original net file.
@@ -2020,11 +1863,9 @@ namespace Learner
         cout << "init done." << endl;
 
         // Reflect other option settings.
-        learn_think.discount_rate = discount_rate;
         learn_think.eval_limit = eval_limit;
         learn_think.save_only_once = save_only_once;
         learn_think.sr.no_shuffle = no_shuffle;
-        learn_think.freeze = freeze;
         learn_think.reduction_gameply = reduction_gameply;
 
         learn_think.newbob_scale = 1.0;
@@ -2033,7 +1874,6 @@ namespace Learner
 
         learn_think.eval_save_interval = eval_save_interval;
         learn_think.loss_output_interval = loss_output_interval;
-        learn_think.mirror_percentage = mirror_percentage;
 
         // Start a thread that loads the phase file in the background
         // (If this is not started, mse cannot be calculated.)
@@ -2069,6 +1909,8 @@ namespace Learner
         // Start learning.
         learn_think.go_think();
 
+        Eval::NNUE::FinalizeNet();
+
         // Save once at the end.
         learn_think.save(true);
     }
diff --git a/src/learn/learn.h b/src/learn/learn.h
index 4b09f825..c76d76c5 100644
--- a/src/learn/learn.h
+++ b/src/learn/learn.h
@@ -23,11 +23,7 @@ using LearnFloatType = float;
 // configure
 // ======================
 
-// ----------------------
-// Learning with the method of elmo (WCSC27)
-// ----------------------
-
-#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
+#define LOSS_FUNCTION "cross_entropy_eval"
 
 // ----------------------
 // Definition of struct used in Learner
diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp
deleted file mode 100644
index 925905c6..00000000
--- a/src/learn/learning_tools.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿#include "learning_tools.h"
-
-#include "misc.h"
-
-using namespace Eval;
-
-namespace EvalLearningTools
-{
-
-	// --- static variables
-
-	double Weight::eta;
-	double Weight::eta1;
-	double Weight::eta2;
-	double Weight::eta3;
-	uint64_t Weight::eta1_epoch;
-	uint64_t Weight::eta2_epoch;
-}
diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h
deleted file mode 100644
index dcb2c4aa..00000000
--- a/src/learn/learning_tools.h
+++ /dev/null
@@ -1,99 +0,0 @@
-﻿#ifndef __LEARN_WEIGHT_H__
-#define __LEARN_WEIGHT_H__
-
-// A set of machine learning tools related to the weight array used for machine learning of evaluation functions
-
-#include "learn.h"
-
-#include "misc.h"  // PRNG , my_insertion_sort
-
-#include <array>
-#include <cmath>	// std::sqrt()
-
-namespace EvalLearningTools
-{
-	// -------------------------------------------------
-	//   Array for learning that stores gradients etc.
-	// -------------------------------------------------
-
-#if defined(_MSC_VER)
-#pragma pack(push,2)
-#elif defined(__GNUC__)
-#pragma pack(2)
-#endif
-	struct Weight
-	{
-		// cumulative value of one mini-batch gradient
-		LearnFloatType g = LearnFloatType(0);
-
-		// Learning rate η(eta) such as AdaGrad.
-		// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
-		// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
-		// After eta2_epoch, gradually change from eta2 to eta3.
-		static double eta;
-		static double eta1;
-		static double eta2;
-		static double eta3;
-		static uint64_t eta1_epoch;
-		static uint64_t eta2_epoch;
-
-		// Batch initialization of eta. If 0 is passed, the default value will be set.
-		static void init_eta(double new_eta1, double new_eta2, double new_eta3,
-			uint64_t new_eta1_epoch, uint64_t new_eta2_epoch)
-		{
-			Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0;
-			Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0;
-			Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0;
-			Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0;
-			Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0;
-		}
-
-		// Set eta according to epoch.
-		static void calc_eta(uint64_t epoch)
-		{
-			if (Weight::eta1_epoch == 0) // Exclude eta2
-				Weight::eta = Weight::eta1;
-			else if (epoch < Weight::eta1_epoch)
-				// apportion
-				Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
-			else if (Weight::eta2_epoch == 0) // Exclude eta3
-				Weight::eta = Weight::eta2;
-			else if (epoch < Weight::eta2_epoch)
-				Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
-			else
-				Weight::eta = Weight::eta3;
-		}
-
-		template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
-
-		// grad setting
-		template <typename T> void set_grad(const T& g_) { g = g_; }
-
-		// Add grad
-		template <typename T> void add_grad(const T& g_) { g += g_; }
-
-		LearnFloatType get_grad() const { return g; }
-	};
-#if defined(_MSC_VER)
-#pragma pack(pop)
-#elif defined(__GNUC__)
-#pragma pack(0)
-#endif
-
-	// Turned weight array
-	// In order to be able to handle it transparently, let's have the same member as Weight.
-	struct Weight2
-	{
-		Weight w[2];
-
-		//Evaluate your turn, eta 1/8.
-		template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
-
-		template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
-		template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
-
-		std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
-	};
-}
-
-#endif
diff --git a/src/learn/multi_think.cpp b/src/learn/multi_think.cpp
index 7c389d40..80bc72b5 100644
--- a/src/learn/multi_think.cpp
+++ b/src/learn/multi_think.cpp
@@ -9,39 +9,14 @@
 
 void MultiThink::go_think()
 {
-	// Keep a copy to restore the Options settings later.
-	auto oldOptions = Options;
-
-	// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
-	// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
-	Options["BookOnTheFly"] = std::string("false");
-
 	// Read evaluation function, etc.
 	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
 	// Skip memory corruption check.
-	Eval::init_NNUE();
+	Eval::NNUE::init();
 
 	// Call the derived class's init().
 	init();
 
-        // About Search::Limits
-        // Be careful because this member variable is global and affects other threads.
-        {
-          auto& limits = Search::Limits;
-
-          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
-          limits.infinite = true;
-
-          // Since PV is an obstacle when displayed, erase it.
-          limits.silent = true;
-
-          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
-          limits.nodes = 0;
-
-          // depth is also processed by the one passed as an argument of Learner::search().
-          limits.depth = 0;
-        }
-
 	// The loop upper limit is set with set_loop_max().
 	loop_count = 0;
 	done_count = 0;
@@ -123,10 +98,4 @@ void MultiThink::go_think()
 	// The file writing thread etc. are still running only when all threads are finished
 	// Since the work itself may not have completed, output only that all threads have finished.
 	std::cout << "all threads are joined." << std::endl;
-
-	// Restored because Options were rewritten.
-	// Restore the handler because the handler will not start unless you assign a value.
-	for (auto& s : oldOptions)
-		Options[s.first] = std::string(s.second);
-
 }
diff --git a/src/learn/sfen_packer.cpp b/src/learn/sfen_packer.cpp
index 734a477b..19c745ad 100644
--- a/src/learn/sfen_packer.cpp
+++ b/src/learn/sfen_packer.cpp
@@ -259,7 +259,7 @@ namespace Learner {
     return make_piece(c, pr);
   }
 
-  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror)
+  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th)
   {
     SfenPacker packer;
     auto& stream = packer.stream;
@@ -280,16 +280,8 @@ namespace Learner {
     pos.pieceList[B_KING][0] = SQUARE_NB;
 
     // First the position of the ball
-    if (mirror)
-    {
-      for (auto c : Colors)
-        pos.board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
-    }
-    else
-    {
-      for (auto c : Colors)
-        pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
-    }
+    for (auto c : Colors)
+      pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
 
     // Piece placement
     for (Rank r = RANK_8; r >= RANK_1; --r)
@@ -297,9 +289,6 @@ namespace Learner {
       for (File f = FILE_A; f <= FILE_H; ++f)
       {
         auto sq = make_square(f, r);
-        if (mirror) {
-          sq = flip_file(sq);
-        }
 
         // it seems there are already balls
         Piece pc;
@@ -355,9 +344,6 @@ namespace Learner {
     // En passant square. Ignore if no pawn capture is possible
     if (stream.read_one_bit()) {
       Square ep_square = static_cast<Square>(stream.read_n_bit(6));
-      if (mirror) {
-        ep_square = flip_file(ep_square);
-      }
       pos.st->epSquare = ep_square;
 
       if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
diff --git a/src/learn/sfen_packer.h b/src/learn/sfen_packer.h
index 533d3fc9..5f232fed 100644
--- a/src/learn/sfen_packer.h
+++ b/src/learn/sfen_packer.h
@@ -13,7 +13,7 @@ class Thread;
 
 namespace Learner {
 
-    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th);
     PackedSfen sfen_pack(Position& pos);
 }
 
diff --git a/src/main.cpp b/src/main.cpp
index fbad6622..e6dff918 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -35,6 +35,7 @@ int main(int argc, char* argv[]) {
 
   std::cout << engine_info() << std::endl;
 
+  CommandLine::init(argc, argv);
   UCI::init(Options);
   Tune::init();
   PSQT::init();
@@ -44,7 +45,7 @@ int main(int argc, char* argv[]) {
   Endgames::init();
   Threads.set(size_t(Options["Threads"]));
   Search::clear(); // After threads are up
-  Eval::init_NNUE();
+  Eval::NNUE::init();
 
   UCI::loop(argc, argv);
 
diff --git a/src/misc.cpp b/src/misc.cpp
index 5ef5ecdc..d31538fa 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -132,6 +132,7 @@ public:
 
 } // namespace
 
+
 /// engine_info() returns the full name of the current Stockfish version. This
 /// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
 /// the program was compiled) or "Stockfish <Version>", depending on whether
@@ -356,27 +357,11 @@ void std_aligned_free(void* ptr) {
 #endif
 }
 
-/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
-/// The returned pointer is the aligned one, while the mem argument is the one that needs
-/// to be passed to free. With c++17 some of this functionality could be simplified.
+/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
 
-#if defined(__linux__) && !defined(__ANDROID__)
+#if defined(_WIN32)
 
-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
-
-  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
-  size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
-  if (posix_memalign(&mem, alignment, size))
-     mem = nullptr;
-#if defined(MADV_HUGEPAGE)
-  madvise(mem, allocSize, MADV_HUGEPAGE);
-#endif
-  return mem;
-}
-
-#elif defined(_WIN64)
-
-static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
+static void* aligned_large_pages_alloc_win(size_t allocSize) {
 
   HANDLE hProcessToken { };
   LUID luid { };
@@ -421,12 +406,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
   return mem;
 }
 
-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+void* aligned_large_pages_alloc(size_t allocSize) {
 
   static bool firstCall = true;
+  void* mem;
 
   // Try to allocate large pages
-  mem = aligned_ttmem_alloc_large_pages(allocSize);
+  mem = aligned_large_pages_alloc_win(allocSize);
 
   // Suppress info strings on the first call. The first call occurs before 'uci'
   // is received and in that case this output confuses some GUIs.
@@ -448,23 +434,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
 
 #else
 
-void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
+void* aligned_large_pages_alloc(size_t allocSize) {
 
-  constexpr size_t alignment = 64; // assumed cache line size
-  size_t size = allocSize + alignment - 1; // allocate some extra space
-  mem = malloc(size);
-  void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
-  return ret;
+#if defined(__linux__)
+  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
+#else
+  constexpr size_t alignment = 4096; // assumed small page size
+#endif
+
+  // round up to multiples of alignment
+  size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
+  void *mem = std_aligned_alloc(alignment, size);
+#if defined(MADV_HUGEPAGE)
+  madvise(mem, size, MADV_HUGEPAGE);
+#endif
+  return mem;
 }
 
 #endif
 
 
-/// aligned_ttmem_free() will free the previously allocated ttmem
+/// aligned_large_pages_free() will free the previously allocated ttmem
 
-#if defined(_WIN64)
+#if defined(_WIN32)
 
-void aligned_ttmem_free(void* mem) {
+void aligned_large_pages_free(void* mem) {
 
   if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
   {
@@ -477,8 +471,8 @@ void aligned_ttmem_free(void* mem) {
 
 #else
 
-void aligned_ttmem_free(void *mem) {
-  free(mem);
+void aligned_large_pages_free(void *mem) {
+  std_aligned_free(mem);
 }
 
 #endif
@@ -590,6 +584,63 @@ void bindThisThread(size_t idx) {
 
 } // namespace WinProcGroup
 
+#ifdef _WIN32
+#include <direct.h>
+#define GETCWD _getcwd
+#else
+#include <unistd.h>
+#define GETCWD getcwd
+#endif
+
+namespace CommandLine {
+
+string argv0;            // path+name of the executable binary, as given by argv[0]
+string binaryDirectory;  // path of the executable directory
+string workingDirectory; // path of the working directory
+string pathSeparator;    // Separator for our current OS
+
+void init(int argc, char* argv[]) {
+    (void)argc;
+    string separator;
+
+    // extract the path+name of the executable binary
+    argv0 = argv[0];
+
+#ifdef _WIN32
+    pathSeparator = "\\";
+  #ifdef _MSC_VER
+    // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
+    // issues in some windows 10 versions, so check returned values carefully.
+    char* pgmptr = nullptr;
+    if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
+        argv0 = pgmptr;
+  #endif
+#else
+    pathSeparator = "/";
+#endif
+
+    // extract the working directory
+    workingDirectory = "";
+    char buff[40000];
+    char* cwd = GETCWD(buff, 40000);
+    if (cwd)
+        workingDirectory = cwd;
+
+    // extract the binary directory path from argv0
+    binaryDirectory = argv0;
+    size_t pos = binaryDirectory.find_last_of("\\/");
+    if (pos == std::string::npos)
+        binaryDirectory = "." + pathSeparator;
+    else
+        binaryDirectory.resize(pos + 1);
+
+    // pattern replacement: "./" at the start of path is replaced by the working directory
+    if (binaryDirectory.find("." + pathSeparator) == 0)
+        binaryDirectory.replace(0, 1, workingDirectory);
+}
+
+
+} // namespace CommandLine
 // Returns a string that represents the current time. (Used when learning evaluation functions)
 std::string now_string()
 {
diff --git a/src/misc.h b/src/misc.h
index 5b7c8870..6696b0a8 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -39,8 +39,8 @@ void prefetch(void* addr);
 void start_logger(const std::string& fname);
 void* std_aligned_alloc(size_t alignment, size_t size);
 void std_aligned_free(void* ptr);
-void* aligned_ttmem_alloc(size_t size, void*& mem);
-void aligned_ttmem_free(void* mem); // nop if mem == nullptr
+void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
+void aligned_large_pages_free(void* mem); // nop if mem == nullptr
 
 void dbg_hit_on(bool b);
 void dbg_hit_on(bool c, bool b);
@@ -48,9 +48,7 @@ void dbg_mean_of(int v);
 void dbg_print();
 
 typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
-
 static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
-
 inline TimePoint now() {
   return std::chrono::duration_cast<std::chrono::milliseconds>
         (std::chrono::steady_clock::now().time_since_epoch()).count();
@@ -337,4 +335,11 @@ namespace Dependency
   extern bool getline(std::ifstream& fs, std::string& s);
 }
 
+namespace CommandLine {
+  void init(int argc, char* argv[]);
+
+  extern std::string binaryDirectory;  // path of the executable directory
+  extern std::string workingDirectory; // path of the working directory
+}
+
 #endif // #ifndef MISC_H_INCLUDED
diff --git a/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h b/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
index 37b155d5..a90de8e6 100644
--- a/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
+++ b/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
@@ -1,7 +1,25 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
 // Definition of input features and network structure used in NNUE evaluation function
 
-#ifndef HALFKP_CR_EP_256X2_32_32_H
-#define HALFKP_CR_EP_256X2_32_32_H
+#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
 
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@@ -12,31 +30,28 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
 
-namespace Eval {
+namespace Eval::NNUE {
 
-  namespace NNUE {
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
+    Features::EnPassant>;
 
-    // Input features used in evaluation function
-    using RawFeatures = Features::FeatureSet<
-      Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
-      Features::EnPassant>;
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
 
-    // Number of input feature dimensions after conversion
-    constexpr IndexType kTransformedFeatureDimensions = 256;
+namespace Layers {
 
-    namespace Layers {
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
 
-      // define network structure
-      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+}  // namespace Layers
 
-    }  // namespace Layers
+using Network = Layers::OutputLayer;
 
-    using Network = Layers::OutputLayer;
+}  // namespace Eval::NNUE
 
-  }  // namespace NNUE
-
-}  // namespace Eval
-#endif // HALFKP_CR_EP_256X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
diff --git a/src/nnue/architectures/halfkp-cr_256x2-32-32.h b/src/nnue/architectures/halfkp-cr_256x2-32-32.h
new file mode 100644
index 00000000..df14f499
--- /dev/null
+++ b/src/nnue/architectures/halfkp-cr_256x2-32-32.h
@@ -0,0 +1,37 @@
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
+
+#include "../features/feature_set.h"
+#include "../features/half_kp.h"
+#include "../features/castling_right.h"
+
+#include "../layers/input_slice.h"
+#include "../layers/affine_transform.h"
+#include "../layers/clipped_relu.h"
+
+namespace Eval::NNUE {
+
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight>;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
+
+namespace Layers {
+
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+}  // namespace Layers
+
+using Network = Layers::OutputLayer;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
diff --git a/src/nnue/architectures/k-p_256x2-32-32.h b/src/nnue/architectures/k-p_256x2-32-32.h
index 00b14d47..0f340dee 100644
--- a/src/nnue/architectures/k-p_256x2-32-32.h
+++ b/src/nnue/architectures/k-p_256x2-32-32.h
@@ -1,4 +1,5 @@
 ﻿// Definition of input features and network structure used in NNUE evaluation function
+
 #ifndef K_P_256X2_32_32_H
 #define K_P_256X2_32_32_H
 
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
index a2845c96..28c86feb 100644
--- a/src/nnue/evaluate_nnue.cpp
+++ b/src/nnue/evaluate_nnue.cpp
@@ -18,7 +18,6 @@
 
 // Code for calculating NNUE evaluation function
 
-#include <fstream>
 #include <iostream>
 #include <set>
 
@@ -31,7 +30,7 @@
 
 namespace Eval::NNUE {
 
-  uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
+  const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
    // convention: W - us, B - them
    // viewed from other side, W and B are reversed
       { PS_NONE,     PS_NONE     },
@@ -53,7 +52,7 @@ namespace Eval::NNUE {
   };
 
   // Input feature converter
-  AlignedPtr<FeatureTransformer> feature_transformer;
+  LargePagePtr<FeatureTransformer> feature_transformer;
 
   // Evaluation function
   AlignedPtr<Network> network;
@@ -80,14 +79,22 @@ namespace Eval::NNUE {
     std::memset(pointer.get(), 0, sizeof(T));
   }
 
+  template <typename T>
+  void Initialize(LargePagePtr<T>& pointer) {
+
+    static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
+    pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
   // Read evaluation function parameters
   template <typename T>
-  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+  bool ReadParameters(std::istream& stream, T& reference) {
 
     std::uint32_t header;
     header = read_little_endian<std::uint32_t>(stream);
     if (!stream || header != T::GetHashValue()) return false;
-    return pointer->ReadParameters(stream);
+    return reference.ReadParameters(stream);
   }
 
   // write evaluation function parameters
@@ -98,6 +105,13 @@ namespace Eval::NNUE {
     return pointer->WriteParameters(stream);
   }
 
+  template <typename T>
+  bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
+    constexpr std::uint32_t header = T::GetHashValue();
+    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
+    return pointer->WriteParameters(stream);
+  }
+
   }  // namespace Detail
 
   // Initialize the evaluation function parameters
@@ -139,11 +153,10 @@ namespace Eval::NNUE {
     std::string architecture;
     if (!ReadHeader(stream, &hash_value, &architecture)) return false;
     if (hash_value != kHashValue) return false;
-    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
-    if (!Detail::ReadParameters(stream, network)) return false;
+    if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, *network)) return false;
     return stream && stream.peek() == std::ios::traits_type::eof();
   }
-
   // write evaluation function parameters
   bool WriteParameters(std::ostream& stream) {
     if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
@@ -151,36 +164,20 @@ namespace Eval::NNUE {
     if (!Detail::WriteParameters(stream, network)) return false;
     return !stream.fail();
   }
-
-  // Proceed with the difference calculation if possible
-  static void UpdateAccumulatorIfPossible(const Position& pos) {
-
-    feature_transformer->UpdateAccumulatorIfPossible(pos);
-  }
-
-  // Calculate the evaluation value
-  static Value ComputeScore(const Position& pos, bool refresh) {
-
-    auto& accumulator = pos.state()->accumulator;
-    if (!refresh && accumulator.computed_score) {
-      return accumulator.score;
-    }
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
 
     alignas(kCacheLineSize) TransformedFeatureType
         transformed_features[FeatureTransformer::kBufferSize];
-    feature_transformer->Transform(pos, transformed_features, refresh);
+    feature_transformer->Transform(pos, transformed_features);
     alignas(kCacheLineSize) char buffer[Network::kBufferSize];
     const auto output = network->Propagate(transformed_features, buffer);
 
-    auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-    accumulator.score = score;
-    accumulator.computed_score = true;
-    return accumulator.score;
+    return static_cast<Value>(output[0] / FV_SCALE);
   }
 
-  // Load the evaluation function file
-  bool load_eval_file(const std::string& evalFile) {
+  // Load eval, from a file stream or a memory stream
+  bool load_eval(std::string name, std::istream& stream) {
 
     Initialize();
 
@@ -189,29 +186,8 @@ namespace Eval::NNUE {
       std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
       return true;
     }
-
-    fileName = evalFile;
-
-    std::ifstream stream(evalFile, std::ios::binary);
-
-    const bool result = ReadParameters(stream);
-
-    return result;
-  }
-
-  // Evaluation function. Perform differential calculation.
-  Value evaluate(const Position& pos) {
-    return ComputeScore(pos, false);
-  }
-
-  // Evaluation function. Perform full calculation.
-  Value compute_eval(const Position& pos) {
-    return ComputeScore(pos, true);
-  }
-
-  // Proceed with the difference calculation if possible
-  void update_eval(const Position& pos) {
-    UpdateAccumulatorIfPossible(pos);
+    fileName = name;
+    return ReadParameters(stream);
   }
 
 } // namespace Eval::NNUE
diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h
index 75700d03..68153cac 100644
--- a/src/nnue/evaluate_nnue.h
+++ b/src/nnue/evaluate_nnue.h
@@ -40,11 +40,22 @@ namespace Eval::NNUE {
     }
   };
 
+  template <typename T>
+  struct LargePageDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      aligned_large_pages_free(ptr);
+    }
+  };
+
   template <typename T>
   using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
 
+  template <typename T>
+  using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
+
   // Input feature converter
-  extern AlignedPtr<FeatureTransformer> feature_transformer;
+  extern LargePagePtr<FeatureTransformer> feature_transformer;
 
   // Evaluation function
   extern AlignedPtr<Network> network;
diff --git a/src/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp
index ea680e31..2d6c6db3 100644
--- a/src/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -5,15 +5,12 @@
 #include <filesystem>
 
 #include "../learn/learn.h"
-#include "../learn/learning_tools.h"
 
 #include "../position.h"
 #include "../uci.h"
 #include "../misc.h"
 #include "../thread_win32_osx.h"
 
-#include "../eval/evaluate_common.h"
-
 #include "evaluate_nnue.h"
 #include "evaluate_nnue_learner.h"
 #include "trainer/features/factorizer_feature_set.h"
@@ -24,215 +21,191 @@
 #include "trainer/trainer_clipped_relu.h"
 #include "trainer/trainer_sum.h"
 
-namespace Eval {
-
-namespace NNUE {
-
-namespace {
-
-// learning data
-std::vector<Example> examples;
-
-// Mutex for exclusive control of examples
-std::mutex examples_mutex;
-
-// number of samples in mini-batch
-uint64_t batch_size;
-
-// random number generator
-std::mt19937 rng;
-
-// learner
-std::shared_ptr<Trainer<Network>> trainer;
-
 // Learning rate scale
-double global_learning_rate_scale;
+double global_learning_rate;
 
-// Get the learning rate scale
-double GetGlobalLearningRateScale() {
-  return global_learning_rate_scale;
-}
+namespace Eval::NNUE {
 
-// Tell the learner options such as hyperparameters
-void SendMessages(std::vector<Message> messages) {
-  for (auto& message : messages) {
-    trainer->SendMessage(&message);
-    assert(message.num_receivers > 0);
-  }
-}
+  namespace {
 
-}  // namespace
+    // learning data
+    std::vector<Example> examples;
 
-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3) {
-  std::cout << "Initializing NN training for "
-            << GetArchitectureString() << std::endl;
+    // Mutex for exclusive control of examples
+    std::mutex examples_mutex;
 
-  assert(feature_transformer);
-  assert(network);
-  trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
+    // number of samples in mini-batch
+    uint64_t batch_size;
 
-  if (Options["SkipLoadingEval"]) {
-    trainer->Initialize(rng);
-  }
+    // random number generator
+    std::mt19937 rng;
 
-  global_learning_rate_scale = 1.0;
-  EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
-}
+    // learner
+    std::shared_ptr<Trainer<Network>> trainer;
 
-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size) {
-  assert(size > 0);
-  batch_size = size;
-}
-
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale) {
-  global_learning_rate_scale = scale;
-}
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options) {
-  std::vector<Message> messages;
-  for (const auto& option : Split(options, ',')) {
-    const auto fields = Split(option, '=');
-    assert(fields.size() == 1 || fields.size() == 2);
-    if (fields.size() == 1) {
-      messages.emplace_back(fields[0]);
-    } else {
-      messages.emplace_back(fields[0], fields[1]);
-    }
-  }
-  SendMessages(std::move(messages));
-}
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name) {
-  const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
-  std::ifstream stream(file_name, std::ios::binary);
-#ifndef NDEBUG
-  bool result =
-#endif
-  ReadParameters(stream);
-#ifndef NDEBUG
-  assert(result);
-#endif
-
-  SendMessages({{"reset"}});
-}
-
-// Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight) {
-  Example example;
-  if (rootColor == pos.side_to_move()) {
-    example.sign = 1;
-  } else {
-    example.sign = -1;
-  }
-  example.psv = psv;
-  example.weight = weight;
-
-  Features::IndexList active_indices[2];
-  for (const auto trigger : kRefreshTriggers) {
-    RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
-  }
-  if (pos.side_to_move() != WHITE) {
-    active_indices[0].swap(active_indices[1]);
-  }
-  for (const auto color : Colors) {
-    std::vector<TrainingFeature> training_features;
-    for (const auto base_index : active_indices[color]) {
-      static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
-                    (1 << TrainingFeature::kIndexBits), "");
-      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
-          base_index, &training_features);
-    }
-    std::sort(training_features.begin(), training_features.end());
-
-    auto& unique_features = example.training_features[color];
-    for (const auto& feature : training_features) {
-      if (!unique_features.empty() &&
-          feature.GetIndex() == unique_features.back().GetIndex()) {
-        unique_features.back() += feature;
-      } else {
-        unique_features.push_back(feature);
+    // Tell the learner options such as hyperparameters
+    void SendMessages(std::vector<Message> messages) {
+      for (auto& message : messages) {
+        trainer->SendMessage(&message);
+        assert(message.num_receivers > 0);
       }
     }
+
+  }  // namespace
+
+  // Initialize learning
+  void InitializeTraining(const std::string& seed) {
+    std::cout << "Initializing NN training for "
+              << GetArchitectureString() << std::endl;
+
+    assert(feature_transformer);
+    assert(network);
+    trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
+    rng.seed(PRNG(seed).rand<uint64_t>());
+
+    if (Options["SkipLoadingEval"]) {
+      trainer->Initialize(rng);
+    }
   }
 
-  std::lock_guard<std::mutex> lock(examples_mutex);
-  examples.push_back(std::move(example));
-}
+  // set the number of samples in the mini-batch
+  void SetBatchSize(uint64_t size) {
+    assert(size > 0);
+    batch_size = size;
+  }
+  
+  // Set options such as hyperparameters
+  void SetOptions(const std::string& options) {
+    std::vector<Message> messages;
+    for (const auto& option : Split(options, ',')) {
+      const auto fields = Split(option, '=');
+      assert(fields.size() == 1 || fields.size() == 2);
+      if (fields.size() == 1) {
+        messages.emplace_back(fields[0]);
+      } else {
+        messages.emplace_back(fields[0], fields[1]);
+      }
+    }
+    SendMessages(std::move(messages));
+  }
 
-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch) {
-  assert(batch_size > 0);
+  // Reread the evaluation function parameters for learning from the file
+  void RestoreParameters(const std::string& dir_name) {
+    const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
+    std::ifstream stream(file_name, std::ios::binary);
+#ifndef NDEBUG
+    bool result =
+#endif
+    ReadParameters(stream);
+#ifndef NDEBUG
+    assert(result);
+#endif
 
-  EvalLearningTools::Weight::calc_eta(epoch);
-  const auto learning_rate = static_cast<LearnFloatType>(
-      get_eta() / batch_size);
+    SendMessages({{"reset"}});
+  }
 
-  std::lock_guard<std::mutex> lock(examples_mutex);
-  std::shuffle(examples.begin(), examples.end(), rng);
-  while (examples.size() >= batch_size) {
-    std::vector<Example> batch(examples.end() - batch_size, examples.end());
-    examples.resize(examples.size() - batch_size);
+  void FinalizeNet() {
+    SendMessages({{"clear_unobserved_feature_weights"}});
+  }
 
-    const auto network_output = trainer->Propagate(batch);
+  // Add 1 sample of learning data
+  void AddExample(Position& pos, Color rootColor,
+                  const Learner::PackedSfenValue& psv, double weight) {
+    Example example;
+    if (rootColor == pos.side_to_move()) {
+      example.sign = 1;
+    } else {
+      example.sign = -1;
+    }
+    example.psv = psv;
+    example.weight = weight;
 
-    std::vector<LearnFloatType> gradients(batch.size());
-    for (std::size_t b = 0; b < batch.size(); ++b) {
-      const auto shallow = static_cast<Value>(Round<std::int32_t>(
-          batch[b].sign * network_output[b] * kPonanzaConstant));
-      const auto& psv = batch[b].psv;
-      const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
-      gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
+    Features::IndexList active_indices[2];
+    for (const auto trigger : kRefreshTriggers) {
+      RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
+    }
+    if (pos.side_to_move() != WHITE) {
+      active_indices[0].swap(active_indices[1]);
+    }
+    for (const auto color : Colors) {
+      std::vector<TrainingFeature> training_features;
+      for (const auto base_index : active_indices[color]) {
+        static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
+                      (1 << TrainingFeature::kIndexBits), "");
+        Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
+            base_index, &training_features);
+      }
+      std::sort(training_features.begin(), training_features.end());
+
+      auto& unique_features = example.training_features[color];
+      for (const auto& feature : training_features) {
+        if (!unique_features.empty() &&
+            feature.GetIndex() == unique_features.back().GetIndex()) {
+          unique_features.back() += feature;
+        } else {
+          unique_features.push_back(feature);
+        }
+      }
     }
 
-    trainer->Backpropagate(gradients.data(), learning_rate);
-  }
-  SendMessages({{"quantize_parameters"}});
-}
-
-// Check if there are any problems with learning
-void CheckHealth() {
-  SendMessages({{"check_health"}});
-}
-
-}  // namespace NNUE
-
-// save merit function parameters to a file
-void save_eval(std::string dir_name) {
-  auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
-  std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
-
-  // mkdir() will fail if this folder already exists, but
-  // Apart from that. If not, I just want you to make it.
-  // Also, assume that the folders up to EvalSaveDir have been dug.
-  std::filesystem::create_directories(eval_dir);
-
-  if (Options["SkipLoadingEval"] && NNUE::trainer) {
-    NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
+    std::lock_guard<std::mutex> lock(examples_mutex);
+    examples.push_back(std::move(example));
   }
 
-  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
-  std::ofstream stream(file_name, std::ios::binary);
+  // update the evaluation function parameters
+  void UpdateParameters() {
+    assert(batch_size > 0);
+
+    const auto learning_rate = static_cast<LearnFloatType>(
+        global_learning_rate / batch_size);
+
+    std::lock_guard<std::mutex> lock(examples_mutex);
+    std::shuffle(examples.begin(), examples.end(), rng);
+    while (examples.size() >= batch_size) {
+      std::vector<Example> batch(examples.end() - batch_size, examples.end());
+      examples.resize(examples.size() - batch_size);
+
+      const auto network_output = trainer->Propagate(batch);
+
+      std::vector<LearnFloatType> gradients(batch.size());
+      for (std::size_t b = 0; b < batch.size(); ++b) {
+        const auto shallow = static_cast<Value>(Round<std::int32_t>(
+            batch[b].sign * network_output[b] * kPonanzaConstant));
+        const auto& psv = batch[b].psv;
+        const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
+        gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
+      }
+
+      trainer->Backpropagate(gradients.data(), learning_rate);
+    }
+    SendMessages({{"quantize_parameters"}});
+  }
+
+  // Check if there are any problems with learning
+  void CheckHealth() {
+    SendMessages({{"check_health"}});
+  }
+
+  // save merit function parameters to a file
+  void save_eval(std::string dir_name) {
+    auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
+    std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
+
+    // mkdir() will fail if this folder already exists, but
+    // Apart from that. If not, I just want you to make it.
+    // Also, assume that the folders up to EvalSaveDir have been dug.
+    std::filesystem::create_directories(eval_dir);
+
+    const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
+    std::ofstream stream(file_name, std::ios::binary);
 #ifndef NDEBUG
-  const bool result =
+    bool result =
 #endif
-  NNUE::WriteParameters(stream);
+    WriteParameters(stream);
 #ifndef NDEBUG
-  assert(result);
+    assert(result);
 #endif
 
-  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
-}
-
-// get the current eta
-double get_eta() {
-  return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
-}
-
-}  // namespace Eval
+    std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
+  }
+}  // namespace Eval::NNUE
\ No newline at end of file
diff --git a/src/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h
index e9bd2fd2..c41d8d6b 100644
--- a/src/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -5,38 +5,33 @@
 
 #include "../learn/learn.h"
 
-namespace Eval {
+namespace Eval::NNUE {
 
-namespace NNUE {
+  // Initialize learning
+  void InitializeTraining(const std::string& seed);
 
-// Initialize learning
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
-                        double eta2, uint64_t eta2_epoch, double eta3);
+  // set the number of samples in the mini-batch
+  void SetBatchSize(uint64_t size);
 
-// set the number of samples in the mini-batch
-void SetBatchSize(uint64_t size);
+  // Set options such as hyperparameters
+  void SetOptions(const std::string& options);
 
-// set the learning rate scale
-void SetGlobalLearningRateScale(double scale);
-
-// Set options such as hyperparameters
-void SetOptions(const std::string& options);
-
-// Reread the evaluation function parameters for learning from the file
-void RestoreParameters(const std::string& dir_name);
+  // Reread the evaluation function parameters for learning from the file
+  void RestoreParameters(const std::string& dir_name);
 
 // Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight);
+  void AddExample(Position& pos, Color rootColor,
+  	const Learner::PackedSfenValue& psv, double weight);
 
-// update the evaluation function parameters
-void UpdateParameters(uint64_t epoch);
+  // update the evaluation function parameters
+  void UpdateParameters();
 
-// Check if there are any problems with learning
-void CheckHealth();
+  // Check if there are any problems with learning
+  void CheckHealth();
 
-}  // namespace NNUE
+  void FinalizeNet();
 
-}  // namespace Eval
+  void save_eval(std::string suffix);
+}  // namespace Eval::NNUE
 
 #endif
diff --git a/src/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp
index ee2c88cf..2d7f563a 100644
--- a/src/nnue/features/castling_right.cpp
+++ b/src/nnue/features/castling_right.cpp
@@ -1,69 +1,40 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function
 
 #include "castling_right.h"
 #include "index_list.h"
 
-namespace Eval {
+namespace Eval::NNUE::Features {
 
-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
+  void CastlingRight::AppendActiveIndices(
+    const Position& pos, Color perspective, IndexList* active) {
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
 
-    namespace Features {
+    int castling_rights = pos.state()->castlingRights;
+    int relative_castling_rights;
+    if (perspective == WHITE) {
+      relative_castling_rights = castling_rights;
+    }
+    else {
+      // Invert the perspective.
+      relative_castling_rights = ((castling_rights & 3) << 2)
+        & ((castling_rights >> 2) & 3);
+    }
 
-      // Get a list of indices with a value of 1 among the features
-      void CastlingRight::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-        int castling_rights = pos.state()->castlingRights;
-        int relative_castling_rights;
-        if (perspective == WHITE) {
-          relative_castling_rights = castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_castling_rights = ((castling_rights & 3) << 2)
-            & ((castling_rights >> 2) & 3);
-        }
-
-        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-          if (relative_castling_rights & (1 << i)) {
-            active->push_back(i);
-          }
-        }
+    for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
+      if (relative_castling_rights & (1 << i)) {
+        active->push_back(i);
       }
+    }
+  }
 
-      // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-      void CastlingRight::AppendChangedIndices(
-        const Position& pos, Color perspective,
-        IndexList* removed, IndexList* /* added */) {
+  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+  void CastlingRight::AppendChangedIndices(
+    const Position& /* pos */, Color /* perspective */,
+    IndexList* /* removed */, IndexList* /* added */) {
+    // Not implemented.
+    assert(false);
+  }
 
-        int previous_castling_rights = pos.state()->previous->castlingRights;
-        int current_castling_rights = pos.state()->castlingRights;
-        int relative_previous_castling_rights;
-        int relative_current_castling_rights;
-        if (perspective == WHITE) {
-          relative_previous_castling_rights = previous_castling_rights;
-          relative_current_castling_rights = current_castling_rights;
-        }
-        else {
-          // Invert the perspective.
-          relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
-            & ((previous_castling_rights >> 2) & 3);
-          relative_current_castling_rights = ((current_castling_rights & 3) << 2)
-            & ((current_castling_rights >> 2) & 3);
-        }
-
-        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-          if ((relative_previous_castling_rights & (1 << i)) &&
-            (relative_current_castling_rights & (1 << i)) == 0) {
-            removed->push_back(i);
-          }
-        }
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
diff --git a/src/nnue/features/castling_right.h b/src/nnue/features/castling_right.h
index 27074080..3a09e14b 100644
--- a/src/nnue/features/castling_right.h
+++ b/src/nnue/features/castling_right.h
@@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function
 
 #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
 #define _NNUE_FEATURES_CASTLING_RIGHT_H_
@@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"
 
-namespace Eval {
+namespace Eval::NNUE::Features {
 
-  namespace NNUE {
+  class CastlingRight {
+  public:
+    // feature quantity name
+    static constexpr const char* kName = "CastlingRight";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = 0x913968AAu;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = 4;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = 4;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
 
-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+      IndexList* active);
 
-      // Feature K: Ball position
-      class CastlingRight {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "CastlingRight";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x913968AAu;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 4;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 4;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
+    // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added);
+  };
 
-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
 
 #endif
diff --git a/src/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp
index 386bd907..d771a85c 100644
--- a/src/nnue/features/enpassant.cpp
+++ b/src/nnue/features/enpassant.cpp
@@ -1,43 +1,30 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function
 
 #include "enpassant.h"
 #include "index_list.h"
 
-namespace Eval {
+namespace Eval::NNUE::Features {
 
-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
+  void EnPassant::AppendActiveIndices(
+    const Position& pos, Color /* perspective */, IndexList* active) {
+    // do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
 
-    namespace Features {
+    auto epSquare = pos.state()->epSquare;
+    if (epSquare == SQ_NONE) {
+      return;
+    }
+    auto file = file_of(epSquare);
+    active->push_back(file);
+  }
 
-      // Get a list of indices with a value of 1 among the features
-      void EnPassant::AppendActiveIndices(
-        const Position& pos, Color perspective, IndexList* active) {
-        // do nothing if array size is small to avoid compiler warning
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
+  void EnPassant::AppendChangedIndices(
+    const Position& /* pos */, Color /* perspective */,
+    IndexList* /* removed */, IndexList* /* added */) {
+    // Not implemented.
+    assert(false);
+  }
 
-        auto epSquare = pos.state()->epSquare;
-        if (epSquare == SQ_NONE) {
-          return;
-        }
-
-        if (perspective == BLACK) {
-          epSquare = rotate180(epSquare);
-        }
-
-        auto file = file_of(epSquare);
-        active->push_back(file);
-      }
-
-      // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-      void EnPassant::AppendChangedIndices(
-        const Position& /* pos */, Color /* perspective */,
-        IndexList* /* removed */, IndexList* /* added */) {
-        // Not implemented.
-        assert(false);
-      }
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
diff --git a/src/nnue/features/enpassant.h b/src/nnue/features/enpassant.h
index 70a8eb5a..efa5eae9 100644
--- a/src/nnue/features/enpassant.h
+++ b/src/nnue/features/enpassant.h
@@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function
 
 #ifndef _NNUE_FEATURES_ENPASSANT_H_
 #define _NNUE_FEATURES_ENPASSANT_H_
@@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"
 
-namespace Eval {
+namespace Eval::NNUE::Features {
 
-  namespace NNUE {
+  class EnPassant {
+  public:
+    // feature quantity name
+    static constexpr const char* kName = "EnPassant";
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue = 0x02924F91u;
+    // number of feature dimensions
+    static constexpr IndexType kDimensions = 8;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions = 1;
+    // Timing of full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
 
-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+      IndexList* active);
 
-      // Feature K: Ball position
-      class EnPassant {
-      public:
-        // feature quantity name
-        static constexpr const char* kName = "EnPassant";
-        // Hash value embedded in the evaluation function file
-        static constexpr std::uint32_t kHashValue = 0x02924F91u;
-        // number of feature dimensions
-        static constexpr IndexType kDimensions = 8;
-        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-        static constexpr IndexType kMaxActiveDimensions = 1;
-        // Timing of full calculation instead of difference calculation
-        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    // Get a list of indices whose values ??have changed from the previous one in the feature quantity
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added);
+  };
 
-        // Get a list of indices with a value of 1 among the features
-        static void AppendActiveIndices(const Position& pos, Color perspective,
-          IndexList* active);
-
-        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-        static void AppendChangedIndices(const Position& pos, Color perspective,
-          IndexList* removed, IndexList* added);
-      };
-
-    }  // namespace Features
-
-  }  // namespace NNUE
-
-}  // namespace Eval
+}  // namespace Eval::NNUE::Features
 
 #endif
diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h
index b933d2d9..2ef92e8e 100644
--- a/src/nnue/features/feature_set.h
+++ b/src/nnue/features/feature_set.h
@@ -105,9 +105,20 @@ namespace Eval::NNUE::Features {
       for (Color perspective : { WHITE, BLACK }) {
         reset[perspective] = false;
         switch (trigger) {
+          case TriggerEvent::kNone:
+            break;
           case TriggerEvent::kFriendKingMoved:
             reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
             break;
+          case TriggerEvent::kEnemyKingMoved:
+              reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
+            break;
+          case TriggerEvent::kAnyKingMoved:
+            reset[perspective] = type_of(dp.piece[0]) == KING;
+            break;
+          case TriggerEvent::kAnyPieceMoved:
+            reset[perspective] = true;
+            break;
           default:
             assert(false);
             break;
diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h
index 3377cd8f..656502a3 100644
--- a/src/nnue/features/features_common.h
+++ b/src/nnue/features/features_common.h
@@ -34,10 +34,10 @@ namespace Eval::NNUE::Features {
   // Trigger to perform full calculations instead of difference only
   enum class TriggerEvent {
     kNone, // Calculate the difference whenever possible
-    kFriendKingMoved, // calculate all when own ball moves
-    kEnemyKingMoved, // do all calculations when enemy balls move
-    kAnyKingMoved, // do all calculations if either ball moves
-    kAnyPieceMoved, // always do all calculations
+    kFriendKingMoved, // calculate full evaluation when own king moves
+    kEnemyKingMoved, // calculate full evaluation when opponent king moves
+    kAnyKingMoved, // calculate full evaluation when any king moves
+    kAnyPieceMoved, // always calculate full evaluation
   };
 
   enum class Side {
diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp
index 88e384a3..ff20a00a 100644
--- a/src/nnue/features/half_kp.cpp
+++ b/src/nnue/features/half_kp.cpp
@@ -23,9 +23,9 @@
 
 namespace Eval::NNUE::Features {
 
-  // Orient a square according to perspective (rotates by 180 for black)
+  // Orient a square according to perspective (flip rank for black)
   inline Square orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
+    return Square(int(s) ^ (bool(perspective) * SQ_A8));
   }
 
   // Find the index of the feature quantity from the king position and PieceSquare
diff --git a/src/nnue/features/half_relative_kp.cpp b/src/nnue/features/half_relative_kp.cpp
index 597d65fb..8a61bada 100644
--- a/src/nnue/features/half_relative_kp.cpp
+++ b/src/nnue/features/half_relative_kp.cpp
@@ -9,9 +9,9 @@ namespace NNUE {
 
 namespace Features {
 
-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }
 
 // Find the index of the feature quantity from the ball position and PieceSquare
diff --git a/src/nnue/features/k.cpp b/src/nnue/features/k.cpp
index 38ec9997..bd8d7dd0 100644
--- a/src/nnue/features/k.cpp
+++ b/src/nnue/features/k.cpp
@@ -9,9 +9,9 @@ namespace NNUE {
 
 namespace Features {
 
-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }
 
 // Index of a feature for a given king position.
@@ -32,19 +32,11 @@ void K::AppendChangedIndices(
     const Position& pos, Color perspective,
     IndexList* removed, IndexList* added) {
   const auto& dp = pos.state()->dirtyPiece;
-  Color king_color;
-  if (dp.piece[0] == Piece::W_KING) {
-    king_color = WHITE;
+  if (type_of(dp.piece[0]) == KING)
+  {
+    removed->push_back(MakeIndex(perspective, dp.from[0], color_of(dp.piece[0])));
+    added->push_back(MakeIndex(perspective, dp.to[0], color_of(dp.piece[0])));
   }
-  else if (dp.piece[0] == Piece::B_KING) {
-    king_color = BLACK;
-  }
-  else {
-    return;
-  }
-
-  removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
-  added->push_back(MakeIndex(perspective, dp.to[0], king_color));
 }
 
 }  // namespace Features
diff --git a/src/nnue/features/p.cpp b/src/nnue/features/p.cpp
index 0c1b7d50..012311ac 100644
--- a/src/nnue/features/p.cpp
+++ b/src/nnue/features/p.cpp
@@ -9,9 +9,9 @@ namespace NNUE {
 
 namespace Features {
 
-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }
 
 // Find the index of the feature quantity from the king position and PieceSquare
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
index 69dfaad2..26370710 100644
--- a/src/nnue/nnue_accumulator.h
+++ b/src/nnue/nnue_accumulator.h
@@ -29,9 +29,7 @@ namespace Eval::NNUE {
   struct alignas(kCacheLineSize) Accumulator {
     std::int16_t
         accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-    Value score;
     bool computed_accumulation;
-    bool computed_score;
   };
 
 }  // namespace Eval::NNUE
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h
index 91cdc4bd..c395d515 100644
--- a/src/nnue/nnue_architecture.h
+++ b/src/nnue/nnue_architecture.h
@@ -22,7 +22,7 @@
 #define NNUE_ARCHITECTURE_H_INCLUDED
 
 // Defines the network structure
-#include "architectures/halfkp_256x2-32-32.h"
+#include "architectures/halfkp-cr-ep_256x2-32-32.h"
 
 namespace Eval::NNUE {
 
diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h
index d7ffa21a..319f005b 100644
--- a/src/nnue/nnue_common.h
+++ b/src/nnue/nnue_common.h
@@ -69,7 +69,7 @@
 namespace Eval::NNUE {
 
   // Version of the evaluation file
-  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+  constexpr std::uint32_t kVersion = 0x7AF32F17u;
 
   // Constant used in evaluation value calculation
   constexpr int FV_SCALE = 16;
@@ -113,7 +113,7 @@ namespace Eval::NNUE {
     PS_END2     = 12 * SQUARE_NB + 1
   };
 
-  extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
+  extern const uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
 
   // Type of input feature after conversion
   using TransformedFeatureType = std::uint8_t;
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
index e1bc2ab8..e3f4b1c6 100644
--- a/src/nnue/nnue_feature_transformer.h
+++ b/src/nnue/nnue_feature_transformer.h
@@ -1,4 +1,4 @@
-/*
+﻿/*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
   Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
@@ -29,6 +29,61 @@
 
 namespace Eval::NNUE {
 
+  // If vector instructions are enabled, we update and refresh the
+  // accumulator tile by tile such that each tile fits in the CPU's
+  // vector registers.
+  #define TILING
+
+  #ifdef USE_AVX512
+  typedef __m512i vec_t;
+  #define vec_load(a) _mm512_loadA_si512(a)
+  #define vec_store(a,b) _mm512_storeA_si512(a,b)
+  #define vec_add_16(a,b) _mm512_add_epi16(a,b)
+  #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
+  #define vec_zero _mm512_setzero_si512()
+  static constexpr IndexType kNumRegs = 8; // only 8 are needed
+
+  #elif USE_AVX2
+  typedef __m256i vec_t;
+  #define vec_load(a) _mm256_loadA_si256(a)
+  #define vec_store(a,b) _mm256_storeA_si256(a,b)
+  #define vec_add_16(a,b) _mm256_add_epi16(a,b)
+  #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
+  #define vec_zero _mm256_setzero_si256()
+  static constexpr IndexType kNumRegs = 16;
+
+  #elif USE_SSE2
+  typedef __m128i vec_t;
+  #define vec_load(a) (*(a))
+  #define vec_store(a,b) *(a)=(b)
+  #define vec_add_16(a,b) _mm_add_epi16(a,b)
+  #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
+  #define vec_zero _mm_setzero_si128()
+  static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
+
+  #elif USE_MMX
+  typedef __m64 vec_t;
+  #define vec_load(a) (*(a))
+  #define vec_store(a,b) *(a)=(b)
+  #define vec_add_16(a,b) _mm_add_pi16(a,b)
+  #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
+  #define vec_zero _mm_setzero_si64()
+  static constexpr IndexType kNumRegs = 8;
+
+  #elif USE_NEON
+  typedef int16x8_t vec_t;
+  #define vec_load(a) (*(a))
+  #define vec_store(a,b) *(a)=(b)
+  #define vec_add_16(a,b) vaddq_s16(a,b)
+  #define vec_sub_16(a,b) vsubq_s16(a,b)
+  #define vec_zero {0}
+  static constexpr IndexType kNumRegs = 16;
+
+  #else
+  #undef TILING
+
+  #endif
+
   // Input feature converter
   class FeatureTransformer {
 
@@ -36,6 +91,11 @@ namespace Eval::NNUE {
     // Number of output dimensions for one side
     static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
 
+    #ifdef TILING
+    static constexpr IndexType kTileHeight = kNumRegs * sizeof(vec_t) / 2;
+    static_assert(kHalfDimensions % kTileHeight == 0, "kTileHeight must divide kHalfDimensions");
+    #endif
+
    public:
     // Output type
     using OutputType = TransformedFeatureType;
@@ -50,6 +110,7 @@ namespace Eval::NNUE {
 
     // Hash value embedded in the evaluation file
     static constexpr std::uint32_t GetHashValue() {
+
       return RawFeatures::kHashValue ^ kOutputDimensions;
     }
 
@@ -62,6 +123,7 @@ namespace Eval::NNUE {
 
     // Read network parameters
     bool ReadParameters(std::istream& stream) {
+
       for (std::size_t i = 0; i < kHalfDimensions; ++i)
         biases_[i] = read_little_endian<BiasType>(stream);
       for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
@@ -80,23 +142,26 @@ namespace Eval::NNUE {
 
     // Proceed with the difference calculation if possible
     bool UpdateAccumulatorIfPossible(const Position& pos) const {
+
       const auto now = pos.state();
-      if (now->accumulator.computed_accumulation) {
+      if (now->accumulator.computed_accumulation)
         return true;
-      }
+
       const auto prev = now->previous;
       if (prev && prev->accumulator.computed_accumulation) {
         UpdateAccumulator(pos);
         return true;
       }
+
       return false;
     }
 
     // Convert input features
-    void Transform(const Position& pos, OutputType* output, bool refresh) const {
-      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+    void Transform(const Position& pos, OutputType* output) const {
+
+      if (!UpdateAccumulatorIfPossible(pos))
         RefreshAccumulator(pos);
-      }
+
       const auto& accumulation = pos.state()->accumulator.accumulation;
 
   #if defined(USE_AVX2)
@@ -133,6 +198,12 @@ namespace Eval::NNUE {
               &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
           __m256i sum1 = _mm256_loadA_si256(
             &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
           _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
               _mm256_packs_epi16(sum0, sum1), kZero), kControl));
         }
@@ -144,6 +215,12 @@ namespace Eval::NNUE {
               accumulation[perspectives[p]][0])[j * 2 + 0]);
           __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
               accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
       const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
 
           _mm_store_si128(&out[j],
@@ -164,6 +241,12 @@ namespace Eval::NNUE {
               accumulation[perspectives[p]][0])[j * 2 + 0]);
           __m64 sum1 = *(&reinterpret_cast<const __m64*>(
               accumulation[perspectives[p]][0])[j * 2 + 1]);
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
+                accumulation[perspectives[p]][i])[j * 2 + 0]);
+            sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
+                accumulation[perspectives[p]][i])[j * 2 + 1]);
+          }
           const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
           out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
         }
@@ -173,12 +256,19 @@ namespace Eval::NNUE {
         for (IndexType j = 0; j < kNumChunks; ++j) {
           int16x8_t sum = reinterpret_cast<const int16x8_t*>(
               accumulation[perspectives[p]][0])[j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
+                accumulation[perspectives[p]][i])[j]);
+          }
           out[j] = vmax_s8(vqmovn_s16(sum), kZero);
         }
 
   #else
         for (IndexType j = 0; j < kHalfDimensions; ++j) {
           BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
+            sum += accumulation[static_cast<int>(perspectives[p])][i][j];
+          }
           output[offset + j] = static_cast<OutputType>(
               std::max<int>(0, std::min<int>(127, sum)));
         }
@@ -193,192 +283,162 @@ namespace Eval::NNUE {
    private:
     // Calculate cumulative value without using difference calculation
     void RefreshAccumulator(const Position& pos) const {
+
       auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (Color perspective : { WHITE, BLACK }) {
-        std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                   kHalfDimensions * sizeof(BiasType));
-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-  #if defined(USE_AVX512)
-          auto accumulation = reinterpret_cast<__m512i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList active_indices[2];
+        RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                         active_indices);
+        for (Color perspective : { WHITE, BLACK }) {
+    #ifdef TILING
+          for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+            auto accTile = reinterpret_cast<vec_t*>(
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
+            vec_t acc[kNumRegs];
 
-  #elif defined(USE_AVX2)
-          auto accumulation = reinterpret_cast<__m256i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
+            if (i == 0) {
+              auto biasesTile = reinterpret_cast<const vec_t*>(
+                  &biases_[j * kTileHeight]);
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = biasesTile[k];
+            } else {
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_zero;
+            }
+            for (const auto index : active_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
 
-  #elif defined(USE_SSE2)
-          auto accumulation = reinterpret_cast<__m128i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+              for (unsigned k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_add_16(acc[k], column[k]);
+            }
 
-  #elif defined(USE_MMX)
-          auto accumulation = reinterpret_cast<__m64*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
+            for (unsigned k = 0; k < kNumRegs; k++)
+              vec_store(&accTile[k], acc[k]);
+          }
+    #else
+          if (i == 0) {
+            std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                        kHalfDimensions * sizeof(BiasType));
+          } else {
+            std::memset(accumulator.accumulation[perspective][i], 0,
+                        kHalfDimensions * sizeof(BiasType));
           }
 
-  #elif defined(USE_NEON)
-          auto accumulation = reinterpret_cast<int16x8_t*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j)
-            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-
-  #else
-          for (IndexType j = 0; j < kHalfDimensions; ++j)
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-  #endif
+          for (const auto index : active_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
 
+            for (IndexType j = 0; j < kHalfDimensions; ++j)
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+          }
+    #endif
         }
+
       }
+
   #if defined(USE_MMX)
       _mm_empty();
   #endif
 
       accumulator.computed_accumulation = true;
-      accumulator.computed_score = false;
     }
 
     // Calculate cumulative value using difference calculation
     void UpdateAccumulator(const Position& pos) const {
+
       const auto prev_accumulator = pos.state()->previous->accumulator;
       auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (Color perspective : { WHITE, BLACK }) {
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+        Features::IndexList removed_indices[2], added_indices[2];
+        bool reset[2];
+        RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                          removed_indices, added_indices, reset);
 
-  #if defined(USE_AVX2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m256i*>(
-            &accumulator.accumulation[perspective][i][0]);
+    #ifdef TILING
+        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+          for (Color perspective : { WHITE, BLACK }) {
+            auto accTile = reinterpret_cast<vec_t*>(
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
+            vec_t acc[kNumRegs];
 
-  #elif defined(USE_SSE2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m128i*>(
-            &accumulator.accumulation[perspective][i][0]);
+            if (reset[perspective]) {
+              if (i == 0) {
+                auto biasesTile = reinterpret_cast<const vec_t*>(
+                    &biases_[j * kTileHeight]);
+                for (unsigned k = 0; k < kNumRegs; ++k)
+                  acc[k] = biasesTile[k];
+              } else {
+                for (unsigned k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_zero;
+              }
+            } else {
+              auto prevAccTile = reinterpret_cast<const vec_t*>(
+                  &prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
+              for (IndexType k = 0; k < kNumRegs; ++k)
+                acc[k] = vec_load(&prevAccTile[k]);
 
-  #elif defined(USE_MMX)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m64*>(
-            &accumulator.accumulation[perspective][i][0]);
+              // Difference calculation for the deactivated features
+              for (const auto index : removed_indices[perspective]) {
+                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
 
-  #elif defined(USE_NEON)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<int16x8_t*>(
-            &accumulator.accumulation[perspective][i][0]);
-  #endif
+                for (IndexType k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_sub_16(acc[k], column[k]);
+              }
+            }
+            { // Difference calculation for the activated features
+              for (const auto index : added_indices[perspective]) {
+                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
 
-        if (reset[perspective]) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator.accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
-          // Difference calculation for the deactivated features
-          for (const auto index : removed_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-
-  #if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+                for (IndexType k = 0; k < kNumRegs; ++k)
+                  acc[k] = vec_add_16(acc[k], column[k]);
+              }
             }
 
-  #elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
-            }
-
-  #elif defined(USE_MMX)
-            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
-            }
-
-  #elif defined(USE_NEON)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
-            }
-
-  #else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] -=
-                  weights_[offset + j];
-            }
-  #endif
-
+            for (IndexType k = 0; k < kNumRegs; ++k)
+              vec_store(&accTile[k], acc[k]);
           }
         }
-        { // Difference calculation for the activated features
-          for (const auto index : added_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
+    #if defined(USE_MMX)
+        _mm_empty();
+    #endif
 
-  #if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+    #else
+        for (Color perspective : { WHITE, BLACK }) {
+
+          if (reset[perspective]) {
+            if (i == 0) {
+              std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                          kHalfDimensions * sizeof(BiasType));
+            } else {
+              std::memset(accumulator.accumulation[perspective][i], 0,
+                          kHalfDimensions * sizeof(BiasType));
             }
+          } else {
+            std::memcpy(accumulator.accumulation[perspective][i],
+                        prev_accumulator.accumulation[perspective][i],
+                        kHalfDimensions * sizeof(BiasType));
+            // Difference calculation for the deactivated features
+            for (const auto index : removed_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;
 
-  #elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
+                accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
             }
+          }
+          { // Difference calculation for the activated features
+            for (const auto index : added_indices[perspective]) {
+              const IndexType offset = kHalfDimensions * index;
 
-  #elif defined(USE_MMX)
-            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
+                accumulator.accumulation[perspective][i][j] += weights_[offset + j];
             }
-
-  #elif defined(USE_NEON)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-            }
-
-  #else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] +=
-                  weights_[offset + j];
-            }
-  #endif
-
           }
         }
+    #endif
       }
-  #if defined(USE_MMX)
-      _mm_empty();
-  #endif
-
       accumulator.computed_accumulation = true;
-      accumulator.computed_score = false;
     }
 
     using BiasType = std::int16_t;
diff --git a/src/nnue/trainer/trainer_affine_transform.h b/src/nnue/trainer/trainer_affine_transform.h
index 50751ffe..415b7dc8 100644
--- a/src/nnue/trainer/trainer_affine_transform.h
+++ b/src/nnue/trainer/trainer_affine_transform.h
@@ -194,7 +194,7 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
       weights_(),
       biases_diff_(),
       weights_diff_(),
-      momentum_(0.0),
+      momentum_(0.2),
       learning_rate_scale_(1.0) {
     DequantizeParameters();
   }
diff --git a/src/nnue/trainer/trainer_feature_transformer.h b/src/nnue/trainer/trainer_feature_transformer.h
index 190e009a..225c91fc 100644
--- a/src/nnue/trainer/trainer_feature_transformer.h
+++ b/src/nnue/trainer/trainer_feature_transformer.h
@@ -232,7 +232,7 @@ class Trainer<FeatureTransformer> {
       biases_(),
       weights_(),
       biases_diff_(),
-      momentum_(0.0),
+      momentum_(0.2),
       learning_rate_scale_(1.0) {
     min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
     max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
diff --git a/src/position.cpp b/src/position.cpp
index 38ac7c5c..52c47f66 100644
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -707,7 +707,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
 
   // Used by NNUE
   st->accumulator.computed_accumulation = false;
-  st->accumulator.computed_score = false;
   auto& dp = st->dirtyPiece;
   dp.dirty_num = 1;
 
@@ -1003,7 +1002,6 @@ void Position::do_null_move(StateInfo& newSt) {
   if (Eval::useNNUE != Eval::UseNNUEMode::False)
   {
       std::memcpy(&newSt, st, sizeof(StateInfo));
-      st->accumulator.computed_score = false;
   }
   else
       std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
@@ -1353,9 +1351,9 @@ bool Position::pos_is_ok() const {
 // Add a function that directly unpacks for speed. It's pretty tough.
 // Write it by combining packer::unpack() and Position::set().
 // If there is a problem with the passed phase and there is an error, non-zero is returned.
-int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th, bool mirror)
+int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th)
 {
-  return Learner::set_from_packed_sfen(*this, sfen, si, th, mirror);
+  return Learner::set_from_packed_sfen(*this, sfen, si, th);
 }
 
 // Give the board, hand piece, and turn, and return the sfen.
diff --git a/src/position.h b/src/position.h
index 2163dca3..e7513eb1 100644
--- a/src/position.h
+++ b/src/position.h
@@ -177,7 +177,7 @@ public:
 
   // --sfenization helper
 
-  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
 
   // Get the packed sfen. Returns to the buffer specified in the argument.
   // Do not include gamePly in pack.
@@ -187,7 +187,7 @@ public:
   // Equivalent to pos.set(sfen_unpack(data),si,th);.
   // If there is a problem with the passed phase and there is an error, non-zero is returned.
   // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
-  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
+  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
 
   void clear() { std::memset(this, 0, sizeof(Position)); }
 
diff --git a/src/search.cpp b/src/search.cpp
index e1616c5c..647f0fd7 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -40,21 +40,11 @@ namespace Search {
   LimitsType Limits;
 }
 
-namespace Tablebases {
-
-  int Cardinality;
-  bool RootInTB;
-  bool UseRule50;
-  Depth ProbeDepth;
-}
-
-namespace TB = Tablebases;
-
 using std::string;
 using Eval::evaluate;
 using namespace Search;
 
-bool Search::prune_at_shallow_depth_on_pv_node = true;
+bool Search::prune_at_shallow_depth = true;
 
 namespace {
 
@@ -227,7 +217,7 @@ void MainThread::search() {
   Time.init(Limits, us, rootPos.game_ply());
   TT.new_search();
 
-  Eval::verify_NNUE();
+  Eval::NNUE::verify();
 
   if (rootMoves.empty())
   {
@@ -464,10 +454,7 @@ void Thread::search() {
                   ++failedHighCnt;
               }
               else
-              {
-                  ++rootMoves[pvIdx].bestMoveCount;
                   break;
-              }
 
               delta += delta / 4 + 5;
 
@@ -522,7 +509,7 @@ void Thread::search() {
               totBestMoveChanges += th->bestMoveChanges;
               th->bestMoveChanges = 0;
           }
-          double bestMoveInstability = 1 + totBestMoveChanges / Threads.size();
+          double bestMoveInstability = 1 + 2 * totBestMoveChanges / Threads.size();
 
           double totalTime = rootMoves.size() == 1 ? 0 :
                              Time.optimum() * fallingEval * reduction * bestMoveInstability;
@@ -599,7 +586,7 @@ namespace {
     Move ttMove, move, excludedMove, bestMove;
     Depth extension, newDepth;
     Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
-    bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture;
+    bool formerPv, givesCheck, improving, didLMR, priorCapture;
     bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
          ttCapture, singularQuietLMR;
     Piece movedPiece;
@@ -646,6 +633,7 @@ namespace {
     assert(0 <= ss->ply && ss->ply < MAX_PLY);
 
     (ss+1)->ply = ss->ply + 1;
+    (ss+1)->ttPv = false;
     (ss+1)->excludedMove = bestMove = MOVE_NONE;
     (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE;
     Square prevSq = to_sq((ss-1)->currentMove);
@@ -655,9 +643,7 @@ namespace {
     // starts with statScore = 0. Later grandchildren start with the last calculated
     // statScore of the previous grandchild. This influences the reduction rules in
     // LMR which are based on the statScore of parent position.
-    if (rootNode)
-        (ss+4)->statScore = 0;
-    else
+    if (!rootNode)
         (ss+2)->statScore = 0;
 
     // Step 4. Transposition table lookup. We don't want the score of a partial
@@ -665,14 +651,15 @@ namespace {
     // position key in case of an excluded move.
     excludedMove = ss->excludedMove;
     posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove);
-    tte = TT.probe(posKey, ttHit);
-    ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+    tte = TT.probe(posKey, ss->ttHit);
+    ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
     ttMove =  rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
-            : ttHit    ? tte->move() : MOVE_NONE;
-    ttPv = PvNode || (ttHit && tte->is_pv());
-    formerPv = ttPv && !PvNode;
+            : ss->ttHit    ? tte->move() : MOVE_NONE;
+    if (!excludedMove)
+        ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
+    formerPv = ss->ttPv && !PvNode;
 
-    if (   ttPv
+    if (   ss->ttPv
         && depth > 12
         && ss->ply - 1 < MAX_LPH
         && !priorCapture
@@ -681,11 +668,11 @@ namespace {
 
     // thisThread->ttHitAverage can be used to approximate the running average of ttHit
     thisThread->ttHitAverage =   (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow
-                                + TtHitAverageResolution * ttHit;
+                                + TtHitAverageResolution * ss->ttHit;
 
     // At non-PV nodes we check for an early TT cutoff
     if (  !PvNode
-        && ttHit
+        && ss->ttHit
         && tte->depth() >= depth
         && ttValue != VALUE_NONE // Possible in case of TT access race
         && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
@@ -717,27 +704,27 @@ namespace {
     }
 
     // Step 5. Tablebases probe
-    if (!rootNode && TB::Cardinality)
+    if (!rootNode && thisThread->Cardinality)
     {
         int piecesCount = pos.count<ALL_PIECES>();
 
-        if (    piecesCount <= TB::Cardinality
-            && (piecesCount <  TB::Cardinality || depth >= TB::ProbeDepth)
+        if (    piecesCount <= thisThread->Cardinality
+            && (piecesCount <  thisThread->Cardinality || depth >= thisThread->ProbeDepth)
             &&  pos.rule50_count() == 0
             && !pos.can_castle(ANY_CASTLING))
         {
-            TB::ProbeState err;
-            TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
+            Tablebases::ProbeState err;
+            Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
 
             // Force check of time on the next occasion
             if (thisThread == Threads.main())
                 static_cast<MainThread*>(thisThread)->callsCnt = 0;
 
-            if (err != TB::ProbeState::FAIL)
+            if (err != Tablebases::ProbeState::FAIL)
             {
                 thisThread->tbHits.fetch_add(1, std::memory_order_relaxed);
 
-                int drawScore = TB::UseRule50 ? 1 : 0;
+                int drawScore = thisThread->UseRule50 ? 1 : 0;
 
                 // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score
                 value =  wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1
@@ -750,7 +737,7 @@ namespace {
                 if (    b == BOUND_EXACT
                     || (b == BOUND_LOWER ? value >= beta : value <= alpha))
                 {
-                    tte->save(posKey, value_to_tt(value, ss->ply), ttPv, b,
+                    tte->save(posKey, value_to_tt(value, ss->ply), ss->ttPv, b,
                               std::min(MAX_PLY - 1, depth + 6),
                               MOVE_NONE, VALUE_NONE);
 
@@ -778,7 +765,7 @@ namespace {
         improving = false;
         goto moves_loop;
     }
-    else if (ttHit)
+    else if (ss->ttHit)
     {
         // Never assume anything about values stored in TT
         ss->staticEval = eval = tte->eval();
@@ -800,7 +787,7 @@ namespace {
         else
             ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
 
-        tte->save(posKey, VALUE_NONE, ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
+        tte->save(posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval);
     }
 
     // Step 7. Razoring (~1 Elo)
@@ -826,7 +813,7 @@ namespace {
         && (ss-1)->statScore < 22977
         &&  eval >= beta
         &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182
+        &&  ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ss->ttPv + 182
         && !excludedMove
         &&  pos.non_pawn_material(us)
         && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -882,14 +869,14 @@ namespace {
         // there and in further interactions with transposition table cutoff depth is set to depth - 3
         // because probCut search has depth set to depth - 4 but we also do a move before it
         // so effective depth is equal to depth - 3
-        && !(   ttHit
+        && !(   ss->ttHit
              && tte->depth() >= depth - 3
              && ttValue != VALUE_NONE
              && ttValue < probCutBeta))
     {
         // if ttMove is a capture and value from transposition table is good enough produce probCut
         // cutoff without digging into actual probCut search
-        if (   ttHit
+        if (   ss->ttHit
             && tte->depth() >= depth - 3
             && ttValue != VALUE_NONE
             && ttValue >= probCutBeta
@@ -900,6 +887,8 @@ namespace {
         assert(probCutBeta < VALUE_INFINITE);
         MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
         int probCutCount = 0;
+        bool ttPv = ss->ttPv;
+        ss->ttPv = false;
 
         while (   (move = mp.next_move()) != MOVE_NONE
                && probCutCount < 2 + 2 * cutNode)
@@ -931,7 +920,7 @@ namespace {
                 if (value >= probCutBeta)
                 {
                     // if transposition table doesn't have equal or more deep info write probCut data into it
-                    if ( !(ttHit
+                    if ( !(ss->ttHit
                        && tte->depth() >= depth - 3
                        && ttValue != VALUE_NONE))
                         tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
@@ -940,8 +929,15 @@ namespace {
                     return value;
                 }
             }
+         ss->ttPv = ttPv;
     }
 
+    // Step 11. If the position is not in TT, decrease depth by 2
+    if (   PvNode
+        && depth >= 6
+        && !ttMove)
+        depth -= 2;
+
 moves_loop: // When in check, search starts from here
 
     const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -965,7 +961,7 @@ moves_loop: // When in check, search starts from here
     // Mark this node as being searched
     ThreadHolding th(thisThread, posKey, ss->ply);
 
-    // Step 11. Loop through all pseudo-legal moves until no moves remain
+    // Step 12. Loop through all pseudo-legal moves until no moves remain
     // or a beta cutoff occurs.
     while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE)
     {
@@ -1005,9 +1001,9 @@ moves_loop: // When in check, search starts from here
       // Calculate new depth for this move
       newDepth = depth - 1;
 
-      // Step 12. Pruning at shallow depth (~200 Elo)
+      // Step 13. Pruning at shallow depth (~200 Elo)
       if (  !rootNode
-          && (PvNode ? prune_at_shallow_depth_on_pv_node : true)
+          && (PvNode ? prune_at_shallow_depth : true)
           && pos.non_pawn_material(us)
           && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
       {
@@ -1052,7 +1048,6 @@ moves_loop: // When in check, search starts from here
               if (   !givesCheck
                   && lmrDepth < 6
                   && !(PvNode && abs(bestValue) < 2)
-                  && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
                   && !ss->inCheck
                   && ss->staticEval + 169 + 244 * lmrDepth
                      + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
@@ -1064,7 +1059,7 @@ moves_loop: // When in check, search starts from here
           }
       }
 
-      // Step 13. Extensions (~75 Elo)
+      // Step 14. Extensions (~75 Elo)
 
       // Singular extension search (~70 Elo). If all moves but one fail low on a
       // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
@@ -1123,11 +1118,6 @@ moves_loop: // When in check, search starts from here
                && pos.non_pawn_material() <= 2 * RookValueMg)
           extension = 1;
 
-      // Castling extension
-      if (   type_of(move) == CASTLING
-          && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
-          extension = 1;
-
       // Late irreversible move extension
       if (   move == ttMove
           && pos.rule50_count() > 80
@@ -1147,14 +1137,13 @@ moves_loop: // When in check, search starts from here
                                                                 [movedPiece]
                                                                 [to_sq(move)];
 
-      // Step 14. Make the move
+      // Step 15. Make the move
       pos.do_move(move, st, givesCheck);
 
-      // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
+      // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
       // re-searched at full depth.
       if (    depth >= 3
-          &&  moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
-          && (!rootNode || thisThread->best_move_count(move) == 0)
+          &&  moveCount > 1 + 2 * rootNode
           && (  !captureOrPromotion
               || moveCountPruning
               || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
@@ -1163,13 +1152,6 @@ moves_loop: // When in check, search starts from here
       {
           Depth r = reduction(improving, depth, moveCount);
 
-          // Decrease reduction at non-check cut nodes for second move at low depths
-          if (   cutNode
-              && depth <= 10
-              && moveCount <= 2
-              && !ss->inCheck)
-              r--;
-
           // Decrease reduction if the ttHit running average is large
           if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
               r--;
@@ -1179,7 +1161,7 @@ moves_loop: // When in check, search starts from here
               r++;
 
           // Decrease reduction if position is or has been on the PV (~10 Elo)
-          if (ttPv)
+          if (ss->ttPv)
               r -= 2;
 
           if (moveCountPruning && !formerPv)
@@ -1191,7 +1173,7 @@ moves_loop: // When in check, search starts from here
 
           // Decrease reduction if ttMove has been singularly extended (~3 Elo)
           if (singularQuietLMR)
-              r -= 1 + formerPv;
+              r--;
 
           if (!captureOrPromotion)
           {
@@ -1208,7 +1190,7 @@ moves_loop: // When in check, search starts from here
               // hence break make_move(). (~2 Elo)
               else if (    type_of(move) == NORMAL
                        && !pos.see_ge(reverse_move(move)))
-                  r -= 2 + ttPv - (type_of(movedPiece) == PAWN);
+                  r -= 2 + ss->ttPv - (type_of(movedPiece) == PAWN);
 
               ss->statScore =  thisThread->mainHistory[us][from_to(move)]
                              + (*contHist[0])[movedPiece][to_sq(move)]
@@ -1228,14 +1210,14 @@ moves_loop: // When in check, search starts from here
           }
           else
           {
-            // Increase reduction for captures/promotions if late move and at low depth
-            if (depth < 8 && moveCount > 2)
-                r++;
+              // Increase reduction for captures/promotions if late move and at low depth
+              if (depth < 8 && moveCount > 2)
+                  r++;
 
-            // Unless giving check, this capture is likely bad
-            if (   !givesCheck
-                && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
-                r++;
+              // Unless giving check, this capture is likely bad
+              if (   !givesCheck
+                  && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
+                  r++;
           }
 
           Depth d = std::clamp(newDepth - r, 1, newDepth);
@@ -1253,7 +1235,7 @@ moves_loop: // When in check, search starts from here
           didLMR = false;
       }
 
-      // Step 16. Full depth search when LMR is skipped or fails high
+      // Step 17. Full depth search when LMR is skipped or fails high
       if (doFullDepthSearch)
       {
           value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
@@ -1281,12 +1263,12 @@ moves_loop: // When in check, search starts from here
           value = -search<PV>(pos, ss+1, -beta, -alpha, newDepth, false);
       }
 
-      // Step 17. Undo move
+      // Step 18. Undo move
       pos.undo_move(move);
 
       assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
 
-      // Step 18. Check for a new best move
+      // Step 19. Check for a new best move
       // Finished searching the move. If a stop occurred, the return value of
       // the search cannot be trusted, and we return immediately without
       // updating best move, PV and TT.
@@ -1363,7 +1345,7 @@ moves_loop: // When in check, search starts from here
         return VALUE_DRAW;
     */
 
-    // Step 19. Check for mate and stalemate
+    // Step 20. Check for mate and stalemate
     // All legal moves have been searched and if there are no legal moves, it
     // must be a mate or a stalemate. If we are in a singular extension search then
     // return a fail low score.
@@ -1386,8 +1368,17 @@ moves_loop: // When in check, search starts from here
     if (PvNode)
         bestValue = std::min(bestValue, maxValue);
 
+    // If no good move is found and the previous position was ttPv, then the previous
+    // opponent move is probably good and the new position is added to the search tree.
+    if (bestValue <= alpha)
+        ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3);
+    // Otherwise, a counter move has been found and if the position is the last leaf
+    // in the search tree, remove the position from the search tree.
+    else if (depth > 3)
+        ss->ttPv = ss->ttPv && (ss+1)->ttPv;
+
     if (!excludedMove && !(rootNode && thisThread->pvIdx))
-        tte->save(posKey, value_to_tt(bestValue, ss->ply), ttPv,
+        tte->save(posKey, value_to_tt(bestValue, ss->ply), ss->ttPv,
                   bestValue >= beta ? BOUND_LOWER :
                   PvNode && bestMove ? BOUND_EXACT : BOUND_UPPER,
                   depth, bestMove, ss->staticEval);
@@ -1416,7 +1407,7 @@ moves_loop: // When in check, search starts from here
     Move ttMove, move, bestMove;
     Depth ttDepth;
     Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha;
-    bool ttHit, pvHit, givesCheck, captureOrPromotion;
+    bool pvHit, givesCheck, captureOrPromotion;
     int moveCount;
 
     if (PvNode)
@@ -1446,13 +1437,13 @@ moves_loop: // When in check, search starts from here
                                                   : DEPTH_QS_NO_CHECKS;
     // Transposition table lookup
     posKey = pos.key();
-    tte = TT.probe(posKey, ttHit);
-    ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
-    ttMove = ttHit ? tte->move() : MOVE_NONE;
-    pvHit = ttHit && tte->is_pv();
+    tte = TT.probe(posKey, ss->ttHit);
+    ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+    ttMove = ss->ttHit ? tte->move() : MOVE_NONE;
+    pvHit = ss->ttHit && tte->is_pv();
 
     if (  !PvNode
-        && ttHit
+        && ss->ttHit
         && tte->depth() >= ttDepth
         && ttValue != VALUE_NONE // Only in case of TT access race
         && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
@@ -1467,7 +1458,7 @@ moves_loop: // When in check, search starts from here
     }
     else
     {
-        if (ttHit)
+        if (ss->ttHit)
         {
             // Never assume anything about values stored in TT
             if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE)
@@ -1486,7 +1477,7 @@ moves_loop: // When in check, search starts from here
         // Stand pat. Return immediately if static value is at least beta
         if (bestValue >= beta)
         {
-            if (!ttHit)
+            if (!ss->ttHit)
                 tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER,
                           DEPTH_NONE, MOVE_NONE, ss->staticEval);
 
@@ -1524,6 +1515,7 @@ moves_loop: // When in check, search starts from here
 
       // Futility pruning
       if (   !ss->inCheck
+          && Search::prune_at_shallow_depth
           && !givesCheck
           &&  futilityBase > -VALUE_KNOWN_WIN
           && !pos.advanced_pawn_push(move))
@@ -1550,18 +1542,17 @@ moves_loop: // When in check, search starts from here
       }
 
       // Do not search moves with negative SEE values
-      if (  !ss->inCheck && !pos.see_ge(move))
+      if (   !ss->inCheck
+          && Search::prune_at_shallow_depth
+          && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move))
+          && !pos.see_ge(move))
           continue;
 
       // Speculative prefetch as early as possible
       prefetch(TT.first_entry(pos.key_after(move)));
 
       // Check for legality just before making the move
-      if (
-        // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning.
-        !pos.pseudo_legal(move) ||
-        !pos.legal(move)
-        )
+      if (!pos.legal(move))
       {
           moveCount--;
           continue;
@@ -1573,8 +1564,10 @@ moves_loop: // When in check, search starts from here
                                                                 [pos.moved_piece(move)]
                                                                 [to_sq(move)];
 
+      // CounterMove based pruning
       if (  !captureOrPromotion
-          && moveCount >= abs(depth) + 1
+          && Search::prune_at_shallow_depth
+          && moveCount
           && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
           && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
           continue;
@@ -1706,8 +1699,8 @@ moves_loop: // When in check, search starts from here
     else
         captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1;
 
-    // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted
-    if (   ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0]))
+    // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted
+    if (   ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0]))
         && !pos.captured_piece())
             update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1);
 
@@ -1844,19 +1837,22 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
   size_t pvIdx = pos.this_thread()->pvIdx;
   size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size());
   uint64_t nodesSearched = Threads.nodes_searched();
-  uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0);
+  uint64_t tbHits = Threads.tb_hits() + (pos.this_thread()->rootInTB ? rootMoves.size() : 0);
 
   for (size_t i = 0; i < multiPV; ++i)
   {
       bool updated = rootMoves[i].score != -VALUE_INFINITE;
 
-      if (depth == 1 && !updated)
+      if (depth == 1 && !updated && i > 0)
           continue;
 
-      Depth d = updated ? depth : depth - 1;
+      Depth d = updated ? depth : std::max(1, depth - 1);
       Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore;
 
-      bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
+      if (v == -VALUE_INFINITE)
+          v = VALUE_ZERO;
+
+      bool tb = pos.this_thread()->rootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
       v = tb ? rootMoves[i].tbScore : v;
 
       if (ss.rdbuf()->in_avail()) // Not at first line
@@ -1923,42 +1919,42 @@ bool RootMove::extract_ponder_from_tt(Position& pos) {
 
 void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
 
-    RootInTB = false;
-    UseRule50 = bool(Options["Syzygy50MoveRule"]);
-    ProbeDepth = int(Options["SyzygyProbeDepth"]);
-    Cardinality = int(Options["SyzygyProbeLimit"]);
+    auto& rootInTB = pos.this_thread()->rootInTB;
+    auto& cardinality = pos.this_thread()->Cardinality;
+    auto& probeDepth = pos.this_thread()->ProbeDepth;
+    rootInTB = false;
     bool dtz_available = true;
 
     // Tables with fewer pieces than SyzygyProbeLimit are searched with
     // ProbeDepth == DEPTH_ZERO
-    if (Cardinality > MaxCardinality)
+    if (cardinality > Tablebases::MaxCardinality)
     {
-        Cardinality = MaxCardinality;
-        ProbeDepth = 0;
+        cardinality = Tablebases::MaxCardinality;
+        probeDepth = 0;
     }
 
-    if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
+    if (cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
     {
         // Rank moves using DTZ tables
-        RootInTB = root_probe(pos, rootMoves);
+        rootInTB = root_probe(pos, rootMoves);
 
-        if (!RootInTB)
+        if (!rootInTB)
         {
             // DTZ tables are missing; try to rank moves using WDL tables
             dtz_available = false;
-            RootInTB = root_probe_wdl(pos, rootMoves);
+            rootInTB = root_probe_wdl(pos, rootMoves);
         }
     }
 
-    if (RootInTB)
+    if (rootInTB)
     {
         // Sort moves according to TB rank
-        std::sort(rootMoves.begin(), rootMoves.end(),
+        std::stable_sort(rootMoves.begin(), rootMoves.end(),
                   [](const RootMove &a, const RootMove &b) { return a.tbRank > b.tbRank; } );
 
         // Probe during search only if DTZ is not available and we are winning
         if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW)
-            Cardinality = 0;
+            cardinality = 0;
     }
     else
     {
@@ -1966,6 +1962,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
         for (auto& m : rootMoves)
             m.tbRank = 0;
     }
+
 }
 
 // --- expose the functions such as fixed depth search used for learning to the outside
@@ -1998,7 +1995,7 @@ namespace Learner
       th->nmpMinPly = th->bestMoveChanges = 0;
       th->ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;
 
-	  // Zero initialization of the number of search nodes
+      // Zero initialization of the number of search nodes
       th->nodes = 0;
 
       // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood.
@@ -2022,7 +2019,7 @@ namespace Learner
       for (int i = 7; i > 0; i--)
           (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel
 
- // set rootMoves
+      // set rootMoves
       auto& rootMoves = th->rootMoves;
 
       rootMoves.clear();
@@ -2030,6 +2027,20 @@ namespace Learner
         rootMoves.push_back(Search::RootMove(m));
 
       assert(!rootMoves.empty());
+
+      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
+      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
+      th->Cardinality = int(Options["SyzygyProbeLimit"]);
+
+      // Tables with fewer pieces than SyzygyProbeLimit are searched with
+      // ProbeDepth == DEPTH_ZERO
+      if (th->Cardinality > Tablebases::MaxCardinality)
+      {
+          th->Cardinality = Tablebases::MaxCardinality;
+          th->ProbeDepth = 0;
+      }
+
+      Tablebases::rank_root_moves(pos, rootMoves);
     }
   }
 
@@ -2050,8 +2061,8 @@ namespace Learner
   // As it has a bad effect, I decided to stop allowing the window range to be specified.
   ValueAndPV qsearch(Position& pos)
   {
-    Stack stack[MAX_PLY + 10], * ss = stack + 7;
-    Move pv[MAX_PLY + 1];
+    Stack stack[MAX_PLY+10], *ss = stack+7;
+    Move  pv[MAX_PLY+1];
 
     init_for_search(pos, ss);
     ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
@@ -2070,7 +2081,7 @@ namespace Learner
 
     auto bestValue = ::qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0);
 
-  // Returns the PV obtained.
+    // Returns the PV obtained.
     std::vector<Move> pvs;
     for (Move* p = &ss->pv[0]; is_ok(*p); ++p)
       pvs.push_back(*p);
@@ -2136,7 +2147,7 @@ namespace Learner
     Value bestValue = -VALUE_INFINITE;
 
     while ((rootDepth += 1) <= depth
-	  // exit this loop even if the node limit is exceeded
+      // exit this loop even if the node limit is exceeded
       // The number of search nodes is passed in the argument of this function.
       && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit)
       )
@@ -2158,46 +2169,36 @@ namespace Learner
               break;
         }
 
-	    // selDepth output with USI info for each depth and PV line
+        // selDepth output with USI info for each depth and PV line
         selDepth = 0;
 
         // Switch to aspiration search for depth 5 and above.
-        if (rootDepth >= 5 * 1)
+        if (rootDepth >= 4)
         {
-          delta = Value(20);
-
-          Value p = rootMoves[pvIdx].previousScore;
-
-          alpha = std::max(p - delta, -VALUE_INFINITE);
-          beta = std::min(p + delta, VALUE_INFINITE);
+            Value prev = rootMoves[pvIdx].previousScore;
+            delta = Value(17);
+            alpha = std::max(prev - delta,-VALUE_INFINITE);
+            beta  = std::min(prev + delta, VALUE_INFINITE);
         }
 
-        // aspiration search
-        int failedHighCnt = 0;
         while (true)
         {
-          Depth adjustedDepth = std::max(1, rootDepth - failedHighCnt * 1);
+          Depth adjustedDepth = std::max(1, rootDepth);
           bestValue = ::search<PV>(pos, ss, alpha, beta, adjustedDepth, false);
 
           stable_sort(rootMoves.begin() + pvIdx, rootMoves.end());
           //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx);
 
-		  // Expand aspiration window for fail low/high.
+          // Expand aspiration window for fail low/high.
           // However, if it is the value specified by the argument, it will be treated as fail low/high and break.
           if (bestValue <= alpha)
           {
             beta = (alpha + beta) / 2;
             alpha = std::max(bestValue - delta, -VALUE_INFINITE);
-
-            failedHighCnt = 0;
-            //if (mainThread)
-            //    mainThread->stopOnPonderhit = false;
-
           }
           else if (bestValue >= beta)
           {
             beta = std::min(bestValue + delta, VALUE_INFINITE);
-            ++failedHighCnt;
           }
           else
             break;
@@ -2218,7 +2219,6 @@ namespace Learner
     }
 
     // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // ¨ PV should not be NULL_MOVE because it is PV
     // MOVE_WIN has never been thrust. (For now)
     for (Move move : rootMoves[0].pv)
     {
diff --git a/src/search.h b/src/search.h
index 20dfe909..ab832ee2 100644
--- a/src/search.h
+++ b/src/search.h
@@ -24,6 +24,7 @@
 #include "misc.h"
 #include "movepick.h"
 #include "types.h"
+#include "uci.h"
 
 class Position;
 
@@ -32,7 +33,7 @@ namespace Search {
 /// Threshold used for countermoves based pruning
 constexpr int CounterMovePruneThreshold = 0;
 
-extern bool prune_at_shallow_depth_on_pv_node;
+extern bool prune_at_shallow_depth;
 
 /// Stack struct keeps track of the information we need to remember from nodes
 /// shallower and deeper in the tree during the search. Each search thread has
@@ -49,6 +50,8 @@ struct Stack {
   int statScore;
   int moveCount;
   bool inCheck;
+  bool ttPv;
+  bool ttHit;
 };
 
 
@@ -70,7 +73,6 @@ struct RootMove {
   Value previousScore = -VALUE_INFINITE;
   int selDepth = 0;
   int tbRank = 0;
-  int bestMoveCount = 0;
   Value tbScore;
   std::vector<Move> pv;
 };
diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp
index 20215b96..f4b9447f 100644
--- a/src/syzygy/tbprobe.cpp
+++ b/src/syzygy/tbprobe.cpp
@@ -52,7 +52,7 @@
 
 using namespace Tablebases;
 
-int Tablebases::MaxCardinality;
+int Tablebases::MaxCardinality = 0;
 
 namespace {
 
@@ -223,7 +223,9 @@ public:
 
         *mapping = statbuf.st_size;
         *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
+#if defined(MADV_RANDOM)
         madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
+#endif
         ::close(fd);
 
         if (*baseAddress == MAP_FAILED)
@@ -758,7 +760,7 @@ Ret do_probe_table(const Position& pos, T* entry, WDLScore wdl, ProbeState* resu
     if (entry->hasPawns) {
         idx = LeadPawnIdx[leadPawnsCnt][squares[0]];
 
-        std::sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
+        std::stable_sort(squares + 1, squares + leadPawnsCnt, pawns_comp);
 
         for (int i = 1; i < leadPawnsCnt; ++i)
             idx += Binomial[i][MapPawns[squares[i]]];
@@ -859,7 +861,7 @@ encode_remaining:
 
     while (d->groupLen[++next])
     {
-        std::sort(groupSq, groupSq + d->groupLen[next]);
+        std::stable_sort(groupSq, groupSq + d->groupLen[next]);
         uint64_t n = 0;
 
         // Map down a square if "comes later" than a square in the previous
diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h
index b998989b..5f97c746 100644
--- a/src/syzygy/tbprobe.h
+++ b/src/syzygy/tbprobe.h
@@ -25,6 +25,8 @@
 
 namespace Tablebases {
 
+extern int MaxCardinality;
+
 enum WDLScore {
     WDLLoss        = -2, // Loss
     WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
@@ -43,8 +45,6 @@ enum ProbeState {
     ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };
 
-extern int MaxCardinality;
-
 void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
 int probe_dtz(Position& pos, ProbeState* result);
diff --git a/src/thread.cpp b/src/thread.cpp
index 1aa66a81..c81ac43d 100644
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -51,17 +51,6 @@ Thread::~Thread() {
 }
 
 
-/// Thread::bestMoveCount(Move move) return best move counter for the given root move
-
-int Thread::best_move_count(Move move) const {
-
-  auto rm = std::find(rootMoves.begin() + pvIdx,
-                      rootMoves.begin() + pvLast, move);
-
-  return rm != rootMoves.begin() + pvLast ? rm->bestMoveCount : 0;
-}
-
-
 /// Thread::clear() reset histories, usually before a new game
 
 void Thread::clear() {
@@ -192,9 +181,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
           || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
           rootMoves.emplace_back(m);
 
-  if (!rootMoves.empty())
-      Tablebases::rank_root_moves(pos, rootMoves);
-
   // After ownership transfer 'states' becomes empty, so if we stop the search
   // and call 'go' again without setting a new position states.get() == NULL.
   assert(states.get() || setupStates.get());
@@ -214,6 +200,21 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
       th->rootMoves = rootMoves;
       th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
       th->rootState = setupStates->back();
+      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
+      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
+      th->Cardinality = int(Options["SyzygyProbeLimit"]);
+
+      // Tables with fewer pieces than SyzygyProbeLimit are searched with
+      // ProbeDepth == DEPTH_ZERO
+      if (th->Cardinality > Tablebases::MaxCardinality)
+      {
+          th->Cardinality = Tablebases::MaxCardinality;
+          th->ProbeDepth = 0;
+      }
+
+      if (!rootMoves.empty())
+          Tablebases::rank_root_moves(pos, rootMoves);
+
   }
 
   main()->start_searching();
@@ -235,16 +236,16 @@ Thread* ThreadPool::get_best_thread() const {
         votes[th->rootMoves[0].pv[0]] +=
             (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);
 
-          if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
-          {
-              // Make sure we pick the shortest mate / TB conversion or stave off mate the longest
-              if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
-                  bestThread = th;
-          }
-          else if (   th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
-                   || (   th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
-                       && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
-              bestThread = th;
+        if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
+        {
+            // Make sure we pick the shortest mate / TB conversion or stave off mate the longest
+            if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
+                bestThread = th;
+        }
+        else if (   th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
+                 || (   th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
+                     && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
+            bestThread = th;
     }
 
     return bestThread;
diff --git a/src/thread.h b/src/thread.h
index 042bc2e9..501a6042 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -54,7 +54,6 @@ public:
   void idle_loop();
   void start_searching();
   void wait_for_search_finished();
-  int best_move_count(Move move) const;
 
   Pawns::Table pawnsTable;
   Material::Table materialTable;
@@ -74,6 +73,11 @@ public:
   CapturePieceToHistory captureHistory;
   ContinuationHistory continuationHistory[2][2];
   Score contempt;
+  bool rootInTB;
+  int Cardinality;
+  bool UseRule50;
+  Depth ProbeDepth;
+
 };
 
 
diff --git a/src/tt.cpp b/src/tt.cpp
index c64670ac..718587a8 100644
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -35,6 +35,9 @@ bool TranspositionTable::enable_transposition_table = true;
 
 void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
 
+  if (!TranspositionTable::enable_transposition_table) {
+      return;
+  }
   // Preserve any existing move for the same position
   if (m || (uint16_t)k != key16)
       move16 = (uint16_t)m;
@@ -64,11 +67,12 @@ void TranspositionTable::resize(size_t mbSize) {
 
   Threads.main()->wait_for_search_finished();
 
-  aligned_ttmem_free(mem);
+  aligned_large_pages_free(table);
 
   clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
-  table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
-  if (!mem)
+
+  table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
+  if (!table)
   {
       std::cerr << "Failed to allocate " << mbSize
                 << "MB for transposition table." << std::endl;
diff --git a/src/tt.h b/src/tt.h
index 29072bd8..d817f26d 100644
--- a/src/tt.h
+++ b/src/tt.h
@@ -73,7 +73,7 @@ class TranspositionTable {
   static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
 
 public:
- ~TranspositionTable() { aligned_ttmem_free(mem); }
+ ~TranspositionTable() { aligned_large_pages_free(table); }
   void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound
   TTEntry* probe(const Key key, bool& found) const;
   int hashfull() const;
@@ -91,7 +91,6 @@ private:
 
   size_t clusterCount;
   Cluster* table;
-  void* mem;
   uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
 };
 
diff --git a/src/uci.cpp b/src/uci.cpp
index 1128d4d9..a123bbc0 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -47,7 +47,7 @@ const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
 void test_cmd(Position& pos, istringstream& is)
 {
     // Initialize as it may be searched.
-    Eval::init_NNUE();
+    Eval::NNUE::init();
 
     std::string param;
     is >> param;
@@ -100,7 +100,7 @@ namespace {
     Position p;
     p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main());
 
-    Eval::verify_NNUE();
+    Eval::NNUE::verify();
 
     sync_cout << "\n" << Eval::trace(p) << sync_endl;
   }
@@ -185,7 +185,7 @@ namespace {
 
         if (token == "go" || token == "eval")
         {
-            cerr << "\nPosition: " << cnt++ << '/' << num << endl;
+            cerr << "\nPosition: " << cnt++ << '/' << num << " (" << pos.fen() << ")" << endl;
             if (token == "go")
             {
                go(pos, is, states);
@@ -210,15 +210,15 @@ namespace {
          << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
   }
 
-  // The win rate model returns the probability (per mille) of winning given an eval
-  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
-  int win_rate_model(Value v, int ply) {
-     // Return win rate in per mille (rounded to nearest)
-     return int(0.5 + UCI::win_rate_model_double(v, ply));
-  }
-
 } // namespace
 
+// The win rate model returns the probability (per mille) of winning given an eval
+// and a game-ply. The model fits rather accurately the LTC fishtest statistics.
+int UCI::win_rate_model(Value v, int ply) {
+   // Return win rate in per mille (rounded to nearest)
+   return int(0.5 + win_rate_model_double(v, ply));
+}
+
 // The win rate model returns the probability (per mille) of winning given an eval
 // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
 double UCI::win_rate_model_double(double v, int ply) {
diff --git a/src/uci.h b/src/uci.h
index c0e8372f..2e0f5c11 100644
--- a/src/uci.h
+++ b/src/uci.h
@@ -72,6 +72,7 @@ std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
+int win_rate_model(Value v, int ply);
 double win_rate_model_double(double v, int ply);
 Move to_move(const Position& pos, std::string& str);
 
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index dde3844a..099ca2ae 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -21,6 +21,7 @@
 #include <ostream>
 #include <sstream>
 
+#include "evaluate.h"
 #include "misc.h"
 #include "search.h"
 #include "thread.h"
@@ -40,10 +41,10 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
-void on_eval_file(const Option& ) { Eval::init_NNUE(); }
-void on_prune_at_shallow_depth_on_pv_node(const Option& o) {
-    Search::prune_at_shallow_depth_on_pv_node = o;
+void on_use_NNUE(const Option& ) { Eval::NNUE::init(); }
+void on_eval_file(const Option& ) { Eval::NNUE::init(); }
+void on_prune_at_shallow_depth(const Option& o) {
+    Search::prune_at_shallow_depth = o;
 }
 void on_enable_transposition_table(const Option& o) {
     TranspositionTable::enable_transposition_table = o;
@@ -85,23 +86,19 @@ void init(OptionsMap& o) {
   o["Syzygy50MoveRule"]      << Option(true);
   o["SyzygyProbeLimit"]      << Option(7, 0, 7);
   o["Use NNUE"]              << Option("true var true var false var pure", "true", on_use_NNUE);
-  // The default must follow the format nn-[SHA256 first 12 digits].nnue
-  // for the build process (profile-build and fishtest) to work.
-  o["EvalFile"]              << Option("nn-82215d0fd0df.nnue", on_eval_file);
+  o["EvalFile"]              << Option(EvalFileDefaultName, on_eval_file);
   // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
   // I want to hit the test eval convert command, but there is no new evaluation function
   // It ends abnormally before executing this command.
   // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame,
   // Hit the test eval convert command.
   o["SkipLoadingEval"]       << Option(false);
-  // how many moves to use a fixed move
-  // o["BookMoves"] << Option(16, 0, 10000);
   // When learning the evaluation function, you can change the folder to save the evaluation function.
   // Evalsave by default. This folder shall be prepared in advance.
   // Automatically create a folder under this folder like "0/", "1/", ... and save the evaluation function file there.
   o["EvalSaveDir"] << Option("evalsave");
   // Prune at shallow depth on PV nodes. False is recommended when using fixed depth search.
-  o["PruneAtShallowDepthOnPvNode"] << Option(true, on_prune_at_shallow_depth_on_pv_node);
+  o["PruneAtShallowDepth"] << Option(true, on_prune_at_shallow_depth);
   // Enable transposition table.
   o["EnableTranspositionTable"] << Option(true, on_enable_transposition_table);
 }
diff --git a/tests/instrumented_learn.sh b/tests/instrumented_learn.sh
index 7f76fd76..ce1fc429 100755
--- a/tests/instrumented_learn.sh
+++ b/tests/instrumented_learn.sh
@@ -78,11 +78,11 @@ cat << EOF > gensfen01.exp
  send "setoption name Threads value $threads\n"
  send "setoption name Use NNUE value false\n"
  send "isready\n"
- send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin use_raw_nnue_eval 0 sfen_format bin\n"
+ send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.bin sfen_format bin\n"
  expect "gensfen finished."
  send "learn training_data/training_data.bin convert_plain output_file_name training_data.txt\n"
  expect "all done"
- send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n"
+ send "gensfen depth 3 loop 100 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name training_data/training_data.binpack sfen_format binpack\n"
  expect "gensfen finished."
 
  send "quit\n"
@@ -104,9 +104,9 @@ cat << EOF > gensfen02.exp
  send "setoption name Threads value $threads\n"
  send "setoption name Use NNUE value true\n"
  send "isready\n"
- send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin use_raw_nnue_eval 0 sfen_format bin\n"
+ send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/valdidation_data.bin sfen_format bin\n"
  expect "gensfen finished."
- send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack use_raw_nnue_eval 0 sfen_format binpack\n"
+ send "gensfen depth 4 loop 50 use_draw_in_training_data_generation 1 eval_limit 32000 output_file_name validation_data/validation_data.binpack sfen_format binpack\n"
  expect "gensfen finished."
 
  send "quit\n"
@@ -127,7 +127,7 @@ cat << EOF > learn01.exp
  send "setoption name Use NNUE value true\n"
  send "setoption name Threads value $threads\n"
  send "isready\n"
- send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 mirror_percentage 50 validation_set_file_name validation_data/validation_data.bin\n"
+ send "learn targetdir training_data loop 2 batchsize 100 use_draw_in_training 1 use_draw_in_validation 1 lr 1 eval_limit 32000 nn_batch_size 30 newbob_decay 0.5 eval_save_interval 30 loss_output_interval 10 validation_set_file_name validation_data/validation_data.bin\n"
 
  expect "save_eval() finished."