diff --git a/.travis.yml b/.travis.yml
index e2ae61be..d563a1e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 language: cpp
-dist: xenial
+dist: bionic
 
 matrix:
   include:
@@ -7,7 +7,6 @@ matrix:
       compiler: gcc
       addons:
         apt:
-          sources: ['ubuntu-toolchain-r-test']
           packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl']
       env:
         - COMPILER=g++-8
@@ -17,23 +16,23 @@ matrix:
       compiler: clang
       addons:
         apt:
-          sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-xenial-6.0']
-          packages: ['clang-6.0', 'llvm-6.0-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
+          packages: ['clang-10', 'llvm-10-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
       env:
-        - COMPILER=clang++-6.0
+        - COMPILER=clang++-10
         - COMP=clang
-        - LDFLAGS=-fuse-ld=lld
 
     - os: osx
+      osx_image: xcode12
       compiler: gcc
       env:
         - COMPILER=g++
         - COMP=gcc
 
     - os: osx
+      osx_image: xcode12
       compiler: clang
       env:
-        - COMPILER=clang++ V='Apple LLVM 9.4.1' # Apple LLVM version 9.1.0 (clang-902.0.39.2)
+        - COMPILER=clang++
         - COMP=clang
 
 branches:
@@ -48,26 +47,34 @@ script:
   - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
   - export benchref=$(cat git_sig)
   - echo "Reference bench:" $benchref
+
+  #
+  # Compiler version string
+  - $COMPILER -v
+
   #
   # Verify bench number against various builds
   - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
   - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
 
   #
   # Check perft and reproducible search
+  - export CXXFLAGS="-Werror"
+  - make clean && make -j2 ARCH=x86-64 build
   - ../tests/perft.sh
   - ../tests/reprosearch.sh
+
   #
   # Valgrind
   #
   - export CXXFLAGS="-O1 -fno-inline"
   - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
   - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
+
   #
   # Sanitizer
   #
-  # Use g++-8 as a proxy for having sanitizers, might need revision as they become available for more recent versions of clang/gcc
-  - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
-  - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
diff --git a/AUTHORS b/AUTHORS
index f08d71d3..21ef3e50 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,10 +1,17 @@
-# List of authors for Stockfish, as of March 30, 2020
+# List of authors for Stockfish, as of August 4, 2020
 
+# Founders of the Stockfish project and fishtest infrastructure
 Tord Romstad (romstad)
 Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)
 
+# Authors and inventors of NNUE, training, NNUE port
+Yu Nasu (ynasu87)
+Motohiro Isozaki (yaneurao)
+Hisayori Noda (nodchip)
+
+# all other authors of the code in alphabetical order
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
 Ajith Chandy Jose (ajithcj)
@@ -36,6 +43,7 @@ Dariusz Orzechowski
 David Zar
 Daylen Yang (daylen)
 DiscanX
+Dominik Schlösser (domschl)
 double-beep
 Eduardo Cáceres (eduherminio)
 Eelco de Groot (KingDefender)
@@ -71,6 +79,7 @@ Jean Gauthier (OuaisBla)
 Jean-Francois Romang (jromang)
 Jekaa
 Jerry Donald Watson (jerrydonaldwatson)
+jjoshua2
 Jonathan Calovski (Mysseno)
 Jonathan Dumale (SFisGOD)
 Joost VandeVondele (vondele)
@@ -115,7 +124,8 @@ Nick Pelling (nickpelling)
 Nicklas Persson (NicklasPersson)
 Niklas Fiekas (niklasf)
 Nikolay Kostov (NikolayIT)
-Nguyen Pham
+Nguyen Pham (nguyenpham)
+Norman Schmidt (FireFather)
 Ondrej Mosnáček (WOnder93)
 Oskar Werkelin Ahlin
 Pablo Vazquez
@@ -135,14 +145,17 @@ Richard Lloyd
 Rodrigo Exterckötter Tjäder
 Ron Britvich (Britvich)
 Ronald de Man (syzygy1, syzygy)
+rqs
 Ryan Schmitt
 Ryan Takker
 Sami Kiminki (skiminki)
 Sebastian Buchwald (UniQP)
 Sergei Antonov (saproj)
 Sergei Ivanov (svivanov72)
+Sergio Vieri (sergiovieri)
 sf-x
 Shane Booth (shane31)
+Shawn Varghese (xXH4CKST3RXx)
 Stefan Geschwentner (locutus2)
 Stefano Cardanobile (Stefano80)
 Steinar Gunderson (sesse)
@@ -155,9 +168,11 @@ Tom Vijlbrief (tomtor)
 Tomasz Sobczyk (Sopel97)
 Torsten Franz (torfranz, tfranzer)
 Tracey Emery (basepr1me)
+tttak
 Unai Corzo (unaiic)
 Uri Blass (uriblass)
 Vince Negri (cuddlestmonkey)
+zz4032
 
 
 # Additionally, we acknowledge the authors and maintainers of fishtest,
diff --git a/README.md b/README.md
index 73eec1fb..7a237480 100644
--- a/README.md
+++ b/README.md
@@ -9,9 +9,10 @@ Stockfish NNUE is a port of a shogi neural network named NNUE (efficiently updat
 
 ## Training Guide
 ### Generating Training Data
-Use the "no-nnue.nnue-gen-sfen-from-original-eval" binary. The given example is generation in its simplest form. There are more commands. 
+To generate training data from the classic eval, use gensfen command with setting "Use NNUE" to "false". The given example is generation in its simplest form. There are more commands. 
 ```
 uci
+setoption name Use NNUE value false
 setoption name Threads value x
 setoption name Hash value y
 setoption name SyzygyPath value path
@@ -27,10 +28,11 @@ This will save a file named "generated_kifu.bin" in the same folder as the binar
 ### Generating Validation Data
 The process is the same as the generation of training data, except for the fact that you need to set loop to 1 million, because you don't need a lot of validation data. The depth should be the same as before or slightly higher than the depth of the training data. After generation rename the validation data file to val.bin and drop it in a folder named "validationdata" in the same directory to make it easier. 
 ### Training a Completely New Network
-Use the "avx2.halfkp_256x2-32-32.nnue-learn.2020-07-11" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
+Use the "learn" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
 ```
 uci
 setoption name SkipLoadingEval value true
+setoption name Use NNUE value true
 setoption name Threads value x
 isready
 learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 mirror_percentage 50 validation_set_file_name validationdata\val.bin
@@ -42,7 +44,7 @@ Nets get saved in the "evalsave" folder.
 - lambda is the amount of weight it puts to eval of learning data vs win/draw/loss results. 1 puts all weight on eval, lambda 0 puts all weight on WDL results.
 
 ### Reinforcement Learning
-If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `uci setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
+If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries with setting `Use NNUE` to true. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `uci setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
 
 After you have generated the training data, you must move it into your training data folder and delete the older data so that the binary does not accidentally train on the same data again. Do the same for the validation data and name it to val-1.bin to make it less confusing. Make sure the evalsave folder is empty. Then, using the same binary, type in the training commands shown above. Do __NOT__ set `SkipLoadingEval` to true, it must be false or you will get a completely new network, instead of a network trained with reinforcement learning. You should also set eval_save_interval to a number that is lower than the amount of positions in your training data, perhaps also 1/10 of the original value. The validation file should be set to the new validation data, not the old data.
 
diff --git a/Readme.md b/Readme.md
new file mode 100644
index 00000000..7a237480
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,65 @@
+<p align="center">
+  <img src="https://cdn.discordapp.com/attachments/724700045525647420/729135226365804594/SFNNUE2.png">
+</p>
+
+<h1 align="center">Stockfish NNUE</h1>
+
+## Overview
+Stockfish NNUE is a port of a shogi neural network named NNUE (efficiently updateable neural network backwards) to Stockfish 11. To learn more about the Stockfish chess engine, look [here](stockfish.md) for an overview and [here](https://github.com/official-stockfish/Stockfish) for the official repository.
+
+## Training Guide
+### Generating Training Data
+To generate training data from the classic eval, use gensfen command with setting "Use NNUE" to "false". The given example is generation in its simplest form. There are more commands. 
+```
+uci
+setoption name Use NNUE value false
+setoption name Threads value x
+setoption name Hash value y
+setoption name SyzygyPath value path
+isready
+gensfen depth a loop b use_draw_in_training_data_generation 1 eval_limit 32000
+```
+Specify how many threads and how much memory you would like to use with the x and y values. The option SyzygyPath is not necessary, but if you would like to use it, you must first have Syzygy endgame tablebases on your computer, which you can find [here](http://oics.olympuschess.com/tracker/index.php). You will need to have a torrent client to download these tablebases, as that is probably the fastest way to obtain them. The path is the path to the folder containing those tablebases. It does not have to be surrounded in quotes.
+
+This will save a file named "generated_kifu.bin" in the same folder as the binary. Once generation is done, rename the file to something like "1billiondepth12.bin" to remember the depth and quantity of the positions and move it to a folder named "trainingdata" in the same directory as the binaries.
+#### Generation Parameters
+- Depth is the searched depth per move, or how far the engine looks forward. This value is an integer.
+- Loop is the amount of positions generated. This value is also an integer
+### Generating Validation Data
+The process is the same as the generation of training data, except for the fact that you need to set loop to 1 million, because you don't need a lot of validation data. The depth should be the same as before or slightly higher than the depth of the training data. After generation rename the validation data file to val.bin and drop it in a folder named "validationdata" in the same directory to make it easier. 
+### Training a Completely New Network
+Use the "learn" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
+```
+uci
+setoption name SkipLoadingEval value true
+setoption name Use NNUE value true
+setoption name Threads value x
+isready
+learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 mirror_percentage 50 validation_set_file_name validationdata\val.bin
+```
+Nets get saved in the "evalsave" folder. 
+
+#### Training Parameters
+- eta is the learning rate
+- lambda is the amount of weight it puts to eval of learning data vs win/draw/loss results. 1 puts all weight on eval, lambda 0 puts all weight on WDL results.
+
+### Reinforcement Learning
+If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries with setting `Use NNUE` to true. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `uci setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
+
+After you have generated the training data, you must move it into your training data folder and delete the older data so that the binary does not accidentally train on the same data again. Do the same for the validation data and name it to val-1.bin to make it less confusing. Make sure the evalsave folder is empty. Then, using the same binary, type in the training commands shown above. Do __NOT__ set `SkipLoadingEval` to true, it must be false or you will get a completely new network, instead of a network trained with reinforcement learning. You should also set eval_save_interval to a number that is lower than the amount of positions in your training data, perhaps also 1/10 of the original value. The validation file should be set to the new validation data, not the old data.
+
+After training is finished, your new net should be located in the "final" folder under the "evalsave" directory. You should test this new network against the older network to see if there are any improvements.
+
+## Using Your Trained Net
+If you want to use your generated net, copy the net located in the "final" folder under the "evalsave" directory and move it into a new folder named "eval" under the directory with the binaries. You can then use the halfkp_256x2 binaries pertaining to your CPU with a standard chess GUI, such as Cutechess. Refer to the [releases page](https://github.com/nodchip/Stockfish/releases) to find out which binary is best for your CPU.
+
+If the engine does not load any net file, or shows "Error! *** not found or wrong format", please try to sepcify the net with the full file path with the "EvalFile" option by typing the command `setoption name EvalFile value path` where path is the full file path.
+
+## Resources
+- [Stockfish NNUE Wiki](https://www.qhapaq.org/shogi/shogiwiki/stockfish-nnue/)
+- [Training instructions](https://twitter.com/mktakizawa/status/1273042640280252416) from the creator of the Elmo shogi engine
+- [Original Talkchess thread](http://talkchess.com/forum3/viewtopic.php?t=74059) discussing Stockfish NNUE
+- [Guide to Stockfish NNUE](http://yaneuraou.yaneu.com/2020/06/19/stockfish-nnue-the-complete-guide/) 
+- [Unofficial Stockfish Discord](https://discord.gg/nv8gDtt)
+
+A more updated list can be found in the #sf-nnue-resources channel in the Discord.
diff --git a/appveyor.yml b/appveyor.yml
index 21f3bbe3..d356ba2f 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -4,10 +4,9 @@ clone_depth: 50
 branches:
   only:
     - master
-    - appveyor
 
 # Operating system (build VM template)
-os: Visual Studio 2017
+os: Visual Studio 2019
 
 # Build platform, i.e. x86, x64, AnyCPU. This setting is optional.
 platform:
@@ -36,8 +35,11 @@ before_build:
       $src = $src.Replace("\", "/")
 
       # Build CMakeLists.txt
-      $t = 'cmake_minimum_required(VERSION 3.8)',
+      $t = 'cmake_minimum_required(VERSION 3.17)',
            'project(Stockfish)',
+           'set(CMAKE_CXX_STANDARD 17)',
+           'set(CMAKE_CXX_STANDARD_REQUIRED ON)',
+           'set (CMAKE_CXX_EXTENSIONS OFF)',
            'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)',
            'set(source_files', $src, ')',
            'add_executable(stockfish ${source_files})'
@@ -51,10 +53,11 @@ before_build:
       $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1
       $bench = $b -match '\D+(\d+)' | % { $matches[1] }
       Write-Host "Reference bench:" $bench
-      $g = "Visual Studio 15 2017"
-      If (${env:PLATFORM} -eq 'x64') { $g = $g + ' Win64' }
-      cmake -G "${g}" .
-      Write-Host "Generated files for: " $g
+      $g = "Visual Studio 16 2019"
+      If (${env:PLATFORM} -eq 'x64') { $a = "x64" }
+      If (${env:PLATFORM} -eq 'x86') { $a = "Win32" }
+      cmake -G "${g}" -A ${a} .
+      Write-Host "Generated files for: " $g $a
 
 build_script:
   - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
diff --git a/src/Makefile b/src/Makefile
index 2e6c415d..e871f267 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -40,24 +40,24 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
 	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
 	eval/evaluate_mir_inv_tools.cpp \
-	eval/nnue/evaluate_nnue.cpp \
-	eval/nnue/evaluate_nnue_learner.cpp \
-	eval/nnue/features/half_kp.cpp \
-	eval/nnue/features/half_relative_kp.cpp \
-	eval/nnue/features/k.cpp \
-	eval/nnue/features/p.cpp \
-	eval/nnue/features/castling_right.cpp \
-	eval/nnue/features/enpassant.cpp \
-	eval/nnue/nnue_test_command.cpp \
+	nnue/evaluate_nnue.cpp \
+	nnue/evaluate_nnue_learner.cpp \
+	nnue/features/half_kp.cpp \
+	nnue/features/half_relative_kp.cpp \
+	nnue/features/k.cpp \
+	nnue/features/p.cpp \
+	nnue/features/castling_right.cpp \
+	nnue/features/enpassant.cpp \
+	nnue/nnue_test_command.cpp \
 	extra/sfen_packer.cpp \
 	learn/gensfen2019.cpp \
 	learn/learner.cpp \
 	learn/learning_tools.cpp \
 	learn/multi_think.cpp
 
-OBJS = $(SRCS:.cpp=.o)
+OBJS = $(notdir $(SRCS:.cpp=.o))
 
-VPATH = syzygy
+VPATH = syzygy:nnue:nnue/features:eval:extra:learn
 
 ### Establish the operating system name
 KERNEL = $(shell uname -s)
@@ -82,12 +82,14 @@ endif
 # prefetch = yes/no   --- -DUSE_PREFETCH   --- Use prefetch asm-instruction
 # popcnt = yes/no     --- -DUSE_POPCNT     --- Use popcnt asm-instruction
 # sse = yes/no        --- -msse            --- Use Intel Streaming SIMD Extensions
+# sse3 = yes/no       --- -msse3           --- Use Intel Streaming SIMD Extensions 3
 # ssse3 = yes/no      --- -mssse3          --- Use Intel Supplemental Streaming SIMD Extensions 3
 # sse41 = yes/no      --- -msse4.1         --- Use Intel Streaming SIMD Extensions 4.1
 # sse42 = yes/no      --- -msse4.2         --- Use Intel Streaming SIMD Extensions 4.2
 # avx2 = yes/no       --- -mavx2           --- Use Intel Advanced Vector Extensions 2
 # pext = yes/no       --- -DUSE_PEXT       --- Use pext x86_64 asm-instruction
-# avx512 = yes/no     --- -mavx512vbmi     --- Use Intel Advanced Vector Extensions 512
+# avx512 = yes/no     --- -mavx512bw       --- Use Intel Advanced Vector Extensions 512
+# neon = yes/no       --- -DUSE_NEON       --- Use ARM SIMD architecture
 #
 # Note that Makefile is space sensitive, so when adding new architectures
 # or modifying existing flags, you have to make sure there are no extra spaces
@@ -108,6 +110,8 @@ sse42 = no
 avx2 = no
 pext = no
 avx512 = no
+neon = no
+ARCH = x86-64-modern
 
 ### 2.2 Architecture specific
 ifeq ($(ARCH),general-32)
@@ -142,16 +146,14 @@ ifeq ($(ARCH),x86-64-sse3)
 	prefetch = yes
 	sse = yes
 	sse3 = yes
-	ssse3 = yes
 endif
 
 ifeq ($(ARCH),x86-64-sse3-popcnt)
 	arch = x86_64
 	prefetch = yes
-	popcnt = yes
 	sse = yes
 	sse3 = yes
-	ssse3 = yes
+	popcnt = yes
 endif
 
 ifeq ($(ARCH),x86-64-ssse3)
@@ -165,6 +167,17 @@ endif
 ifeq ($(ARCH),x86-64-sse41)
 	arch = x86_64
 	prefetch = yes
+	popcnt = yes
+	sse = yes
+	sse3 = yes
+	ssse3 = yes
+	sse41 = yes
+endif
+
+ifeq ($(ARCH),x86-64-modern)
+	arch = x86_64
+	prefetch = yes
+	popcnt = yes
 	sse = yes
 	sse3 = yes
 	ssse3 = yes
@@ -184,7 +197,6 @@ endif
 
 ifeq ($(ARCH),x86-64-avx2)
 	arch = x86_64
-	bits = 64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
@@ -210,7 +222,6 @@ endif
 
 ifeq ($(ARCH),x86-64-avx512)
 	arch = x86_64
-	bits = 64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
@@ -233,6 +244,14 @@ ifeq ($(ARCH),armv8)
 	arch = armv8-a
 	prefetch = yes
 	popcnt = yes
+	neon = yes
+endif
+
+ifeq ($(ARCH),apple-silicon)
+	arch = arm64
+	prefetch = yes
+	popcnt = yes
+	neon = yes
 endif
 
 ifeq ($(ARCH),ppc-32)
@@ -251,7 +270,7 @@ endif
 ### ==========================================================================
 
 ### 3.1 Selecting compiler (default = gcc)
-CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(NNUECXXFLAGS)
+CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS)
 DEPENDFLAGS += -std=c++17
 LDFLAGS += $(EXTRALDFLAGS)
 
@@ -277,6 +296,9 @@ ifeq ($(COMP),gcc)
 	ifneq ($(KERNEL),Darwin)
 	   LDFLAGS += -Wl,--no-as-needed
 	endif
+	
+	gccversion = $(shell $(CXX) --version)
+	gccisclang = $(findstring clang,$(gccversion))
 endif
 
 ifeq ($(COMP),mingw)
@@ -332,28 +354,6 @@ ifeq ($(COMP),clang)
 	endif
 endif
 
-ifeq ($(COMP),msys2)
-	comp=gcc
-	CXX=g++
-	CXXFLAGS += -pedantic -Wextra -Wshadow
-
-	ifeq ($(ARCH),armv7)
-		ifeq ($(OS),Android)
-			CXXFLAGS += -m$(bits)
-			LDFLAGS += -m$(bits)
-		endif
-	else
-		CXXFLAGS += -m$(bits)
-		LDFLAGS += -m$(bits)
-	endif
-
-	ifneq ($(KERNEL),Darwin)
-	   LDFLAGS += -Wl,--no-as-needed
-	endif
-
-	LDFLAGS += -static -Wl,-s
-endif
-
 ifeq ($(comp),icc)
 	profile_make = icc-profile-make
 	profile_use = icc-profile-use
@@ -368,8 +368,8 @@ endif
 endif
 
 ifeq ($(KERNEL),Darwin)
-	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.9
-	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.9
+	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
+	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
 endif
 
 ### Travis CI script uses COMPILER to overwrite CXX
@@ -402,8 +402,8 @@ endif
 
 ### 3.2.2 Debugging with undefined behavior sanitizers
 ifneq ($(sanitize),no)
-        CXXFLAGS += -g3 -fsanitize=$(sanitize) -fuse-ld=gold
-        LDFLAGS += -fsanitize=$(sanitize) -fuse-ld=gold
+        CXXFLAGS += -g3 -fsanitize=$(sanitize)
+        LDFLAGS += -fsanitize=$(sanitize)
 endif
 
 ### 3.3 Optimization
@@ -441,56 +441,61 @@ endif
 
 ### 3.6 popcnt
 ifeq ($(popcnt),yes)
-	CXXFLAGS += -DUSE_POPCNT
-	ifneq ($(arch),$(filter $(arch),ppc64 armv8-a))
-		ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
-			CXXFLAGS += -mpopcnt
-		endif
+	ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64))
+		CXXFLAGS += -DUSE_POPCNT
+	else ifeq ($(comp),icc)
+		CXXFLAGS += -msse3 -DUSE_POPCNT
+	else
+		CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT
 	endif
 endif
 
 ifeq ($(avx2),yes)
 	CXXFLAGS += -DUSE_AVX2
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mavx2
 	endif
 endif
 
 ifeq ($(avx512),yes)
 	CXXFLAGS += -DUSE_AVX512
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mavx512bw
 	endif
 endif
 
 ifeq ($(sse42),yes)
 	CXXFLAGS += -DUSE_SSE42
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -msse4.2
 	endif
 endif
 
 ifeq ($(sse41),yes)
 	CXXFLAGS += -DUSE_SSE41
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -msse4.1
 	endif
 endif
 
 ifeq ($(ssse3),yes)
 	CXXFLAGS += -DUSE_SSSE3
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mssse3
 	endif
 endif
 
 ifeq ($(sse3),yes)
 	CXXFLAGS += -DUSE_SSE3
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -msse3
 	endif
 endif
 
+ifeq ($(neon),yes)
+	CXXFLAGS += -DUSE_NEON
+endif
+
 ifeq ($(arch),x86_64)
 	CXXFLAGS += -DUSE_SSE2
 endif
@@ -498,7 +503,7 @@ endif
 ### 3.7 pext
 ifeq ($(pext),yes)
 	CXXFLAGS += -DUSE_PEXT
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mbmi2
 	endif
 endif
@@ -508,18 +513,28 @@ endif
 ### needs access to the optimization flags.
 ifeq ($(optimize),yes)
 ifeq ($(debug), no)
-	ifeq ($(comp),$(filter $(comp),gcc clang))
+	ifeq ($(comp),clang)
+		CXXFLAGS += -flto=thin
+		LDFLAGS += $(CXXFLAGS)
+
+# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
+# GCC on some systems.
+	else ifeq ($(comp),gcc)
+	ifeq ($(gccisclang),)
 		CXXFLAGS += -flto
+		LDFLAGS += $(CXXFLAGS) -flto=jobserver
+	else
+		CXXFLAGS += -flto=thin
 		LDFLAGS += $(CXXFLAGS)
 	endif
 
 # To use LTO and static linking on windows, the tool chain requires a recent gcc:
 # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not.
 # So, only enable it for a cross from Linux by default.
-	ifeq ($(comp),mingw)
+	else ifeq ($(comp),mingw)
 	ifeq ($(KERNEL),Linux)
 		CXXFLAGS += -flto
-		LDFLAGS += $(CXXFLAGS)
+		LDFLAGS += $(CXXFLAGS) -flto=jobserver
 	endif
 	endif
 endif
@@ -544,20 +559,12 @@ help:
 	@echo ""
 	@echo "Supported targets:"
 	@echo ""
-	@echo "build                   > Standard (without NNUE) build"
+	@echo "build                   > Standard build"
 	@echo "profile-build           > Standard build with PGO"
-	@echo "nnue                    > NNUE-enabled build"
-	@echo "profile-nnue            > NNUE-enabled build with PGO"
-	@echo "nnue-learn              > Produces or refines a NNUE parameter set."
-	@echo "                            Requires training data that can be"
-	@echo "                            generated by itself using an existing"
-	@echo "                            parameter set, or with the next tool"
-	@echo "nnue-gen-sfen-from-original-eval"
-	@echo "                        > Produces training data for 'nnue-learn'"
-	@echo "                        >   without using a NNUE parameter set"
 	@echo "strip                   > Strip executable"
 	@echo "install                 > Install executable"
 	@echo "clean                   > Clean up"
+	@echo "net                     > Download the default nnue net"
 	@echo ""
 	@echo "Supported archs:"
 	@echo ""
@@ -565,10 +572,11 @@ help:
 	@echo "x86-64-bmi2             > x86 64-bit with bmi2 support"
 	@echo "x86-64-avx2             > x86 64-bit with avx2 support"
 	@echo "x86-64-sse42            > x86 64-bit with sse42 support"
+	@echo "x86-64-modern           > x86 64-bit with sse41 support (x86-64-sse41)"
 	@echo "x86-64-sse41            > x86 64-bit with sse41 support"
 	@echo "x86-64-ssse3            > x86 64-bit with ssse3 support"
-	@echo "x86-64-sse3-popcnt      > x86 64-bit with ssse3 and popcnt support"
-	@echo "x86-64-sse3             > x86 64-bit with ssse3 support"
+	@echo "x86-64-sse3-popcnt      > x86 64-bit with sse3 and popcnt support"
+	@echo "x86-64-sse3             > x86 64-bit with sse3 support"
 	@echo "x86-64                  > x86 64-bit generic"
 	@echo "x86-32                  > x86 32-bit (also enables SSE)"
 	@echo "x86-32-old              > x86 32-bit fall back for old hardware"
@@ -576,6 +584,7 @@ help:
 	@echo "ppc-32                  > PPC 32-bit"
 	@echo "armv7                   > ARMv7 32-bit"
 	@echo "armv8                   > ARMv8 64-bit"
+	@echo "apple-silicon           > Apple silicon ARM64"
 	@echo "general-64              > unspecified 64-bit"
 	@echo "general-32              > unspecified 32-bit"
 	@echo ""
@@ -585,21 +594,23 @@ help:
 	@echo "mingw                   > Gnu compiler with MinGW under Windows"
 	@echo "clang                   > LLVM Clang compiler"
 	@echo "icc                     > Intel compiler"
-	@echo "msys2                   > MSYS2"
 	@echo ""
 	@echo "Simple examples. If you don't know what to do, you likely want to run: "
 	@echo ""
-	@echo "make build ARCH=x86-64    (This is for 64-bit systems)"
-	@echo "make build ARCH=x86-32    (This is for 32-bit systems)"
+	@echo "make -j build ARCH=x86-64    (This is for 64-bit systems)"
+	@echo "make -j build ARCH=x86-32    (This is for 32-bit systems)"
 	@echo ""
 	@echo "Advanced examples, for experienced users: "
 	@echo ""
-	@echo "make build ARCH=x86-64 COMP=clang"
-	@echo "make profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
+	@echo "make -j build ARCH=x86-64-modern COMP=clang"
+	@echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
 	@echo ""
+	@echo "The selected architecture $(ARCH) enables the following configuration: "
+	@echo ""
+	@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
 
 
-.PHONY: help build profile-build strip install clean objclean profileclean \
+.PHONY: help build profile-build strip install clean net objclean profileclean \
         config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
         clang-profile-use clang-profile-make
 
@@ -633,14 +644,21 @@ install:
 clean: objclean profileclean
 	@rm -f .depend *~ core
 
+net:
+	$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
+	@echo "Default net: $(nnuenet)"
+	$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
+	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
+	@if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi
+
 # clean binaries and objects
 objclean:
-	@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./eval/nnue/*.o ./eval/nnue/features/*.o
+	@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./nnue/*.o ./nnue/features/*.o
 
 # clean auxiliary profiling files
 profileclean:
 	@rm -rf profdir
-	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./eval/nnue/*.gcda ./eval/nnue/features/*.gcda
+	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda
 	@rm -f stockfish.profdata *.profraw
 
 default:
@@ -672,6 +690,7 @@ config-sanity:
 	@echo "avx2: '$(avx2)'"
 	@echo "pext: '$(pext)'"
 	@echo "avx512: '$(avx512)'"
+	@echo "neon: '$(neon)'"
 	@echo ""
 	@echo "Flags:"
 	@echo "CXX: $(CXX)"
@@ -685,7 +704,7 @@ config-sanity:
 	@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
 	@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
 	 test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
-	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a"
+	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64"
 	@test "$(bits)" = "32" || test "$(bits)" = "64"
 	@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
 	@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
@@ -697,10 +716,11 @@ config-sanity:
 	@test "$(avx2)" = "yes" || test "$(avx2)" = "no"
 	@test "$(pext)" = "yes" || test "$(pext)" = "no"
 	@test "$(avx512)" = "yes" || test "$(avx512)" = "no"
+	@test "$(neon)" = "yes" || test "$(neon)" = "no"
 	@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
 
 $(EXE): $(OBJS)
-	$(CXX) -o $@ $(OBJS) $(LDFLAGS)
+	+$(CXX) -o $@ $(OBJS) $(LDFLAGS)
 
 clang-profile-make:
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
@@ -738,20 +758,10 @@ icc-profile-use:
 	EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
 	all
 
-nnue: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_NNUE -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
-
-profile-nnue: export NNUECXXFLAGS = -DEVAL_NNUE -DENABLE_TEST_CMD
-profile-nnue: config-sanity
-	$(MAKE) profile-build
-
-nnue-gen-sfen-from-original-eval: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DUSE_EVAL_HASH -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
-
-nnue-learn: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DEVAL_NNUE -DUSE_EVAL_HASH -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp' LDFLAGS='$(LDFLAGS) -lopenblas -fopenmp' build
+learn: config-sanity
+	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp' LDFLAGS='$(LDFLAGS) -lopenblas -fopenmp' build
 
 .depend:
-	-@$(CXX) $(DEPENDFLAGS) -MM $(OBJS:.o=.cpp) > $@ 2> /dev/null
+	-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
 
 -include .depend
diff --git a/src/benchmark.cpp b/src/benchmark.cpp
index 3299f373..6041d642 100644
--- a/src/benchmark.cpp
+++ b/src/benchmark.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/bitbase.cpp b/src/bitbase.cpp
index 7e27eb96..bbe8e9a7 100644
--- a/src/bitbase.cpp
+++ b/src/bitbase.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/bitboard.cpp b/src/bitboard.cpp
index 0bf7eef9..f531010c 100644
--- a/src/bitboard.cpp
+++ b/src/bitboard.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/bitboard.h b/src/bitboard.h
index 15ec4153..a899d879 100644
--- a/src/bitboard.h
+++ b/src/bitboard.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -130,12 +128,6 @@ constexpr bool more_than_one(Bitboard b) {
   return b & (b - 1);
 }
 
-/// Counts the occupation of the bitboard depending on the occupation of SQ_A1
-/// as in `b & (1ULL << SQ_A1) ? more_than_two(b) : more_than_one(b)`
-
-constexpr bool conditional_more_than_two(Bitboard b) {
-  return b & (b - 1) & (b - 2);
-}
 
 constexpr bool opposite_colors(Square s1, Square s2) {
   return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1;
diff --git a/src/endgame.cpp b/src/endgame.cpp
index 40f49dce..c8be2198 100644
--- a/src/endgame.cpp
+++ b/src/endgame.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -589,8 +587,8 @@ ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
   Bitboard strongPawns = pos.pieces(strongSide, PAWN);
 
   // If all pawns are ahead of the king on a single rook file, it's a draw.
-  if (!((strongPawns & ~FileABB) || (strongPawns & ~FileHBB)) &&
-      !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
+  if (   !(strongPawns & ~(FileABB | FileHBB))
+      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
       return SCALE_FACTOR_DRAW;
 
   return SCALE_FACTOR_NONE;
diff --git a/src/endgame.h b/src/endgame.h
index fd1aba2d..1351d88a 100644
--- a/src/endgame.h
+++ b/src/endgame.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/eval/evaluate_mir_inv_tools.cpp b/src/eval/evaluate_mir_inv_tools.cpp
index 3b5d3a36..3667b9f5 100644
--- a/src/eval/evaluate_mir_inv_tools.cpp
+++ b/src/eval/evaluate_mir_inv_tools.cpp
@@ -7,22 +7,22 @@ namespace Eval
 
 	// --- tables
 
-	// Value when a certain BonaPiece is seen from the other side
+	// Value when a certain PieceSquare is seen from the other side
 	// BONA_PIECE_INIT is -1, so it must be a signed type.
-	// Even if KPPT is expanded, BonaPiece will not exceed 2^15 for the time being, so int16_t is good.
-	int16_t inv_piece_[Eval::fe_end];
+	// Even if KPPT is expanded, PieceSquare will not exceed 2^15 for the time being, so int16_t is good.
+	int16_t inv_piece_[PieceSquare::PS_END];
 
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	int16_t mir_piece_[Eval::fe_end];
+	// Returns the one at the position where a PieceSquare on the board is mirrored.
+	int16_t mir_piece_[PieceSquare::PS_END];
 
 
 	// --- methods
 
-// Returns the value when a certain BonaPiece is seen from the other side
-	Eval::BonaPiece inv_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)inv_piece_[p]; }
+// Returns the value when a certain PieceSquare is seen from the other side
+	PieceSquare inv_piece(PieceSquare p) { return (PieceSquare)inv_piece_[p]; }
 
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	Eval::BonaPiece mir_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)mir_piece_[p]; }
+	// Returns the one at the position where a PieceSquare on the board is mirrored.
+	PieceSquare mir_piece(PieceSquare p) { return (PieceSquare)mir_piece_[p]; }
 
 	std::function<void()> mir_piece_init_function;
 
@@ -37,23 +37,23 @@ namespace Eval
 
 		// exchange f and e
 		int t[] = {
-			f_pawn             , e_pawn            ,
-			f_knight           , e_knight          ,
-			f_bishop           , e_bishop          ,
-			f_rook             , e_rook            ,
-			f_queen            , e_queen           ,
+			PieceSquare::PS_W_PAWN             , PieceSquare::PS_B_PAWN            ,
+			PieceSquare::PS_W_KNIGHT           , PieceSquare::PS_B_KNIGHT          ,
+			PieceSquare::PS_W_BISHOP           , PieceSquare::PS_B_BISHOP          ,
+			PieceSquare::PS_W_ROOK             , PieceSquare::PS_B_ROOK            ,
+			PieceSquare::PS_W_QUEEN            , PieceSquare::PS_B_QUEEN           ,
 		};
 
 		// Insert uninitialized value.
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
+		for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
 		{
-			inv_piece_[p] = BONA_PIECE_NOT_INIT;
+			inv_piece_[p] = PieceSquare::PS_NOT_INIT;
 
 			// mirror does not work for hand pieces. Just return the original value.
-			mir_piece_[p] = (p < f_pawn) ? p : BONA_PIECE_NOT_INIT;
+			mir_piece_[p] = (p < PieceSquare::PS_W_PAWN) ? p : PieceSquare::PS_NOT_INIT;
 		}
 
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
+		for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
 		{
 			for (int i = 0; i < 32 /* t.size() */; i += 2)
 			{
@@ -62,13 +62,13 @@ namespace Eval
 					Square sq = (Square)(p - t[i]);
 
 					// found!!
-					BonaPiece q = (p < fe_hand_end) ? BonaPiece(sq + t[i + 1]) : (BonaPiece)(Inv(sq) + t[i + 1]);
+					PieceSquare q = (p < PieceSquare::PS_W_PAWN) ? PieceSquare(sq + t[i + 1]) : (PieceSquare)(rotate180(sq) + t[i + 1]);
 					inv_piece_[p] = q;
 					inv_piece_[q] = p;
 
 					/*
 					It's a bit tricky, but regarding p
-										p >= fe_hand_end
+										p >= PieceSquare::PS_W_PAWN
 										When.
 
 					For this p, let n be an integer (i in the above code can only be an even number),
@@ -76,20 +76,20 @@ namespace Eval
 					b) When t[2n + 1] <= p <t[2n + 2], the back piece
 					Is.
 
-					Therefore, if p in the range of a) is set to q = Inv(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
+					Therefore, if p in the range of a) is set to q = rotate180(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
 					So inv_piece[] is initialized by swapping p and q.
 					*/
 
 					// There is no mirror for hand pieces.
-					if (p < fe_hand_end)
+					if (p < PieceSquare::PS_W_PAWN)
 						continue;
 
-					BonaPiece r1 = (BonaPiece)(Mir(sq) + t[i]);
+					PieceSquare r1 = (PieceSquare)(flip_file(sq) + t[i]);
 					mir_piece_[p] = r1;
 					mir_piece_[r1] = p;
 
-					BonaPiece p2 = (BonaPiece)(sq + t[i + 1]);
-					BonaPiece r2 = (BonaPiece)(Mir(sq) + t[i + 1]);
+					PieceSquare p2 = (PieceSquare)(sq + t[i + 1]);
+					PieceSquare r2 = (PieceSquare)(flip_file(sq) + t[i + 1]);
 					mir_piece_[p2] = r2;
 					mir_piece_[r2] = p2;
 
@@ -101,11 +101,11 @@ namespace Eval
 		if (mir_piece_init_function)
 			mir_piece_init_function();
 
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
+		for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
 		{
 			// It remains uninitialized. The initialization code in the table above is incorrect.
-			assert(mir_piece_[p] != BONA_PIECE_NOT_INIT && mir_piece_[p] < fe_end);
-			assert(inv_piece_[p] != BONA_PIECE_NOT_INIT && inv_piece_[p] < fe_end);
+			assert(mir_piece_[p] != PieceSquare::PS_NOT_INIT && mir_piece_[p] < PieceSquare::PS_END);
+			assert(inv_piece_[p] != PieceSquare::PS_NOT_INIT && inv_piece_[p] < PieceSquare::PS_END);
 
 			// mir and inv return to their original coordinates after being applied twice.
 			assert(mir_piece_[mir_piece_[p]] == p);
@@ -126,7 +126,7 @@ namespace Eval
 		// Apery's WCSC26 evaluation function, kpp p1==0 or p1==20 (0th step on the back)
 		// There is dust in it, and if you don't avoid it, it will get caught in the assert.
 
-		std::unordered_set<BonaPiece> s;
+		std::unordered_set<PieceSquare> s;
 		vector<int> a = {
 			f_hand_pawn - 1,e_hand_pawn - 1,
 			f_hand_lance - 1, e_hand_lance - 1,
@@ -137,7 +137,7 @@ namespace Eval
 			f_hand_rook - 1, e_hand_rook - 1,
 		};
 		for (auto b : a)
-			s.insert((BonaPiece)b);
+			s.insert((PieceSquare)b);
 
 		// Excludes walks, incense, and katsura on the board that do not appear further (Apery also contains garbage here)
 		for (Rank r = RANK_1; r <= RANK_2; ++r)
@@ -146,18 +146,18 @@ namespace Eval
 				if (r == RANK_1)
 				{
 					// first step
-					BonaPiece b1 = BonaPiece(f_pawn + (f | r));
+					PieceSquare b1 = PieceSquare(PieceSquare::PS_W_PAWN + (f | r));
 					s.insert(b1);
 					s.insert(inv_piece[b1]);
 
 					// 1st stage incense
-					BonaPiece b2 = BonaPiece(f_lance + (f | r));
+					PieceSquare b2 = PieceSquare(f_lance + (f | r));
 					s.insert(b2);
 					s.insert(inv_piece[b2]);
 				}
 
 				// Katsura on the 1st and 2nd steps
-				BonaPiece b = BonaPiece(f_knight + (f | r));
+				PieceSquare b = PieceSquare(PieceSquare::PS_W_KNIGHT + (f | r));
 				s.insert(b);
 				s.insert(inv_piece[b]);
 			}
@@ -166,8 +166,8 @@ namespace Eval
 		for (auto sq : SQ)
 		{
 			cout << sq << ' ';
-			for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
-				for (BonaPiece p2 = BONA_PIECE_ZERO; p2 < fe_end; ++p2)
+			for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
+				for (PieceSquare p2 = PieceSquare::PS_NONE; p2 < PieceSquare::PS_END; ++p2)
 					if (!s.count(p1) && !s.count(p2))
 						kpp_write(sq, p1, p2, kpp[sq][p1][p2]);
 		}
@@ -177,7 +177,7 @@ namespace Eval
 		{
 			cout << sq1 << ' ';
 			for (auto sq2 : SQ)
-				for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
+				for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
 					if (!s.count(p1))
 						kkp_write(sq1, sq2, p1, kkp[sq1][sq2][p1]);
 		}
diff --git a/src/eval/evaluate_mir_inv_tools.h b/src/eval/evaluate_mir_inv_tools.h
index 826164bf..1f193b17 100644
--- a/src/eval/evaluate_mir_inv_tools.h
+++ b/src/eval/evaluate_mir_inv_tools.h
@@ -3,7 +3,7 @@
 
 #if defined(EVAL_NNUE) || defined(EVAL_LEARN)
 
-// BonaPiece's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
+// PieceSquare's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
 
 #include "../types.h"
 #include "../evaluate.h"
@@ -15,18 +15,18 @@ namespace Eval
 	//                  tables
 	// -------------------------------------------------
 
-	// --- Provide Mirror and Inverse to BonaPiece.
+	// --- Provide Mirror and Inverse to PieceSquare.
 
 	// These arrays are initialized by calling init() or init_mir_inv_tables();.
 	// If you want to use only this table from the evaluation function,
 	// Call init_mir_inv_tables().
 	// These arrays are referenced from the KK/KKP/KPP classes below.
 
-	// Returns the value when a certain BonaPiece is seen from the other side
-	extern Eval::BonaPiece inv_piece(Eval::BonaPiece p);
+	// Returns the value when a certain PieceSquare is seen from the other side
+	extern PieceSquare inv_piece(PieceSquare p);
 
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
-	extern Eval::BonaPiece mir_piece(Eval::BonaPiece p);
+	// Returns the one at the position where a PieceSquare on the board is mirrored.
+	extern PieceSquare mir_piece(PieceSquare p);
 
 
 	// callback called when initializing mir_piece/inv_piece
@@ -35,8 +35,8 @@ namespace Eval
 	// At the timing when mir_piece_init_function is called, until fe_old_end
 	// It is guaranteed that these tables have been initialized.
 	extern std::function<void()> mir_piece_init_function;
-	extern int16_t mir_piece_[Eval::fe_end];
-	extern int16_t inv_piece_[Eval::fe_end];
+	extern int16_t mir_piece_[PieceSquare::PS_END];
+	extern int16_t inv_piece_[PieceSquare::PS_END];
 
 	// The table above will be initialized when you call this function explicitly or call init().
 	extern void init_mir_inv_tables();
diff --git a/src/eval/nnue/architectures/halfkp_256x2-32-32.h b/src/eval/nnue/architectures/halfkp_256x2-32-32.h
deleted file mode 100644
index 467d0222..00000000
--- a/src/eval/nnue/architectures/halfkp_256x2-32-32.h
+++ /dev/null
@@ -1,39 +0,0 @@
-﻿// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef HALFKP_256X2_32_32_H
-#define HALFKP_256X2_32_32_H
-
-#include "../features/feature_set.h"
-#include "../features/half_kp.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input features used in evaluation function
-using RawFeatures = Features::FeatureSet<
-    Features::HalfKP<Features::Side::kFriend>>;
-
-// Number of input feature dimensions after conversion
-constexpr IndexType kTransformedFeatureDimensions = 256;
-
-namespace Layers {
-
-// define network structure
-using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-}  // namespace Layers
-
-using Network = Layers::OutputLayer;
-
-}  // namespace NNUE
-
-}  // namespace Eval
-#endif // HALFKP_256X2_32_32_H
diff --git a/src/eval/nnue/evaluate_nnue.cpp b/src/eval/nnue/evaluate_nnue.cpp
deleted file mode 100644
index 55e627d0..00000000
--- a/src/eval/nnue/evaluate_nnue.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-﻿// Code for calculating NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include <fstream>
-#include <iostream>
-
-#include "../../evaluate.h"
-#include "../../position.h"
-#include "../../misc.h"
-#include "../../uci.h"
-
-#include "evaluate_nnue.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input feature converter
-AlignedPtr<FeatureTransformer> feature_transformer;
-
-// Evaluation function
-AlignedPtr<Network> network;
-
-// Evaluation function file name
-std::string fileName = "nn.bin";
-
-// Saved evaluation function file name
-std::string savedfileName = "nn.bin";
-
-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString() {
-  return "Features=" + FeatureTransformer::GetStructureString() +
-      ",Network=" + Network::GetStructureString();
-}
-
-namespace {
-
-namespace Detail {
-
-// Initialize the evaluation function parameters
-template <typename T>
-void Initialize(AlignedPtr<T>& pointer) {
-  pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
-  std::memset(pointer.get(), 0, sizeof(T));
-}
-
-// read evaluation function parameters
-template <typename T>
-bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
-  std::uint32_t header;
-  stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-  if (!stream || header != T::GetHashValue()) return false;
-  return pointer->ReadParameters(stream);
-}
-
-// write evaluation function parameters
-template <typename T>
-bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
-  constexpr std::uint32_t header = T::GetHashValue();
-  stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
-  return pointer->WriteParameters(stream);
-}
-
-}  // namespace Detail
-
-// Initialize the evaluation function parameters
-void Initialize() {
-  Detail::Initialize(feature_transformer);
-  Detail::Initialize(network);
-}
-
-}  // namespace
-
-// read the header
-bool ReadHeader(std::istream& stream,
-  std::uint32_t* hash_value, std::string* architecture) {
-  std::uint32_t version, size;
-  stream.read(reinterpret_cast<char*>(&version), sizeof(version));
-  stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
-  stream.read(reinterpret_cast<char*>(&size), sizeof(size));
-  if (!stream || version != kVersion) return false;
-  architecture->resize(size);
-  stream.read(&(*architecture)[0], size);
-  return !stream.fail();
-}
-
-// write the header
-bool WriteHeader(std::ostream& stream,
-  std::uint32_t hash_value, const std::string& architecture) {
-  stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
-  stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
-  const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
-  stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
-  stream.write(architecture.data(), size);
-  return !stream.fail();
-}
-
-// read evaluation function parameters
-bool ReadParameters(std::istream& stream) {
-  std::uint32_t hash_value;
-  std::string architecture;
-  if (!ReadHeader(stream, &hash_value, &architecture)) return false;
-  if (hash_value != kHashValue) return false;
-  if (!Detail::ReadParameters(stream, feature_transformer)) return false;
-  if (!Detail::ReadParameters(stream, network)) return false;
-  return stream && stream.peek() == std::ios::traits_type::eof();
-}
-
-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream) {
-  if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
-  if (!Detail::WriteParameters(stream, feature_transformer)) return false;
-  if (!Detail::WriteParameters(stream, network)) return false;
-  return !stream.fail();
-}
-
-// proceed if you can calculate the difference
-static void UpdateAccumulatorIfPossible(const Position& pos) {
-  feature_transformer->UpdateAccumulatorIfPossible(pos);
-}
-
-// Calculate the evaluation value
-static Value ComputeScore(const Position& pos, bool refresh = false) {
-  auto& accumulator = pos.state()->accumulator;
-  if (!refresh && accumulator.computed_score) {
-    return accumulator.score;
-  }
-
-  alignas(kCacheLineSize) TransformedFeatureType
-      transformed_features[FeatureTransformer::kBufferSize];
-  feature_transformer->Transform(pos, transformed_features, refresh);
-  alignas(kCacheLineSize) char buffer[Network::kBufferSize];
-  const auto output = network->Propagate(transformed_features, buffer);
-
-  // When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
-  // It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
-
-  // Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
-  // The best move in the previous iteration is pointed to as bestmove, so apparently
-  // no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
-  // Since such a jamming phase often appears at the end, there is a big difference in the situation
-  // Doesn't really affect the outcome.
-
-  // However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
-  // Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
-
-  auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-  // 1) I feel that if I clip too poorly, it will have an effect on my learning...
-  // 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
-  score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
-
-  accumulator.score = score;
-  accumulator.computed_score = true;
-  return accumulator.score;
-}
-
-} // namespace NNUE
-
-#if defined(USE_EVAL_HASH)
-// Class used to store evaluation values ​​in HashTable
-struct alignas(16) ScoreKeyValue {
-#if defined(USE_SSE2)
-  ScoreKeyValue() = default;
-  ScoreKeyValue(const ScoreKeyValue& other) {
-    static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
-                  "sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
-    _mm_store_si128(&as_m128i, other.as_m128i);
-  }
-  ScoreKeyValue& operator=(const ScoreKeyValue& other) {
-    _mm_store_si128(&as_m128i, other.as_m128i);
-    return *this;
-  }
-#endif
-
-  // It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
-  void encode() {
-#if defined(USE_SSE2)
-    // ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
-#else
-    key ^= score;
-#endif
-  }
-  // decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
-  void decode() { encode(); }
-
-  union {
-    struct {
-      std::uint64_t key;
-      std::uint64_t score;
-    };
-#if defined(USE_SSE2)
-    __m128i as_m128i;
-#endif
-  };
-};
-
-// Simple HashTable implementation.
-// Size is a power of 2.
-template <typename T, size_t Size>
-struct HashTable {
-  HashTable() { clear(); }
-  T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
-  void clear() { memset(entries_, 0, sizeof(T)*Size); }
-
-  // Check that Size is a power of 2
-  static_assert((Size & (Size - 1)) == 0, "");
-
- private:
-  T entries_[Size];
-};
-
-//HashTable to save the evaluated ones (following ehash)
-
-#if !defined(USE_LARGE_EVAL_HASH)
-// 134MB (setting other than witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
-#else
-// If you have prefetch, it's better to have a big one...
-// → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
-// 1GB (setting for witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
-#endif
-
-EvaluateHashTable g_evalTable;
-
-// Prepare a function to prefetch.
-void prefetch_evalhash(const Key key) {
-  constexpr auto mask = ~((uint64_t)0x1f);
-  prefetch((void*)((uint64_t)g_evalTable[key] & mask));
-}
-#endif
-
-// read the evaluation function file
-// Save and restore Options with bench command etc., so EvalDir is changed at this time,
-// This function may be called twice to flag that the evaluation function needs to be reloaded.
-void load_eval() {
-
-  // Must be done!
-  NNUE::Initialize();
-
-  if (Options["SkipLoadingEval"])
-  {
-      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
-      return;
-  }
-
-  const std::string file_name = Options["EvalFile"];
-  NNUE::fileName = file_name;
-
-  std::ifstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::ReadParameters(stream);
-
-  if (!result)
-      // It's a problem if it doesn't finish when there is a read error.
-      std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
-
-  else
-      std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
-}
-
-// Initialization
-void init() {
-}
-
-// Evaluation function. Perform full calculation instead of difference calculation.
-// Called only once with Position::set(). (The difference calculation after that)
-// Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
-// Since, we will not try to optimize this function.
-Value compute_eval(const Position& pos) {
-  return NNUE::ComputeScore(pos, true);
-}
-
-// Evaluation function
-Value evaluate(const Position& pos) {
-  const auto& accumulator = pos.state()->accumulator;
-  if (accumulator.computed_score) {
-    return accumulator.score;
-  }
-
-#if defined(USE_GLOBAL_OPTIONS)
-  // If Global Options is set not to use eval hash
-  // Skip the query to the eval hash.
-  if (!GlobalOptions.use_eval_hash) {
-    ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
-    return NNUE::ComputeScore(pos);
-  }
-#endif
-
-#if defined(USE_EVAL_HASH)
-  // May be in the evaluate hash table.
-  const Key key = pos.key();
-  ScoreKeyValue entry = *g_evalTable[key];
-  entry.decode();
-  if (entry.key == key) {
-    // there were!
-    return Value(entry.score);
-  }
-#endif
-
-  Value score = NNUE::ComputeScore(pos);
-#if defined(USE_EVAL_HASH)
-  // Since it was calculated carefully, save it in the evaluate hash table.
-  entry.key = key;
-  entry.score = score;
-  entry.encode();
-  *g_evalTable[key] = entry;
-#endif
-
-  return score;
-}
-
-// proceed if you can calculate the difference
-void evaluate_with_no_return(const Position& pos) {
-  NNUE::UpdateAccumulatorIfPossible(pos);
-}
-
-// display the breakdown of the evaluation value of the current phase
-void print_eval_stat(Position& /*pos*/) {
-  std::cout << "--- EVAL STAT: not implemented" << std::endl;
-}
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/evaluate_nnue.h b/src/eval/nnue/evaluate_nnue.h
deleted file mode 100644
index d474a8ae..00000000
--- a/src/eval/nnue/evaluate_nnue.h
+++ /dev/null
@@ -1,67 +0,0 @@
-﻿// header used in NNUE evaluation function
-
-#ifndef _EVALUATE_NNUE_H_
-#define _EVALUATE_NNUE_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_feature_transformer.h"
-#include "nnue_architecture.h"
-
-#include <memory>
-
-namespace Eval {
-
-namespace NNUE {
-
-// hash value of evaluation function structure
-constexpr std::uint32_t kHashValue =
-    FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
-
-// Deleter for automating release of memory area
-template <typename T>
-struct AlignedDeleter {
-  void operator()(T* ptr) const {
-    ptr->~T();
-    aligned_free(ptr);
-  }
-};
-template <typename T>
-using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
-
-// Input feature converter
-extern AlignedPtr<FeatureTransformer> feature_transformer;
-
-// Evaluation function
-extern AlignedPtr<Network> network;
-
-// Evaluation function file name
-extern std::string fileName;
-
-// Saved evaluation function file name
-extern std::string savedfileName;
-
-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString();
-
-// read the header
-bool ReadHeader(std::istream& stream,
-    std::uint32_t* hash_value, std::string* architecture);
-
-// write the header
-bool WriteHeader(std::ostream& stream,
-    std::uint32_t hash_value, const std::string& architecture);
-
-// read evaluation function parameters
-bool ReadParameters(std::istream& stream);
-
-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream);
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/feature_set.h b/src/eval/nnue/features/feature_set.h
deleted file mode 100644
index 0430ebfe..00000000
--- a/src/eval/nnue/features/feature_set.h
+++ /dev/null
@@ -1,249 +0,0 @@
-﻿// A class template that represents the input feature set of the NNUE evaluation function
-
-#ifndef _NNUE_FEATURE_SET_H_
-#define _NNUE_FEATURE_SET_H_
-
-#if defined(EVAL_NNUE)
-
-#include "features_common.h"
-#include <array>
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// A class template that represents a list of values
-template <typename T, T... Values>
-struct CompileTimeList;
-template <typename T, T First, T... Remaining>
-struct CompileTimeList<T, First, Remaining...> {
-  static constexpr bool Contains(T value) {
-    return value == First || CompileTimeList<T, Remaining...>::Contains(value);
-  }
-  static constexpr std::array<T, sizeof...(Remaining) + 1>
-      kValues = {{First, Remaining...}};
-};
-template <typename T, T First, T... Remaining>
-constexpr std::array<T, sizeof...(Remaining) + 1>
-    CompileTimeList<T, First, Remaining...>::kValues;
-template <typename T>
-struct CompileTimeList<T> {
-  static constexpr bool Contains(T /*value*/) {
-    return false;
-  }
-  static constexpr std::array<T, 0> kValues = {{}};
-};
-
-// Class template that adds to the beginning of the list
-template <typename T, typename ListType, T Value>
-struct AppendToList;
-template <typename T, T... Values, T AnotherValue>
-struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
-  using Result = CompileTimeList<T, AnotherValue, Values...>;
-};
-
-// Class template for adding to a sorted, unique list
-template <typename T, typename ListType, T Value>
-struct InsertToSet;
-template <typename T, T First, T... Remaining, T AnotherValue>
-struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
-  using Result = std::conditional_t<
-      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
-      CompileTimeList<T, First, Remaining...>,
-      std::conditional_t<(AnotherValue <First),
-          CompileTimeList<T, AnotherValue, First, Remaining...>,
-          typename AppendToList<T, typename InsertToSet<
-              T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
-              First>::Result>>;
-};
-template <typename T, T Value>
-struct InsertToSet<T, CompileTimeList<T>, Value> {
-  using Result = CompileTimeList<T, Value>;
-};
-
-// Base class of feature set
-template <typename Derived>
-class FeatureSetBase {
- public:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void AppendActiveIndices(
-      const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
-    for (const auto perspective :Colors) {
-      Derived::CollectActiveIndices(
-          pos, trigger, perspective, &active[perspective]);
-    }
-  }
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  template <typename PositionType, typename IndexListType>
-  static void AppendChangedIndices(
-      const PositionType& pos, TriggerEvent trigger,
-      IndexListType removed[2], IndexListType added[2], bool reset[2]) {
-    const auto& dp = pos.state()->dirtyPiece;
-    if (dp.dirty_num == 0) return;
-
-    for (const auto perspective :Colors) {
-      reset[perspective] = false;
-      switch (trigger) {
-        case TriggerEvent::kNone:
-          break;
-        case TriggerEvent::kFriendKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
-          break;
-        case TriggerEvent::kEnemyKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
-          break;
-        case TriggerEvent::kAnyKingMoved:
-          reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
-          break;
-        case TriggerEvent::kAnyPieceMoved:
-          reset[perspective] = true;
-          break;
-        default:
-          assert(false);
-          break;
-      }
-      if (reset[perspective]) {
-        Derived::CollectActiveIndices(
-            pos, trigger, perspective, &added[perspective]);
-      } else {
-        Derived::CollectChangedIndices(
-            pos, trigger, perspective,
-            &removed[perspective], &added[perspective]);
-      }
-    }
-  }
-};
-
-// Class template that represents the feature set
-// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
-template <typename FirstFeatureType, typename... RemainingFeatureTypes>
-class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
-    public FeatureSetBase<
-        FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
- private:
-  using Head = FirstFeatureType;
-  using Tail = FeatureSet<RemainingFeatureTypes...>;
-
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      Head::kDimensions + Tail::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet = typename InsertToSet<TriggerEvent,
-      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  // Get the feature quantity name
-  static std::string GetName() {
-    return std::string(Head::kName) + "+" + Tail::GetName();
-  }
-
- private:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexListType* const active) {
-    Tail::CollectActiveIndices(pos, trigger, perspective, active);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start = active->size();
-      Head::AppendActiveIndices(pos, perspective, active);
-      for (auto i = start; i < active->size(); ++i) {
-        (*active)[i] += Tail::kDimensions;
-      }
-    }
-  }
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  template <typename IndexListType>
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexListType* const removed, IndexListType* const added) {
-    Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start_removed = removed->size();
-      const auto start_added = added->size();
-      Head::AppendChangedIndices(pos, perspective, removed, added);
-      for (auto i = start_removed; i < removed->size(); ++i) {
-        (*removed)[i] += Tail::kDimensions;
-      }
-      for (auto i = start_added; i < added->size(); ++i) {
-        (*added)[i] += Tail::kDimensions;
-      }
-    }
-  }
-
-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
-
-// Class template that represents the feature set
-// Specialization with one template argument
-template <typename FeatureType>
-class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = FeatureType::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      FeatureType::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet =
-      CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  // Get the feature quantity name
-  static std::string GetName() {
-    return FeatureType::kName;
-  }
-
- private:
-  // Get a list of indices with a value of 1 among the features
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const active) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendActiveIndices(pos, perspective, active);
-    }
-  }
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const removed, IndexList* const added) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendChangedIndices(pos, perspective, removed, added);
-    }
-  }
-
-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/features_common.h b/src/eval/nnue/features/features_common.h
deleted file mode 100644
index 8d2ca4a2..00000000
--- a/src/eval/nnue/features/features_common.h
+++ /dev/null
@@ -1,47 +0,0 @@
-﻿//Common header of input features of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_COMMON_H_
-#define _NNUE_FEATURES_COMMON_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Index list type
-class IndexList;
-
-// Class template that represents the feature set
-template <typename... FeatureTypes>
-class FeatureSet;
-
-// Type of timing to perform all calculations instead of difference calculation
-enum class TriggerEvent {
-  kNone, // Calculate the difference whenever possible
-  kFriendKingMoved, // calculate all when own ball moves
-  kEnemyKingMoved, // do all calculations when enemy balls move
-  kAnyKingMoved, // do all calculations if either ball moves
-  kAnyPieceMoved, // always do all calculations
-};
-
-// turn side or other side
-enum class Side {
-  kFriend, // turn side
-  kEnemy, // opponent
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/half_kp.cpp b/src/eval/nnue/features/half_kp.cpp
deleted file mode 100644
index cba2c9cd..00000000
--- a/src/eval/nnue/features/half_kp.cpp
+++ /dev/null
@@ -1,84 +0,0 @@
-﻿//Definition of input features HalfKP of NNUE evaluation function
-
-#if defined(EVAL_NNUE)
-
-#include "half_kp.h"
-#include "index_list.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Find the index of the feature quantity from the ball position and BonaPiece
-template <Side AssociatedKing>
-inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
-  return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
-}
-
-// Get the piece information
-template <Side AssociatedKing>
-inline void HalfKP<AssociatedKing>::GetPieces(
-    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
-  *pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
-}
-
-// Get a list of indices with a value of 1 among the features
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-      active->push_back(MakeIndex(sq_target_k, pieces[i]));
-    }
-  }
-}
-
-// Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(MakeIndex(sq_target_k, old_p));
-    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p != Eval::BONA_PIECE_ZERO) {
-      added->push_back(MakeIndex(sq_target_k, new_p));
-    }
-  }
-}
-
-template class HalfKP<Side::kFriend>;
-template class HalfKP<Side::kEnemy>;
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
diff --git a/src/eval/nnue/features/half_kp.h b/src/eval/nnue/features/half_kp.h
deleted file mode 100644
index cc9cd660..00000000
--- a/src/eval/nnue/features/half_kp.h
+++ /dev/null
@@ -1,62 +0,0 @@
-﻿//Definition of input features HalfKP of NNUE evaluation function
-
-#ifndef _NNUE_FEATURES_HALF_KP_H_
-#define _NNUE_FEATURES_HALF_KP_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
-template <Side AssociatedKing>
-class HalfKP {
- public:
-  // feature quantity name
-  static constexpr const char* kName =
-      (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger =
-      (AssociatedKing == Side::kFriend) ?
-      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
-
-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
-
-  // Get a list of indices whose values ​​have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
-
-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
-
- private:
-  // Get the piece information
-  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/features/index_list.h b/src/eval/nnue/features/index_list.h
deleted file mode 100644
index 39e66a09..00000000
--- a/src/eval/nnue/features/index_list.h
+++ /dev/null
@@ -1,55 +0,0 @@
-﻿// Definition of index list of input features
-
-#ifndef _NNUE_FEATURES_INDEX_LIST_H_
-#define _NNUE_FEATURES_INDEX_LIST_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../../../position.h"
-#include "../nnue_architecture.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Features {
-
-// Class template used for feature index list
-template <typename T, std::size_t MaxSize>
-class ValueList {
- public:
-  std::size_t size() const { return size_; }
-  void resize(std::size_t size) { size_ = size; }
-  void push_back(const T& value) { values_[size_++] = value; }
-  T& operator[](std::size_t index) { return values_[index]; }
-  T* begin() { return values_; }
-  T* end() { return values_ + size_; }
-  const T& operator[](std::size_t index) const { return values_[index]; }
-  const T* begin() const { return values_; }
-  const T* end() const { return values_ + size_; }
-  void swap(ValueList& other) {
-    const std::size_t max_size = std::max(size_, other.size_);
-    for (std::size_t i = 0; i < max_size; ++i) {
-      std::swap(values_[i], other.values_[i]);
-    }
-    std::swap(size_, other.size_);
-  }
- private:
-  T values_[MaxSize];
-  std::size_t size_ = 0;
-};
-
-//Type of feature index list
-class IndexList
-    : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/layers/affine_transform.h b/src/eval/nnue/layers/affine_transform.h
deleted file mode 100644
index 2db7f731..00000000
--- a/src/eval/nnue/layers/affine_transform.h
+++ /dev/null
@@ -1,217 +0,0 @@
-﻿// Definition of layer AffineTransform of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-#define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// affine transformation layer
-template <typename PreviousLayer, IndexType OutputDimensions>
-class AffineTransform {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::int32_t;
-  static_assert(std::is_same<InputType, std::uint8_t>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static constexpr IndexType kPaddedInputDimensions =
-      CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xCC03DAE4u;
-    hash_value += kOutputDimensions;
-    hash_value ^= PreviousLayer::GetHashValue() >> 1;
-    hash_value ^= PreviousLayer::GetHashValue() << 31;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "AffineTransform[" +
-        std::to_string(kOutputDimensions) + "<-" +
-        std::to_string(kInputDimensions) + "](" +
-        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    if (!previous_layer_.ReadParameters(stream)) return false;
-    stream.read(reinterpret_cast<char*>(biases_),
-                kOutputDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kOutputDimensions * kPaddedInputDimensions *
-                sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    if (!previous_layer_.WriteParameters(stream)) return false;
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kOutputDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kOutputDimensions * kPaddedInputDimensions *
-                 sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX512)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
-    const __m512i kOnes = _mm512_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m512i*>(input);
-#elif defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m256i kOnes = _mm256_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m256i*>(input);
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m128i kOnes = _mm_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m128i*>(input);
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
-#endif
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const IndexType offset = i * kPaddedInputDimensions;
-#if defined(USE_AVX512)
-      __m512i sum = _mm512_setzero_si512();
-      const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
-#else
-          __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
-#endif
-          product = _mm512_madd_epi16(product, kOnes);
-          sum = _mm512_add_epi32(sum, product);
-      }
-      output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
-      
-      // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
-      // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
-      // and we have to do one more 256bit chunk.
-      if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
-      {
-          const auto iv_256  = reinterpret_cast<const __m256i*>(input);
-          const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          int j = kNumChunks * 2;
-#if defined(__MINGW32__) || defined(__MINGW64__)  // See HACK comment below in AVX2.
-          __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
-#else
-          __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
-#endif
-          sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
-
-          sum256 = _mm256_hadd_epi32(sum256, sum256);
-          sum256 = _mm256_hadd_epi32(sum256, sum256);
-          const __m128i lo = _mm256_extracti128_si256(sum256, 0);
-          const __m128i hi = _mm256_extracti128_si256(sum256, 1);
-          output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
-      }
-#elif defined(USE_AVX2)
-      __m256i sum = _mm256_setzero_si256();
-      const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i product = _mm256_maddubs_epi16(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&input_vector[j]), _mm256_load_si256(&row[j]));
-        product = _mm256_madd_epi16(product, kOnes);
-        sum = _mm256_add_epi32(sum, product);
-      }
-      sum = _mm256_hadd_epi32(sum, sum);
-      sum = _mm256_hadd_epi32(sum, sum);
-      const __m128i lo = _mm256_extracti128_si256(sum, 0);
-      const __m128i hi = _mm256_extracti128_si256(sum, 1);
-      output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
-#elif defined(USE_SSSE3)
-      __m128i sum = _mm_cvtsi32_si128(biases_[i]);
-      const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i product = _mm_maddubs_epi16(
-            _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
-        product = _mm_madd_epi16(product, kOnes);
-        sum = _mm_add_epi32(sum, product);
-      }
-      sum = _mm_hadd_epi32(sum, sum);
-      sum = _mm_hadd_epi32(sum, sum);
-      output[i] = _mm_cvtsi128_si32(sum);
-#elif defined(IS_ARM)
-      int32x4_t sum = {biases_[i]};
-      const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
-        product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
-        sum = vpadalq_s16(sum, product);
-      }
-      output[i] = sum[0] + sum[1] + sum[2] + sum[3];
-#else
-      OutputType sum = biases_[i];
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        sum += weights_[offset + j] * input[j];
-      }
-      output[i] = sum;
-#endif
-    }
-    return output;
-  }
-
- private:
-  // parameter type
-  using BiasType = OutputType;
-  using WeightType = std::int8_t;
-
-  // Make the learning class a friend
-  friend class Trainer<AffineTransform>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-
-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/layers/clipped_relu.h b/src/eval/nnue/layers/clipped_relu.h
deleted file mode 100644
index fe4bedaa..00000000
--- a/src/eval/nnue/layers/clipped_relu.h
+++ /dev/null
@@ -1,177 +0,0 @@
-﻿// Definition of layer ClippedReLU of NNUE evaluation function
-
-#ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
-#define _NNUE_LAYERS_CLIPPED_RELU_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// Clipped ReLU
-template <typename PreviousLayer>
-class ClippedReLU {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::uint8_t;
-  static_assert(std::is_same<InputType, std::int32_t>::value, "");
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
-
-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0x538D24C7u;
-    hash_value += PreviousLayer::GetHashValue();
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "ClippedReLU[" +
-        std::to_string(kOutputDimensions) + "](" +
-        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m256i kZero = _mm256_setzero_si256();
-    const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-    const auto in = reinterpret_cast<const __m256i*>(input);
-    const auto out = reinterpret_cast<__m256i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-        //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-        //       even though alignas is specified.
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 0]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 1])), kWeightScaleBits);
-      const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 2]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 3])), kWeightScaleBits);
-#if defined(__MINGW32__) || defined(__MINGW64__)
-      _mm256_storeu_si256
-#else
-      _mm256_store_si256
-#endif
-        (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-          _mm256_packs_epi16(words0, words1), kZero), kOffsets));
-    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-    const auto in = reinterpret_cast<const __m128i*>(input);
-    const auto out = reinterpret_cast<__m128i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 0]),
-          _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
-      const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 2]),
-          _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
-      const __m128i packedbytes = _mm_packs_epi16(words0, words1);
-      _mm_store_si128(&out[i], 
-#ifdef USE_SSE41
-        _mm_max_epi8(packedbytes, kZero)
-#else
-        _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-      );
-    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-    const auto in = reinterpret_cast<const int32x4_t*>(input);
-    const auto out = reinterpret_cast<int8x8_t*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      int16x8_t shifted;
-      const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-      pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
-      pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
-      out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
-    }
-    constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
-#else
-    constexpr IndexType kStart = 0;
-#endif
-    for (IndexType i = kStart; i < kInputDimensions; ++i) {
-      output[i] = static_cast<OutputType>(
-          std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
-    }
-    return output;
-  }
-
- private:
-  // Make the learning class a friend
-  friend class Trainer<ClippedReLU>;
-
-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/layers/input_slice.h b/src/eval/nnue/layers/input_slice.h
deleted file mode 100644
index ec7627d2..00000000
--- a/src/eval/nnue/layers/input_slice.h
+++ /dev/null
@@ -1,74 +0,0 @@
-﻿// NNUE evaluation function layer InputSlice definition
-
-#ifndef _NNUE_LAYERS_INPUT_SLICE_H_
-#define _NNUE_LAYERS_INPUT_SLICE_H_
-
-#if defined(EVAL_NNUE)
-
-#include "../nnue_common.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-namespace Layers {
-
-// input layer
-template <IndexType OutputDimensions, IndexType Offset = 0>
-class InputSlice {
- public:
-  // need to maintain alignment
-  static_assert(Offset % kMaxSimdWidth == 0, "");
-
-  // output type
-  using OutputType = TransformedFeatureType;
-
-  // output dimensionality
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-
-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize = 0;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xEC42E90Du;
-    hash_value ^= kOutputDimensions ^ (Offset << 10);
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
-        std::to_string(Offset) + ":" +
-        std::to_string(Offset + kOutputDimensions) + ")]";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& /*stream*/) {
-    return true;
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& /*stream*/) const {
-    return true;
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features,
-      char* /*buffer*/) const {
-    return transformed_features + Offset;
-  }
-
- private:
-};
-
-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_accumulator.h b/src/eval/nnue/nnue_accumulator.h
deleted file mode 100644
index 07f4f183..00000000
--- a/src/eval/nnue/nnue_accumulator.h
+++ /dev/null
@@ -1,30 +0,0 @@
-﻿// Class for difference calculation of NNUE evaluation function
-
-#ifndef _NNUE_ACCUMULATOR_H_
-#define _NNUE_ACCUMULATOR_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_architecture.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-// Class that holds the result of affine transformation of input features
-// Keep the evaluation value that is the final output together
-struct alignas(32) Accumulator {
-  std::int16_t
-      accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-  Value score = VALUE_ZERO;
-  bool computed_accumulation = false;
-  bool computed_score = false;
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_architecture.h b/src/eval/nnue/nnue_architecture.h
deleted file mode 100644
index cb53e4f9..00000000
--- a/src/eval/nnue/nnue_architecture.h
+++ /dev/null
@@ -1,33 +0,0 @@
-﻿// Input features and network structure used in NNUE evaluation function
-
-#ifndef _NNUE_ARCHITECTURE_H_
-#define _NNUE_ARCHITECTURE_H_
-
-#if defined(EVAL_NNUE)
-
-// include a header that defines the input features and network structure
-//#include "architectures/k-p_256x2-32-32.h"
-//#include "architectures/k-p-cr_256x2-32-32.h"
-//#include "architectures/k-p-cr-ep_256x2-32-32.h"
-#include "architectures/halfkp_256x2-32-32.h"
-//#include "architectures/halfkp-cr-ep_256x2-32-32.h"
-//#include "architectures/halfkp_384x2-32-32.h"
-
-namespace Eval {
-
-namespace NNUE {
-
-static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
-static_assert(Network::kOutputDimensions == 1, "");
-static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
-
-// List of timings to perform all calculations instead of difference calculation
-constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_common.h b/src/eval/nnue/nnue_common.h
deleted file mode 100644
index cffb0098..00000000
--- a/src/eval/nnue/nnue_common.h
+++ /dev/null
@@ -1,64 +0,0 @@
-﻿// Constants used in NNUE evaluation function
-
-#ifndef _NNUE_COMMON_H_
-#define _NNUE_COMMON_H_
-
-#if defined(EVAL_NNUE)
-
-#if defined(USE_AVX2)
-#include <immintrin.h>
-#elif defined(USE_SSE41)
-#include <smmintrin.h>
-#elif defined(USE_SSSE3)
-#include <tmmintrin.h>
-#elif defined(USE_SSE2)
-#include <emmintrin.h>
-#endif
-
-namespace Eval {
-
-namespace NNUE {
-
-// A constant that represents the version of the evaluation function file
-constexpr std::uint32_t kVersion = 0x7AF32F16u;
-
-// Constant used in evaluation value calculation
-constexpr int FV_SCALE = 16;
-constexpr int kWeightScaleBits = 6;
-
-// Size of cache line (in bytes)
-constexpr std::size_t kCacheLineSize = 64;
-
-// SIMD width (in bytes)
-#if defined(USE_AVX2)
-constexpr std::size_t kSimdWidth = 32;
-#elif defined(USE_SSE2)
-constexpr std::size_t kSimdWidth = 16;
-#elif defined(IS_ARM)
-constexpr std::size_t kSimdWidth = 16;
-#endif
-constexpr std::size_t kMaxSimdWidth = 32;
-
-// Type of input feature after conversion
-using TransformedFeatureType = std::uint8_t;
-
-// index type
-using IndexType = std::uint32_t;
-
-// Forward declaration of learning class template
-template <typename Layer>
-class Trainer;
-
-// find the smallest multiple of n and above
-template <typename IntType>
-constexpr IntType CeilToMultiple(IntType n, IntType base) {
-  return (n + base - 1) / base * base;
-}
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/eval/nnue/nnue_feature_transformer.h b/src/eval/nnue/nnue_feature_transformer.h
deleted file mode 100644
index bb1a50bc..00000000
--- a/src/eval/nnue/nnue_feature_transformer.h
+++ /dev/null
@@ -1,357 +0,0 @@
-﻿// A class that converts the input features of the NNUE evaluation function
-
-#ifndef _NNUE_FEATURE_TRANSFORMER_H_
-#define _NNUE_FEATURE_TRANSFORMER_H_
-
-#if defined(EVAL_NNUE)
-
-#include "nnue_common.h"
-#include "nnue_architecture.h"
-#include "features/index_list.h"
-
-#include <cstring> // std::memset()
-
-namespace Eval {
-
-namespace NNUE {
-
-// Input feature converter
-class FeatureTransformer {
- private:
-  // number of output dimensions for one side
-  static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
-
- public:
-  // output type
-  using OutputType = TransformedFeatureType;
-
-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
-  static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
-
-  // size of forward propagation buffer
-  static constexpr std::size_t kBufferSize =
-      kOutputDimensions * sizeof(OutputType);
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    return RawFeatures::kHashValue ^ kOutputDimensions;
-  }
-
-  // a string representing the structure
-  static std::string GetStructureString() {
-    return RawFeatures::GetName() + "[" +
-        std::to_string(kInputDimensions) + "->" +
-        std::to_string(kHalfDimensions) + "x2]";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    stream.read(reinterpret_cast<char*>(biases_),
-                kHalfDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kHalfDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // proceed with the difference calculation if possible
-  bool UpdateAccumulatorIfPossible(const Position& pos) const {
-    const auto now = pos.state();
-    if (now->accumulator.computed_accumulation) {
-      return true;
-    }
-    const auto prev = now->previous;
-    if (prev && prev->accumulator.computed_accumulation) {
-      UpdateAccumulator(pos);
-      return true;
-    }
-    return false;
-  }
-
-  // convert input features
-  void Transform(const Position& pos, OutputType* output, bool refresh) const {
-    if (refresh || !UpdateAccumulatorIfPossible(pos)) {
-      RefreshAccumulator(pos);
-    }
-    const auto& accumulation = pos.state()->accumulator.accumulation;
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    constexpr int kControl = 0b11011000;
-    const __m256i kZero = _mm256_setzero_si256();
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-#endif
-    const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
-    for (IndexType p = 0; p < 2; ++p) {
-      const IndexType offset = kHalfDimensions * p;
-#if defined(USE_AVX2)
-      auto out = reinterpret_cast<__m256i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i sum0 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m256i sum1 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_storeu_si256
-#else
-        _mm256_store_si256
-#endif
-        (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
-            _mm256_packs_epi16(sum0, sum1), kZero), kControl));
-      }
-#elif defined(USE_SSSE3)
-      auto out = reinterpret_cast<__m128i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-  	const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
- 
-        _mm_store_si128(&out[j],
-#ifdef USE_SSE41
-          _mm_max_epi8(packedbytes, kZero)
-#else
-          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-        );
-      }
-#elif defined(IS_ARM)
-      const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t sum = reinterpret_cast<const int16x8_t*>(
-            accumulation[perspectives[p]][0])[j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
-              accumulation[perspectives[p]][i])[j]);
-        }
-        out[j] = vmax_s8(vqmovn_s16(sum), kZero);
-      }
-#else
-      for (IndexType j = 0; j < kHalfDimensions; ++j) {
-        BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum += accumulation[static_cast<int>(perspectives[p])][i][j];
-        }
-        output[offset + j] = static_cast<OutputType>(
-            std::max<int>(0, std::min<int>(127, sum)));
-      }
-#endif
-    }
-  }
-
- private:
-  // Calculate cumulative value without using difference calculation
-  void RefreshAccumulator(const Position& pos) const {
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
-      for (const auto perspective : Colors) {
-        if (i == 0) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memset(accumulator.accumulation[perspective][i], 0,
-                      kHalfDimensions * sizeof(BiasType));
-        }
-        for (const auto index : active_indices[perspective]) {
-          const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-          auto accumulation = reinterpret_cast<__m256i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-#if defined(__MINGW32__) || defined(__MINGW64__)
-            _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
-#else
-            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-#endif
-          }
-#elif defined(USE_SSE2)
-          auto accumulation = reinterpret_cast<__m128i*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-          }
-#elif defined(IS_ARM)
-          auto accumulation = reinterpret_cast<int16x8_t*>(
-              &accumulator.accumulation[perspective][i][0]);
-          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
-            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-          }
-#else
-          for (IndexType j = 0; j < kHalfDimensions; ++j) {
-            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-          }
-#endif
-        }
-      }
-    }
-
-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // Calculate cumulative value using difference calculation
-  void UpdateAccumulator(const Position& pos) const {
-    const auto prev_accumulator = pos.state()->previous->accumulator;
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
-      for (const auto perspective : Colors) {
-#if defined(USE_AVX2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m256i*>(
-            &accumulator.accumulation[perspective][i][0]);
-#elif defined(USE_SSE2)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<__m128i*>(
-            &accumulator.accumulation[perspective][i][0]);
-#elif defined(IS_ARM)
-        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-        auto accumulation = reinterpret_cast<int16x8_t*>(
-            &accumulator.accumulation[perspective][i][0]);
-#endif
-        if (reset[perspective]) {
-          if (i == 0) {
-            std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                        kHalfDimensions * sizeof(BiasType));
-          } else {
-            std::memset(accumulator.accumulation[perspective][i], 0,
-                        kHalfDimensions * sizeof(BiasType));
-          }
-        } else {// Difference calculation for the feature amount changed from 1 to 0
-          std::memcpy(accumulator.accumulation[perspective][i],
-                      prev_accumulator.accumulation[perspective][i],
-                      kHalfDimensions * sizeof(BiasType));
-          for (const auto index : removed_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
-            }
-#elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
-            }
-#elif defined(IS_ARM)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
-            }
-#else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] -=
-                  weights_[offset + j];
-            }
-#endif
-          }
-        }
-        {// Difference calculation for features that changed from 0 to 1
-          for (const auto index : added_indices[perspective]) {
-            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
-            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-            }
-#elif defined(USE_SSE2)
-            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-            }
-#elif defined(IS_ARM)
-            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
-              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-            }
-#else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] +=
-                  weights_[offset + j];
-            }
-#endif
-          }
-        }
-      }
-    }
-
-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // parameter type
-  using BiasType = std::int16_t;
-  using WeightType = std::int16_t;
-
-  // Make the learning class a friend
-  friend class Trainer<FeatureTransformer>;
-
-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kHalfDimensions * kInputDimensions];
-};
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
diff --git a/src/evaluate.cpp b/src/evaluate.cpp
index 12ecff00..9b3b58c3 100644
--- a/src/evaluate.cpp
+++ b/src/evaluate.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -20,17 +18,55 @@
 
 #include <algorithm>
 #include <cassert>
+#include <cstdlib>
 #include <cstring>   // For std::memset
 #include <iomanip>
-#include <set>
 #include <sstream>
+#include <iostream>
+#include <set>
 
 #include "bitboard.h"
 #include "evaluate.h"
 #include "material.h"
 #include "pawns.h"
 #include "thread.h"
-#include "eval/nnue/evaluate_nnue.h"
+#include "uci.h"
+
+namespace Eval {
+
+  bool useNNUE;
+  std::string eval_file_loaded="None";
+
+  void init_NNUE() {
+
+    useNNUE = Options["Use NNUE"];
+    std::string eval_file = std::string(Options["EvalFile"]);
+    if (useNNUE && eval_file_loaded != eval_file)
+        if (Eval::NNUE::load_eval_file(eval_file))
+            eval_file_loaded = eval_file;
+  }
+
+  void verify_NNUE() {
+
+    std::string eval_file = std::string(Options["EvalFile"]);
+    if (useNNUE && eval_file_loaded != eval_file)
+    {
+        UCI::OptionsMap defaults;
+        UCI::init(defaults);
+
+        std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. "
+                  << "These network evaluation parameters must be available, and compatible with this version of the code. "
+                  << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. "
+                  << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl;
+        std::exit(EXIT_FAILURE);
+    }
+
+    if (useNNUE)
+        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
+    else
+        sync_cout << "info string classical evaluation enabled." << sync_endl;
+  }
+}
 
 namespace Trace {
 
@@ -76,8 +112,10 @@ using namespace Trace;
 namespace {
 
   // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold  = Value(1400);
+  constexpr Value LazyThreshold1 =  Value(1400);
+  constexpr Value LazyThreshold2 =  Value(1300);
   constexpr Value SpaceThreshold = Value(12222);
+  constexpr Value NNUEThreshold  =   Value(460);
 
   // KingAttackWeights[PieceType] contains king attack weights by piece type
   constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -148,7 +186,6 @@ namespace {
   constexpr Score MinorBehindPawn     = S( 18,  3);
   constexpr Score PassedFile          = S( 11,  8);
   constexpr Score PawnlessFlank       = S( 17, 95);
-  constexpr Score QueenInfiltration   = S( -2, 14);
   constexpr Score ReachableOutpost    = S( 31, 22);
   constexpr Score RestrictedPiece     = S(  7,  7);
   constexpr Score RookOnKingRing      = S( 16,  0);
@@ -311,13 +348,16 @@ namespace {
 
         if (Pt == BISHOP || Pt == KNIGHT)
         {
-            // Bonus if piece is on an outpost square or can reach one
-            bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them);
+            // Bonus if the piece is on an outpost square or can reach one
+            // Reduced bonus for knights (BadOutpost) if few relevant targets
+            bb = OutpostRanks & (attackedBy[Us][PAWN] | shift<Down>(pos.pieces(PAWN)))
+                              & ~pe->pawn_attacks_span(Them);
+            Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN);
+
             if (   Pt == KNIGHT
-                && bb & s & ~CenterFiles
-                && !(b & pos.pieces(Them) & ~pos.pieces(PAWN))
-                && !conditional_more_than_two(
-                      pos.pieces(Them) & ~pos.pieces(PAWN) & (s & QueenSide ? QueenSide : KingSide)))
+                && bb & s & ~CenterFiles // on a side outpost
+                && !(b & targets)        // no relevant attacks
+                && (!more_than_one(targets & (s & QueenSide ? QueenSide : KingSide))))
                 score += BadOutpost;
             else if (bb & s)
                 score += Outpost[Pt == BISHOP];
@@ -388,10 +428,6 @@ namespace {
             Bitboard queenPinners;
             if (pos.slider_blockers(pos.pieces(Them, ROOK, BISHOP), s, queenPinners))
                 score -= WeakQueen;
-
-            // Bonus for queen on weak square in enemy camp
-            if (relative_rank(Us, s) > RANK_4 && (~pe->pawn_attacks_span(Them) & s))
-                score += QueenInfiltration;
         }
     }
     if (T)
@@ -578,17 +614,21 @@ namespace {
     // Bonus for threats on the next moves against enemy queen
     if (pos.count<QUEEN>(Them) == 1)
     {
+        bool queenImbalance = pos.count<QUEEN>() == 1;
+
         Square s = pos.square<QUEEN>(Them);
-        safe = mobilityArea[Us] & ~stronglyProtected;
+        safe =   mobilityArea[Us]
+              & ~pos.pieces(Us, PAWN)
+              & ~stronglyProtected;
 
         b = attackedBy[Us][KNIGHT] & attacks_bb<KNIGHT>(s);
 
-        score += KnightOnQueen * popcount(b & safe);
+        score += KnightOnQueen * popcount(b & safe) * (1 + queenImbalance);
 
         b =  (attackedBy[Us][BISHOP] & attacks_bb<BISHOP>(s, pos.pieces()))
            | (attackedBy[Us][ROOK  ] & attacks_bb<ROOK  >(s, pos.pieces()));
 
-        score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]);
+        score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]) * (1 + queenImbalance);
     }
 
     if (T)
@@ -788,7 +828,7 @@ namespace {
                 && pos.non_pawn_material(BLACK) == RookValueMg
                 && pos.count<PAWN>(strongSide) - pos.count<PAWN>(~strongSide) <= 1
                 && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN))
-                && (attackedBy[~strongSide][KING] & pos.pieces(~strongSide, PAWN)))
+                && (attacks_bb<KING>(pos.square<KING>(~strongSide)) & pos.pieces(~strongSide, PAWN)))
             sf = 36;
         else if (pos.count<QUEEN>() == 1)
             sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
@@ -839,9 +879,12 @@ namespace {
     score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK);
 
     // Early exit if score is high
-    Value v = (mg_value(score) + eg_value(score)) / 2;
-    if (abs(v) > LazyThreshold + pos.non_pawn_material() / 64)
-       return pos.side_to_move() == WHITE ? v : -v;
+    auto lazy_skip = [&](Value lazyThreshold) {
+        return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
+    };
+
+    if (lazy_skip(LazyThreshold1))
+        goto make_v;
 
     // Main evaluation begins here
     initialize<WHITE>();
@@ -858,12 +901,17 @@ namespace {
 
     // More complex interactions that require fully populated attack bitboards
     score +=  king<   WHITE>() - king<   BLACK>()
-            + threats<WHITE>() - threats<BLACK>()
-            + passed< WHITE>() - passed< BLACK>()
+            + passed< WHITE>() - passed< BLACK>();
+
+    if (lazy_skip(LazyThreshold2))
+        goto make_v;
+
+    score +=  threats<WHITE>() - threats<BLACK>()
             + space<  WHITE>() - space<  BLACK>();
 
+make_v:
     // Derive single value from mg and eg parts of score
-    v = winnable(score);
+    Value v = winnable(score);
 
     // In case of tracing add all remaining individual evaluation terms
     if (T)
@@ -892,187 +940,168 @@ namespace {
 /// evaluate() is the evaluator for the outer world. It returns a static
 /// evaluation of the position from the point of view of the side to move.
 
-#if !defined(EVAL_NNUE)
 Value Eval::evaluate(const Position& pos) {
+
+  if (Eval::useNNUE)
+  {
+      Value v = eg_value(pos.psq_score());
+      // Take NNUE eval only on balanced positions
+      if (abs(v) < NNUEThreshold + 20 * pos.count<PAWN>())
+         return NNUE::evaluate(pos) + Tempo;
+  }
   return Evaluation<NO_TRACE>(pos).value();
 }
-#endif  // defined(EVAL_NNUE)
-
 
 /// trace() is like evaluate(), but instead of returning a value, it returns
 /// a string (suitable for outputting to stdout) that contains the detailed
 /// descriptions and values of each evaluation term. Useful for debugging.
+/// Trace scores are from white's point of view
 
 std::string Eval::trace(const Position& pos) {
 
   if (pos.checkers())
-      return "Total evaluation: none (in check)";
-
-  std::memset(scores, 0, sizeof(scores));
-
-  pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
-
-  Value v = Evaluation<TRACE>(pos).value();
-
-  v = pos.side_to_move() == WHITE ? v : -v; // Trace scores are from white's point of view
+      return "Final evaluation: none (in check)";
 
   std::stringstream ss;
-  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
-     << "     Term    |    White    |    Black    |    Total   \n"
-     << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
-     << " ------------+-------------+-------------+------------\n"
-     << "    Material | " << Term(MATERIAL)
-     << "   Imbalance | " << Term(IMBALANCE)
-     << "       Pawns | " << Term(PAWN)
-     << "     Knights | " << Term(KNIGHT)
-     << "     Bishops | " << Term(BISHOP)
-     << "       Rooks | " << Term(ROOK)
-     << "      Queens | " << Term(QUEEN)
-     << "    Mobility | " << Term(MOBILITY)
-     << " King safety | " << Term(KING)
-     << "     Threats | " << Term(THREAT)
-     << "      Passed | " << Term(PASSED)
-     << "       Space | " << Term(SPACE)
-     << "    Winnable | " << Term(WINNABLE)
-     << " ------------+-------------+-------------+------------\n"
-     << "       Total | " << Term(TOTAL);
+  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
+
+  Value v;
+
+  if (Eval::useNNUE)
+  {
+      v = NNUE::evaluate(pos);
+  }
+  else
+  {
+      std::memset(scores, 0, sizeof(scores));
+
+      pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
+
+      v = Evaluation<TRACE>(pos).value();
+
+      ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
+         << "     Term    |    White    |    Black    |    Total   \n"
+         << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
+         << " ------------+-------------+-------------+------------\n"
+         << "    Material | " << Term(MATERIAL)
+         << "   Imbalance | " << Term(IMBALANCE)
+         << "       Pawns | " << Term(PAWN)
+         << "     Knights | " << Term(KNIGHT)
+         << "     Bishops | " << Term(BISHOP)
+         << "       Rooks | " << Term(ROOK)
+         << "      Queens | " << Term(QUEEN)
+         << "    Mobility | " << Term(MOBILITY)
+         << " King safety | " << Term(KING)
+         << "     Threats | " << Term(THREAT)
+         << "      Passed | " << Term(PASSED)
+         << "       Space | " << Term(SPACE)
+         << "    Winnable | " << Term(WINNABLE)
+         << " ------------+-------------+-------------+------------\n"
+         << "       Total | " << Term(TOTAL);
+  }
+
+  v = pos.side_to_move() == WHITE ? v : -v;
 
   ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
 
   return ss.str();
 }
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-namespace Eval {
-ExtBonaPiece kpp_board_index[PIECE_NB] = {
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-    { f_pawn, e_pawn },
-    { f_knight, e_knight },
-    { f_bishop, e_bishop },
-    { f_rook, e_rook },
-    { f_queen, e_queen },
-    { f_king, e_king },
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-
-    // When viewed from behind. f and e are exchanged.
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
-    { e_pawn, f_pawn },
-    { e_knight, f_knight },
-    { e_bishop, f_bishop },
-    { e_rook, f_rook },
-    { e_queen, f_queen },
-    { e_king, f_king },
-    { BONA_PIECE_ZERO, BONA_PIECE_ZERO }, // no money
-};
-
-// Check whether the pieceListFw[] held internally is a correct BonaPiece.
+// Check whether the pieceListFw[] held internally is a correct PieceSquare.
 // Note: For debugging. slow.
 bool EvalList::is_valid(const Position& pos)
 {
-  std::set<PieceNumber> piece_numbers;
-  for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
-    auto piece_number = piece_no_of_board(sq);
-    if (piece_number == PIECE_NUMBER_NB) {
-      continue;
-    }
-    assert(!piece_numbers.count(piece_number));
-    piece_numbers.insert(piece_number);
-  }
-
-  for (int i = 0; i < length(); ++i)
-  {
-    BonaPiece fw = pieceListFw[i];
-    // Go to the Position class to see if this fw really exists.
-
-    if (fw == Eval::BONA_PIECE_ZERO) {
-      continue;
+    std::set<PieceId> piece_numbers;
+    for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
+        auto piece_number = piece_id_list[sq];
+        if (piece_number == PieceId::PIECE_ID_NONE) {
+            continue;
+        }
+        assert(!piece_numbers.count(piece_number));
+        piece_numbers.insert(piece_number);
     }
 
-    // Out of range
-    if (!(0 <= fw && fw < fe_end))
-      return false;
-
-    // Since it is a piece on the board, I will check if this piece really exists.
-    for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
+    for (int i = 0; i < PieceId::PIECE_ID_KING; ++i)
     {
-      auto pt = type_of(pc);
-      if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
-        continue;
+        PieceSquare fw = pieceListFw[i];
+        // Go to the Position class to see if this fw really exists.
 
-      // BonaPiece start number of piece pc
-      auto s = BonaPiece(kpp_board_index[pc].fw);
-      if (s <= fw && fw < s + SQUARE_NB)
-      {
-        // Since it was found, check if this piece is at sq.
-        Square sq = (Square)(fw - s);
-        Piece pc2 = pos.piece_on(sq);
+        if (fw == PieceSquare::PS_NONE) {
+            continue;
+        }
 
-        if (pc2 != pc)
-          return false;
+        // Out of range
+        if (!(0 <= fw && fw < PieceSquare::PS_END))
+            return false;
 
-        goto Found;
-      }
-    }
-    // It was a piece that did not exist for some reason..
-    return false;
-  Found:;
-  }
+        // Since it is a piece on the board, I will check if this piece really exists.
+        for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
+        {
+            auto pt = type_of(pc);
+            if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
+                continue;
 
-  // Validate piece_no_list_board
-  for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
-    Piece expected_piece = pos.piece_on(sq);
-    PieceNumber piece_number = piece_no_list_board[sq];
-    if (piece_number == PIECE_NUMBER_NB) {
-      assert(expected_piece == NO_PIECE);
-      if (expected_piece != NO_PIECE) {
+            // PieceSquare start number of piece pc
+            auto s = PieceSquare(kpp_board_index[pc].from[Color::WHITE]);
+            if (s <= fw && fw < s + SQUARE_NB)
+            {
+                // Since it was found, check if this piece is at sq.
+                Square sq = (Square)(fw - s);
+                Piece pc2 = pos.piece_on(sq);
+
+                if (pc2 != pc)
+                    return false;
+
+                goto Found;
+            }
+        }
+        // It was a piece that did not exist for some reason..
         return false;
-      }
-      continue;
+    Found:;
     }
 
-    BonaPiece bona_piece_white = pieceListFw[piece_number];
-    Piece actual_piece;
-    for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
-      if (kpp_board_index[actual_piece].fw == BONA_PIECE_ZERO) {
-        continue;
-      }
+    // Validate piece_id_list
+    for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
+        Piece expected_piece = pos.piece_on(sq);
+        PieceId piece_number = piece_id_list[sq];
+        if (piece_number == PieceId::PIECE_ID_NONE) {
+            assert(expected_piece == NO_PIECE);
+            if (expected_piece != NO_PIECE) {
+                return false;
+            }
+            continue;
+        }
 
-      if (kpp_board_index[actual_piece].fw <= bona_piece_white
-        && bona_piece_white < kpp_board_index[actual_piece].fw + SQUARE_NB) {
-        break;
-      }
+        PieceSquare bona_piece_white = pieceListFw[piece_number];
+        Piece actual_piece;
+        for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
+            if (kpp_board_index[actual_piece].from[Color::WHITE] == PieceSquare::PS_NONE) {
+                continue;
+            }
+
+            if (kpp_board_index[actual_piece].from[Color::WHITE] <= bona_piece_white
+                && bona_piece_white < kpp_board_index[actual_piece].from[Color::WHITE] + SQUARE_NB) {
+                break;
+            }
+        }
+
+        assert(actual_piece != PIECE_NB);
+        if (actual_piece == PIECE_NB) {
+            return false;
+        }
+
+        assert(actual_piece == expected_piece);
+        if (actual_piece != expected_piece) {
+            return false;
+        }
+
+        Square actual_square = static_cast<Square>(
+            bona_piece_white - kpp_board_index[actual_piece].from[Color::WHITE]);
+        assert(sq == actual_square);
+        if (sq != actual_square) {
+            return false;
+        }
     }
 
-    assert(actual_piece != PIECE_NB);
-    if (actual_piece == PIECE_NB) {
-      return false;
-    }
-
-    assert(actual_piece == expected_piece);
-    if (actual_piece != expected_piece) {
-      return false;
-    }
-
-    Square actual_square = static_cast<Square>(
-      bona_piece_white - kpp_board_index[actual_piece].fw);
-    assert(sq == actual_square);
-    if (sq != actual_square) {
-      return false;
-    }
-  }
-
-  return true;
+    return true;
 }
-}
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
-#if !defined(EVAL_NNUE)
-namespace Eval {
-void evaluate_with_no_return(const Position& pos) {}
-void update_weights(uint64_t epoch, const std::array<bool, 4> & freeze) {}
-void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3) {}
-void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4> & freeze) {}
-void save_eval(std::string suffix) {}
-double get_eta() { return 0.0; }
-}
-#endif  // defined(EVAL_NNUE)
diff --git a/src/evaluate.h b/src/evaluate.h
index 0301f455..e808068d 100644
--- a/src/evaluate.h
+++ b/src/evaluate.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -29,194 +27,23 @@ class Position;
 
 namespace Eval {
 
-std::string trace(const Position& pos);
+  std::string trace(const Position& pos);
+  Value evaluate(const Position& pos);
 
-Value evaluate(const Position& pos);
+  extern bool useNNUE;
+  extern std::string eval_file_loaded;
+  void init_NNUE();
+  void verify_NNUE();
 
-void evaluate_with_no_return(const Position& pos);
+  namespace NNUE {
 
-Value compute_eval(const Position& pos);
+    Value evaluate(const Position& pos);
+    Value compute_eval(const Position& pos);
+    void  update_eval(const Position& pos);
+    bool  load_eval_file(const std::string& evalFile);
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// Read the evaluation function file.
-// This is only called once in response to the "is_ready" command. It is not supposed to be called twice.
-// (However, if isready is sent again after EvalDir (evaluation function folder) has been changed, read it again.)
-void load_eval();
+  } // namespace NNUE
 
-static uint64_t calc_check_sum() {return 0;}
-
-static void print_softname(uint64_t check_sum) {}
-
-// --- enum corresponding to P of constant KPP (ball and arbitrary 2 pieces) used in evaluation function
-
-// (BonaPiece wants to define freely in experiment of evaluation function, so I don't define it here.)
-
-
-// A type that represents P(Piece) when calling KKP/KPP in Bonanza.
-// When you ask for �� KPP, you need a unique number for each box �~ piece type, like the step at 39 points.
-enum BonaPiece : int32_t
-{
-	// Meaning of f = friend (��first move). Meaning of e = enemy (��rear)
-
-	// Value when uninitialized
-	BONA_PIECE_NOT_INIT = -1,
-
-	// Invalid piece. When you drop a piece, move unnecessary pieces here.
-	BONA_PIECE_ZERO = 0,
-
-	fe_hand_end = BONA_PIECE_ZERO + 1,
-
-	// Don't pack the numbers of unrealistic walks and incense on the board like Bonanza.
-	// Reason 1) When learning, there are times when the incense is on the first stage in relative PP, and it is difficult to display it correctly in the inverse transformation.
-	// Reason 2) It is difficult to convert from Square with vertical Bitboard.
-
-	// --- Pieces on the board
-	f_pawn = fe_hand_end,
-	e_pawn = f_pawn + SQUARE_NB,
-	f_knight = e_pawn + SQUARE_NB,
-	e_knight = f_knight + SQUARE_NB,
-	f_bishop = e_knight + SQUARE_NB,
-	e_bishop = f_bishop + SQUARE_NB,
-	f_rook = e_bishop + SQUARE_NB,
-	e_rook = f_rook + SQUARE_NB,
-	f_queen = e_rook + SQUARE_NB,
-	e_queen = f_queen + SQUARE_NB,
-	fe_end = e_queen + SQUARE_NB,
-	f_king = fe_end,
-	e_king = f_king + SQUARE_NB,
-	fe_end2 = e_king + SQUARE_NB, // Last number including balls.
-};
-
-#define ENABLE_INCR_OPERATORS_ON(T)                                \
-inline T& operator++(T& d) { return d = T(int(d) + 1); }           \
-inline T& operator--(T& d) { return d = T(int(d) - 1); }
-
-ENABLE_INCR_OPERATORS_ON(BonaPiece)
-
-#undef ENABLE_INCR_OPERATORS_ON
-
-// The number when you look at BonaPiece from the back (the number of steps from the previous 39 to the number 71 from the back)
-// Let's call the paired one the ExtBonaPiece type.
-union ExtBonaPiece
-{
-	struct {
-		BonaPiece fw; // from white
-		BonaPiece fb; // from black
-	};
-	BonaPiece from[2];
-
-	ExtBonaPiece() {}
-	ExtBonaPiece(BonaPiece fw_, BonaPiece fb_) : fw(fw_), fb(fb_) {}
-};
-
-// Information about where the piece has moved from where to by this move.
-// Assume the piece is an ExtBonaPiece expression.
-struct ChangedBonaPiece
-{
-	ExtBonaPiece old_piece;
-	ExtBonaPiece new_piece;
-};
-
-// An array for finding the BonaPiece corresponding to the piece pc on the board of the KPP table.
-// example)
-// BonaPiece fb = kpp_board_index[pc].fb + sq; // BonaPiece corresponding to pc in sq seen from the front
-// BonaPiece fw = kpp_board_index[pc].fw + sq; // BonaPiece corresponding to pc in sq seen from behind
-extern ExtBonaPiece kpp_board_index[PIECE_NB];
-
-// List of pieces used in the evaluation function. A structure holding which piece (PieceNumber) is where (BonaPiece)
-struct EvalList
-{
-	// List of frame numbers used in evaluation function (FV38 type)
-	BonaPiece* piece_list_fw() const { return const_cast<BonaPiece*>(pieceListFw); }
-	BonaPiece* piece_list_fb() const { return const_cast<BonaPiece*>(pieceListFb); }
-
-	// Convert the specified piece_no piece to ExtBonaPiece type and return it.
-	ExtBonaPiece bona_piece(PieceNumber piece_no) const
-	{
-		ExtBonaPiece bp;
-		bp.fw = pieceListFw[piece_no];
-		bp.fb = pieceListFb[piece_no];
-		return bp;
-	}
-
-	// Place the piece_no pc piece in the sq box on the board
-	void put_piece(PieceNumber piece_no, Square sq, Piece pc) {
-		set_piece_on_board(piece_no, BonaPiece(kpp_board_index[pc].fw + sq), BonaPiece(kpp_board_index[pc].fb + Inv(sq)), sq);
-	}
-
-	// Returns the PieceNumber corresponding to a box on the board.
-	PieceNumber piece_no_of_board(Square sq) const { return piece_no_list_board[sq]; }
-
-	// Initialize the pieceList.
-	// Set the value of unused pieces to BONA_PIECE_ZERO in case you want to deal with dropped pieces.
-	// A normal evaluation function can be used as an evaluation function for missing frames.
-	// piece_no_list is initialized with PIECE_NUMBER_NB to facilitate debugging.
-	void clear()
-	{
-
-		for (auto& p: pieceListFw)
-			p = BONA_PIECE_ZERO;
-
-		for (auto& p: pieceListFb)
-			p = BONA_PIECE_ZERO;
-
-		for (auto& v :piece_no_list_board)
-			v = PIECE_NUMBER_NB;
-	}
-
-	// Check whether the pieceListFw[] held internally is a correct BonaPiece.
-	// Note: For debugging. slow.
-	bool is_valid(const Position& pos);
-
-	// Set that the BonaPiece of the piece_no piece on the board sq is fb,fw.
-	inline void set_piece_on_board(PieceNumber piece_no, BonaPiece fw, BonaPiece fb, Square sq)
-	{
-		assert(is_ok(piece_no));
-		pieceListFw[piece_no] = fw;
-		pieceListFb[piece_no] = fb;
-		piece_no_list_board[sq] = piece_no;
-	}
-
-	// Piece list. Piece Number Shows how many pieces are in place (Bona Piece). Used in FV38 etc.
-
-	// Length of piece list
-  // 38 fixed
-public:
-	int length() const { return PIECE_NUMBER_KING; }
-
-	// Must be a multiple of 4 to use VPGATHERDD.
-	// In addition, the KPPT type evaluation function, etc. is based on the assumption that the 39th and 40th elements are zero.
-	// Please note that there is a part that is accessed.
-	static const int MAX_LENGTH = 32;
-
-  // An array that holds the piece number (PieceNumber) for the pieces on the board
-  // Hold up to +1 for when the ball is moving to SQUARE_NB,
-  // SQUARE_NB balls are not moved, so this value should never be used.
-  PieceNumber piece_no_list_board[SQUARE_NB_PLUS1];
-private:
-
-	BonaPiece pieceListFw[MAX_LENGTH];
-	BonaPiece pieceListFb[MAX_LENGTH];
-};
-
-// For management of evaluation value difference calculation
-// A structure for managing the number of pieces that have moved from the previous stage
-// Up to 2 moving pieces.
-struct DirtyPiece
-{
-	// What changed from the piece with that piece number
-	Eval::ChangedBonaPiece changed_piece[2];
-
-	// The number of dirty pieces
-	PieceNumber pieceNo[2];
-
-	// The number of dirty files.
-	// It can be 0 for null move.
-	// Up to 2 moving pieces and taken pieces.
-	int dirty_num;
-
-};
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-}
+} // namespace Eval
 
 #endif // #ifndef EVALUATE_H_INCLUDED
diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp
index b3404542..68576c82 100644
--- a/src/extra/sfen_packer.cpp
+++ b/src/extra/sfen_packer.cpp
@@ -281,7 +281,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
 
 	// In updating the PieceList, we have to set which piece is where,
 	// A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
+  PieceId next_piece_number = PieceId::PIECE_ID_ZERO;
 
   pieceList[W_KING][0] = SQUARE_NB;
   pieceList[B_KING][0] = SQUARE_NB;
@@ -290,7 +290,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
 	if (mirror)
 	{
 		for (auto c : Colors)
-			board[Mir((Square)stream.read_n_bit(6))] = make_piece(c, KING);
+			board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
 	}
 	else
 	{
@@ -305,7 +305,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
     {
       auto sq = make_square(f, r);
       if (mirror) {
-        sq = Mir(sq);
+        sq = flip_file(sq);
       }
 
       // it seems there are already balls
@@ -328,9 +328,9 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
       put_piece(Piece(pc), sq);
 
       // update evalList
-      PieceNumber piece_no =
-        (pc == B_KING) ?PIECE_NUMBER_BKING :// Move ball
-        (pc == W_KING) ?PIECE_NUMBER_WKING :// Backing ball
+      PieceId piece_no =
+        (pc == B_KING) ?PieceId::PIECE_ID_BKING :// Move ball
+        (pc == W_KING) ?PieceId::PIECE_ID_WKING :// Backing ball
         next_piece_number++; // otherwise
 
       evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
@@ -372,7 +372,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
   if (stream.read_one_bit()) {
     Square ep_square = static_cast<Square>(stream.read_n_bit(6));
     if (mirror) {
-      ep_square = Mir(ep_square);
+      ep_square = flip_file(ep_square);
     }
     st->epSquare = ep_square;
 
diff --git a/src/learn/learner.cpp b/src/learn/learner.cpp
index e343fde5..afc82447 100644
--- a/src/learn/learner.cpp
+++ b/src/learn/learner.cpp
@@ -81,7 +81,7 @@
 #include "multi_think.h"
 
 #if defined(EVAL_NNUE)
-#include "../eval/nnue/evaluate_nnue_learner.h"
+#include "../nnue/evaluate_nnue_learner.h"
 #include <shared_mutex>
 #endif
 
@@ -627,7 +627,7 @@ void MultiThinkGenSfen::thread_worker(size_t thread_id)
 						// If the depth is 8 or more, it seems faster not to calculate this difference.
 #if defined(EVAL_NNUE)
             if (depth < 8)
-              Eval::evaluate_with_no_return(pos);
+              Eval::NNUE::update_eval(pos);
 #endif  // defined(EVAL_NNUE)
 					}
 
@@ -635,10 +635,10 @@ void MultiThinkGenSfen::thread_worker(size_t thread_id)
 					// cout << pos;
 
 					auto v = Eval::evaluate(pos);
-					// evaluate() returns the evaluation value on the turn side, so
-					// If it's a turn different from root_color, you must invert v and return it.
-					if (rootColor != pos.side_to_move())
-						v = -v;
+						// evaluate() returns the evaluation value on the turn side, so
+						// If it's a turn different from root_color, you must invert v and return it.
+						if (rootColor != pos.side_to_move())
+							v = -v;
 
 					// Rewind.
 					// Is it C++x14, and isn't there even foreach to turn in reverse?
@@ -825,7 +825,7 @@ void MultiThinkGenSfen::thread_worker(size_t thread_id)
 			pos.do_move(m, states[ply]);
 
 			// Call node evaluate() for each difference calculation.
-			Eval::evaluate_with_no_return(pos);
+			Eval::NNUE::update_eval(pos);
 
 		} // for (int ply = 0; ; ++ply)
 
@@ -979,7 +979,7 @@ void gen_sfen(Position&, istringstream& is)
 		<< "  loop_max = " << loop_max << endl
 		<< "  eval_limit = " << eval_limit << endl
 		<< "  thread_num (set by USI setoption) = " << thread_num << endl
-		<< "  book_moves (set by USI setoption) = " << Options["BookMoves"] << endl
+		//<< "  book_moves (set by USI setoption) = " << Options["BookMoves"] << endl
 		<< "  random_move_minply     = " << random_move_minply << endl
 		<< "  random_move_maxply     = " << random_move_maxply << endl
 		<< "  random_move_count      = " << random_move_count << endl
@@ -994,6 +994,9 @@ void gen_sfen(Position&, istringstream& is)
 		<< "  save_every             = " << save_every << endl
 		<< "  random_file_name       = " << random_file_name << endl;
 
+	// Show if the training data generator uses NNUE.
+	Eval::verify_NNUE();
+
 	// Create and execute threads as many as Options["Threads"].
 	{
 		SfenWriter sw(output_file_name, thread_num);
@@ -1697,7 +1700,7 @@ void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
 				for (size_t i = 0; i < pv.size(); ++i)
 				{
 					pos.do_move(pv[i], states[i]);
-					Eval::evaluate_with_no_return(pos);
+					Eval::NNUE::update_eval(pos);
 				}
 				shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
 				for (auto it = pv.rbegin(); it != pv.rend(); ++it)
@@ -2106,7 +2109,7 @@ void LearnerThink::thread_worker(size_t thread_id)
 			pos.do_move(m, state[ply++]);
 
 			// Since the value of evaluate in leaf is used, the difference is updated.
-			Eval::evaluate_with_no_return(pos);
+			Eval::NNUE::update_eval(pos);
 		}
 
 		if (illegal_move) {
@@ -2135,9 +2138,6 @@ void LearnerThink::thread_worker(size_t thread_id)
 // Write evaluation function file.
 bool LearnerThink::save(bool is_final)
 {
-	// Calculate and output check sum before saving. (To check if it matches the next time)
-	std::cout << "Check Sum = "<< std::hex << Eval::calc_check_sum() << std::dec << std::endl;
-
 	// Each time you save, change the extension part of the file name like "0","1","2",..
 	// (Because I want to compare the winning rate for each evaluation function parameter later)
 
@@ -3089,14 +3089,14 @@ void learn(Position&, istringstream& is)
 	}
 	if (use_convert_plain)
 	{
-		init_nnue(true);
+		Eval::init_NNUE();
 		cout << "convert_plain.." << endl;
 		convert_plain(filenames, output_file_name);
 		return;
 	}
 	if (use_convert_bin)
 	{
-	  	init_nnue(true);
+		Eval::init_NNUE();
 		cout << "convert_bin.." << endl;
 		convert_bin(filenames,output_file_name, ply_minimum, ply_maximum, interpolate_eval);
 		return;
@@ -3104,7 +3104,7 @@ void learn(Position&, istringstream& is)
 	}
 	if (use_convert_bin_from_pgn_extract)
 	{
-		init_nnue(true);
+		Eval::init_NNUE();
 		cout << "convert_bin_from_pgn-extract.." << endl;
 		convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move);
 		return;
@@ -3170,7 +3170,7 @@ void learn(Position&, istringstream& is)
 	cout << "init.." << endl;
 
 	// Read evaluation function parameters
-	init_nnue(true);
+	Eval::init_NNUE();
 
 #if !defined(EVAL_NNUE)
 	cout << "init_grad.." << endl;
diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp
index 4bcecab8..c97b4910 100644
--- a/src/learn/learning_tools.cpp
+++ b/src/learn/learning_tools.cpp
@@ -28,17 +28,17 @@ namespace EvalLearningTools
 	void init_min_index_flag()
 	{
 		// Initialization of mir_piece and inv_piece must be completed.
-		assert(mir_piece(Eval::f_pawn) == Eval::e_pawn);
+		assert(Eval::mir_piece(PieceSquare::PS_W_PAWN) == PieceSquare::PS_B_PAWN);
 
 		// Initialize the flag array for dimension reduction
 		// Not involved in KPPP.
 
 		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
+		g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0);
 		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
+		g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index());
 		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
+		g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index());
 
 		uint64_t size = g_kpp.max_index();
 		min_index_flag.resize(size);
@@ -123,22 +123,22 @@ namespace EvalLearningTools
 		// Determine if it is correct.
 
 		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
+		g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0);
 		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
+		g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index());
 		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
+		g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index());
 
 		std::vector<bool> f;
 		f.resize(g_kpp.max_index() - g_kpp.min_index());
 
 		for(auto k = SQUARE_ZERO ; k < SQUARE_NB ; ++k)
-			for(auto p0 = BonaPiece::BONA_PIECE_ZERO; p0 < fe_end ; ++p0)
-				for (auto p1 = BonaPiece::BONA_PIECE_ZERO; p1 < fe_end; ++p1)
+			for(auto p0 = PieceSquare::PS_NONE; p0 < PieceSquare::PS_END ; ++p0)
+				for (auto p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
 				{
 					KPP kpp_org = g_kpp.fromKPP(k,p0,p1);
 					KPP kpp0;
-					KPP kpp1 = g_kpp.fromKPP(Mir(k), mir_piece(p0), mir_piece(p1));
+					KPP kpp1 = g_kpp.fromKPP(flip_file(k), mir_piece(p0), mir_piece(p1));
 					KPP kpp_array[2];
 
 					auto index = kpp_org.toIndex();
@@ -172,7 +172,7 @@ namespace EvalLearningTools
 		// Test for missing KPPP calculations
 
 		KPPP g_kppp;
-		g_kppp.set(15, Eval::fe_end,0);
+		g_kppp.set(15, PieceSquare::PS_END,0);
 		uint64_t min_index = g_kppp.min_index();
 		uint64_t max_index = g_kppp.max_index();
 
@@ -214,7 +214,7 @@ namespace EvalLearningTools
 			for (int i = 0; i<10000; ++i) // As a test, assuming a large fe_end, try turning at 10000.
 				for (int j = 0; j < i; ++j)
 				{
-					auto kkpp = g_kkpp.fromKKPP(k, (BonaPiece)i, (BonaPiece)j);
+					auto kkpp = g_kkpp.fromKKPP(k, (PieceSquare)i, (PieceSquare)j);
 					auto r = kkpp.toRawIndex();
 					assert(n++ == r);
 					auto kkpp2 = g_kkpp.fromIndex(r + g_kkpp.min_index());
diff --git a/src/learn/learning_tools.h b/src/learn/learning_tools.h
index a1de03dd..becd8db4 100644
--- a/src/learn/learning_tools.h
+++ b/src/learn/learning_tools.h
@@ -281,7 +281,7 @@ namespace EvalLearningTools
 		// The number of balls to support (normally SQUARE_NB)
 		int max_king_sq_;
 
-		// Maximum BonaPiece value supported
+		// Maximum PieceSquare value supported
 		uint64_t fe_end_;
 
 	};
@@ -341,10 +341,10 @@ namespace EvalLearningTools
 		void toLowerDimensions(/*out*/KK kk_[KK_LOWER_COUNT]) const {
 			kk_[0] = fromKK(king0_, king1_,false);
 #if defined(USE_KK_MIRROR_WRITE)
-			kk_[1] = fromKK(Mir(king0_),Mir(king1_),false);
+			kk_[1] = fromKK(flip_file(king0_),flip_file(king1_),false);
 #if defined(USE_KK_INVERSE_WRITE)
-			kk_[2] = fromKK(Inv(king1_), Inv(king0_),true);
-			kk_[3] = fromKK(Inv(Mir(king1_)) , Inv(Mir(king0_)),true);
+			kk_[2] = fromKK(rotate180(king1_), rotate180(king0_),true);
+			kk_[3] = fromKK(rotate180(flip_file(king1_)) , rotate180(flip_file(king0_)),true);
 #endif
 #endif
 		}
@@ -386,8 +386,8 @@ namespace EvalLearningTools
 	struct KKP : public SerializerBase
 	{
 	protected:
-		KKP(Square king0, Square king1, Eval::BonaPiece p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {}
-		KKP(Square king0, Square king1, Eval::BonaPiece p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {}
+		KKP(Square king0, Square king1, PieceSquare p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {}
+		KKP(Square king0, Square king1, PieceSquare p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {}
 	public:
 		KKP() {}
 
@@ -399,27 +399,27 @@ namespace EvalLearningTools
 		// A builder that creates a KKP object from raw_index (a number that starts from 0, not a serial number)
 		KKP fromRawIndex(uint64_t raw_index) const
 		{
-			int piece = (int)(raw_index % Eval::fe_end);
-			raw_index /= Eval::fe_end;
+			int piece = (int)(raw_index % PieceSquare::PS_END);
+			raw_index /= PieceSquare::PS_END;
 			int king1 = (int)(raw_index % SQUARE_NB);
 			raw_index /= SQUARE_NB;
 			int king0 = (int)(raw_index  /* % SQUARE_NB */);
 			assert(king0 < SQUARE_NB);
-			return fromKKP((Square)king0, (Square)king1, (Eval::BonaPiece)piece,false);
+			return fromKKP((Square)king0, (Square)king1, (PieceSquare)piece,false);
 		}
 
-		KKP fromKKP(Square king0, Square king1, Eval::BonaPiece p, bool inverse) const
+		KKP fromKKP(Square king0, Square king1, PieceSquare p, bool inverse) const
 		{
 			KKP my_kkp(king0, king1, p, inverse);
 			my_kkp.set(max_king_sq_,fe_end_,min_index());
 			return my_kkp;
 		}
-		KKP fromKKP(Square king0, Square king1, Eval::BonaPiece p) const { return fromKKP(king0, king1, p, false); }
+		KKP fromKKP(Square king0, Square king1, PieceSquare p) const { return fromKKP(king0, king1, p, false); }
 
 		// When you construct this object using fromIndex(), you can get information with the following accessors.
 		Square king0() const { return king0_; }
 		Square king1() const { return king1_; }
-		Eval::BonaPiece piece() const { return piece_; }
+		PieceSquare piece() const { return piece_; }
 
 		// Number of KKP dimension reductions
 #if defined(USE_KKP_INVERSE_WRITE)
@@ -442,10 +442,10 @@ namespace EvalLearningTools
 		void toLowerDimensions(/*out*/ KKP kkp_[KKP_LOWER_COUNT]) const {
 			kkp_[0] = fromKKP(king0_, king1_, piece_,false);
 #if defined(USE_KKP_MIRROR_WRITE)
-			kkp_[1] = fromKKP(Mir(king0_), Mir(king1_), mir_piece(piece_),false);
+			kkp_[1] = fromKKP(flip_file(king0_), flip_file(king1_), Eval::mir_piece(piece_),false);
 #if defined(USE_KKP_INVERSE_WRITE)
-			kkp_[2] = fromKKP( Inv(king1_), Inv(king0_), inv_piece(piece_),true);
-			kkp_[3] = fromKKP( Inv(Mir(king1_)), Inv(Mir(king0_)) , inv_piece(mir_piece(piece_)),true);
+			kkp_[2] = fromKKP( rotate180(king1_), rotate180(king0_), Eval::inv_piece(piece_),true);
+			kkp_[3] = fromKKP( rotate180(flip_file(king1_)), rotate180(flip_file(king0_)) , Eval::inv_piece(Eval::mir_piece(piece_)),true);
 #endif
 #endif
 		}
@@ -473,7 +473,7 @@ namespace EvalLearningTools
 
 	private:
 		Square king0_, king1_;
-		Eval::BonaPiece piece_;
+		PieceSquare piece_;
 		bool inverse_sign;
 	};
 
@@ -489,7 +489,7 @@ namespace EvalLearningTools
 	struct KPP : public SerializerBase
 	{
 	protected:
-		KPP(Square king, Eval::BonaPiece p0, Eval::BonaPiece p1) : king_(king), piece0_(p0), piece1_(p1) {}
+		KPP(Square king, PieceSquare p0, PieceSquare p1) : king_(king), piece0_(p0), piece1_(p1) {}
 
 	public:
 		KPP() {}
@@ -534,7 +534,7 @@ namespace EvalLearningTools
 			// From the solution formula of the quadratic equation i = (sqrt(8*index2+1)-1) / 2.
 			// After i is converted into an integer, j can be calculated as j = index2-i * (i + 1) / 2.
 
-			// BonaPiece assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit.
+			// PieceSquare assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit.
 			int piece1 = int(sqrt(8 * index2 + 1) - 1) / 2;
 			int piece0 = int(index2 - (uint64_t)piece1*((uint64_t)piece1 + 1) / 2);
 
@@ -546,10 +546,10 @@ namespace EvalLearningTools
 #endif
 			int king = (int)(raw_index  /* % SQUARE_NB */);
 			assert(king < max_king_sq_);
-			return fromKPP((Square)king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1);
+			return fromKPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1);
 		}
 
-		KPP fromKPP(Square king, Eval::BonaPiece p0, Eval::BonaPiece p1) const
+		KPP fromKPP(Square king, PieceSquare p0, PieceSquare p1) const
 		{
 			KPP my_kpp(king, p0, p1);
 			my_kpp.set(max_king_sq_,fe_end_,min_index());
@@ -558,8 +558,8 @@ namespace EvalLearningTools
 
 		// When you construct this object using fromIndex(), you can get information with the following accessors.
 		Square king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
+		PieceSquare piece0() const { return piece0_; }
+		PieceSquare piece1() const { return piece1_; }
 
 
 // number of dimension reductions
@@ -584,7 +584,7 @@ namespace EvalLearningTools
 			// Note that if you use a triangular array, the swapped piece0 and piece1 will not be returned.
 			kpp_[0] = fromKPP(king_, piece0_, piece1_);
 #if defined(USE_KPP_MIRROR_WRITE)
-			kpp_[1] = fromKPP(Mir(king_), mir_piece(piece0_), mir_piece(piece1_));
+			kpp_[1] = fromKPP(flip_file(king_), Eval::mir_piece(piece0_), Eval::mir_piece(piece1_));
 #endif
 
 #else
@@ -592,8 +592,8 @@ namespace EvalLearningTools
 			kpp_[0] = fromKPP(king_, piece0_, piece1_);
 			kpp_[1] = fromKPP(king_, piece1_, piece0_);
 #if defined(USE_KPP_MIRROR_WRITE)
-			kpp_[2] = fromKPP(Mir(king_), mir_piece(piece0_), mir_piece(piece1_));
-			kpp_[3] = fromKPP(Mir(king_), mir_piece(piece1_), mir_piece(piece0_));
+			kpp_[2] = fromKPP(flip_file(king_), mir_piece(piece0_), mir_piece(piece1_));
+			kpp_[3] = fromKPP(flip_file(king_), mir_piece(piece1_), mir_piece(piece0_));
 #endif
 #endif
 		}
@@ -607,14 +607,14 @@ namespace EvalLearningTools
 
 #else
 			// Macro similar to that used in Bonanza 6.0
-			auto PcPcOnSq = [&](Square k, Eval::BonaPiece i, Eval::BonaPiece j)
+			auto PcPcOnSq = [&](Square k, PieceSquare i, PieceSquare j)
 			{
 
 				// (i,j) in this triangular array is the element in the i-th row and the j-th column.
 				// 1st row + 2 + ... + i = i * (i+1) / 2 because the i-th row and 0th column is the total of the elements up to that point
 				// The i-th row and the j-th column is j plus this. i*(i+1)/2+j
 
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
+				// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
 				return (uint64_t)k * triangle_fe_end + (uint64_t)(uint64_t(i)*(uint64_t(i)+1) / 2 + uint64_t(j));
 			};
 
@@ -646,7 +646,7 @@ namespace EvalLearningTools
 
 	private:
 		Square king_;
-		Eval::BonaPiece piece0_, piece1_;
+		PieceSquare piece0_, piece1_;
 
 		uint64_t triangle_fe_end; // = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2;
 	};
@@ -672,7 +672,7 @@ namespace EvalLearningTools
 	struct KPPP : public SerializerBase
 	{
 	protected:
-		KPPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1, Eval::BonaPiece p2) :
+		KPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) :
 			king_(king), piece0_(p0), piece1_(p1), piece2_(p2)
 		{
 			assert(piece0_ > piece1_ && piece1_ > piece2_);
@@ -716,9 +716,9 @@ namespace EvalLearningTools
 			kppp_[0] = fromKPPP(king_, piece0_, piece1_,piece2_);
 #if KPPP_LOWER_COUNT > 1
 			// If mir_piece is done, it will be in a state not sorted. Need code to sort.
-			Eval::BonaPiece p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) };
+			PieceSquare p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) };
 			my_insertion_sort(p_list, 0, 3);
-			kppp_[1] = fromKPPP((int)Mir((Square)king_), p_list[2] , p_list[1], p_list[0]);
+			kppp_[1] = fromKPPP((int)flip_file((Square)king_), p_list[2] , p_list[1], p_list[0]);
 #endif
 		}
 
@@ -797,12 +797,12 @@ namespace EvalLearningTools
 			assert(king < max_king_sq_);
 
 			// Propagate king_sq and fe_end.
-			return fromKPPP((Square)king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1 , (Eval::BonaPiece)piece2);
+			return fromKPPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1 , (PieceSquare)piece2);
 		}
 
 		// Specify k,p0,p1,p2 to build KPPP instance.
 		// The king_sq and fe_end passed by set() which is internally retained are inherited.
-		KPPP fromKPPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1, Eval::BonaPiece p2) const
+		KPPP fromKPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) const
 		{
 			KPPP kppp(king, p0, p1, p2);
 			kppp.set(max_king_sq_, fe_end_,min_index());
@@ -815,7 +815,7 @@ namespace EvalLearningTools
 			// Macro similar to the one used in Bonanza 6.0
 			// Precondition) i> j> k.
 			// NG in case of i==j,j==k.
-			auto PcPcPcOnSq = [this](int king, Eval::BonaPiece i, Eval::BonaPiece j , Eval::BonaPiece k)
+			auto PcPcPcOnSq = [this](int king, PieceSquare i, PieceSquare j , PieceSquare k)
 			{
 				// (i,j,k) in this triangular array is the element in the i-th row and the j-th column.
 				// 0th row 0th column 0th is the sum of the elements up to that point, so 0 + 0 + 1 + 3 + 6 + ... + (i)*(i-1)/2 = i*( i-1)*(i-2)/6
@@ -823,7 +823,7 @@ namespace EvalLearningTools
 				// i-th row, j-th column and k-th row is k plus it. + k
 				assert(i > j && j > k);
 
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
+				// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
 				return (uint64_t)king * triangle_fe_end + (uint64_t)(
 						  uint64_t(i)*(uint64_t(i) - 1) * (uint64_t(i) - 2) / 6
 						+ uint64_t(j)*(uint64_t(j) - 1) / 2
@@ -836,9 +836,9 @@ namespace EvalLearningTools
 
 		// When you construct this object using fromIndex(), you can get information with the following accessors.
 		int king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
-		Eval::BonaPiece piece2() const { return piece2_; }
+		PieceSquare piece0() const { return piece0_; }
+		PieceSquare piece1() const { return piece1_; }
+		PieceSquare piece2() const { return piece2_; }
 		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
 		// Prepared to match KK, KKP and interface. This method always returns false for this KPPP class.
 		bool is_inverse() const {
@@ -859,14 +859,14 @@ namespace EvalLearningTools
 	private:
 
 		int king_;
-		Eval::BonaPiece piece0_, piece1_,piece2_;
+		PieceSquare piece0_, piece1_,piece2_;
 
 		// The part of the square array of [fe_end][fe_end][fe_end] of kppp[king_sq][fe_end][fe_end][fe_end] is made into a triangular array.
 		// If kppp[king_sq][triangle_fe_end], the number of elements from the 0th row of this triangular array is 0,0,1,3,..., The nth row is n(n-1)/2.
 		// therefore,
 		// triangle_fe_end = Σn(n-1)/2 , n=0..fe_end-1
 		//                 =  fe_end * (fe_end - 1) * (fe_end - 2) / 6
-		uint64_t triangle_fe_end; // ((uint64_t)Eval::fe_end)*((uint64_t)Eval::fe_end - 1)*((uint64_t)Eval::fe_end - 2) / 6;
+		uint64_t triangle_fe_end; // ((uint64_t)PieceSquare::PS_END)*((uint64_t)PieceSquare::PS_END - 1)*((uint64_t)PieceSquare::PS_END - 2) / 6;
 	};
 
 	// Output for debugging.
@@ -885,12 +885,12 @@ namespace EvalLearningTools
 	// piece0() >piece1()
 	// It is, and it is necessary to keep this constraint even when passing piece0,1 in the constructor.
 	//
-	// Due to this constraint, BonaPieceZero cannot be assigned to piece0 and piece1 at the same time and passed.
+	// Due to this constraint, PieceSquareZero cannot be assigned to piece0 and piece1 at the same time and passed.
 	// If you want to support learning of dropped frames, you need to devise with evaluate().
 	struct KKPP: SerializerBase
 	{
 	protected:
-		KKPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1) :
+		KKPP(int king, PieceSquare p0, PieceSquare p1) :
 			king_(king), piece0_(p0), piece1_(p1)
 		{
 			assert(piece0_ > piece1_);
@@ -956,12 +956,12 @@ namespace EvalLearningTools
 			assert(king < max_king_sq_);
 
 			// Propagate king_sq and fe_end.
-			return fromKKPP(king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1);
+			return fromKKPP(king, (PieceSquare)piece0, (PieceSquare)piece1);
 		}
 
 		// Specify k,p0,p1 to build KKPP instance.
 		// The king_sq and fe_end passed by set() which is internally retained are inherited.
-		KKPP fromKKPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1) const
+		KKPP fromKKPP(int king, PieceSquare p0, PieceSquare p1) const
 		{
 			KKPP kkpp(king, p0, p1);
 			kkpp.set(max_king_sq_, fe_end_,min_index());
@@ -974,11 +974,11 @@ namespace EvalLearningTools
 			// Macro similar to the one used in Bonanza 6.0
 			// Precondition) i> j.
 			// NG in case of i==j,j==k.
-			auto PcPcOnSq = [this](int king, Eval::BonaPiece i, Eval::BonaPiece j)
+			auto PcPcOnSq = [this](int king, PieceSquare i, PieceSquare j)
 			{
 				assert(i > j);
 
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
+				// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
 				return (uint64_t)king * triangle_fe_end + (uint64_t)(
 					+ uint64_t(i)*(uint64_t(i) - 1) / 2
 					+ uint64_t(j)
@@ -990,8 +990,8 @@ namespace EvalLearningTools
 
 		// When you construct this object using fromIndex(), fromKKPP(), you can get information with the following accessors.
 		int king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
+		PieceSquare piece0() const { return piece0_; }
+		PieceSquare piece1() const { return piece1_; }
 
 		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
 		// Prepared to match KK, KKP and interface. In this KKPP class, this method always returns false.
@@ -1013,7 +1013,7 @@ namespace EvalLearningTools
 	private:
 
 		int king_;
-		Eval::BonaPiece piece0_, piece1_;
+		PieceSquare piece0_, piece1_;
 
 		// Triangularize the square array part of [fe_end][fe_end] of kppp[king_sq][fe_end][fe_end].
 		uint64_t triangle_fe_end = 0;
diff --git a/src/learn/multi_think.cpp b/src/learn/multi_think.cpp
index ba2c47d4..82ebeabb 100644
--- a/src/learn/multi_think.cpp
+++ b/src/learn/multi_think.cpp
@@ -20,7 +20,7 @@ void MultiThink::go_think()
 	// Read evaluation function, etc.
 	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
 	// Skip memory corruption check.
-	init_nnue(true);
+	Eval::init_NNUE();
 
 	// Call the derived class's init().
 	init();
diff --git a/src/learn/multi_think.h b/src/learn/multi_think.h
index 55edb049..c22b7e8d 100644
--- a/src/learn/multi_think.h
+++ b/src/learn/multi_think.h
@@ -4,6 +4,7 @@
 #if defined(EVAL_LEARN)
 
 #include <functional>
+#include <mutex>
 
 #include "../misc.h"
 #include "../learn/learn.h"
diff --git a/src/main.cpp b/src/main.cpp
index 6001432d..fbad6622 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -20,15 +18,6 @@
 
 #include <iostream>
 
-#ifdef _WIN32
-#include <filesystem>
-
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-#include <Windows.h>
-#endif
-
 #include "bitboard.h"
 #include "endgame.h"
 #include "position.h"
@@ -43,17 +32,6 @@ namespace PSQT {
 }
 
 int main(int argc, char* argv[]) {
-  // Change the current working directory to the binary directory.  So that a
-  // net file path can be specified with a relative path from the binary
-  // directory.
-  // TODO(someone): Implement the logic for other OS.
-#ifdef _WIN32
-  TCHAR filename[_MAX_PATH];
-  ::GetModuleFileName(NULL, filename, sizeof(filename) / sizeof(filename[0]));
-  std::filesystem::path current_path = filename;
-  current_path.remove_filename();
-  std::filesystem::current_path(current_path);
-#endif
 
   std::cout << engine_info() << std::endl;
 
@@ -66,6 +44,7 @@ int main(int argc, char* argv[]) {
   Endgames::init();
   Threads.set(size_t(Options["Threads"]));
   Search::clear(); // After threads are up
+  Eval::init_NNUE();
 
   UCI::loop(argc, argv);
 
diff --git a/src/material.cpp b/src/material.cpp
index bb25d3ca..0ef9926f 100644
--- a/src/material.cpp
+++ b/src/material.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/material.h b/src/material.h
index 21647f23..80d01655 100644
--- a/src/material.h
+++ b/src/material.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/misc.cpp b/src/misc.cpp
index 865e21fb..725450c2 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -42,11 +40,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #endif
 
 #include <fstream>
-#include <functional>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
 #include <vector>
+#include <cstdlib>
 
 #if defined(__linux__) && !defined(__ANDROID__)
 #include <stdlib.h>
@@ -140,7 +138,7 @@ const string engine_info(bool to_uci) {
   string month, day, year;
   stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008"
 
-  ss << "Stockfish+NNUE " << Version << setfill('0');
+  ss << "Stockfish " << Version << setfill('0');
 
   if (Version.empty())
   {
@@ -148,10 +146,8 @@ const string engine_info(bool to_uci) {
       ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2);
   }
 
-  ss << (Is64Bit ? " 64" : "")
-     << (HasPext ? " BMI2" : (HasPopCnt ? " POPCNT" : ""))
-     << (to_uci  ? "\nid author ": " by ")
-     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott, H. Noda, Y. Nasu, M. Isozaki";
+  ss << (to_uci  ? "\nid author ": " by ")
+     << "the Stockfish developers (see AUTHORS file)";
 
   return ss.str();
 }
@@ -216,7 +212,33 @@ const std::string compiler_info() {
      compiler += " on unknown system";
   #endif
 
-  compiler += "\n __VERSION__ macro expands to: ";
+  compiler += "\nCompilation settings include: ";
+  compiler += (Is64Bit ? " 64bit" : " 32bit");
+  #if defined(USE_AVX512)
+    compiler += " AVX512";
+  #endif
+  #if defined(USE_AVX2)
+    compiler += " AVX2";
+  #endif
+  #if defined(USE_SSE42)
+    compiler += " SSE42";
+  #endif
+  #if defined(USE_SSE41)
+    compiler += " SSE41";
+  #endif
+  #if defined(USE_SSSE3)
+    compiler += " SSSE3";
+  #endif
+  #if defined(USE_SSE3)
+    compiler += " SSE3";
+  #endif
+    compiler += (HasPext ? " BMI2" : "");
+    compiler += (HasPopCnt ? " POPCNT" : "");
+  #if !defined(NDEBUG)
+    compiler += " DEBUG";
+  #endif
+
+  compiler += "\n__VERSION__ macro expands to: ";
   #ifdef __VERSION__
      compiler += __VERSION__;
   #else
@@ -294,6 +316,29 @@ void prefetch(void* addr) {
 
 #endif
 
+/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
+/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
+///
+
+void* std_aligned_alloc(size_t alignment, size_t size) {
+#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+  return aligned_alloc(alignment, size);
+#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
+  return _mm_malloc(size, alignment);
+#else
+  return std::aligned_alloc(alignment, size);
+#endif
+}
+
+void std_aligned_free(void* ptr) {
+#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
+  free(ptr);
+#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
+  _mm_free(ptr);
+#else
+  free(ptr);
+#endif
+}
 
 /// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
 /// The returned pointer is the aligned one, while the mem argument is the one that needs
@@ -371,8 +416,8 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
   {
       if (mem)
           sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
-      //else
-          //sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
+      else
+          sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
   }
   firstCall = false;
 
@@ -530,99 +575,99 @@ void bindThisThread(size_t idx) {
 // Returns a string that represents the current time. (Used when learning evaluation functions)
 std::string now_string()
 {
-  // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
-  // This shouldn't happen in the C++ standard, but...
+    // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
+    // This shouldn't happen in the C++ standard, but...
 
 #if defined(_MSC_VER)
   // C4996 : 'ctime' : This function or variable may be unsafe.Consider using ctime_s instead.
 #pragma warning(disable : 4996)
 #endif
 
-  auto now = std::chrono::system_clock::now();
-  auto tp = std::chrono::system_clock::to_time_t(now);
-  auto result = string(std::ctime(&tp));
+    auto now = std::chrono::system_clock::now();
+    auto tp = std::chrono::system_clock::to_time_t(now);
+    auto result = string(std::ctime(&tp));
 
-  // remove line endings if they are included at the end
-  while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
-    result.pop_back();
-  return result;
+    // remove line endings if they are included at the end
+    while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
+        result.pop_back();
+    return result;
 }
 
 void sleep(int ms)
 {
-	std::this_thread::sleep_for(std::chrono::milliseconds(ms));
+    std::this_thread::sleep_for(std::chrono::milliseconds(ms));
 }
 
 void* aligned_malloc(size_t size, size_t align)
 {
-	void* p = _mm_malloc(size, align);
-	if (p == nullptr)
-	{
-		std::cout << "info string can't allocate memory. sise = " << size << std::endl;
-		exit(1);
-	}
-	return p;
+    void* p = _mm_malloc(size, align);
+    if (p == nullptr)
+    {
+        std::cout << "info string can't allocate memory. sise = " << size << std::endl;
+        exit(1);
+    }
+    return p;
 }
 
 int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
 {
-  fstream fs(filename, ios::in | ios::binary);
-  if (fs.fail())
-    return 1;
-
-  fs.seekg(0, fstream::end);
-  uint64_t eofPos = (uint64_t)fs.tellg();
-  fs.clear(); // Otherwise the next seek may fail.
-  fs.seekg(0, fstream::beg);
-  uint64_t begPos = (uint64_t)fs.tellg();
-  uint64_t file_size = eofPos - begPos;
-  //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
-
-  // I know the file size, so call callback_func to get a buffer for this,
-  // Get the pointer.
-  void* ptr = callback_func(file_size);
-
-  // If the buffer could not be secured, or if the file size is different from the expected file size,
-  // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
-  if (ptr == nullptr)
-    return 2;
-
-  // read in pieces
-
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
-  for (uint64_t pos = 0; pos < file_size; pos += block_size)
-  {
-    // size to read this time
-    uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
-    fs.read((char*)ptr + pos, read_size);
-
-    // Read error occurred in the middle of the file.
+    fstream fs(filename, ios::in | ios::binary);
     if (fs.fail())
-      return 2;
+        return 1;
 
-    //cout << ".";
-  }
-  fs.close();
+    fs.seekg(0, fstream::end);
+    uint64_t eofPos = (uint64_t)fs.tellg();
+    fs.clear(); // Otherwise the next seek may fail.
+    fs.seekg(0, fstream::beg);
+    uint64_t begPos = (uint64_t)fs.tellg();
+    uint64_t file_size = eofPos - begPos;
+    //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
 
-  return 0;
+    // I know the file size, so call callback_func to get a buffer for this,
+    // Get the pointer.
+    void* ptr = callback_func(file_size);
+
+    // If the buffer could not be secured, or if the file size is different from the expected file size,
+    // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
+    if (ptr == nullptr)
+        return 2;
+
+    // read in pieces
+
+    const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
+    for (uint64_t pos = 0; pos < file_size; pos += block_size)
+    {
+        // size to read this time
+        uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
+        fs.read((char*)ptr + pos, read_size);
+
+        // Read error occurred in the middle of the file.
+        if (fs.fail())
+            return 2;
+
+        //cout << ".";
+    }
+    fs.close();
+
+    return 0;
 }
 
 int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
 {
-  fstream fs(filename, ios::out | ios::binary);
-  if (fs.fail())
-    return 1;
+    fstream fs(filename, ios::out | ios::binary);
+    if (fs.fail())
+        return 1;
 
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
-  for (uint64_t pos = 0; pos < size; pos += block_size)
-  {
-    // Memory size to write this time
-    uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
-    fs.write((char*)ptr + pos, write_size);
-    //cout << ".";
-  }
-  fs.close();
-  return 0;
+    const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
+    for (uint64_t pos = 0; pos < size; pos += block_size)
+    {
+        // Memory size to write this time
+        uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
+        fs.write((char*)ptr + pos, write_size);
+        //cout << ".";
+    }
+    fs.close();
+    return 0;
 }
 
 // ----------------------------
@@ -642,22 +687,22 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
 #include <locale> // This is required for wstring_convert.
 
 namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
-    return _wmkdir(cv.from_bytes(dir_name).c_str());
-    //	::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
-  }
+    int mkdir(std::string dir_name)
+    {
+        std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
+        return _wmkdir(cv.from_bytes(dir_name).c_str());
+        //	::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
+    }
 }
 
 #elif defined(__GNUC__) 
 
 #include <direct.h>
 namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return _mkdir(dir_name.c_str());
-  }
+    int mkdir(std::string dir_name)
+    {
+        return _mkdir(dir_name.c_str());
+    }
 }
 
 #endif
@@ -669,10 +714,10 @@ namespace Dependency {
 #include "sys/stat.h"
 
 namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return ::mkdir(dir_name.c_str(), 0777);
-  }
+    int mkdir(std::string dir_name)
+    {
+        return ::mkdir(dir_name.c_str(), 0777);
+    }
 }
 #else
 
@@ -680,10 +725,10 @@ namespace Dependency {
 // The function to dig a folder on linux is good for the time being... Only used to save the evaluation function file...
 
 namespace Dependency {
-  int mkdir(std::string dir_name)
-  {
-    return 0;
-  }
+    int mkdir(std::string dir_name)
+    {
+        return 0;
+    }
 }
 
 #endif
diff --git a/src/misc.h b/src/misc.h
index 0e2e8403..ecef028f 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -21,7 +19,6 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED
 
-#include <algorithm>
 #include <cassert>
 #include <chrono>
 #include <functional>
@@ -29,17 +26,15 @@
 #include <ostream>
 #include <string>
 #include <vector>
-#ifndef _MSC_VER
-#include <mm_malloc.h>
-#endif
 
 #include "types.h"
-#include "thread_win32_osx.h"
 
 const std::string engine_info(bool to_uci = false);
 const std::string compiler_info();
 void prefetch(void* addr);
 void start_logger(const std::string& fname);
+void* std_aligned_alloc(size_t alignment, size_t size);
+void std_aligned_free(void* ptr);
 void* aligned_ttmem_alloc(size_t size, void*& mem);
 void aligned_ttmem_free(void* mem); // nop if mem == nullptr
 
@@ -158,13 +153,6 @@ extern void sleep(int ms);
 // Returns a string that represents the current time. (Used for log output when learning evaluation function)
 std::string now_string();
 
-// wrapper for end processing on the way
-static void my_exit()
-{
-	sleep(3000); // It is bad to finish before the error message is output, so put wait.
-	exit(EXIT_FAILURE);
-}
-
 // When compiled with gcc/clang such as msys2, Windows Subsystem for Linux,
 // In C++ std::ifstream, ::read() is a wrapper for that because it is not possible to read and write files larger than 2GB in one shot.
 //
@@ -265,9 +253,6 @@ struct Path
 	}
 };
 
-extern void* aligned_malloc(size_t size, size_t align);
-static void aligned_free(void* ptr) { _mm_free(ptr); }
-
 // It is ignored when new even though alignas is specified & because it is ignored when the STL container allocates memory,
 // A custom allocator used for that.
 template <typename T>
@@ -281,8 +266,8 @@ public:
 
   template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
 
-  T* allocate(std::size_t n) { return (T*)aligned_malloc(n * sizeof(T), alignof(T)); }
-  void deallocate(T* p, std::size_t n) { aligned_free(p); }
+  T* allocate(std::size_t n) { return (T*)std_aligned_alloc(alignof(T), n * sizeof(T)); }
+  void deallocate(T* p, std::size_t n) { std_aligned_free(p); }
 };
 
 // --------------------
diff --git a/src/movegen.cpp b/src/movegen.cpp
index 4ff12fc6..d74df4c3 100644
--- a/src/movegen.cpp
+++ b/src/movegen.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/movegen.h b/src/movegen.h
index d5f82f16..675b7698 100644
--- a/src/movegen.h
+++ b/src/movegen.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/movepick.cpp b/src/movepick.cpp
index 5775f810..96a44449 100644
--- a/src/movepick.cpp
+++ b/src/movepick.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/movepick.h b/src/movepick.h
index aaff388f..f080935a 100644
--- a/src/movepick.h
+++ b/src/movepick.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h b/src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
similarity index 100%
rename from src/eval/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
rename to src/nnue/architectures/halfkp-cr-ep_256x2-32-32.h
diff --git a/src/nnue/architectures/halfkp_256x2-32-32.h b/src/nnue/architectures/halfkp_256x2-32-32.h
new file mode 100644
index 00000000..9216bd41
--- /dev/null
+++ b/src/nnue/architectures/halfkp_256x2-32-32.h
@@ -0,0 +1,54 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
+
+#include "../features/feature_set.h"
+#include "../features/half_kp.h"
+
+#include "../layers/input_slice.h"
+#include "../layers/affine_transform.h"
+#include "../layers/clipped_relu.h"
+
+namespace Eval::NNUE {
+
+// Input features used in evaluation function
+using RawFeatures = Features::FeatureSet<
+    Features::HalfKP<Features::Side::kFriend>>;
+
+// Number of input feature dimensions after conversion
+constexpr IndexType kTransformedFeatureDimensions = 256;
+
+namespace Layers {
+
+// Define network structure
+using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
+using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
+using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
+using OutputLayer = AffineTransform<HiddenLayer2, 1>;
+
+}  // namespace Layers
+
+using Network = Layers::OutputLayer;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
diff --git a/src/eval/nnue/architectures/halfkp_384x2-32-32.h b/src/nnue/architectures/halfkp_384x2-32-32.h
similarity index 100%
rename from src/eval/nnue/architectures/halfkp_384x2-32-32.h
rename to src/nnue/architectures/halfkp_384x2-32-32.h
diff --git a/src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h b/src/nnue/architectures/k-p-cr-ep_256x2-32-32.h
similarity index 100%
rename from src/eval/nnue/architectures/k-p-cr-ep_256x2-32-32.h
rename to src/nnue/architectures/k-p-cr-ep_256x2-32-32.h
diff --git a/src/eval/nnue/architectures/k-p-cr_256x2-32-32.h b/src/nnue/architectures/k-p-cr_256x2-32-32.h
similarity index 100%
rename from src/eval/nnue/architectures/k-p-cr_256x2-32-32.h
rename to src/nnue/architectures/k-p-cr_256x2-32-32.h
diff --git a/src/eval/nnue/architectures/k-p_256x2-32-32.h b/src/nnue/architectures/k-p_256x2-32-32.h
similarity index 100%
rename from src/eval/nnue/architectures/k-p_256x2-32-32.h
rename to src/nnue/architectures/k-p_256x2-32-32.h
diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp
new file mode 100644
index 00000000..a28a4573
--- /dev/null
+++ b/src/nnue/evaluate_nnue.cpp
@@ -0,0 +1,221 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Code for calculating NNUE evaluation function
+
+#include <fstream>
+#include <iostream>
+#include <set>
+
+#include "../evaluate.h"
+#include "../position.h"
+#include "../misc.h"
+#include "../uci.h"
+
+#include "evaluate_nnue.h"
+
+ExtPieceSquare kpp_board_index[PIECE_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+    { PS_NONE,     PS_NONE     },
+    { PS_W_PAWN,   PS_B_PAWN   },
+    { PS_W_KNIGHT, PS_B_KNIGHT },
+    { PS_W_BISHOP, PS_B_BISHOP },
+    { PS_W_ROOK,   PS_B_ROOK   },
+    { PS_W_QUEEN,  PS_B_QUEEN  },
+    { PS_W_KING,   PS_B_KING   },
+    { PS_NONE,     PS_NONE     },
+    { PS_NONE,     PS_NONE     },
+    { PS_B_PAWN,   PS_W_PAWN   },
+    { PS_B_KNIGHT, PS_W_KNIGHT },
+    { PS_B_BISHOP, PS_W_BISHOP },
+    { PS_B_ROOK,   PS_W_ROOK   },
+    { PS_B_QUEEN,  PS_W_QUEEN  },
+    { PS_B_KING,   PS_W_KING   },
+    { PS_NONE,     PS_NONE     }
+};
+
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  std::string fileName;
+
+  // Saved evaluation function file name
+  std::string savedfileName = "nn.bin";
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString() {
+    return "Features=" + FeatureTransformer::GetStructureString() +
+      ",Network=" + Network::GetStructureString();
+  }
+
+  namespace Detail {
+
+  // Initialize the evaluation function parameters
+  template <typename T>
+  void Initialize(AlignedPtr<T>& pointer) {
+
+    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }
+
+  // Read evaluation function parameters
+  template <typename T>
+  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+
+    std::uint32_t header;
+    stream.read(reinterpret_cast<char*>(&header), sizeof(header));
+    if (!stream || header != T::GetHashValue()) return false;
+    return pointer->ReadParameters(stream);
+  }
+
+  // write evaluation function parameters
+  template <typename T>
+  bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
+    constexpr std::uint32_t header = T::GetHashValue();
+    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
+    return pointer->WriteParameters(stream);
+  }
+
+  }  // namespace Detail
+
+  // Initialize the evaluation function parameters
+  void Initialize() {
+
+    Detail::Initialize(feature_transformer);
+    Detail::Initialize(network);
+  }
+
+  // Read network header
+  bool ReadHeader(std::istream& stream,
+    std::uint32_t* hash_value, std::string* architecture) {
+
+    std::uint32_t version, size;
+    stream.read(reinterpret_cast<char*>(&version), sizeof(version));
+    stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
+    stream.read(reinterpret_cast<char*>(&size), sizeof(size));
+    if (!stream || version != kVersion) return false;
+    architecture->resize(size);
+    stream.read(&(*architecture)[0], size);
+    return !stream.fail();
+  }
+
+  // write the header
+  bool WriteHeader(std::ostream& stream,
+    std::uint32_t hash_value, const std::string& architecture) {
+    stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
+    stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
+    const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
+    stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
+    stream.write(architecture.data(), size);
+    return !stream.fail();
+  }
+
+  // Read network parameters
+  bool ReadParameters(std::istream& stream) {
+
+    std::uint32_t hash_value;
+    std::string architecture;
+    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
+    if (hash_value != kHashValue) return false;
+    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, network)) return false;
+    return stream && stream.peek() == std::ios::traits_type::eof();
+  }
+
+  // write evaluation function parameters
+  bool WriteParameters(std::ostream& stream) {
+    if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
+    if (!Detail::WriteParameters(stream, feature_transformer)) return false;
+    if (!Detail::WriteParameters(stream, network)) return false;
+    return !stream.fail();
+  }
+
+  // Proceed with the difference calculation if possible
+  static void UpdateAccumulatorIfPossible(const Position& pos) {
+
+    feature_transformer->UpdateAccumulatorIfPossible(pos);
+  }
+
+  // Calculate the evaluation value
+  static Value ComputeScore(const Position& pos, bool refresh) {
+
+    auto& accumulator = pos.state()->accumulator;
+    if (!refresh && accumulator.computed_score) {
+      return accumulator.score;
+    }
+
+    alignas(kCacheLineSize) TransformedFeatureType
+        transformed_features[FeatureTransformer::kBufferSize];
+    feature_transformer->Transform(pos, transformed_features, refresh);
+    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
+    const auto output = network->Propagate(transformed_features, buffer);
+
+    auto score = static_cast<Value>(output[0] / FV_SCALE);
+
+    accumulator.score = score;
+    accumulator.computed_score = true;
+    return accumulator.score;
+  }
+
+  // Load the evaluation function file
+  bool load_eval_file(const std::string& evalFile) {
+
+    Initialize();
+
+    if (Options["SkipLoadingEval"])
+    {
+      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
+      return true;
+    }
+
+    fileName = evalFile;
+
+    std::ifstream stream(evalFile, std::ios::binary);
+
+    const bool result = ReadParameters(stream);
+
+    return result;
+  }
+
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
+    Value v = ComputeScore(pos, false);
+    v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
+
+    return v;
+  }
+
+  // Evaluation function. Perform full calculation.
+  Value compute_eval(const Position& pos) {
+    return ComputeScore(pos, true);
+  }
+
+  // Proceed with the difference calculation if possible
+  void update_eval(const Position& pos) {
+    UpdateAccumulatorIfPossible(pos);
+  }
+
+} // namespace Eval::NNUE
diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h
new file mode 100644
index 00000000..75700d03
--- /dev/null
+++ b/src/nnue/evaluate_nnue.h
@@ -0,0 +1,77 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// header used in NNUE evaluation function
+
+#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+#define NNUE_EVALUATE_NNUE_H_INCLUDED
+
+#include "nnue_feature_transformer.h"
+
+#include <memory>
+
+namespace Eval::NNUE {
+
+  // Hash value of evaluation function structure
+  constexpr std::uint32_t kHashValue =
+      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+
+  // Deleter for automating release of memory area
+  template <typename T>
+  struct AlignedDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      std_aligned_free(ptr);
+    }
+  };
+
+  template <typename T>
+  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+
+  // Input feature converter
+  extern AlignedPtr<FeatureTransformer> feature_transformer;
+
+  // Evaluation function
+  extern AlignedPtr<Network> network;
+
+  // Evaluation function file name
+  extern std::string fileName;
+
+  // Saved evaluation function file name
+  extern std::string savedfileName;
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString();
+
+  // read the header
+  bool ReadHeader(std::istream& stream,
+    std::uint32_t* hash_value, std::string* architecture);
+
+  // write the header
+  bool WriteHeader(std::ostream& stream,
+    std::uint32_t hash_value, const std::string& architecture);
+
+  // read evaluation function parameters
+  bool ReadParameters(std::istream& stream);
+
+  // write evaluation function parameters
+  bool WriteParameters(std::ostream& stream);
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
diff --git a/src/eval/nnue/evaluate_nnue_learner.cpp b/src/nnue/evaluate_nnue_learner.cpp
similarity index 96%
rename from src/eval/nnue/evaluate_nnue_learner.cpp
rename to src/nnue/evaluate_nnue_learner.cpp
index 3297037d..650f443e 100644
--- a/src/eval/nnue/evaluate_nnue_learner.cpp
+++ b/src/nnue/evaluate_nnue_learner.cpp
@@ -5,15 +5,15 @@
 #include <random>
 #include <fstream>
 
-#include "../../learn/learn.h"
-#include "../../learn/learning_tools.h"
+#include "../learn/learn.h"
+#include "../learn/learning_tools.h"
 
-#include "../../position.h"
-#include "../../uci.h"
-#include "../../misc.h"
-#include "../../thread_win32_osx.h"
+#include "../position.h"
+#include "../uci.h"
+#include "../misc.h"
+#include "../thread_win32_osx.h"
 
-#include "../evaluate_common.h"
+#include "../eval/evaluate_common.h"
 
 #include "evaluate_nnue.h"
 #include "evaluate_nnue_learner.h"
diff --git a/src/eval/nnue/evaluate_nnue_learner.h b/src/nnue/evaluate_nnue_learner.h
similarity index 97%
rename from src/eval/nnue/evaluate_nnue_learner.h
rename to src/nnue/evaluate_nnue_learner.h
index ace66524..1e4a463e 100644
--- a/src/eval/nnue/evaluate_nnue_learner.h
+++ b/src/nnue/evaluate_nnue_learner.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
 
-#include "../../learn/learn.h"
+#include "../learn/learn.h"
 
 namespace Eval {
 
diff --git a/src/eval/nnue/features/castling_right.cpp b/src/nnue/features/castling_right.cpp
similarity index 100%
rename from src/eval/nnue/features/castling_right.cpp
rename to src/nnue/features/castling_right.cpp
diff --git a/src/eval/nnue/features/castling_right.h b/src/nnue/features/castling_right.h
similarity index 98%
rename from src/eval/nnue/features/castling_right.h
rename to src/nnue/features/castling_right.h
index 709d4688..3af5b074 100644
--- a/src/eval/nnue/features/castling_right.h
+++ b/src/nnue/features/castling_right.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_NNUE)
 
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 
 namespace Eval {
diff --git a/src/eval/nnue/features/enpassant.cpp b/src/nnue/features/enpassant.cpp
similarity index 96%
rename from src/eval/nnue/features/enpassant.cpp
rename to src/nnue/features/enpassant.cpp
index 82a4158e..ea70529a 100644
--- a/src/eval/nnue/features/enpassant.cpp
+++ b/src/nnue/features/enpassant.cpp
@@ -23,7 +23,7 @@ namespace Eval {
         }
 
         if (perspective == BLACK) {
-          epSquare = Inv(epSquare);
+          epSquare = rotate180(epSquare);
         }
 
         auto file = file_of(epSquare);
diff --git a/src/eval/nnue/features/enpassant.h b/src/nnue/features/enpassant.h
similarity index 98%
rename from src/eval/nnue/features/enpassant.h
rename to src/nnue/features/enpassant.h
index 51880bb4..f77f9c4f 100644
--- a/src/eval/nnue/features/enpassant.h
+++ b/src/nnue/features/enpassant.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_NNUE)
 
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 
 namespace Eval {
diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h
new file mode 100644
index 00000000..ec34a486
--- /dev/null
+++ b/src/nnue/features/feature_set.h
@@ -0,0 +1,249 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class template that represents the input feature set of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_SET_H_INCLUDED
+#define NNUE_FEATURE_SET_H_INCLUDED
+
+#include "features_common.h"
+#include <array>
+
+namespace Eval::NNUE::Features {
+
+  // Class template that represents a list of values
+  template <typename T, T... Values>
+  struct CompileTimeList;
+
+  template <typename T, T First, T... Remaining>
+  struct CompileTimeList<T, First, Remaining...> {
+    static constexpr bool Contains(T value) {
+      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
+    }
+    static constexpr std::array<T, sizeof...(Remaining) + 1>
+        kValues = {{First, Remaining...}};
+  };
+
+  template <typename T, T First, T... Remaining>
+  constexpr std::array<T, sizeof...(Remaining) + 1>
+    CompileTimeList<T, First, Remaining...>::kValues;
+  template <typename T>
+  struct CompileTimeList<T> {
+    static constexpr bool Contains(T /*value*/) {
+      return false;
+    }
+    static constexpr std::array<T, 0> kValues = { {} };
+  };
+
+  // Class template that adds to the beginning of the list
+  template <typename T, typename ListType, T Value>
+  struct AppendToList;
+  template <typename T, T... Values, T AnotherValue>
+  struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
+    using Result = CompileTimeList<T, AnotherValue, Values...>;
+  };
+
+  // Class template for adding to a sorted, unique list
+  template <typename T, typename ListType, T Value>
+  struct InsertToSet;
+  template <typename T, T First, T... Remaining, T AnotherValue>
+  struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
+    using Result = std::conditional_t<
+      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
+      CompileTimeList<T, First, Remaining...>,
+      std::conditional_t<(AnotherValue < First),
+      CompileTimeList<T, AnotherValue, First, Remaining...>,
+      typename AppendToList<T, typename InsertToSet<
+      T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
+      First>::Result>>;
+  };
+  template <typename T, T Value>
+  struct InsertToSet<T, CompileTimeList<T>, Value> {
+    using Result = CompileTimeList<T, Value>;
+  };
+
+  // Base class of feature set
+  template <typename Derived>
+  class FeatureSetBase {
+
+   public:
+    // Get a list of indices for active features
+    template <typename IndexListType>
+    static void AppendActiveIndices(
+        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
+
+      for (Color perspective : { WHITE, BLACK }) {
+        Derived::CollectActiveIndices(
+            pos, trigger, perspective, &active[perspective]);
+      }
+    }
+
+    // Get a list of indices for recently changed features
+    template <typename PositionType, typename IndexListType>
+    static void AppendChangedIndices(
+        const PositionType& pos, TriggerEvent trigger,
+        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
+
+      const auto& dp = pos.state()->dirtyPiece;
+      if (dp.dirty_num == 0) return;
+
+      for (Color perspective : { WHITE, BLACK }) {
+        reset[perspective] = false;
+        switch (trigger) {
+          case TriggerEvent::kFriendKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + perspective;
+            break;
+          default:
+            assert(false);
+            break;
+        }
+        if (reset[perspective]) {
+          Derived::CollectActiveIndices(
+              pos, trigger, perspective, &added[perspective]);
+        } else {
+          Derived::CollectChangedIndices(
+              pos, trigger, perspective,
+              &removed[perspective], &added[perspective]);
+        }
+      }
+    }
+  };
+
+  // Class template that represents the feature set
+  // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
+  template <typename FirstFeatureType, typename... RemainingFeatureTypes>
+  class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
+    public FeatureSetBase<
+    FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
+  private:
+    using Head = FirstFeatureType;
+    using Tail = FeatureSet<RemainingFeatureTypes...>;
+
+  public:
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue =
+      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
+    // number of feature dimensions
+    static constexpr IndexType kDimensions =
+      Head::kDimensions + Tail::kDimensions;
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions =
+      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
+    // List of timings to perform all calculations instead of difference calculation
+    using SortedTriggerSet = typename InsertToSet<TriggerEvent,
+      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+    // Get the feature quantity name
+    static std::string GetName() {
+      return std::string(Head::kName) + "+" + Tail::GetName();
+    }
+
+  private:
+    // Get a list of indices with a value of 1 among the features
+    template <typename IndexListType>
+    static void CollectActiveIndices(
+      const Position& pos, const TriggerEvent trigger, const Color perspective,
+      IndexListType* const active) {
+      Tail::CollectActiveIndices(pos, trigger, perspective, active);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start = active->size();
+        Head::AppendActiveIndices(pos, perspective, active);
+        for (auto i = start; i < active->size(); ++i) {
+          (*active)[i] += Tail::kDimensions;
+        }
+      }
+    }
+
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    template <typename IndexListType>
+    static void CollectChangedIndices(
+      const Position& pos, const TriggerEvent trigger, const Color perspective,
+      IndexListType* const removed, IndexListType* const added) {
+      Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start_removed = removed->size();
+        const auto start_added = added->size();
+        Head::AppendChangedIndices(pos, perspective, removed, added);
+        for (auto i = start_removed; i < removed->size(); ++i) {
+          (*removed)[i] += Tail::kDimensions;
+        }
+        for (auto i = start_added; i < added->size(); ++i) {
+          (*added)[i] += Tail::kDimensions;
+        }
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+  // Class template that represents the feature set
+  template <typename FeatureType>
+  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
+
+   public:
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions = FeatureType::kDimensions;
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions =
+        FeatureType::kMaxActiveDimensions;
+    // Trigger for full calculation instead of difference calculation
+    using SortedTriggerSet =
+        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+
+    // Get the feature quantity name
+    static std::string GetName() {
+      return FeatureType::kName;
+    }
+
+   private:
+    // Get a list of indices for active features
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const active) {
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendActiveIndices(pos, perspective, active);
+      }
+    }
+
+    // Get a list of indices for recently changed features
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const removed, IndexList* const added) {
+
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+      }
+    }
+
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h
new file mode 100644
index 00000000..3377cd8f
--- /dev/null
+++ b/src/nnue/features/features_common.h
@@ -0,0 +1,50 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Common header of input features of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+#define NNUE_FEATURES_COMMON_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Features {
+
+  class IndexList;
+
+  template <typename... FeatureTypes>
+  class FeatureSet;
+
+  // Trigger to perform full calculations instead of difference only
+  enum class TriggerEvent {
+    kNone, // Calculate the difference whenever possible
+    kFriendKingMoved, // calculate all when own ball moves
+    kEnemyKingMoved, // do all calculations when enemy balls move
+    kAnyKingMoved, // do all calculations if either ball moves
+    kAnyPieceMoved, // always do all calculations
+  };
+
+  enum class Side {
+    kFriend, // side to move
+    kEnemy, // opponent
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp
new file mode 100644
index 00000000..628add6e
--- /dev/null
+++ b/src/nnue/features/half_kp.cpp
@@ -0,0 +1,92 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#include "half_kp.h"
+#include "index_list.h"
+
+namespace Eval::NNUE::Features {
+
+  // Find the index of the feature quantity from the king position and PieceSquare
+  template <Side AssociatedKing>
+  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
+    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+  }
+
+  // Get pieces information
+  template <Side AssociatedKing>
+  inline void HalfKP<AssociatedKing>::GetPieces(
+      const Position& pos, Color perspective,
+      PieceSquare** pieces, Square* sq_target_k) {
+
+    *pieces = (perspective == BLACK) ?
+        pos.eval_list()->piece_list_fb() :
+        pos.eval_list()->piece_list_fw();
+    const PieceId target = (AssociatedKing == Side::kFriend) ?
+        static_cast<PieceId>(PIECE_ID_KING + perspective) :
+        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
+    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+  }
+
+  // Get a list of indices for active features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendActiveIndices(
+      const Position& pos, Color perspective, IndexList* active) {
+
+    // Do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
+      if (pieces[i] != PS_NONE) {
+        active->push_back(MakeIndex(sq_target_k, pieces[i]));
+      }
+    }
+  }
+
+  // Get a list of indices for recently changed features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendChangedIndices(
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added) {
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    const auto& dp = pos.state()->dirtyPiece;
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
+      const auto old_p = static_cast<PieceSquare>(
+          dp.old_piece[i].from[perspective]);
+      if (old_p != PS_NONE) {
+        removed->push_back(MakeIndex(sq_target_k, old_p));
+      }
+      const auto new_p = static_cast<PieceSquare>(
+          dp.new_piece[i].from[perspective]);
+      if (new_p != PS_NONE) {
+        added->push_back(MakeIndex(sq_target_k, new_p));
+      }
+    }
+  }
+
+  template class HalfKP<Side::kFriend>;
+
+}  // namespace Eval::NNUE::Features
diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h
new file mode 100644
index 00000000..99842eea
--- /dev/null
+++ b/src/nnue/features/half_kp.h
@@ -0,0 +1,67 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#define NNUE_FEATURES_HALF_KP_H_INCLUDED
+
+#include "../../evaluate.h"
+#include "features_common.h"
+
+namespace Eval::NNUE::Features {
+
+  // Feature HalfKP: Combination of the position of own king
+  // and the position of pieces other than kings
+  template <Side AssociatedKing>
+  class HalfKP {
+
+   public:
+    // Feature name
+    static constexpr const char* kName = "HalfKP(Friend)";
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue =
+        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+    // Trigger for full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
+
+    // Get a list of indices for active features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+                                    IndexList* active);
+
+    // Get a list of indices for recently changed features
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+                                     IndexList* removed, IndexList* added);
+
+    // Index of a feature for a given king position and another piece on some square
+    static IndexType MakeIndex(Square sq_k, PieceSquare p);
+
+   private:
+    // Get pieces information
+    static void GetPieces(const Position& pos, Color perspective,
+                          PieceSquare** pieces, Square* sq_target_k);
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
diff --git a/src/eval/nnue/features/half_relative_kp.cpp b/src/nnue/features/half_relative_kp.cpp
similarity index 65%
rename from src/eval/nnue/features/half_relative_kp.cpp
rename to src/nnue/features/half_relative_kp.cpp
index 623b839c..7f15ff39 100644
--- a/src/eval/nnue/features/half_relative_kp.cpp
+++ b/src/nnue/features/half_relative_kp.cpp
@@ -11,14 +11,14 @@ namespace NNUE {
 
 namespace Features {
 
-// Find the index of the feature quantity from the ball position and BonaPiece
+// Find the index of the feature quantity from the ball position and PieceSquare
 template <Side AssociatedKing>
 inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
-    Square sq_k, BonaPiece p) {
+    Square sq_k, PieceSquare p) {
   constexpr IndexType W = kBoardWidth;
   constexpr IndexType H = kBoardHeight;
-  const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
-  const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
+  const IndexType piece_index = (p - PieceSquare::PS_W_PAWN) / SQUARE_NB;
+  const Square sq_p = static_cast<Square>((p - PieceSquare::PS_W_PAWN) % SQUARE_NB);
   const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
   const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
   return H * W * piece_index + H * relative_file + relative_rank;
@@ -28,14 +28,14 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
 template <Side AssociatedKing>
 inline void HalfRelativeKP<AssociatedKing>::GetPieces(
     const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
+    PieceSquare** pieces, Square* sq_target_k) {
   *pieces = (perspective == BLACK) ?
       pos.eval_list()->piece_list_fb() :
       pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
+  const PieceId target = (AssociatedKing == Side::kFriend) ?
+      static_cast<PieceId>(PieceId::PIECE_ID_KING + perspective) :
+      static_cast<PieceId>(PieceId::PIECE_ID_KING + ~perspective);
+  *sq_target_k = static_cast<Square>(((*pieces)[target] - PieceSquare::PS_W_KING) % SQUARE_NB);
 }
 
 // Get a list of indices with a value of 1 among the features
@@ -45,12 +45,12 @@ void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
   // do nothing if array size is small to avoid compiler warning
   if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
 
-  BonaPiece* pieces;
+  PieceSquare* pieces;
   Square sq_target_k;
   GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] >= fe_hand_end) {
-      if (pieces[i] != Eval::BONA_PIECE_ZERO) {
+  for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
+    if (pieces[i] >= PieceSquare::PS_W_PAWN) {
+      if (pieces[i] != PieceSquare::PS_NONE) {
         active->push_back(MakeIndex(sq_target_k, pieces[i]));
       }
     }
@@ -62,23 +62,23 @@ template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
     const Position& pos, Color perspective,
     IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
+  PieceSquare* pieces;
   Square sq_target_k;
   GetPieces(pos, perspective, &pieces, &sq_target_k);
   const auto& dp = pos.state()->dirtyPiece;
   for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p >= fe_hand_end) {
-      if (old_p != Eval::BONA_PIECE_ZERO) {
+    if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
+    const auto old_p = static_cast<PieceSquare>(
+        dp.old_piece[i].from[perspective]);
+    if (old_p >= PieceSquare::PS_W_PAWN) {
+      if (old_p != PieceSquare::PS_NONE) {
         removed->push_back(MakeIndex(sq_target_k, old_p));
       }
     }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p >= fe_hand_end) {
-      if (new_p != Eval::BONA_PIECE_ZERO) {
+    const auto new_p = static_cast<PieceSquare>(
+        dp.new_piece[i].from[perspective]);
+    if (new_p >= PieceSquare::PS_W_PAWN) {
+      if (new_p != PieceSquare::PS_NONE) {
         added->push_back(MakeIndex(sq_target_k, new_p));
       }
     }
diff --git a/src/eval/nnue/features/half_relative_kp.h b/src/nnue/features/half_relative_kp.h
similarity index 86%
rename from src/eval/nnue/features/half_relative_kp.h
rename to src/nnue/features/half_relative_kp.h
index 2f967745..9561ab91 100644
--- a/src/eval/nnue/features/half_relative_kp.h
+++ b/src/nnue/features/half_relative_kp.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_NNUE)
 
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 
 namespace Eval {
@@ -25,7 +25,7 @@ class HalfRelativeKP {
   static constexpr std::uint32_t kHashValue =
       0xF9180919u ^ (AssociatedKing == Side::kFriend);
   // Piece type excluding balls
-  static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
+  static constexpr IndexType kNumPieceKinds = (PieceSquare::PS_END - PieceSquare::PS_W_PAWN) / SQUARE_NB;
   // width of the virtual board with the ball in the center
   static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
   // height of a virtual board with balls in the center
@@ -34,7 +34,7 @@ class HalfRelativeKP {
   static constexpr IndexType kDimensions =
       kNumPieceKinds * kBoardHeight * kBoardWidth;
   // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
+  static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
   // Timing of full calculation instead of difference calculation
   static constexpr TriggerEvent kRefreshTrigger =
       (AssociatedKing == Side::kFriend) ?
@@ -48,13 +48,13 @@ class HalfRelativeKP {
   static void AppendChangedIndices(const Position& pos, Color perspective,
                                    IndexList* removed, IndexList* added);
 
-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
+  // Find the index of the feature quantity from the ball position and PieceSquare
+  static IndexType MakeIndex(Square sq_k, PieceSquare p);
 
  private:
   // Get the piece information
   static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
+                        PieceSquare** pieces, Square* sq_target_k);
 };
 
 }  // namespace Features
diff --git a/src/nnue/features/index_list.h b/src/nnue/features/index_list.h
new file mode 100644
index 00000000..d9ad680a
--- /dev/null
+++ b/src/nnue/features/index_list.h
@@ -0,0 +1,64 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of index list of input features
+
+#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+
+#include "../../position.h"
+#include "../nnue_architecture.h"
+
+namespace Eval::NNUE::Features {
+
+  // Class template used for feature index list
+  template <typename T, std::size_t MaxSize>
+  class ValueList {
+
+   public:
+    std::size_t size() const { return size_; }
+    void resize(std::size_t size) { size_ = size; }
+    void push_back(const T& value) { values_[size_++] = value; }
+    T& operator[](std::size_t index) { return values_[index]; }
+    T* begin() { return values_; }
+    T* end() { return values_ + size_; }
+    const T& operator[](std::size_t index) const { return values_[index]; }
+    const T* begin() const { return values_; }
+    const T* end() const { return values_ + size_; }
+
+    void swap(ValueList& other) {
+      const std::size_t max_size = std::max(size_, other.size_);
+      for (std::size_t i = 0; i < max_size; ++i) {
+        std::swap(values_[i], other.values_[i]);
+      }
+      std::swap(size_, other.size_);
+    }
+
+   private:
+    T values_[MaxSize];
+    std::size_t size_ = 0;
+  };
+
+  //Type of feature index list
+  class IndexList
+      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
+  };
+
+}  // namespace Eval::NNUE::Features
+
+#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
diff --git a/src/eval/nnue/features/k.cpp b/src/nnue/features/k.cpp
similarity index 66%
rename from src/eval/nnue/features/k.cpp
rename to src/nnue/features/k.cpp
index dc01eb92..001e4b98 100644
--- a/src/eval/nnue/features/k.cpp
+++ b/src/nnue/features/k.cpp
@@ -17,13 +17,13 @@ void K::AppendActiveIndices(
   // do nothing if array size is small to avoid compiler warning
   if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
 
-  const BonaPiece* pieces = (perspective == BLACK) ?
+  const PieceSquare* pieces = (perspective == BLACK) ?
       pos.eval_list()->piece_list_fb() :
       pos.eval_list()->piece_list_fw();
-  assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
-  assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
-  for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
-    active->push_back(pieces[i] - fe_end);
+  assert(pieces[PieceId::PIECE_ID_BKING] != PieceSquare::PS_NONE);
+  assert(pieces[PieceId::PIECE_ID_WKING] != PieceSquare::PS_NONE);
+  for (PieceId i = PieceId::PIECE_ID_KING; i < PieceId::PIECE_ID_NONE; ++i) {
+    active->push_back(pieces[i] - PieceSquare::PS_END);
   }
 }
 
@@ -32,11 +32,11 @@ void K::AppendChangedIndices(
     const Position& pos, Color perspective,
     IndexList* removed, IndexList* added) {
   const auto& dp = pos.state()->dirtyPiece;
-  if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
+  if (dp.pieceId[0] >= PieceId::PIECE_ID_KING) {
     removed->push_back(
-        dp.changed_piece[0].old_piece.from[perspective] - fe_end);
+        dp.old_piece[0].from[perspective] - PieceSquare::PS_END);
     added->push_back(
-        dp.changed_piece[0].new_piece.from[perspective] - fe_end);
+        dp.new_piece[0].from[perspective] - PieceSquare::PS_END);
   }
 }
 
diff --git a/src/eval/nnue/features/k.h b/src/nnue/features/k.h
similarity index 97%
rename from src/eval/nnue/features/k.h
rename to src/nnue/features/k.h
index d7a6f4aa..28431010 100644
--- a/src/eval/nnue/features/k.h
+++ b/src/nnue/features/k.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_NNUE)
 
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 
 namespace Eval {
diff --git a/src/eval/nnue/features/p.cpp b/src/nnue/features/p.cpp
similarity index 65%
rename from src/eval/nnue/features/p.cpp
rename to src/nnue/features/p.cpp
index 68527119..8b24f544 100644
--- a/src/eval/nnue/features/p.cpp
+++ b/src/nnue/features/p.cpp
@@ -17,11 +17,11 @@ void P::AppendActiveIndices(
   // do nothing if array size is small to avoid compiler warning
   if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
 
-  const BonaPiece* pieces = (perspective == BLACK) ?
+  const PieceSquare* pieces = (perspective == BLACK) ?
       pos.eval_list()->piece_list_fb() :
       pos.eval_list()->piece_list_fw();
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
+  for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
+    if (pieces[i] != PieceSquare::PS_NONE) {
       active->push_back(pieces[i]);
     }
   }
@@ -33,12 +33,12 @@ void P::AppendChangedIndices(
     IndexList* removed, IndexList* added) {
   const auto& dp = pos.state()->dirtyPiece;
   for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
+    if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
+    if (dp.old_piece[i].from[perspective] != PieceSquare::PS_NONE) {
+      removed->push_back(dp.old_piece[i].from[perspective]);
     }
-    if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
+    if (dp.new_piece[i].from[perspective] != PieceSquare::PS_NONE) {
+      added->push_back(dp.new_piece[i].from[perspective]);
     }
   }
 }
diff --git a/src/eval/nnue/features/p.h b/src/nnue/features/p.h
similarity index 85%
rename from src/eval/nnue/features/p.h
rename to src/nnue/features/p.h
index 27a944fa..2a83c4ad 100644
--- a/src/eval/nnue/features/p.h
+++ b/src/nnue/features/p.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_NNUE)
 
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 
 namespace Eval {
@@ -14,7 +14,7 @@ namespace NNUE {
 
 namespace Features {
 
-// Feature P: BonaPiece of pieces other than balls
+// Feature P: PieceSquare of pieces other than balls
 class P {
  public:
   // feature quantity name
@@ -22,9 +22,9 @@ class P {
   // Hash value embedded in the evaluation function file
   static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
   // number of feature dimensions
-  static constexpr IndexType kDimensions = fe_end;
+  static constexpr IndexType kDimensions = PieceSquare::PS_END;
   // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
+  static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
   // Timing of full calculation instead of difference calculation
   static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
 
diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h
new file mode 100644
index 00000000..7336be52
--- /dev/null
+++ b/src/nnue/layers/affine_transform.h
@@ -0,0 +1,218 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer AffineTransform of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+
+#include <iostream>
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Affine transformation layer
+  template <typename PreviousLayer, IndexType OutputDimensions>
+  class AffineTransform {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::int32_t;
+    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = OutputDimensions;
+    static constexpr IndexType kPaddedInputDimensions =
+        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xCC03DAE4u;
+      hash_value += kOutputDimensions;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "AffineTransform[" +
+        std::to_string(kOutputDimensions) + "<-" +
+        std::to_string(kInputDimensions) + "](" +
+        PreviousLayer::GetStructureString() + ")";
+    }
+    
+   // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!previous_layer_.ReadParameters(stream)) return false;
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kOutputDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kOutputDimensions * kPaddedInputDimensions *
+                  sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // write parameters
+    bool WriteParameters(std::ostream& stream) const {
+      if (!previous_layer_.WriteParameters(stream)) return false;
+      stream.write(reinterpret_cast<const char*>(biases_),
+        kOutputDimensions * sizeof(BiasType));
+      stream.write(reinterpret_cast<const char*>(weights_),
+        kOutputDimensions * kPaddedInputDimensions *
+        sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // Forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX512)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
+      const __m512i kOnes = _mm512_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m512i*>(input);
+
+  #elif defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m256i kOnes = _mm256_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m256i*>(input);
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m128i kOnes = _mm_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m128i*>(input);
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+  #endif
+
+      for (IndexType i = 0; i < kOutputDimensions; ++i) {
+        const IndexType offset = i * kPaddedInputDimensions;
+
+  #if defined(USE_AVX512)
+        __m512i sum = _mm512_setzero_si512();
+        const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+            __m512i product = _mm512_maddubs_epi16(
+              _mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+            product = _mm512_madd_epi16(product, kOnes);
+            sum = _mm512_add_epi32(sum, product);
+        }
+        output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
+
+        // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
+        // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
+        // and we have to do one more 256bit chunk.
+        if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
+        {
+            const auto iv_256  = reinterpret_cast<const __m256i*>(input);
+            const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            int j = kNumChunks * 2;
+
+            __m256i sum256 = _mm256_maddubs_epi16(
+              _mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+            sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
+            sum256 = _mm256_hadd_epi32(sum256, sum256);
+            sum256 = _mm256_hadd_epi32(sum256, sum256);
+            const __m128i lo = _mm256_extracti128_si256(sum256, 0);
+            const __m128i hi = _mm256_extracti128_si256(sum256, 1);
+            output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
+        }
+
+  #elif defined(USE_AVX2)
+        __m256i sum = _mm256_setzero_si256();
+        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i product = _mm256_maddubs_epi16(
+            _mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
+          product = _mm256_madd_epi16(product, kOnes);
+          sum = _mm256_add_epi32(sum, product);
+        }
+        sum = _mm256_hadd_epi32(sum, sum);
+        sum = _mm256_hadd_epi32(sum, sum);
+        const __m128i lo = _mm256_extracti128_si256(sum, 0);
+        const __m128i hi = _mm256_extracti128_si256(sum, 1);
+        output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
+
+  #elif defined(USE_SSSE3)
+        __m128i sum = _mm_cvtsi32_si128(biases_[i]);
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i product = _mm_maddubs_epi16(
+              _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+          product = _mm_madd_epi16(product, kOnes);
+          sum = _mm_add_epi32(sum, product);
+        }
+        sum = _mm_hadd_epi32(sum, sum);
+        sum = _mm_hadd_epi32(sum, sum);
+        output[i] = _mm_cvtsi128_si32(sum);
+
+  #elif defined(USE_NEON)
+        int32x4_t sum = {biases_[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+          sum = vpadalq_s16(sum, product);
+        }
+        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+  #else
+        OutputType sum = biases_[i];
+        for (IndexType j = 0; j < kInputDimensions; ++j) {
+          sum += weights_[offset + j] * input[j];
+        }
+        output[i] = sum;
+  #endif
+
+      }
+      return output;
+    }
+
+   private:
+    using BiasType = OutputType;
+    using WeightType = std::int8_t;
+
+    // Make the learning class a friend
+    friend class Trainer<AffineTransform>;
+
+    PreviousLayer previous_layer_;
+
+    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h
new file mode 100644
index 00000000..9b5a5f5f
--- /dev/null
+++ b/src/nnue/layers/clipped_relu.h
@@ -0,0 +1,164 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer ClippedReLU of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+  // Clipped ReLU
+  template <typename PreviousLayer>
+  class ClippedReLU {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::uint8_t;
+    static_assert(std::is_same<InputType, std::int32_t>::value, "");
+
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;
+
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0x538D24C7u;
+      hash_value += PreviousLayer::GetHashValue();
+      return hash_value;
+    }
+
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "ClippedReLU[" +
+        std::to_string(kOutputDimensions) + "](" +
+        PreviousLayer::GetStructureString() + ")";
+    }
+
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
+    }
+
+    // write parameters
+    bool WriteParameters(std::ostream& stream) const {
+      return previous_layer_.WriteParameters(stream);
+    }
+
+    // Forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m256i kZero = _mm256_setzero_si256();
+      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+          _mm256_load_si256(&in[i * 4 + 0]),
+          _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+          _mm256_load_si256(&in[i * 4 + 2]),
+          _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
+        _mm256_store_si256(
+            &out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 0]),
+            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 2]),
+            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+        _mm_store_si128(&out[i],
+
+  #ifdef USE_SSE41
+          _mm_max_epi8(packedbytes, kZero)
+  #else
+          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+        );
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+      const auto in = reinterpret_cast<const int32x4_t*>(input);
+      const auto out = reinterpret_cast<int8x8_t*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        int16x8_t shifted;
+        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+      }
+      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+  #else
+      constexpr IndexType kStart = 0;
+  #endif
+
+      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+        output[i] = static_cast<OutputType>(
+            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+      }
+      return output;
+    }
+
+   private:
+     // Make the learning class a friend
+     friend class Trainer<ClippedReLU>;
+     
+    PreviousLayer previous_layer_;
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
diff --git a/src/nnue/layers/input_slice.h b/src/nnue/layers/input_slice.h
new file mode 100644
index 00000000..78756a39
--- /dev/null
+++ b/src/nnue/layers/input_slice.h
@@ -0,0 +1,80 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// NNUE evaluation function layer InputSlice definition
+
+#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+
+#include "../nnue_common.h"
+
+namespace Eval::NNUE::Layers {
+
+// Input layer
+template <IndexType OutputDimensions, IndexType Offset = 0>
+class InputSlice {
+ public:
+  // Need to maintain alignment
+  static_assert(Offset % kMaxSimdWidth == 0, "");
+
+  // Output type
+  using OutputType = TransformedFeatureType;
+
+  // Output dimensionality
+  static constexpr IndexType kOutputDimensions = OutputDimensions;
+
+  // Size of forward propagation buffer used from the input layer to this layer
+  static constexpr std::size_t kBufferSize = 0;
+
+  // Hash value embedded in the evaluation file
+  static constexpr std::uint32_t GetHashValue() {
+    std::uint32_t hash_value = 0xEC42E90Du;
+    hash_value ^= kOutputDimensions ^ (Offset << 10);
+    return hash_value;
+  }
+
+  // A string that represents the structure from the input layer to this layer
+  static std::string GetStructureString() {
+    return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
+      std::to_string(Offset) + ":" +
+      std::to_string(Offset + kOutputDimensions) + ")]";
+  }
+
+  // Read network parameters
+  bool ReadParameters(std::istream& /*stream*/) {
+    return true;
+  }
+
+  // write parameters
+  bool WriteParameters(std::ostream& /*stream*/) const {
+    return true;
+  }
+
+  // Forward propagation
+  const OutputType* Propagate(
+      const TransformedFeatureType* transformed_features,
+      char* /*buffer*/) const {
+    return transformed_features + Offset;
+  }
+
+ private:
+};
+
+}  // namespace Layers
+
+#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
diff --git a/src/eval/nnue/layers/sum.h b/src/nnue/layers/sum.h
similarity index 100%
rename from src/eval/nnue/layers/sum.h
rename to src/nnue/layers/sum.h
diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h
new file mode 100644
index 00000000..2a354a3c
--- /dev/null
+++ b/src/nnue/nnue_accumulator.h
@@ -0,0 +1,39 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Class for difference calculation of NNUE evaluation function
+
+#ifndef NNUE_ACCUMULATOR_H_INCLUDED
+#define NNUE_ACCUMULATOR_H_INCLUDED
+
+#include "nnue_architecture.h"
+
+namespace Eval::NNUE {
+
+  // Class that holds the result of affine transformation of input features
+  struct alignas(32) Accumulator {
+    std::int16_t
+        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
+    Value score;
+    bool computed_accumulation;
+    bool computed_score;
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // NNUE_ACCUMULATOR_H_INCLUDED
diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h
new file mode 100644
index 00000000..91cdc4bd
--- /dev/null
+++ b/src/nnue/nnue_architecture.h
@@ -0,0 +1,38 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#define NNUE_ARCHITECTURE_H_INCLUDED
+
+// Defines the network structure
+#include "architectures/halfkp_256x2-32-32.h"
+
+namespace Eval::NNUE {
+
+  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
+  static_assert(Network::kOutputDimensions == 1, "");
+  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+
+  // Trigger for full calculation instead of difference calculation
+  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h
new file mode 100644
index 00000000..36fda7d7
--- /dev/null
+++ b/src/nnue/nnue_common.h
@@ -0,0 +1,81 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Constants used in NNUE evaluation function
+
+#ifndef NNUE_COMMON_H_INCLUDED
+#define NNUE_COMMON_H_INCLUDED
+
+#if defined(USE_AVX2)
+#include <immintrin.h>
+
+#elif defined(USE_SSE41)
+#include <smmintrin.h>
+
+#elif defined(USE_SSSE3)
+#include <tmmintrin.h>
+
+#elif defined(USE_SSE2)
+#include <emmintrin.h>
+
+#elif defined(USE_NEON)
+#include <arm_neon.h>
+#endif
+
+namespace Eval::NNUE {
+
+  // Version of the evaluation file
+  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+
+  // Constant used in evaluation value calculation
+  constexpr int FV_SCALE = 16;
+  constexpr int kWeightScaleBits = 6;
+
+  // Size of cache line (in bytes)
+  constexpr std::size_t kCacheLineSize = 64;
+
+  // SIMD width (in bytes)
+  #if defined(USE_AVX2)
+  constexpr std::size_t kSimdWidth = 32;
+
+  #elif defined(USE_SSE2)
+  constexpr std::size_t kSimdWidth = 16;
+
+  #elif defined(USE_NEON)
+  constexpr std::size_t kSimdWidth = 16;
+  #endif
+
+  constexpr std::size_t kMaxSimdWidth = 32;
+
+  // Type of input feature after conversion
+  using TransformedFeatureType = std::uint8_t;
+  using IndexType = std::uint32_t;
+
+  // Forward declaration of learning class template
+  template <typename Layer>
+  class Trainer;
+
+  // Round n up to be a multiple of base
+  template <typename IntType>
+  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+    return (n + base - 1) / base * base;
+  }
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_COMMON_H_INCLUDED
diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h
new file mode 100644
index 00000000..29e6db6e
--- /dev/null
+++ b/src/nnue/nnue_feature_transformer.h
@@ -0,0 +1,346 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class that converts the input features of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+
+#include "nnue_common.h"
+#include "nnue_architecture.h"
+#include "features/index_list.h"
+
+#include <cstring> // std::memset()
+
+namespace Eval::NNUE {
+
+  // Input feature converter
+  class FeatureTransformer {
+
+   private:
+    // Number of output dimensions for one side
+    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
+
+   public:
+    // Output type
+    using OutputType = TransformedFeatureType;
+
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
+    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
+
+    // Size of forward propagation buffer
+    static constexpr std::size_t kBufferSize =
+        kOutputDimensions * sizeof(OutputType);
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      return RawFeatures::kHashValue ^ kOutputDimensions;
+    }
+
+    // a string representing the structure
+    static std::string GetStructureString() {
+      return RawFeatures::GetName() + "[" +
+        std::to_string(kInputDimensions) + "->" +
+        std::to_string(kHalfDimensions) + "x2]";
+    }
+
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kHalfDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // write parameters
+    bool WriteParameters(std::ostream& stream) const {
+      stream.write(reinterpret_cast<const char*>(biases_),
+        kHalfDimensions * sizeof(BiasType));
+      stream.write(reinterpret_cast<const char*>(weights_),
+        kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // Proceed with the difference calculation if possible
+    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+      const auto now = pos.state();
+      if (now->accumulator.computed_accumulation) {
+        return true;
+      }
+      const auto prev = now->previous;
+      if (prev && prev->accumulator.computed_accumulation) {
+        UpdateAccumulator(pos);
+        return true;
+      }
+      return false;
+    }
+
+    // Convert input features
+    void Transform(const Position& pos, OutputType* output, bool refresh) const {
+      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+        RefreshAccumulator(pos);
+      }
+      const auto& accumulation = pos.state()->accumulator.accumulation;
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      constexpr int kControl = 0b11011000;
+      const __m256i kZero = _mm256_setzero_si256();
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+  #endif
+
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
+      for (IndexType p = 0; p < 2; ++p) {
+        const IndexType offset = kHalfDimensions * p;
+
+  #if defined(USE_AVX2)
+        auto out = reinterpret_cast<__m256i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i sum0 =
+            _mm256_load_si256(&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m256i sum1 =
+            _mm256_load_si256(&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+          _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
+        }
+
+  #elif defined(USE_SSSE3)
+        auto out = reinterpret_cast<__m128i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
+
+          _mm_store_si128(&out[j],
+
+  #ifdef USE_SSE41
+            _mm_max_epi8(packedbytes, kZero)
+  #else
+            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+          );
+        }
+
+  #elif defined(USE_NEON)
+        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
+              accumulation[perspectives[p]][0])[j];
+          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
+        }
+
+  #else
+        for (IndexType j = 0; j < kHalfDimensions; ++j) {
+          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          output[offset + j] = static_cast<OutputType>(
+              std::max<int>(0, std::min<int>(127, sum)));
+        }
+  #endif
+
+      }
+    }
+
+   private:
+    // Calculate cumulative value without using difference calculation
+    void RefreshAccumulator(const Position& pos) const {
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
+      Features::IndexList active_indices[2];
+      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+                                       active_indices);
+      for (Color perspective : { WHITE, BLACK }) {
+        std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                   kHalfDimensions * sizeof(BiasType));
+        for (const auto index : active_indices[perspective]) {
+          const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+          auto accumulation = reinterpret_cast<__m256i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+          }
+
+  #elif defined(USE_SSE2)
+          auto accumulation = reinterpret_cast<__m128i*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+          }
+
+  #elif defined(USE_NEON)
+          auto accumulation = reinterpret_cast<int16x8_t*>(
+              &accumulator.accumulation[perspective][i][0]);
+          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+          for (IndexType j = 0; j < kNumChunks; ++j) {
+            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+          }
+
+  #else
+          for (IndexType j = 0; j < kHalfDimensions; ++j) {
+            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
+          }
+  #endif
+
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    // Calculate cumulative value using difference calculation
+    void UpdateAccumulator(const Position& pos) const {
+      const auto prev_accumulator = pos.state()->previous->accumulator;
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
+      Features::IndexList removed_indices[2], added_indices[2];
+      bool reset[2];
+      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+                                        removed_indices, added_indices, reset);
+      for (Color perspective : { WHITE, BLACK }) {
+
+  #if defined(USE_AVX2)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m256i*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_SSE2)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<__m128i*>(
+            &accumulator.accumulation[perspective][i][0]);
+
+  #elif defined(USE_NEON)
+        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+        auto accumulation = reinterpret_cast<int16x8_t*>(
+            &accumulator.accumulation[perspective][i][0]);
+  #endif
+
+        if (reset[perspective]) {
+          std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                      kHalfDimensions * sizeof(BiasType));
+        } else {
+          std::memcpy(accumulator.accumulation[perspective][i],
+                      prev_accumulator.accumulation[perspective][i],
+                      kHalfDimensions * sizeof(BiasType));
+          // Difference calculation for the deactivated features
+          for (const auto index : removed_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_SSE2)
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_NEON)
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] -=
+                  weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+        { // Difference calculation for the activated features
+          for (const auto index : added_indices[perspective]) {
+            const IndexType offset = kHalfDimensions * index;
+
+  #if defined(USE_AVX2)
+            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_SSE2)
+            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
+            }
+
+  #elif defined(USE_NEON)
+            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
+            for (IndexType j = 0; j < kNumChunks; ++j) {
+              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
+            }
+
+  #else
+            for (IndexType j = 0; j < kHalfDimensions; ++j) {
+              accumulator.accumulation[perspective][i][j] +=
+                  weights_[offset + j];
+            }
+  #endif
+
+          }
+        }
+      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
+    }
+
+    using BiasType = std::int16_t;
+    using WeightType = std::int16_t;
+
+    // Make the learning class a friend
+    friend class Trainer<FeatureTransformer>;
+
+    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kHalfDimensions * kInputDimensions];
+  };
+
+}  // namespace Eval::NNUE
+
+#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
diff --git a/src/eval/nnue/nnue_test_command.cpp b/src/nnue/nnue_test_command.cpp
similarity index 99%
rename from src/eval/nnue/nnue_test_command.cpp
rename to src/nnue/nnue_test_command.cpp
index b0c57d4c..311c5ded 100644
--- a/src/eval/nnue/nnue_test_command.cpp
+++ b/src/nnue/nnue_test_command.cpp
@@ -2,8 +2,8 @@
 
 #if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
 
-#include "../../thread.h"
-#include "../../uci.h"
+#include "../thread.h"
+#include "../uci.h"
 #include "evaluate_nnue.h"
 #include "nnue_test_command.h"
 
diff --git a/src/eval/nnue/nnue_test_command.h b/src/nnue/nnue_test_command.h
similarity index 100%
rename from src/eval/nnue/nnue_test_command.h
rename to src/nnue/nnue_test_command.h
diff --git a/src/eval/nnue/trainer/features/factorizer.h b/src/nnue/trainer/features/factorizer.h
similarity index 100%
rename from src/eval/nnue/trainer/features/factorizer.h
rename to src/nnue/trainer/features/factorizer.h
diff --git a/src/eval/nnue/trainer/features/factorizer_feature_set.h b/src/nnue/trainer/features/factorizer_feature_set.h
similarity index 100%
rename from src/eval/nnue/trainer/features/factorizer_feature_set.h
rename to src/nnue/trainer/features/factorizer_feature_set.h
diff --git a/src/eval/nnue/trainer/features/factorizer_half_kp.h b/src/nnue/trainer/features/factorizer_half_kp.h
similarity index 93%
rename from src/eval/nnue/trainer/features/factorizer_half_kp.h
rename to src/nnue/trainer/features/factorizer_half_kp.h
index a5363771..48a99797 100644
--- a/src/eval/nnue/trainer/features/factorizer_half_kp.h
+++ b/src/nnue/trainer/features/factorizer_half_kp.h
@@ -62,8 +62,8 @@ class Factorizer<HalfKP<AssociatedKing>> {
     IndexType index_offset = AppendBaseFeature<FeatureType>(
         kProperties[kFeaturesHalfKP], base_index, training_features);
 
-    const auto sq_k = static_cast<Square>(base_index / fe_end);
-    const auto p = static_cast<BonaPiece>(base_index % fe_end);
+    const auto sq_k = static_cast<Square>(base_index / PieceSquare::PS_END);
+    const auto p = static_cast<PieceSquare>(base_index % PieceSquare::PS_END);
     // kFeaturesHalfK
     {
       const auto& properties = kProperties[kFeaturesHalfK];
@@ -76,7 +76,7 @@ class Factorizer<HalfKP<AssociatedKing>> {
     index_offset += InheritFeaturesIfRequired<P>(
         index_offset, kProperties[kFeaturesP], p, training_features);
     // kFeaturesHalfRelativeKP
-    if (p >= fe_hand_end) {
+    if (p >= PieceSquare::PS_W_PAWN) {
       index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
           index_offset, kProperties[kFeaturesHalfRelativeKP],
           HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
diff --git a/src/eval/nnue/trainer/trainer.h b/src/nnue/trainer/trainer.h
similarity index 98%
rename from src/eval/nnue/trainer/trainer.h
rename to src/nnue/trainer/trainer.h
index 49400bbe..4b467041 100644
--- a/src/eval/nnue/trainer/trainer.h
+++ b/src/nnue/trainer/trainer.h
@@ -111,7 +111,7 @@ IntType Round(double value) {
 // make_shared with alignment
 template <typename T, typename... ArgumentTypes>
 std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
-  const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
+  const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
       T(std::forward<ArgumentTypes>(arguments)...);
   return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
 }
diff --git a/src/eval/nnue/trainer/trainer_affine_transform.h b/src/nnue/trainer/trainer_affine_transform.h
similarity index 99%
rename from src/eval/nnue/trainer/trainer_affine_transform.h
rename to src/nnue/trainer/trainer_affine_transform.h
index f5b208a3..db56c1c0 100644
--- a/src/eval/nnue/trainer/trainer_affine_transform.h
+++ b/src/nnue/trainer/trainer_affine_transform.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
 
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/affine_transform.h"
 #include "trainer.h"
 
diff --git a/src/eval/nnue/trainer/trainer_clipped_relu.h b/src/nnue/trainer/trainer_clipped_relu.h
similarity index 99%
rename from src/eval/nnue/trainer/trainer_clipped_relu.h
rename to src/nnue/trainer/trainer_clipped_relu.h
index 566ed777..fd7b1a07 100644
--- a/src/eval/nnue/trainer/trainer_clipped_relu.h
+++ b/src/nnue/trainer/trainer_clipped_relu.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
 
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/clipped_relu.h"
 #include "trainer.h"
 
diff --git a/src/eval/nnue/trainer/trainer_feature_transformer.h b/src/nnue/trainer/trainer_feature_transformer.h
similarity index 99%
rename from src/eval/nnue/trainer/trainer_feature_transformer.h
rename to src/nnue/trainer/trainer_feature_transformer.h
index 0139d534..97dbeff4 100644
--- a/src/eval/nnue/trainer/trainer_feature_transformer.h
+++ b/src/nnue/trainer/trainer_feature_transformer.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
 
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../nnue_feature_transformer.h"
 #include "trainer.h"
 #include "features/factorizer_feature_set.h"
diff --git a/src/eval/nnue/trainer/trainer_input_slice.h b/src/nnue/trainer/trainer_input_slice.h
similarity index 99%
rename from src/eval/nnue/trainer/trainer_input_slice.h
rename to src/nnue/trainer/trainer_input_slice.h
index f5b263d3..33e39244 100644
--- a/src/eval/nnue/trainer/trainer_input_slice.h
+++ b/src/nnue/trainer/trainer_input_slice.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
 
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/input_slice.h"
 #include "trainer.h"
 
diff --git a/src/eval/nnue/trainer/trainer_sum.h b/src/nnue/trainer/trainer_sum.h
similarity index 99%
rename from src/eval/nnue/trainer/trainer_sum.h
rename to src/nnue/trainer/trainer_sum.h
index 2efdff67..fb5b1532 100644
--- a/src/eval/nnue/trainer/trainer_sum.h
+++ b/src/nnue/trainer/trainer_sum.h
@@ -5,7 +5,7 @@
 
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
 
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/sum.h"
 #include "trainer.h"
 
diff --git a/src/pawns.cpp b/src/pawns.cpp
index 7f8d451a..868d0c8e 100644
--- a/src/pawns.cpp
+++ b/src/pawns.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -32,21 +30,21 @@ namespace {
   #define S(mg, eg) make_score(mg, eg)
 
   // Pawn penalties
-  constexpr Score Backward      = S( 9, 24);
-  constexpr Score Doubled       = S(11, 56);
-  constexpr Score Isolated      = S( 5, 15);
-  constexpr Score WeakLever     = S( 0, 56);
-  constexpr Score WeakUnopposed = S(13, 27);
+  constexpr Score Backward      = S( 8, 27);
+  constexpr Score Doubled       = S(11, 55);
+  constexpr Score Isolated      = S( 5, 17);
+  constexpr Score WeakLever     = S( 2, 54);
+  constexpr Score WeakUnopposed = S(15, 25);
 
   // Bonus for blocked pawns at 5th or 6th rank
-  constexpr Score BlockedPawn[2] = { S(-11, -4), S(-3, 4) };
+  constexpr Score BlockedPawn[2] = { S(-13, -4), S(-4, 3) };
 
   constexpr Score BlockedStorm[RANK_NB] = {
     S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)
   };
 
   // Connected pawn bonus
-  constexpr int Connected[RANK_NB] = { 0, 7, 8, 12, 29, 48, 86 };
+  constexpr int Connected[RANK_NB] = { 0, 7, 8, 11, 24, 45, 85 };
 
   // Strength of pawn shelter for our king by [distance from edge][rank].
   // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
diff --git a/src/pawns.h b/src/pawns.h
index e6098069..5499826e 100644
--- a/src/pawns.h
+++ b/src/pawns.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/position.cpp b/src/position.cpp
index 90677337..46e5d78b 100644
--- a/src/position.cpp
+++ b/src/position.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -200,14 +198,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
   std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
   st = si;
 
-#if defined(EVAL_NNUE)
-  // clear evalList. It is cleared when memset is cleared to zero above...
-  evalList.clear();
-
-  // In updating the PieceList, we have to set which piece is where,
-  // A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
-#endif  // defined(EVAL_NNUE)
+  // Each piece on board gets a unique ID used to track the piece later
+  PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
 
   ss >> std::noskipws;
 
@@ -225,13 +217,15 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
           auto pc = Piece(idx);
           put_piece(pc, sq);
 
-#if defined(EVAL_NNUE)
-          PieceNumber piece_no =
-            (idx == W_KING) ?PIECE_NUMBER_WKING : //
-            (idx == B_KING) ?PIECE_NUMBER_BKING : // back ball
-            next_piece_number++; // otherwise
-          evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
-#endif  // defined(EVAL_NNUE)
+          if (Eval::useNNUE)
+          {
+              // Kings get a fixed ID, other pieces get ID in order of placement
+              piece_id =
+                (idx == W_KING) ? PIECE_ID_WKING :
+                (idx == B_KING) ? PIECE_ID_BKING :
+                next_piece_id++;
+              evalList.put_piece(piece_id, sq, pc);
+          }
 
           ++sq;
       }
@@ -303,9 +297,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
   set_state(st);
 
   assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 
   return *this;
 }
@@ -727,10 +718,13 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   ++st->rule50;
   ++st->pliesFromNull;
 
-#if defined(EVAL_NNUE)
+  // Used by NNUE
   st->accumulator.computed_accumulation = false;
   st->accumulator.computed_score = false;
-#endif  // defined(EVAL_NNUE)
+  PieceId dp0 = PIECE_ID_NONE;
+  PieceId dp1 = PIECE_ID_NONE;
+  auto& dp = st->dirtyPiece;
+  dp.dirty_num = 1;
 
   Color us = sideToMove;
   Color them = ~us;
@@ -739,20 +733,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   Piece pc = piece_on(from);
   Piece captured = type_of(m) == ENPASSANT ? make_piece(them, PAWN) : piece_on(to);
 
-#if defined(EVAL_NNUE)
-  PieceNumber piece_no0 = PIECE_NUMBER_NB;
-  PieceNumber piece_no1 = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
-
   assert(color_of(pc) == us);
   assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us));
   assert(type_of(captured) != KING);
 
-#if defined(EVAL_NNUE)
-  auto& dp = st->dirtyPiece;
-  dp.dirty_num = 1;
-#endif  // defined(EVAL_NNUE)
-
   if (type_of(m) == CASTLING)
   {
       assert(pc == make_piece(us, KING));
@@ -782,30 +766,21 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
               assert(relative_rank(us, to) == RANK_6);
               assert(piece_on(to) == NO_PIECE);
               assert(piece_on(capsq) == make_piece(them, PAWN));
-
-#if defined(EVAL_NNUE)
-              piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
-
-              //board[capsq] = NO_PIECE; // Not done by remove_piece()
-#if defined(EVAL_NNUE)
-              evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
-          }
-          else {
-#if defined(EVAL_NNUE)
-            piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
           }
 
           st->pawnKey ^= Zobrist::psq[captured][capsq];
       }
-      else {
+      else
           st->nonPawnMaterial[them] -= PieceValue[MG][captured];
 
-#if defined(EVAL_NNUE)
-          piece_no1 = piece_no_of(capsq);
-#endif  // defined(EVAL_NNUE)
+      if (Eval::useNNUE)
+      {
+          dp.dirty_num = 2; // 2 pieces moved
+          dp1 = piece_id_on(capsq);
+          dp.pieceId[1] = dp1;
+          dp.old_piece[1] = evalList.piece_with_id(dp1);
+          evalList.put_piece(dp1, capsq, NO_PIECE);
+          dp.new_piece[1] = evalList.piece_with_id(dp1);
       }
 
       // Update board and piece lists
@@ -821,21 +796,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
 
       // Reset rule 50 counter
       st->rule50 = 0;
-
-#if defined(EVAL_NNUE)
-      dp.dirty_num = 2; // 2 pieces moved
-
-      dp.pieceNo[1] = piece_no1;
-      dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
-      // Do not use Eval::EvalList::put_piece() because the piece is removed
-      // from the game, and the corresponding elements of the piece lists
-      // needs to be Eval::BONA_PIECE_ZERO.
-      evalList.set_piece_on_board(piece_no1, Eval::BONA_PIECE_ZERO, Eval::BONA_PIECE_ZERO, capsq);
-      // Set PIECE_NUMBER_NB to piece_no_of_board[capsq] directly because it
-      // will not be overritten to pc if the move type is enpassant.
-      evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
-      dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
-#endif  // defined(EVAL_NNUE)
   }
 
   // Update hash key
@@ -857,20 +817,18 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
   }
 
   // Move the piece. The tricky Chess960 castling is handled earlier
-  if (type_of(m) != CASTLING) {
-#if defined(EVAL_NNUE)
-    piece_no0 = piece_no_of(from);
-#endif  // defined(EVAL_NNUE)
+  if (type_of(m) != CASTLING)
+  {
+      if (Eval::useNNUE)
+      {
+          dp0 = piece_id_on(from);
+          dp.pieceId[0] = dp0;
+          dp.old_piece[0] = evalList.piece_with_id(dp0);
+          evalList.put_piece(dp0, to, pc);
+          dp.new_piece[0] = evalList.piece_with_id(dp0);
+      }
 
-    move_piece(from, to);
-
-#if defined(EVAL_NNUE)
-    dp.pieceNo[0] = piece_no0;
-    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, to, pc);
-    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-#endif  // defined(EVAL_NNUE)
+      move_piece(from, to);
   }
 
   // If the moving piece is a pawn do some special extra work
@@ -894,14 +852,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
           remove_piece(to);
           put_piece(promotion, to);
 
-#if defined(EVAL_NNUE)
-          piece_no0 = piece_no_of(to);
-          //dp.pieceNo[0] = piece_no0;
-          //dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-          assert(evalList.piece_no_list_board[from] == PIECE_NUMBER_NB);
-          evalList.put_piece(piece_no0, to, promotion);
-          dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-#endif  // defined(EVAL_NNUE)
+          if (Eval::useNNUE)
+          {
+              dp0 = piece_id_on(to);
+              evalList.put_piece(dp0, to, promotion);
+              dp.new_piece[0] = evalList.piece_with_id(dp0);
+          }
 
           // Update hash keys
           k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to];
@@ -953,12 +909,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
       }
   }
 
-  //std::cout << *this << std::endl;
-
   assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 }
 
 
@@ -988,11 +939,6 @@ void Position::undo_move(Move m) {
       remove_piece(to);
       pc = make_piece(us, PAWN);
       put_piece(pc, to);
-
-#if defined(EVAL_NNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
-      evalList.put_piece(piece_no0, to, pc);
-#endif  // defined(EVAL_NNUE)
   }
 
   if (type_of(m) == CASTLING)
@@ -1002,14 +948,13 @@ void Position::undo_move(Move m) {
   }
   else
   {
-      
       move_piece(to, from); // Put the piece back at the source square
 
-#if defined(EVAL_NNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
-      evalList.put_piece(piece_no0, from, pc);
-      evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
-#endif  // defined(EVAL_NNUE)
+      if (Eval::useNNUE)
+      {
+          PieceId dp0 = st->dirtyPiece.pieceId[0];
+          evalList.put_piece(dp0, from, pc);
+      }
 
       if (st->capturedPiece)
       {
@@ -1028,12 +973,13 @@ void Position::undo_move(Move m) {
 
           put_piece(st->capturedPiece, capsq); // Restore the captured piece
 
-#if defined(EVAL_NNUE)
-          PieceNumber piece_no1 = st->dirtyPiece.pieceNo[1];
-          assert(evalList.bona_piece(piece_no1).fw == Eval::BONA_PIECE_ZERO);
-          assert(evalList.bona_piece(piece_no1).fb == Eval::BONA_PIECE_ZERO);
-          evalList.put_piece(piece_no1, capsq, st->capturedPiece);
-#endif  // defined(EVAL_NNUE)
+          if (Eval::useNNUE)
+          {
+              PieceId dp1 = st->dirtyPiece.pieceId[1];
+              assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
+              assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
+              evalList.put_piece(dp1, capsq, st->capturedPiece);
+          }
       }
   }
 
@@ -1042,9 +988,6 @@ void Position::undo_move(Move m) {
   --gamePly;
 
   assert(pos_is_ok());
-#if defined(EVAL_NNUE)
-  assert(evalList.is_valid(*this));
-#endif  // defined(EVAL_NNUE)
 }
 
 
@@ -1052,31 +995,39 @@ void Position::undo_move(Move m) {
 /// is a bit tricky in Chess960 where from/to squares can overlap.
 template<bool Do>
 void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) {
-#if defined(EVAL_NNUE)
-  auto& dp = st->dirtyPiece;
-   // Record the moved pieces in StateInfo for difference calculation.
-   dp.dirty_num = 2; // 2 pieces moved
-
-  PieceNumber piece_no0;
-  PieceNumber piece_no1;
-
-  if (Do) {
-    piece_no0 = piece_no_of(from);
-    piece_no1 = piece_no_of(to);
-  }
-#endif  // defined(EVAL_NNUE)
 
   bool kingSide = to > from;
   rfrom = to; // Castling is encoded as "king captures friendly rook"
   rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
   to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
 
-#if defined(EVAL_NNUE)
-  if (!Do) {
-    piece_no0 = piece_no_of(to);
-    piece_no1 = piece_no_of(rto);
+  if (Eval::useNNUE)
+  {
+      PieceId dp0, dp1;
+      auto& dp = st->dirtyPiece;
+      dp.dirty_num = 2; // 2 pieces moved
+
+      if (Do)
+      {
+          dp0 = piece_id_on(from);
+          dp1 = piece_id_on(rfrom);
+          dp.pieceId[0] = dp0;
+          dp.old_piece[0] = evalList.piece_with_id(dp0);
+          evalList.put_piece(dp0, to, make_piece(us, KING));
+          dp.new_piece[0] = evalList.piece_with_id(dp0);
+          dp.pieceId[1] = dp1;
+          dp.old_piece[1] = evalList.piece_with_id(dp1);
+          evalList.put_piece(dp1, rto, make_piece(us, ROOK));
+          dp.new_piece[1] = evalList.piece_with_id(dp1);
+      }
+      else
+      {
+          dp0 = piece_id_on(to);
+          dp1 = piece_id_on(rto);
+          evalList.put_piece(dp0, from, make_piece(us, KING));
+          evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
+      }
   }
-#endif  // defined(EVAL_NNUE)
 
   // Remove both pieces first since squares could overlap in Chess960
   remove_piece(Do ? from : to);
@@ -1084,28 +1035,6 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
   board[Do ? from : to] = board[Do ? rfrom : rto] = NO_PIECE; // Since remove_piece doesn't do this for us
   put_piece(make_piece(us, KING), Do ? to : from);
   put_piece(make_piece(us, ROOK), Do ? rto : rfrom);
-
-#if defined(EVAL_NNUE)
-  if (Do) {
-    dp.pieceNo[0] = piece_no0;
-    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
-    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, to, make_piece(us, KING));
-    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
-
-    dp.pieceNo[1] = piece_no1;
-    dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
-    evalList.piece_no_list_board[rfrom] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no1, rto, make_piece(us, ROOK));
-    dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
-  }
-  else {
-    evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no0, from, make_piece(us, KING));
-    evalList.piece_no_list_board[rto] = PIECE_NUMBER_NB;
-    evalList.put_piece(piece_no1, rfrom, make_piece(us, ROOK));
-  }
-#endif  // defined(EVAL_NNUE)
 }
 
 
@@ -1117,7 +1046,14 @@ void Position::do_null_move(StateInfo& newSt) {
   assert(!checkers());
   assert(&newSt != st);
 
-  std::memcpy(&newSt, st, sizeof(StateInfo));
+  if (Eval::useNNUE)
+  {
+      std::memcpy(&newSt, st, sizeof(StateInfo));
+      st->accumulator.computed_score = false;
+  }
+  else
+      std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
+
   newSt.previous = st;
   st = &newSt;
 
@@ -1130,10 +1066,6 @@ void Position::do_null_move(StateInfo& newSt) {
   st->key ^= Zobrist::side;
   prefetch(TT.first_entry(st->key));
 
-#if defined(EVAL_NNUE)
-  st->accumulator.computed_score = false;
-#endif
-
   ++st->rule50;
   st->pliesFromNull = 0;
 
@@ -1463,13 +1395,3 @@ bool Position::pos_is_ok() const {
 
   return true;
 }
-
-#if defined(EVAL_NNUE)
-PieceNumber Position::piece_no_of(Square sq) const
-{
-  assert(piece_on(sq) != NO_PIECE);
-  PieceNumber n = evalList.piece_no_of_board(sq);
-  assert(is_ok(n));
-  return n;
-}
-#endif  // defined(EVAL_NNUE)
diff --git a/src/position.h b/src/position.h
index 725be527..b5dbaf59 100644
--- a/src/position.h
+++ b/src/position.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -23,16 +21,14 @@
 
 #include <cassert>
 #include <deque>
-#include <iostream>
 #include <memory> // For std::unique_ptr
 #include <string>
 
 #include "bitboard.h"
 #include "evaluate.h"
-#include "misc.h"
 #include "types.h"
 
-#include "eval/nnue/nnue_accumulator.h"
+#include "nnue/nnue_accumulator.h"
 
 
 /// StateInfo struct stores information needed to restore a Position object to
@@ -60,12 +56,9 @@ struct StateInfo {
   Bitboard   checkSquares[PIECE_TYPE_NB];
   int        repetition;
 
-#if defined(EVAL_NNUE)
+  // Used by NNUE
   Eval::NNUE::Accumulator accumulator;
-
-   // For management of evaluation value difference calculation
-  Eval::DirtyPiece dirtyPiece;
-#endif  // defined(EVAL_NNUE)
+  DirtyPiece dirtyPiece;
 };
 
 
@@ -83,7 +76,7 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
 class Thread;
 
 // packed sfen
-struct PackedSfen { uint8_t data[32]; };
+struct PackedSfen { uint8_t data[32]; }; 
 
 class Position {
 public:
@@ -178,16 +171,9 @@ public:
   bool pos_is_ok() const;
   void flip();
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-  // --- StateInfo
-
-  // Returns the StateInfo corresponding to the current situation.
-  // For example, if state()->capturedPiece, the pieces captured in the previous phase are stored.
-  StateInfo* state() const { return st; }
-
-  // Information such as where and which piece number is used for the evaluation function.
-  const Eval::EvalList* eval_list() const { return &evalList; }
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  // Used by NNUE
+  StateInfo* state() const;
+  const EvalList* eval_list() const;
 
 #if defined(EVAL_LEARN)
   // --sfenization helper
@@ -196,7 +182,7 @@ public:
   // Do not include gamePly in pack.
   void sfen_pack(PackedSfen& sfen);
 
-  // �� It is slow to go through sfen, so I made a function to set packed sfen directly.
+  // It is slow to go through sfen, so I made a function to set packed sfen directly.
   // Equivalent to pos.set(sfen_unpack(data),si,th);.
   // If there is a problem with the passed phase and there is an error, non-zero is returned.
   // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
@@ -222,10 +208,8 @@ private:
   template<bool Do>
   void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
 
-#if defined(EVAL_NNUE)
-  // Returns the PieceNumber of the piece in the sq box on the board.
-  PieceNumber piece_no_of(Square sq) const;
-#endif  // defined(EVAL_NNUE)
+  // ID of a piece on a given square
+  PieceId piece_id_on(Square sq) const;
 
   // Data members
   Piece board[SQUARE_NB];
@@ -244,10 +228,8 @@ private:
   StateInfo* st;
   bool chess960;
 
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-  // List of pieces used in the evaluation function
-  Eval::EvalList evalList;
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  // List of pieces used in NNUE evaluation function
+  EvalList evalList;
 };
 
 namespace PSQT {
@@ -482,4 +464,25 @@ inline void Position::do_move(Move m, StateInfo& newSt) {
   do_move(m, newSt, gives_check(m));
 }
 
+inline StateInfo* Position::state() const {
+
+  return st;
+}
+
+inline const EvalList* Position::eval_list() const {
+
+  return &evalList;
+}
+
+inline PieceId Position::piece_id_on(Square sq) const
+{
+
+  assert(piece_on(sq) != NO_PIECE);
+
+  PieceId pid = evalList.piece_id_list[sq];
+  assert(is_ok(pid));
+
+  return pid;
+}
+
 #endif // #ifndef POSITION_H_INCLUDED
diff --git a/src/psqt.cpp b/src/psqt.cpp
index 5e8dd2c7..eb36e75e 100644
--- a/src/psqt.cpp
+++ b/src/psqt.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/search.cpp b/src/search.cpp
index 3ccc1d89..fe1771a3 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -227,6 +225,8 @@ void MainThread::search() {
   Time.init(Limits, us, rootPos.game_ply());
   TT.new_search();
 
+  Eval::verify_NNUE();
+
   if (rootMoves.empty())
   {
       rootMoves.emplace_back(MOVE_NONE);
@@ -816,7 +816,7 @@ namespace {
 
     // Step 8. Futility pruning: child node (~50 Elo)
     if (   !PvNode
-        &&  depth < 6
+        &&  depth < 8
         &&  eval - futility_margin(depth, improving) >= beta
         &&  eval < VALUE_KNOWN_WIN) // Do not return unproven wins
         return eval;
@@ -827,7 +827,7 @@ namespace {
         && (ss-1)->statScore < 23824
         &&  eval >= beta
         &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 33 * depth - 33 * improving + 112 * ttPv + 311
+        &&  ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200
         && !excludedMove
         &&  pos.non_pawn_material(us)
         && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -925,9 +925,12 @@ namespace {
 
                 if (value >= probcutBeta)
                 {
-                    tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
-                        BOUND_LOWER,
-                        depth - 3, move, ss->staticEval);
+                    if ( !(ttHit
+                       && tte->depth() >= depth - 3
+                       && ttValue != VALUE_NONE))
+                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
+                            BOUND_LOWER,
+                            depth - 3, move, ss->staticEval);
                     return value;
                 }
             }
@@ -983,9 +986,17 @@ moves_loop: // When in check, search starts from here
                                   thisThread->rootMoves.begin() + thisThread->pvLast, move))
           continue;
 
+      // Check for legality
+      if (!rootNode && !pos.legal(move))
+          continue;
+
       ss->moveCount = ++moveCount;
 
-      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000 && !Limits.silent)
+      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000
+#if defined(EVAL_LEARN)
+          && !Limits.silent
+#endif
+          )
           sync_cout << "info depth " << depth
                     << " currmove " << UCI::move(move, pos.is_chess960())
                     << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl;
@@ -1021,7 +1032,7 @@ moves_loop: // When in check, search starts from here
                   continue;
 
               // Futility pruning: parent node (~5 Elo)
-              if (   lmrDepth < 6
+              if (   lmrDepth < 8
                   && !ss->inCheck
                   && ss->staticEval + 284 + 188 * lmrDepth <= alpha
                   &&  (*contHist[0])[movedPiece][to_sq(move)]
@@ -1048,7 +1059,7 @@ moves_loop: // When in check, search starts from here
                   && !(PvNode && abs(bestValue) < 2)
                   && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
                   && !ss->inCheck
-                  && ss->staticEval + 267 + 391 * lmrDepth
+                  && ss->staticEval + 178 + 261 * lmrDepth
                      + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
                   continue;
 
@@ -1064,16 +1075,15 @@ moves_loop: // When in check, search starts from here
       // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
       // then that move is singular and should be extended. To verify this we do
       // a reduced search on all the other moves but the ttMove and if the
-      // result is lower than ttValue minus a margin then we will extend the ttMove.
-      if (    depth >= 6
+      // result is lower than ttValue minus a margin, then we will extend the ttMove.
+      if (    depth >= 7
           &&  move == ttMove
           && !rootNode
           && !excludedMove // Avoid recursive singular search
        /* &&  ttValue != VALUE_NONE Already implicit in the next condition */
           &&  abs(ttValue) < VALUE_KNOWN_WIN
           && (tte->bound() & BOUND_LOWER)
-          &&  tte->depth() >= depth - 3
-          &&  pos.legal(move))
+          &&  tte->depth() >= depth - 3)
       {
           Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2;
           Depth singularDepth = (depth - 1 + 3 * formerPv) / 2;
@@ -1140,13 +1150,6 @@ moves_loop: // When in check, search starts from here
       // Speculative prefetch as early as possible
       prefetch(TT.first_entry(pos.key_after(move)));
 
-      // Check for legality just before making the move
-      if (!rootNode && !pos.legal(move))
-      {
-          ss->moveCount = --moveCount;
-          continue;
-      }
-
       // Update the current move (this must be done after singular extension search)
       ss->currentMove = move;
       ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck]
@@ -1160,7 +1163,7 @@ moves_loop: // When in check, search starts from here
       // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
       // re-searched at full depth.
       if (    depth >= 3
-          &&  moveCount > 1 + 2 * rootNode
+          &&  moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
           && (!rootNode || thisThread->best_move_count(move) == 0)
           && (  !captureOrPromotion
               || moveCountPruning
@@ -1170,6 +1173,13 @@ moves_loop: // When in check, search starts from here
       {
           Depth r = reduction(improving, depth, moveCount);
 
+          // Decrease reduction at non-check cut nodes for second move at low depths
+          if (   cutNode
+              && depth <= 10
+              && moveCount <= 2
+              && !ss->inCheck)
+              r--;
+
           // Decrease reduction if the ttHit running average is large
           if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024)
               r--;
@@ -2060,10 +2070,10 @@ namespace Learner
       // Increase the generation of the substitution table for this thread because it is a new search.
             //TT.new_search(th->thread_id());
 
-            // ↑ If you call new_search here, it may be a loss because you can't use the previous search result.
+            // �� If you call new_search here, it may be a loss because you can't use the previous search result.
             // Do not do this here, but caller should do TT.new_search(th->thread_id()) for each station ...
 
-            // →Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
+            // ��Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
       //#endif
     }
   }
@@ -2253,7 +2263,7 @@ namespace Learner
     }
 
     // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // → PV should not be NULL_MOVE because it is PV
+    // �� PV should not be NULL_MOVE because it is PV
     // MOVE_WIN has never been thrust. (For now)
     for (Move move : rootMoves[0].pv)
     {
diff --git a/src/search.h b/src/search.h
index eae1cafc..01d8a4c1 100644
--- a/src/search.h
+++ b/src/search.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -88,7 +86,9 @@ struct LimitsType {
     time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
     movestogo = depth = mate = perft = infinite = 0;
     nodes = 0;
+#if defined (EVAL_LEARN)
     silent = false;
+#endif
   }
 
   bool use_time_management() const {
@@ -99,9 +99,11 @@ struct LimitsType {
   TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
   int movestogo, depth, mate, perft, infinite;
   int64_t nodes;
+#if defined (EVAL_LEARN)
   // Silent mode that does not output to the screen (for continuous self-play in process)
   // Do not output PV at this time.
   bool silent;
+#endif
 };
 
 extern LimitsType Limits;
diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp
index 95d58945..20215b96 100644
--- a/src/syzygy/tbprobe.cpp
+++ b/src/syzygy/tbprobe.cpp
@@ -1,7 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (c) 2013 Ronald de Man
-  Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h
index df3ca4fe..b998989b 100644
--- a/src/syzygy/tbprobe.h
+++ b/src/syzygy/tbprobe.h
@@ -1,7 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (c) 2013 Ronald de Man
-  Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/thread.cpp b/src/thread.cpp
index a0ee2b25..1aa66a81 100644
--- a/src/thread.cpp
+++ b/src/thread.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -206,21 +204,18 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
 
   // We use Position::set() to set root position across threads. But there are
   // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
-  // be deduced from a fen string, so set() clears them and to not lose the info
-  // we need to backup and later restore setupStates->back(). Note that setupStates
-  // is shared by threads but is accessed in read-only mode.
-  StateInfo tmp = setupStates->back();
-
+  // be deduced from a fen string, so set() clears them and they are set from
+  // setupStates->back() later. The rootState is per thread, earlier states are shared
+  // since they are read-only.
   for (Thread* th : *this)
   {
       th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
       th->rootDepth = th->completedDepth = 0;
       th->rootMoves = rootMoves;
-      th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
+      th->rootState = setupStates->back();
   }
 
-  setupStates->back() = tmp;
-
   main()->start_searching();
 }
 
diff --git a/src/thread.h b/src/thread.h
index a69e1d10..042bc2e9 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -67,6 +65,7 @@ public:
   std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
 
   Position rootPos;
+  StateInfo rootState;
   Search::RootMoves rootMoves;
   Depth rootDepth, completedDepth;
   CounterMoveHistory counterMoves;
diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h
index 0ef5c981..c4b55a48 100644
--- a/src/thread_win32_osx.h
+++ b/src/thread_win32_osx.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/timeman.cpp b/src/timeman.cpp
index 546eadd2..df4ba9b2 100644
--- a/src/timeman.cpp
+++ b/src/timeman.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/timeman.h b/src/timeman.h
index 9301dc94..5ad72b32 100644
--- a/src/timeman.h
+++ b/src/timeman.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tt.cpp b/src/tt.cpp
index cfbb2ae6..d494c27d 100644
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -116,9 +114,6 @@ void TranspositionTable::clear() {
 /// TTEntry t2 if its replace value is greater than that of t2.
 
 TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
-#if defined(DISABLE_TT)
-  return found = false, first_entry(0);
-#else
 
   TTEntry* const tte = first_entry(key);
   const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster
@@ -143,7 +138,6 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
           replace = &tte[i];
 
   return found = false, replace;
-#endif
 }
 
 
diff --git a/src/tt.h b/src/tt.h
index e18db8ce..c177ca52 100644
--- a/src/tt.h
+++ b/src/tt.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tune.cpp b/src/tune.cpp
index c1b1c76b..e94f67f8 100644
--- a/src/tune.cpp
+++ b/src/tune.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/tune.h b/src/tune.h
index 27c3f961..1489fa32 100644
--- a/src/tune.h
+++ b/src/tune.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2017 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2018 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
diff --git a/src/types.h b/src/types.h
index 773d9247..ce4c2dbb 100644
--- a/src/types.h
+++ b/src/types.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -40,7 +38,6 @@
 
 #include <cassert>
 #include <cctype>
-#include <climits>
 #include <cstdint>
 #include <cstdlib>
 #include <algorithm>
@@ -183,17 +180,14 @@ enum Value : int {
   VALUE_MATE_IN_MAX_PLY  =  VALUE_MATE - MAX_PLY,
   VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY,
 
-  PawnValueMg   = 124,   PawnValueEg   = 206,
+  PawnValueMg   = 126,   PawnValueEg   = 208,
   KnightValueMg = 781,   KnightValueEg = 854,
   BishopValueMg = 825,   BishopValueEg = 915,
   RookValueMg   = 1276,  RookValueEg   = 1380,
   QueenValueMg  = 2538,  QueenValueEg  = 2682,
   Tempo = 28,
 
-  MidgameLimit  = 15258, EndgameLimit  = 3915,
-
-// Maximum value returned by the evaluation function (I want it to be around 2**14..)
-  VALUE_MAX_EVAL = 27000,
+  MidgameLimit  = 15258, EndgameLimit  = 3915
 };
 
 enum PieceType {
@@ -209,6 +203,22 @@ enum Piece {
   PIECE_NB = 16
 };
 
+// An ID used to track the pieces. Max. 32 pieces on board.
+enum PieceId {
+  PIECE_ID_ZERO   = 0,
+  PIECE_ID_KING   = 30,
+  PIECE_ID_WKING  = 30,
+  PIECE_ID_BKING  = 31,
+  PIECE_ID_NONE   = 32
+};
+
+inline PieceId operator++(PieceId& d, int) {
+
+  PieceId x = d;
+  d = PieceId(int(d) + 1);
+  return x;
+}
+
 constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
   { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
     VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
@@ -238,8 +248,8 @@ enum Square : int {
   SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8,
   SQ_NONE,
 
-  SQUARE_ZERO = 0, SQUARE_NB = 64,
-  SQUARE_NB_PLUS1 = SQUARE_NB + 1, // If there are no balls, it is treated as having moved to SQUARE_NB, so it may be necessary to secure the array with SQUARE_NB+1, so this constant is used.
+  SQUARE_ZERO = 0,
+  SQUARE_NB   = 64
 };
 
 enum Direction : int {
@@ -262,6 +272,119 @@ enum Rank : int {
   RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
 };
 
+// unique number for each piece type on each square
+enum PieceSquare : uint32_t {
+  PS_NONE     =  0,
+  PS_W_PAWN   =  1,
+  PS_B_PAWN   =  1 * SQUARE_NB + 1,
+  PS_W_KNIGHT =  2 * SQUARE_NB + 1,
+  PS_B_KNIGHT =  3 * SQUARE_NB + 1,
+  PS_W_BISHOP =  4 * SQUARE_NB + 1,
+  PS_B_BISHOP =  5 * SQUARE_NB + 1,
+  PS_W_ROOK   =  6 * SQUARE_NB + 1,
+  PS_B_ROOK   =  7 * SQUARE_NB + 1,
+  PS_W_QUEEN  =  8 * SQUARE_NB + 1,
+  PS_B_QUEEN  =  9 * SQUARE_NB + 1,
+  PS_W_KING   = 10 * SQUARE_NB + 1,
+  PS_END      = PS_W_KING, // pieces without kings (pawns included)
+  PS_B_KING   = 11 * SQUARE_NB + 1,
+  PS_END2     = 12 * SQUARE_NB + 1,
+
+  PS_NOT_INIT = PS_END2 + 1,
+};
+
+struct ExtPieceSquare {
+  PieceSquare from[COLOR_NB];
+};
+
+// Array for finding the PieceSquare corresponding to the piece on the board
+extern ExtPieceSquare kpp_board_index[PIECE_NB];
+
+constexpr bool is_ok(PieceId pid);
+constexpr Square rotate180(Square sq);
+
+class Position;
+
+// Structure holding which tracked piece (PieceId) is where (PieceSquare)
+class EvalList {
+
+public:
+  // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
+  static const int MAX_LENGTH = 32;
+
+  // Array that holds the piece id for the pieces on the board
+  PieceId piece_id_list[SQUARE_NB];
+
+  // List of pieces, separate from White and Black POV
+  PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
+  PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
+
+  // Place the piece pc with piece_id on the square sq on the board
+  void put_piece(PieceId piece_id, Square sq, Piece pc)
+  {
+      assert(is_ok(piece_id));
+      if (pc != NO_PIECE)
+      {
+          pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
+          pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
+          piece_id_list[sq] = piece_id;
+      }
+      else
+      {
+          pieceListFw[piece_id] = PS_NONE;
+          pieceListFb[piece_id] = PS_NONE;
+          piece_id_list[sq] = piece_id;
+      }
+  }
+
+  // Convert the specified piece_id piece to ExtPieceSquare type and return it
+  ExtPieceSquare piece_with_id(PieceId piece_id) const
+  {
+      ExtPieceSquare eps;
+      eps.from[WHITE] = pieceListFw[piece_id];
+      eps.from[BLACK] = pieceListFb[piece_id];
+      return eps;
+  }
+
+  // Initialize the pieceList.
+  // Set the value of unused pieces to PieceSquare::PS_NONE in case you want to deal with dropped pieces.
+  // A normal evaluation function can be used as an evaluation function for missing frames.
+  // piece_no_list is initialized with PieceId::PIECE_ID_NONE to facilitate debugging.
+  void clear()
+  {
+
+      for (auto& p : pieceListFw)
+          p = PieceSquare::PS_NONE;
+
+      for (auto& p : pieceListFb)
+          p = PieceSquare::PS_NONE;
+
+      for (auto& v : piece_id_list)
+          v = PieceId::PIECE_ID_NONE;
+  }
+
+  // Check whether the pieceListFw[] held internally is a correct BonaPiece.
+  // Note: For debugging. slow.
+  bool is_valid(const Position& pos);
+
+private:
+  PieceSquare pieceListFw[MAX_LENGTH];
+  PieceSquare pieceListFb[MAX_LENGTH];
+};
+
+// For differential evaluation of pieces that changed since last turn
+struct DirtyPiece {
+
+  // Number of changed pieces
+  int dirty_num;
+
+  // The ids of changed pieces, max. 2 pieces can change in one move
+  PieceId pieceId[2];
+
+  // What changed from the piece with that piece number
+  ExtPieceSquare old_piece[2];
+  ExtPieceSquare new_piece[2];
+};
 
 /// Score enum stores a middlegame and an endgame value in a single integer (enum).
 /// The least significant 16 bits are used to store the middlegame value and the
@@ -287,10 +410,10 @@ inline Value mg_value(Score s) {
 }
 
 #define ENABLE_BASE_OPERATORS_ON(T)                                \
-constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \
-constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \
+constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); }    \
+constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); }    \
 constexpr T operator-(T d) { return T(-int(d)); }                  \
-inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }         \
+inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; }       \
 inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; }
 
 #define ENABLE_INCR_OPERATORS_ON(T)                                \
@@ -309,8 +432,10 @@ inline T& operator/=(T& d, int i) { return d = T(int(d) / i); }
 ENABLE_FULL_OPERATORS_ON(Value)
 ENABLE_FULL_OPERATORS_ON(Direction)
 
-ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Piece)
+ENABLE_INCR_OPERATORS_ON(PieceSquare)
+ENABLE_INCR_OPERATORS_ON(PieceId)
+ENABLE_INCR_OPERATORS_ON(PieceType)
 ENABLE_INCR_OPERATORS_ON(Square)
 ENABLE_INCR_OPERATORS_ON(File)
 ENABLE_INCR_OPERATORS_ON(Rank)
@@ -398,6 +523,10 @@ inline Color color_of(Piece pc) {
   return Color(pc >> 3);
 }
 
+constexpr bool is_ok(PieceId pid) {
+  return pid < PIECE_ID_NONE;
+}
+
 constexpr bool is_ok(Square s) {
   return s >= SQ_A1 && s <= SQ_H8;
 }
@@ -434,6 +563,11 @@ constexpr Square to_sq(Move m) {
   return Square(m & 0x3F);
 }
 
+// Return relative square when turning the board 180 degrees
+constexpr Square rotate180(Square sq) {
+  return (Square)(sq ^ 0x3F);
+}
+
 constexpr int from_to(Move m) {
  return m & 0xFFF;
 }
@@ -463,44 +597,6 @@ constexpr bool is_ok(Move m) {
   return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE
 }
 
-// Return squares when turning the board 180��
-constexpr Square Inv(Square sq) { return (Square)((SQUARE_NB - 1) - sq); }
-
-// Return squares when mirroring the board
-constexpr Square Mir(Square sq) { return make_square(File(7 - (int)file_of(sq)), rank_of(sq)); }
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// --------------------
-// 		piece box
-// --------------------
-
-// A number used to manage the piece list (which piece is where) used in the Position class.
-enum PieceNumber : uint8_t
-{
-	PIECE_NUMBER_PAWN = 0,
-	PIECE_NUMBER_KNIGHT = 16,
-	PIECE_NUMBER_BISHOP = 20,
-	PIECE_NUMBER_ROOK = 24,
-	PIECE_NUMBER_QUEEN = 28,
-	PIECE_NUMBER_KING = 30,
-	PIECE_NUMBER_WKING = 30,
-	PIECE_NUMBER_BKING = 31, // Use this if you need the numbers of the first and second balls
-	PIECE_NUMBER_ZERO = 0,
-	PIECE_NUMBER_NB = 32,
-};
-
-inline PieceNumber& operator++(PieceNumber& d) { return d = PieceNumber(int8_t(d) + 1); }
-inline PieceNumber operator++(PieceNumber& d, int) {
-  PieceNumber x = d;
-  d = PieceNumber(int8_t(d) + 1);
-  return x;
-}
-inline PieceNumber& operator--(PieceNumber& d) { return d = PieceNumber(int8_t(d) - 1); }
-
-// Piece Number integrity check. for assert.
-constexpr bool is_ok(PieceNumber pn) { return pn < PIECE_NUMBER_NB; }
-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
 /// Based on a congruential pseudo random number generator
 constexpr Key make_key(uint64_t seed) {
   return seed * 6364136223846793005ULL + 1442695040888963407ULL;
diff --git a/src/uci.cpp b/src/uci.cpp
index c775f333..8972cec9 100644
--- a/src/uci.cpp
+++ b/src/uci.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -35,7 +33,7 @@
 #include "syzygy/tbprobe.h"
 
 #if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
-#include "eval/nnue/nnue_test_command.h"
+#include "nnue/nnue_test_command.h"
 #endif
 
 using namespace std;
@@ -73,7 +71,7 @@ namespace Learner
 void test_cmd(Position& pos, istringstream& is)
 {
     // Initialize as it may be searched.
-    init_nnue();
+    Eval::init_NNUE();
 
     std::string param;
     is >> param;
@@ -83,6 +81,7 @@ void test_cmd(Position& pos, istringstream& is)
 #endif
 
 namespace {
+
   // position() is called when engine receives the "position" UCI command.
   // The function sets up the position described in the given FEN string ("fen")
   // or the starting position ("startpos") and then makes the moves given in the
@@ -117,6 +116,20 @@ namespace {
     }
   }
 
+  // trace_eval() prints the evaluation for the current position, consistent with the UCI
+  // options set so far.
+
+  void trace_eval(Position& pos) {
+
+    StateListPtr states(new std::deque<StateInfo>(1));
+    Position p;
+    p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main());
+
+    Eval::verify_NNUE();
+
+    sync_cout << "\n" << Eval::trace(p) << sync_endl;
+  }
+
 
   // setoption() is called when engine receives the "setoption" UCI command. The
   // function updates the UCI option ("name") to the given value ("value").
@@ -205,18 +218,11 @@ namespace {
                nodes += Threads.nodes_searched();
             }
             else
-               sync_cout << "\n" << Eval::trace(pos) << sync_endl;
+               trace_eval(pos);
         }
         else if (token == "setoption")  setoption(is);
         else if (token == "position")   position(pos, is, states);
-        else if (token == "ucinewgame")
-        {
-#if defined(EVAL_NNUE)
-            init_nnue();
-#endif
-            Search::clear();
-            elapsed = now(); // Search::clear() may take some while
-        }
+        else if (token == "ucinewgame") { Search::clear(); elapsed = now(); } // Search::clear() may take some while
     }
 
     elapsed = now() - elapsed + 1; // Ensure positivity to avoid a 'divide by zero'
@@ -251,48 +257,8 @@ namespace {
      return int(0.5 + 1000 / (1 + std::exp((a - x) / b)));
   }
 
-// When you calculate check sum, save it and check the consistency later.
-  uint64_t eval_sum;
 } // namespace
 
-// Make is_ready_cmd() callable from outside. (Because I want to call it from the bench command etc.)
-// Note that the phase is not initialized.
-void init_nnue(bool skipCorruptCheck)
-{
-#if defined(EVAL_NNUE)
-  // After receiving "isready", modify so that a line feed is sent every 5 seconds until "readyok" is returned. (keep alive processing)
-  // From USI 2.0 specifications.
-  // -The time out time after "is ready" is about 30 seconds. Beyond this, if you want to initialize the evaluation function and secure the hash table,
-  // You should send some kind of message (breakable) from the thinking engine side.
-  // -Shogi GUI already does so, so MyShogi will follow along.
-  //-Also, the engine side of Yaneura King modifies it so that after "isready" is received, a line feed is sent every 5 seconds until "readyok" is returned.
-
-  // Perform processing that may take time, such as reading the evaluation function, at this timing.
-  // If you do a time-consuming process at startup, Shogi place will make a timeout judgment and retire the recognition as a thinking engine.
-  if (!UCI::load_eval_finished)
-  {
-      // Read evaluation function
-      Eval::load_eval();
-
-      // Calculate and save checksum (to check for subsequent memory corruption)
-      eval_sum = Eval::calc_check_sum();
-
-      // display soft name
-      Eval::print_softname(eval_sum);
-
-      UCI::load_eval_finished = true;
-  }
-  else
-  {
-      // Check the checksum every time to see if the memory has been corrupted.
-      // It seems that the time is a little wasteful, but it is good because it is about 0.1 seconds.
-      if (!skipCorruptCheck && eval_sum != Eval::calc_check_sum())
-          sync_cout << "Error! : EVAL memory is corrupted" << sync_endl;
-  }
-#endif  // defined(EVAL_NNUE)
-}
-
-
 // --------------------
 // Call qsearch(),search() directly for testing
 // --------------------
@@ -376,25 +342,15 @@ void UCI::loop(int argc, char* argv[]) {
       else if (token == "setoption")  setoption(is);
       else if (token == "go")         go(pos, is, states);
       else if (token == "position")   position(pos, is, states);
-      else if (token == "ucinewgame")
-      {
-#if defined(EVAL_NNUE)
-          init_nnue();
-#endif
-          Search::clear();
-      }
-      else if (token == "isready") {
-#if defined(EVAL_NNUE)
-          init_nnue(true);
-#endif
-          sync_cout << "readyok" << sync_endl;
-      }
+      else if (token == "ucinewgame") Search::clear();
+      else if (token == "isready")    sync_cout << "readyok" << sync_endl;
+
       // Additional custom non-UCI commands, mainly for debugging.
       // Do not use these commands during a search!
       else if (token == "flip")     pos.flip();
       else if (token == "bench")    bench(pos, is, states);
       else if (token == "d")        sync_cout << pos << sync_endl;
-      else if (token == "eval")     sync_cout << Eval::trace(pos) << sync_endl;
+      else if (token == "eval")     trace_eval(pos);
       else if (token == "compiler") sync_cout << compiler_info() << sync_endl;
 #if defined (EVAL_LEARN)
       else if (token == "gensfen") Learner::gen_sfen(pos, is);
@@ -410,10 +366,6 @@ void UCI::loop(int argc, char* argv[]) {
 
 #endif
 
-#if defined(EVAL_NNUE)
-      else if (token == "eval_nnue") sync_cout << "eval_nnue = " << Eval::compute_eval(pos) << sync_endl;
-#endif
-
 #if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
       // test command
       else if (token == "test") test_cmd(pos, is);
diff --git a/src/uci.h b/src/uci.h
index 6529f90c..27a50fb9 100644
--- a/src/uci.h
+++ b/src/uci.h
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -76,19 +74,10 @@ std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
 Move to_move(const Position& pos, std::string& str);
 
-// Flag that read the evaluation function. This is set to false when evaldir is changed.
-extern bool load_eval_finished; // = false;
 } // namespace UCI
 
 extern UCI::OptionsMap Options;
 
-// Processing when USI "isready" command is called. At this time, the evaluation function is read.
-// Used when you want to load the evaluation function when "isready" does not come in handler of benchmark command etc.
-// If skipCorruptCheck == true, skip memory corruption check by check sum when reading the evaluation function a second time.
-// * This function is inconvenient if it is not available in Stockfish, so add it.
-
-void init_nnue(bool skipCorruptCheck = false);
-
 extern const char* StartFEN;
 
 #endif // #ifndef UCI_H_INCLUDED
diff --git a/src/ucioption.cpp b/src/ucioption.cpp
index e145c34b..168e73a9 100644
--- a/src/ucioption.cpp
+++ b/src/ucioption.cpp
@@ -1,8 +1,6 @@
 /*
   Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
-  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
-  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
 
   Stockfish is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
@@ -42,8 +40,8 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-void on_eval_file(const Option& o) { load_eval_finished = false; init_nnue(); }
-
+void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
+void on_eval_file(const Option& ) { Eval::init_NNUE(); }
 
 /// Our case insensitive less() function as required by UCI protocol
 bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@@ -80,10 +78,9 @@ void init(OptionsMap& o) {
   o["SyzygyProbeDepth"]      << Option(1, 1, 100);
   o["Syzygy50MoveRule"]      << Option(true);
   o["SyzygyProbeLimit"]      << Option(7, 0, 7);
+  o["Use NNUE"]              << Option(false, on_use_NNUE);
+  o["EvalFile"]              << Option("nn-9931db908a9b.nnue", on_eval_file);
 #ifdef EVAL_NNUE
-  // Evaluation function file name. When this is changed, it is necessary to reread the evaluation function at the next ucinewgame timing.
-  // Without the preceding "./", some GUIs can not load he net file.
-  o["EvalFile"]              << Option("./eval/nn.bin", on_eval_file);
   // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
   // I want to hit the test eval convert command, but there is no new evaluation function
   // It ends abnormally before executing this command.
@@ -207,6 +204,4 @@ Option& Option::operator=(const string& v) {
   return *this;
 }
 
-// Flag that read the evaluation function. This is set to false when evaldir is changed.
-bool load_eval_finished = false;
 } // namespace UCI