Merge pull request #79 from nodchip/nnue-player-merge

Merge Stockfish master to nodchip's repository
2026-05-20 14:27:45 +00:00 · 2020-08-10 09:51:44 +09:00
parent c0e1235fef 4260ed0c7f
commit 53d15e5ec2
112 changed files with 3013 additions and 3179 deletions
@@ -1,5 +1,5 @@
 language: cpp
-dist: xenial
+dist: bionic
 matrix:
  include:
@@ -7,7 +7,6 @@ matrix:
      compiler: gcc
      addons:
        apt:
          sources: ['ubuntu-toolchain-r-test']
          packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl']
      env:
        - COMPILER=g++-8
@@ -17,23 +16,23 @@ matrix:
      compiler: clang
      addons:
        apt:
-          sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-xenial-6.0']
+          packages: ['clang-10', 'llvm-10-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
          packages: ['clang-6.0', 'llvm-6.0-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
      env:
-        - COMPILER=clang++-6.0
+        - COMPILER=clang++-10
        - COMP=clang
        - LDFLAGS=-fuse-ld=lld
    - os: osx
      osx_image: xcode12
      compiler: gcc
      env:
        - COMPILER=g++
        - COMP=gcc
    - os: osx
      osx_image: xcode12
      compiler: clang
      env:
-        - COMPILER=clang++ V='Apple LLVM 9.4.1' # Apple LLVM version 9.1.0 (clang-902.0.39.2)
+        - COMPILER=clang++
        - COMP=clang
 branches:
@@ -48,26 +47,34 @@ script:
  - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
  - export benchref=$(cat git_sig)
  - echo "Reference bench:" $benchref
  #
  # Compiler version string
  - $COMPILER -v
  #
  # Verify bench number against various builds
  - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
  - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
-  - make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
  #
  # Check perft and reproducible search
  - export CXXFLAGS="-Werror"
  - make clean && make -j2 ARCH=x86-64 build
  - ../tests/perft.sh
  - ../tests/reprosearch.sh
  #
  # Valgrind
  #
  - export CXXFLAGS="-O1 -fno-inline"
  - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
  - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
  #
  # Sanitizer
  #
-  # Use g++-8 as a proxy for having sanitizers, might need revision as they become available for more recent versions of clang/gcc
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
-  - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
+  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
  - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
@@ -1,10 +1,17 @@
-# List of authors for Stockfish, as of March 30, 2020
+# List of authors for Stockfish, as of August 4, 2020
 # Founders of the Stockfish project and fishtest infrastructure
 Tord Romstad (romstad)
 Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)
 # Authors and inventors of NNUE, training, NNUE port
 Yu Nasu (ynasu87)
 Motohiro Isozaki (yaneurao)
 Hisayori Noda (nodchip)
 # all other authors of the code in alphabetical order
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
 Ajith Chandy Jose (ajithcj)
@@ -36,6 +43,7 @@ Dariusz Orzechowski
 David Zar
 Daylen Yang (daylen)
 DiscanX
 Dominik Schlösser (domschl)
 double-beep
 Eduardo Cáceres (eduherminio)
 Eelco de Groot (KingDefender)
@@ -71,6 +79,7 @@ Jean Gauthier (OuaisBla)
 Jean-Francois Romang (jromang)
 Jekaa
 Jerry Donald Watson (jerrydonaldwatson)
 jjoshua2
 Jonathan Calovski (Mysseno)
 Jonathan Dumale (SFisGOD)
 Joost VandeVondele (vondele)
@@ -115,7 +124,8 @@ Nick Pelling (nickpelling)
 Nicklas Persson (NicklasPersson)
 Niklas Fiekas (niklasf)
 Nikolay Kostov (NikolayIT)
-Nguyen Pham
+Nguyen Pham (nguyenpham)
 Norman Schmidt (FireFather)
 Ondrej Mosnáček (WOnder93)
 Oskar Werkelin Ahlin
 Pablo Vazquez
@@ -135,14 +145,17 @@ Richard Lloyd
 Rodrigo Exterckötter Tjäder
 Ron Britvich (Britvich)
 Ronald de Man (syzygy1, syzygy)
 rqs
 Ryan Schmitt
 Ryan Takker
 Sami Kiminki (skiminki)
 Sebastian Buchwald (UniQP)
 Sergei Antonov (saproj)
 Sergei Ivanov (svivanov72)
 Sergio Vieri (sergiovieri)
 sf-x
 Shane Booth (shane31)
 Shawn Varghese (xXH4CKST3RXx)
 Stefan Geschwentner (locutus2)
 Stefano Cardanobile (Stefano80)
 Steinar Gunderson (sesse)
@@ -155,9 +168,11 @@ Tom Vijlbrief (tomtor)
 Tomasz Sobczyk (Sopel97)
 Torsten Franz (torfranz, tfranzer)
 Tracey Emery (basepr1me)
 tttak
 Unai Corzo (unaiic)
 Uri Blass (uriblass)
 Vince Negri (cuddlestmonkey)
 zz4032
 # Additionally, we acknowledge the authors and maintainers of fishtest,
@@ -9,9 +9,10 @@ Stockfish NNUE is a port of a shogi neural network named NNUE (efficiently updat
 ## Training Guide
 ### Generating Training Data
-Use the "no-nnue.nnue-gen-sfen-from-original-eval" binary. The given example is generation in its simplest form. There are more commands. 
+To generate training data from the classic eval, use gensfen command with setting "Use NNUE" to "false". The given example is generation in its simplest form. There are more commands. 
 ```
 uci
 setoption name Use NNUE value false
 setoption name Threads value x
 setoption name Hash value y
 setoption name SyzygyPath value path
@@ -27,10 +28,11 @@ This will save a file named "generated_kifu.bin" in the same folder as the binar
 ### Generating Validation Data
 The process is the same as the generation of training data, except for the fact that you need to set loop to 1 million, because you don't need a lot of validation data. The depth should be the same as before or slightly higher than the depth of the training data. After generation rename the validation data file to val.bin and drop it in a folder named "validationdata" in the same directory to make it easier. 
 ### Training a Completely New Network
-Use the "avx2.halfkp_256x2-32-32.nnue-learn.2020-07-11" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
+Use the "learn" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
 ```
 uci
 setoption name SkipLoadingEval value true
 setoption name Use NNUE value true
 setoption name Threads value x
 isready
 learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 mirror_percentage 50 validation_set_file_name validationdata\val.bin
@@ -42,7 +44,7 @@ Nets get saved in the "evalsave" folder.
 - lambda is the amount of weight it puts to eval of learning data vs win/draw/loss results. 1 puts all weight on eval, lambda 0 puts all weight on WDL results.
 ### Reinforcement Learning
-If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `uci setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
+If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries with setting `Use NNUE` to true. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `uci setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
 After you have generated the training data, you must move it into your training data folder and delete the older data so that the binary does not accidentally train on the same data again. Do the same for the validation data and name it to val-1.bin to make it less confusing. Make sure the evalsave folder is empty. Then, using the same binary, type in the training commands shown above. Do __NOT__ set `SkipLoadingEval` to true, it must be false or you will get a completely new network, instead of a network trained with reinforcement learning. You should also set eval_save_interval to a number that is lower than the amount of positions in your training data, perhaps also 1/10 of the original value. The validation file should be set to the new validation data, not the old data.
@@ -0,0 +1,65 @@
 <p align="center">
  <img src="https://cdn.discordapp.com/attachments/724700045525647420/729135226365804594/SFNNUE2.png">
 </p>
 <h1 align="center">Stockfish NNUE</h1>
 ## Overview
 Stockfish NNUE is a port of a shogi neural network named NNUE (efficiently updateable neural network backwards) to Stockfish 11. To learn more about the Stockfish chess engine, look [here](stockfish.md) for an overview and [here](https://github.com/official-stockfish/Stockfish) for the official repository.
 ## Training Guide
 ### Generating Training Data
 To generate training data from the classic eval, use gensfen command with setting "Use NNUE" to "false". The given example is generation in its simplest form. There are more commands. 
 ```
 uci
 setoption name Use NNUE value false
 setoption name Threads value x
 setoption name Hash value y
 setoption name SyzygyPath value path
 isready
 gensfen depth a loop b use_draw_in_training_data_generation 1 eval_limit 32000
 ```
 Specify how many threads and how much memory you would like to use with the x and y values. The option SyzygyPath is not necessary, but if you would like to use it, you must first have Syzygy endgame tablebases on your computer, which you can find [here](http://oics.olympuschess.com/tracker/index.php). You will need to have a torrent client to download these tablebases, as that is probably the fastest way to obtain them. The path is the path to the folder containing those tablebases. It does not have to be surrounded in quotes.
 This will save a file named "generated_kifu.bin" in the same folder as the binary. Once generation is done, rename the file to something like "1billiondepth12.bin" to remember the depth and quantity of the positions and move it to a folder named "trainingdata" in the same directory as the binaries.
 #### Generation Parameters
 - Depth is the searched depth per move, or how far the engine looks forward. This value is an integer.
 - Loop is the amount of positions generated. This value is also an integer
 ### Generating Validation Data
 The process is the same as the generation of training data, except for the fact that you need to set loop to 1 million, because you don't need a lot of validation data. The depth should be the same as before or slightly higher than the depth of the training data. After generation rename the validation data file to val.bin and drop it in a folder named "validationdata" in the same directory to make it easier. 
 ### Training a Completely New Network
 Use the "learn" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
 ```
 uci
 setoption name SkipLoadingEval value true
 setoption name Use NNUE value true
 setoption name Threads value x
 isready
 learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 mirror_percentage 50 validation_set_file_name validationdata\val.bin
 ```
 Nets get saved in the "evalsave" folder. 
 #### Training Parameters
 - eta is the learning rate
 - lambda is the amount of weight it puts to eval of learning data vs win/draw/loss results. 1 puts all weight on eval, lambda 0 puts all weight on WDL results.
 ### Reinforcement Learning
 If you would like to do some reinforcement learning on your original network, you must first generate training data using the learn binaries with setting `Use NNUE` to true. Make sure that your previously trained network is in the eval folder. Use the commands specified above. Make sure `SkipLoadingEval` is set to false so that the data generated is using the neural net's eval by typing the command `uci setoption name SkipLoadingEval value false` before typing the `isready` command. You should aim to generate less positions than the first run, around 1/10 of the number of positions generated in the first run. The depth should be higher as well. You should also do the same for validation data, with the depth being higher than the last run.
 After you have generated the training data, you must move it into your training data folder and delete the older data so that the binary does not accidentally train on the same data again. Do the same for the validation data and name it to val-1.bin to make it less confusing. Make sure the evalsave folder is empty. Then, using the same binary, type in the training commands shown above. Do __NOT__ set `SkipLoadingEval` to true, it must be false or you will get a completely new network, instead of a network trained with reinforcement learning. You should also set eval_save_interval to a number that is lower than the amount of positions in your training data, perhaps also 1/10 of the original value. The validation file should be set to the new validation data, not the old data.
 After training is finished, your new net should be located in the "final" folder under the "evalsave" directory. You should test this new network against the older network to see if there are any improvements.
 ## Using Your Trained Net
 If you want to use your generated net, copy the net located in the "final" folder under the "evalsave" directory and move it into a new folder named "eval" under the directory with the binaries. You can then use the halfkp_256x2 binaries pertaining to your CPU with a standard chess GUI, such as Cutechess. Refer to the [releases page](https://github.com/nodchip/Stockfish/releases) to find out which binary is best for your CPU.
 If the engine does not load any net file, or shows "Error! *** not found or wrong format", please try to sepcify the net with the full file path with the "EvalFile" option by typing the command `setoption name EvalFile value path` where path is the full file path.
 ## Resources
 - [Stockfish NNUE Wiki](https://www.qhapaq.org/shogi/shogiwiki/stockfish-nnue/)
 - [Training instructions](https://twitter.com/mktakizawa/status/1273042640280252416) from the creator of the Elmo shogi engine
 - [Original Talkchess thread](http://talkchess.com/forum3/viewtopic.php?t=74059) discussing Stockfish NNUE
 - [Guide to Stockfish NNUE](http://yaneuraou.yaneu.com/2020/06/19/stockfish-nnue-the-complete-guide/) 
 - [Unofficial Stockfish Discord](https://discord.gg/nv8gDtt)
 A more updated list can be found in the #sf-nnue-resources channel in the Discord.
@@ -4,10 +4,9 @@ clone_depth: 50
 branches:
  only:
    - master
    - appveyor
 # Operating system (build VM template)
-os: Visual Studio 2017
+os: Visual Studio 2019
 # Build platform, i.e. x86, x64, AnyCPU. This setting is optional.
 platform:
@@ -36,8 +35,11 @@ before_build:
      $src = $src.Replace("\", "/")
      # Build CMakeLists.txt
-      $t = 'cmake_minimum_required(VERSION 3.8)',
+      $t = 'cmake_minimum_required(VERSION 3.17)',
           'project(Stockfish)',
           'set(CMAKE_CXX_STANDARD 17)',
           'set(CMAKE_CXX_STANDARD_REQUIRED ON)',
           'set (CMAKE_CXX_EXTENSIONS OFF)',
           'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)',
           'set(source_files', $src, ')',
           'add_executable(stockfish ${source_files})'
@@ -51,10 +53,11 @@ before_build:
      $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1
      $bench = $b -match '\D+(\d+)' | % { $matches[1] }
      Write-Host "Reference bench:" $bench
-      $g = "Visual Studio 15 2017"
+      $g = "Visual Studio 16 2019"
-      If (${env:PLATFORM} -eq 'x64') { $g = $g + ' Win64' }
+      If (${env:PLATFORM} -eq 'x64') { $a = "x64" }
-      cmake -G "${g}" .
+      If (${env:PLATFORM} -eq 'x86') { $a = "Win32" }
-      Write-Host "Generated files for: " $g
+      cmake -G "${g}" -A ${a} .
      Write-Host "Generated files for: " $g $a
 build_script:
  - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
@@ -40,24 +40,24 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
 	search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
 	eval/evaluate_mir_inv_tools.cpp \
-	eval/nnue/evaluate_nnue.cpp \
+	nnue/evaluate_nnue.cpp \
-	eval/nnue/evaluate_nnue_learner.cpp \
+	nnue/evaluate_nnue_learner.cpp \
-	eval/nnue/features/half_kp.cpp \
+	nnue/features/half_kp.cpp \
-	eval/nnue/features/half_relative_kp.cpp \
+	nnue/features/half_relative_kp.cpp \
-	eval/nnue/features/k.cpp \
+	nnue/features/k.cpp \
-	eval/nnue/features/p.cpp \
+	nnue/features/p.cpp \
-	eval/nnue/features/castling_right.cpp \
+	nnue/features/castling_right.cpp \
-	eval/nnue/features/enpassant.cpp \
+	nnue/features/enpassant.cpp \
-	eval/nnue/nnue_test_command.cpp \
+	nnue/nnue_test_command.cpp \
 	extra/sfen_packer.cpp \
 	learn/gensfen2019.cpp \
 	learn/learner.cpp \
 	learn/learning_tools.cpp \
 	learn/multi_think.cpp
-OBJS = $(SRCS:.cpp=.o)
+OBJS = $(notdir $(SRCS:.cpp=.o))
-VPATH = syzygy
+VPATH = syzygy:nnue:nnue/features:eval:extra:learn
 ### Establish the operating system name
 KERNEL = $(shell uname -s)
@@ -82,12 +82,14 @@ endif
 # prefetch = yes/no   --- -DUSE_PREFETCH   --- Use prefetch asm-instruction
 # popcnt = yes/no     --- -DUSE_POPCNT     --- Use popcnt asm-instruction
 # sse = yes/no        --- -msse            --- Use Intel Streaming SIMD Extensions
 # sse3 = yes/no       --- -msse3           --- Use Intel Streaming SIMD Extensions 3
 # ssse3 = yes/no      --- -mssse3          --- Use Intel Supplemental Streaming SIMD Extensions 3
 # sse41 = yes/no      --- -msse4.1         --- Use Intel Streaming SIMD Extensions 4.1
 # sse42 = yes/no      --- -msse4.2         --- Use Intel Streaming SIMD Extensions 4.2
 # avx2 = yes/no       --- -mavx2           --- Use Intel Advanced Vector Extensions 2
 # pext = yes/no       --- -DUSE_PEXT       --- Use pext x86_64 asm-instruction
-# avx512 = yes/no     --- -mavx512vbmi     --- Use Intel Advanced Vector Extensions 512
+# avx512 = yes/no     --- -mavx512bw       --- Use Intel Advanced Vector Extensions 512
 # neon = yes/no       --- -DUSE_NEON       --- Use ARM SIMD architecture
 #
 # Note that Makefile is space sensitive, so when adding new architectures
 # or modifying existing flags, you have to make sure there are no extra spaces
@@ -108,6 +110,8 @@ sse42 = no
 avx2 = no
 pext = no
 avx512 = no
 neon = no
 ARCH = x86-64-modern
 ### 2.2 Architecture specific
 ifeq ($(ARCH),general-32)
@@ -142,16 +146,14 @@ ifeq ($(ARCH),x86-64-sse3)
 	prefetch = yes
 	sse = yes
 	sse3 = yes
 	ssse3 = yes
 endif
 ifeq ($(ARCH),x86-64-sse3-popcnt)
 	arch = x86_64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
 	sse3 = yes
-	ssse3 = yes
+	popcnt = yes
 endif
 ifeq ($(ARCH),x86-64-ssse3)
@@ -165,6 +167,17 @@ endif
 ifeq ($(ARCH),x86-64-sse41)
 	arch = x86_64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
 	sse3 = yes
 	ssse3 = yes
 	sse41 = yes
 endif
 ifeq ($(ARCH),x86-64-modern)
 	arch = x86_64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
 	sse3 = yes
 	ssse3 = yes
@@ -184,7 +197,6 @@ endif
 ifeq ($(ARCH),x86-64-avx2)
 	arch = x86_64
 	bits = 64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
@@ -210,7 +222,6 @@ endif
 ifeq ($(ARCH),x86-64-avx512)
 	arch = x86_64
 	bits = 64
 	prefetch = yes
 	popcnt = yes
 	sse = yes
@@ -233,6 +244,14 @@ ifeq ($(ARCH),armv8)
 	arch = armv8-a
 	prefetch = yes
 	popcnt = yes
 	neon = yes
 endif
 ifeq ($(ARCH),apple-silicon)
 	arch = arm64
 	prefetch = yes
 	popcnt = yes
 	neon = yes
 endif
 ifeq ($(ARCH),ppc-32)
@@ -251,7 +270,7 @@ endif
 ### ==========================================================================
 ### 3.1 Selecting compiler (default = gcc)
-CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) $(NNUECXXFLAGS)
+CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS)
 DEPENDFLAGS += -std=c++17
 LDFLAGS += $(EXTRALDFLAGS)
@@ -277,6 +296,9 @@ ifeq ($(COMP),gcc)
 	ifneq ($(KERNEL),Darwin)
 	   LDFLAGS += -Wl,--no-as-needed
 	endif
 	gccversion = $(shell $(CXX) --version)
 	gccisclang = $(findstring clang,$(gccversion))
 endif
 ifeq ($(COMP),mingw)
@@ -332,28 +354,6 @@ ifeq ($(COMP),clang)
 	endif
 endif
 ifeq ($(COMP),msys2)
 	comp=gcc
 	CXX=g++
 	CXXFLAGS += -pedantic -Wextra -Wshadow
 	ifeq ($(ARCH),armv7)
 		ifeq ($(OS),Android)
 			CXXFLAGS += -m$(bits)
 			LDFLAGS += -m$(bits)
 		endif
 	else
 		CXXFLAGS += -m$(bits)
 		LDFLAGS += -m$(bits)
 	endif
 	ifneq ($(KERNEL),Darwin)
 	   LDFLAGS += -Wl,--no-as-needed
 	endif
 	LDFLAGS += -static -Wl,-s
 endif
 ifeq ($(comp),icc)
 	profile_make = icc-profile-make
 	profile_use = icc-profile-use
@@ -368,8 +368,8 @@ endif
 endif
 ifeq ($(KERNEL),Darwin)
-	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.9
+	CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
-	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.9
+	LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
 endif
 ### Travis CI script uses COMPILER to overwrite CXX
@@ -402,8 +402,8 @@ endif
 ### 3.2.2 Debugging with undefined behavior sanitizers
 ifneq ($(sanitize),no)
-        CXXFLAGS += -g3 -fsanitize=$(sanitize) -fuse-ld=gold
+        CXXFLAGS += -g3 -fsanitize=$(sanitize)
-        LDFLAGS += -fsanitize=$(sanitize) -fuse-ld=gold
+        LDFLAGS += -fsanitize=$(sanitize)
 endif
 ### 3.3 Optimization
@@ -441,56 +441,61 @@ endif
 ### 3.6 popcnt
 ifeq ($(popcnt),yes)
-	CXXFLAGS += -DUSE_POPCNT
+	ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64))
-	ifneq ($(arch),$(filter $(arch),ppc64 armv8-a))
+		CXXFLAGS += -DUSE_POPCNT
-		ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	else ifeq ($(comp),icc)
-			CXXFLAGS += -mpopcnt
+		CXXFLAGS += -msse3 -DUSE_POPCNT
-		endif
+	else
 		CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT
 	endif
 endif
 ifeq ($(avx2),yes)
 	CXXFLAGS += -DUSE_AVX2
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mavx2
 	endif
 endif
 ifeq ($(avx512),yes)
 	CXXFLAGS += -DUSE_AVX512
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mavx512bw
 	endif
 endif
 ifeq ($(sse42),yes)
 	CXXFLAGS += -DUSE_SSE42
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -msse4.2
 	endif
 endif
 ifeq ($(sse41),yes)
 	CXXFLAGS += -DUSE_SSE41
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -msse4.1
 	endif
 endif
 ifeq ($(ssse3),yes)
 	CXXFLAGS += -DUSE_SSSE3
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mssse3
 	endif
 endif
 ifeq ($(sse3),yes)
 	CXXFLAGS += -DUSE_SSE3
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -msse3
 	endif
 endif
 ifeq ($(neon),yes)
 	CXXFLAGS += -DUSE_NEON
 endif
 ifeq ($(arch),x86_64)
 	CXXFLAGS += -DUSE_SSE2
 endif
@@ -498,7 +503,7 @@ endif
 ### 3.7 pext
 ifeq ($(pext),yes)
 	CXXFLAGS += -DUSE_PEXT
-	ifeq ($(comp),$(filter $(comp),gcc clang mingw msys2))
+	ifeq ($(comp),$(filter $(comp),gcc clang mingw))
 		CXXFLAGS += -mbmi2
 	endif
 endif
@@ -508,18 +513,28 @@ endif
 ### needs access to the optimization flags.
 ifeq ($(optimize),yes)
 ifeq ($(debug), no)
-	ifeq ($(comp),$(filter $(comp),gcc clang))
+	ifeq ($(comp),clang)
 		CXXFLAGS += -flto=thin
 		LDFLAGS += $(CXXFLAGS)
 # GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be
 # GCC on some systems.
 	else ifeq ($(comp),gcc)
 	ifeq ($(gccisclang),)
 		CXXFLAGS += -flto
 		LDFLAGS += $(CXXFLAGS) -flto=jobserver
 	else
 		CXXFLAGS += -flto=thin
 		LDFLAGS += $(CXXFLAGS)
 	endif
 # To use LTO and static linking on windows, the tool chain requires a recent gcc:
 # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not.
 # So, only enable it for a cross from Linux by default.
-	ifeq ($(comp),mingw)
+	else ifeq ($(comp),mingw)
 	ifeq ($(KERNEL),Linux)
 		CXXFLAGS += -flto
-		LDFLAGS += $(CXXFLAGS)
+		LDFLAGS += $(CXXFLAGS) -flto=jobserver
 	endif
 	endif
 endif
@@ -544,20 +559,12 @@ help:
 	@echo ""
 	@echo "Supported targets:"
 	@echo ""
-	@echo "build                   > Standard (without NNUE) build"
+	@echo "build                   > Standard build"
 	@echo "profile-build           > Standard build with PGO"
 	@echo "nnue                    > NNUE-enabled build"
 	@echo "profile-nnue            > NNUE-enabled build with PGO"
 	@echo "nnue-learn              > Produces or refines a NNUE parameter set."
 	@echo "                            Requires training data that can be"
 	@echo "                            generated by itself using an existing"
 	@echo "                            parameter set, or with the next tool"
 	@echo "nnue-gen-sfen-from-original-eval"
 	@echo "                        > Produces training data for 'nnue-learn'"
 	@echo "                        >   without using a NNUE parameter set"
 	@echo "strip                   > Strip executable"
 	@echo "install                 > Install executable"
 	@echo "clean                   > Clean up"
 	@echo "net                     > Download the default nnue net"
 	@echo ""
 	@echo "Supported archs:"
 	@echo ""
@@ -565,10 +572,11 @@ help:
 	@echo "x86-64-bmi2             > x86 64-bit with bmi2 support"
 	@echo "x86-64-avx2             > x86 64-bit with avx2 support"
 	@echo "x86-64-sse42            > x86 64-bit with sse42 support"
 	@echo "x86-64-modern           > x86 64-bit with sse41 support (x86-64-sse41)"
 	@echo "x86-64-sse41            > x86 64-bit with sse41 support"
 	@echo "x86-64-ssse3            > x86 64-bit with ssse3 support"
-	@echo "x86-64-sse3-popcnt      > x86 64-bit with ssse3 and popcnt support"
+	@echo "x86-64-sse3-popcnt      > x86 64-bit with sse3 and popcnt support"
-	@echo "x86-64-sse3             > x86 64-bit with ssse3 support"
+	@echo "x86-64-sse3             > x86 64-bit with sse3 support"
 	@echo "x86-64                  > x86 64-bit generic"
 	@echo "x86-32                  > x86 32-bit (also enables SSE)"
 	@echo "x86-32-old              > x86 32-bit fall back for old hardware"
@@ -576,6 +584,7 @@ help:
 	@echo "ppc-32                  > PPC 32-bit"
 	@echo "armv7                   > ARMv7 32-bit"
 	@echo "armv8                   > ARMv8 64-bit"
 	@echo "apple-silicon           > Apple silicon ARM64"
 	@echo "general-64              > unspecified 64-bit"
 	@echo "general-32              > unspecified 32-bit"
 	@echo ""
@@ -585,21 +594,23 @@ help:
 	@echo "mingw                   > Gnu compiler with MinGW under Windows"
 	@echo "clang                   > LLVM Clang compiler"
 	@echo "icc                     > Intel compiler"
 	@echo "msys2                   > MSYS2"
 	@echo ""
 	@echo "Simple examples. If you don't know what to do, you likely want to run: "
 	@echo ""
-	@echo "make build ARCH=x86-64    (This is for 64-bit systems)"
+	@echo "make -j build ARCH=x86-64    (This is for 64-bit systems)"
-	@echo "make build ARCH=x86-32    (This is for 32-bit systems)"
+	@echo "make -j build ARCH=x86-32    (This is for 32-bit systems)"
 	@echo ""
 	@echo "Advanced examples, for experienced users: "
 	@echo ""
-	@echo "make build ARCH=x86-64 COMP=clang"
+	@echo "make -j build ARCH=x86-64-modern COMP=clang"
-	@echo "make profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
+	@echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8"
 	@echo ""
 	@echo "The selected architecture $(ARCH) enables the following configuration: "
 	@echo ""
 	@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
-.PHONY: help build profile-build strip install clean objclean profileclean \
+.PHONY: help build profile-build strip install clean net objclean profileclean \
        config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
        clang-profile-use clang-profile-make
@@ -633,14 +644,21 @@ install:
 clean: objclean profileclean
 	@rm -f .depend *~ core
 net:
 	$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
 	@echo "Default net: $(nnuenet)"
 	$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
 	$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
 	@if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi
 # clean binaries and objects
 objclean:
-	@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./eval/nnue/*.o ./eval/nnue/features/*.o
+	@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./nnue/*.o ./nnue/features/*.o
 # clean auxiliary profiling files
 profileclean:
 	@rm -rf profdir
-	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./eval/nnue/*.gcda ./eval/nnue/features/*.gcda
+	@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda
 	@rm -f stockfish.profdata *.profraw
 default:
@@ -672,6 +690,7 @@ config-sanity:
 	@echo "avx2: '$(avx2)'"
 	@echo "pext: '$(pext)'"
 	@echo "avx512: '$(avx512)'"
 	@echo "neon: '$(neon)'"
 	@echo ""
 	@echo "Flags:"
 	@echo "CXX: $(CXX)"
@@ -685,7 +704,7 @@ config-sanity:
 	@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
 	@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
 	 test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
-	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a"
+	 test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64"
 	@test "$(bits)" = "32" || test "$(bits)" = "64"
 	@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
 	@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
@@ -697,10 +716,11 @@ config-sanity:
 	@test "$(avx2)" = "yes" || test "$(avx2)" = "no"
 	@test "$(pext)" = "yes" || test "$(pext)" = "no"
 	@test "$(avx512)" = "yes" || test "$(avx512)" = "no"
 	@test "$(neon)" = "yes" || test "$(neon)" = "no"
 	@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
 $(EXE): $(OBJS)
-	$(CXX) -o $@ $(OBJS) $(LDFLAGS)
+	+$(CXX) -o $@ $(OBJS) $(LDFLAGS)
 clang-profile-make:
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
@@ -738,20 +758,10 @@ icc-profile-use:
 	EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
 	all
-nnue: config-sanity
+learn: config-sanity
-	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_NNUE -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
+	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp' LDFLAGS='$(LDFLAGS) -lopenblas -fopenmp' build
 profile-nnue: export NNUECXXFLAGS = -DEVAL_NNUE -DENABLE_TEST_CMD
 profile-nnue: config-sanity
 	$(MAKE) profile-build
 nnue-gen-sfen-from-original-eval: config-sanity
 	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DUSE_EVAL_HASH -DENABLE_TEST_CMD -fopenmp' LDFLAGS='$(LDFLAGS) -fopenmp' build
 nnue-learn: config-sanity
 	$(MAKE) CXXFLAGS='$(CXXFLAGS) -DEVAL_LEARN -DEVAL_NNUE -DUSE_EVAL_HASH -DENABLE_TEST_CMD -DUSE_BLAS -I/mingw64/include/OpenBLAS -fopenmp' LDFLAGS='$(LDFLAGS) -lopenblas -fopenmp' build
 .depend:
-	-@$(CXX) $(DEPENDFLAGS) -MM $(OBJS:.o=.cpp) > $@ 2> /dev/null
+	-@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null
 -include .depend
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -130,12 +128,6 @@ constexpr bool more_than_one(Bitboard b) {
  return b & (b - 1);
 }
 /// Counts the occupation of the bitboard depending on the occupation of SQ_A1
 /// as in `b & (1ULL << SQ_A1) ? more_than_two(b) : more_than_one(b)`
 constexpr bool conditional_more_than_two(Bitboard b) {
  return b & (b - 1) & (b - 2);
 }
 constexpr bool opposite_colors(Square s1, Square s2) {
  return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1;
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -589,8 +587,8 @@ ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
  Bitboard strongPawns = pos.pieces(strongSide, PAWN);
  // If all pawns are ahead of the king on a single rook file, it's a draw.
-  if (!((strongPawns & ~FileABB) || (strongPawns & ~FileHBB)) &&
+  if (   !(strongPawns & ~(FileABB | FileHBB))
-      !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
+      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
      return SCALE_FACTOR_DRAW;
  return SCALE_FACTOR_NONE;
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -7,22 +7,22 @@ namespace Eval
 	// --- tables
-	// Value when a certain BonaPiece is seen from the other side
+	// Value when a certain PieceSquare is seen from the other side
 	// BONA_PIECE_INIT is -1, so it must be a signed type.
-	// Even if KPPT is expanded, BonaPiece will not exceed 2^15 for the time being, so int16_t is good.
+	// Even if KPPT is expanded, PieceSquare will not exceed 2^15 for the time being, so int16_t is good.
-	int16_t inv_piece_[Eval::fe_end];
+	int16_t inv_piece_[PieceSquare::PS_END];
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
+	// Returns the one at the position where a PieceSquare on the board is mirrored.
-	int16_t mir_piece_[Eval::fe_end];
+	int16_t mir_piece_[PieceSquare::PS_END];
 	// --- methods
-// Returns the value when a certain BonaPiece is seen from the other side
+// Returns the value when a certain PieceSquare is seen from the other side
-	Eval::BonaPiece inv_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)inv_piece_[p]; }
+	PieceSquare inv_piece(PieceSquare p) { return (PieceSquare)inv_piece_[p]; }
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
+	// Returns the one at the position where a PieceSquare on the board is mirrored.
-	Eval::BonaPiece mir_piece(Eval::BonaPiece p) { return (Eval::BonaPiece)mir_piece_[p]; }
+	PieceSquare mir_piece(PieceSquare p) { return (PieceSquare)mir_piece_[p]; }
 	std::function<void()> mir_piece_init_function;
@@ -37,23 +37,23 @@ namespace Eval
 		// exchange f and e
 		int t[] = {
-			f_pawn             , e_pawn            ,
+			PieceSquare::PS_W_PAWN             , PieceSquare::PS_B_PAWN            ,
-			f_knight           , e_knight          ,
+			PieceSquare::PS_W_KNIGHT           , PieceSquare::PS_B_KNIGHT          ,
-			f_bishop           , e_bishop          ,
+			PieceSquare::PS_W_BISHOP           , PieceSquare::PS_B_BISHOP          ,
-			f_rook             , e_rook            ,
+			PieceSquare::PS_W_ROOK             , PieceSquare::PS_B_ROOK            ,
-			f_queen            , e_queen           ,
+			PieceSquare::PS_W_QUEEN            , PieceSquare::PS_B_QUEEN           ,
 		};
 		// Insert uninitialized value.
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
+		for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
 		{
-			inv_piece_[p] = BONA_PIECE_NOT_INIT;
+			inv_piece_[p] = PieceSquare::PS_NOT_INIT;
 			// mirror does not work for hand pieces. Just return the original value.
-			mir_piece_[p] = (p < f_pawn) ? p : BONA_PIECE_NOT_INIT;
+			mir_piece_[p] = (p < PieceSquare::PS_W_PAWN) ? p : PieceSquare::PS_NOT_INIT;
 		}
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
+		for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
 		{
 			for (int i = 0; i < 32 /* t.size() */; i += 2)
 			{
@@ -62,13 +62,13 @@ namespace Eval
 					Square sq = (Square)(p - t[i]);
 					// found!!
-					BonaPiece q = (p < fe_hand_end) ? BonaPiece(sq + t[i + 1]) : (BonaPiece)(Inv(sq) + t[i + 1]);
+					PieceSquare q = (p < PieceSquare::PS_W_PAWN) ? PieceSquare(sq + t[i + 1]) : (PieceSquare)(rotate180(sq) + t[i + 1]);
 					inv_piece_[p] = q;
 					inv_piece_[q] = p;
 					/*
 					It's a bit tricky, but regarding p
-										p >= fe_hand_end
+										p >= PieceSquare::PS_W_PAWN
 										When.
 					For this p, let n be an integer (i in the above code can only be an even number),
@@ -76,20 +76,20 @@ namespace Eval
 					b) When t[2n + 1] <= p <t[2n + 2], the back piece
 					Is.
-					Therefore, if p in the range of a) is set to q = Inv(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
+					Therefore, if p in the range of a) is set to q = rotate180(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
 					So inv_piece[] is initialized by swapping p and q.
 					*/
 					// There is no mirror for hand pieces.
-					if (p < fe_hand_end)
+					if (p < PieceSquare::PS_W_PAWN)
 						continue;
-					BonaPiece r1 = (BonaPiece)(Mir(sq) + t[i]);
+					PieceSquare r1 = (PieceSquare)(flip_file(sq) + t[i]);
 					mir_piece_[p] = r1;
 					mir_piece_[r1] = p;
-					BonaPiece p2 = (BonaPiece)(sq + t[i + 1]);
+					PieceSquare p2 = (PieceSquare)(sq + t[i + 1]);
-					BonaPiece r2 = (BonaPiece)(Mir(sq) + t[i + 1]);
+					PieceSquare r2 = (PieceSquare)(flip_file(sq) + t[i + 1]);
 					mir_piece_[p2] = r2;
 					mir_piece_[r2] = p2;
@@ -101,11 +101,11 @@ namespace Eval
 		if (mir_piece_init_function)
 			mir_piece_init_function();
-		for (BonaPiece p = BONA_PIECE_ZERO; p < fe_end; ++p)
+		for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
 		{
 			// It remains uninitialized. The initialization code in the table above is incorrect.
-			assert(mir_piece_[p] != BONA_PIECE_NOT_INIT && mir_piece_[p] < fe_end);
+			assert(mir_piece_[p] != PieceSquare::PS_NOT_INIT && mir_piece_[p] < PieceSquare::PS_END);
-			assert(inv_piece_[p] != BONA_PIECE_NOT_INIT && inv_piece_[p] < fe_end);
+			assert(inv_piece_[p] != PieceSquare::PS_NOT_INIT && inv_piece_[p] < PieceSquare::PS_END);
 			// mir and inv return to their original coordinates after being applied twice.
 			assert(mir_piece_[mir_piece_[p]] == p);
@@ -126,7 +126,7 @@ namespace Eval
 		// Apery's WCSC26 evaluation function, kpp p1==0 or p1==20 (0th step on the back)
 		// There is dust in it, and if you don't avoid it, it will get caught in the assert.
-		std::unordered_set<BonaPiece> s;
+		std::unordered_set<PieceSquare> s;
 		vector<int> a = {
 			f_hand_pawn - 1,e_hand_pawn - 1,
 			f_hand_lance - 1, e_hand_lance - 1,
@@ -137,7 +137,7 @@ namespace Eval
 			f_hand_rook - 1, e_hand_rook - 1,
 		};
 		for (auto b : a)
-			s.insert((BonaPiece)b);
+			s.insert((PieceSquare)b);
 		// Excludes walks, incense, and katsura on the board that do not appear further (Apery also contains garbage here)
 		for (Rank r = RANK_1; r <= RANK_2; ++r)
@@ -146,18 +146,18 @@ namespace Eval
 				if (r == RANK_1)
 				{
 					// first step
-					BonaPiece b1 = BonaPiece(f_pawn + (f | r));
+					PieceSquare b1 = PieceSquare(PieceSquare::PS_W_PAWN + (f | r));
 					s.insert(b1);
 					s.insert(inv_piece[b1]);
 					// 1st stage incense
-					BonaPiece b2 = BonaPiece(f_lance + (f | r));
+					PieceSquare b2 = PieceSquare(f_lance + (f | r));
 					s.insert(b2);
 					s.insert(inv_piece[b2]);
 				}
 				// Katsura on the 1st and 2nd steps
-				BonaPiece b = BonaPiece(f_knight + (f | r));
+				PieceSquare b = PieceSquare(PieceSquare::PS_W_KNIGHT + (f | r));
 				s.insert(b);
 				s.insert(inv_piece[b]);
 			}
@@ -166,8 +166,8 @@ namespace Eval
 		for (auto sq : SQ)
 		{
 			cout << sq << ' ';
-			for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
+			for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
-				for (BonaPiece p2 = BONA_PIECE_ZERO; p2 < fe_end; ++p2)
+				for (PieceSquare p2 = PieceSquare::PS_NONE; p2 < PieceSquare::PS_END; ++p2)
 					if (!s.count(p1) && !s.count(p2))
 						kpp_write(sq, p1, p2, kpp[sq][p1][p2]);
 		}
@@ -177,7 +177,7 @@ namespace Eval
 		{
 			cout << sq1 << ' ';
 			for (auto sq2 : SQ)
-				for (BonaPiece p1 = BONA_PIECE_ZERO; p1 < fe_end; ++p1)
+				for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
 					if (!s.count(p1))
 						kkp_write(sq1, sq2, p1, kkp[sq1][sq2][p1]);
 		}
@@ -3,7 +3,7 @@
 #if defined(EVAL_NNUE) || defined(EVAL_LEARN)
-// BonaPiece's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
+// PieceSquare's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
 #include "../types.h"
 #include "../evaluate.h"
@@ -15,18 +15,18 @@ namespace Eval
 	//                  tables
 	// -------------------------------------------------
-	// --- Provide Mirror and Inverse to BonaPiece.
+	// --- Provide Mirror and Inverse to PieceSquare.
 	// These arrays are initialized by calling init() or init_mir_inv_tables();.
 	// If you want to use only this table from the evaluation function,
 	// Call init_mir_inv_tables().
 	// These arrays are referenced from the KK/KKP/KPP classes below.
-	// Returns the value when a certain BonaPiece is seen from the other side
+	// Returns the value when a certain PieceSquare is seen from the other side
-	extern Eval::BonaPiece inv_piece(Eval::BonaPiece p);
+	extern PieceSquare inv_piece(PieceSquare p);
-	// Returns the one at the position where a BonaPiece on the board is mirrored.
+	// Returns the one at the position where a PieceSquare on the board is mirrored.
-	extern Eval::BonaPiece mir_piece(Eval::BonaPiece p);
+	extern PieceSquare mir_piece(PieceSquare p);
 	// callback called when initializing mir_piece/inv_piece
@@ -35,8 +35,8 @@ namespace Eval
 	// At the timing when mir_piece_init_function is called, until fe_old_end
 	// It is guaranteed that these tables have been initialized.
 	extern std::function<void()> mir_piece_init_function;
-	extern int16_t mir_piece_[Eval::fe_end];
+	extern int16_t mir_piece_[PieceSquare::PS_END];
-	extern int16_t inv_piece_[Eval::fe_end];
+	extern int16_t inv_piece_[PieceSquare::PS_END];
 	// The table above will be initialized when you call this function explicitly or call init().
 	extern void init_mir_inv_tables();
@@ -1,39 +0,0 @@
 // Definition of input features and network structure used in NNUE evaluation function
 #ifndef HALFKP_256X2_32_32_H
 #define HALFKP_256X2_32_32_H
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
 #include "../layers/input_slice.h"
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
 namespace Eval {
 namespace NNUE {
 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
    Features::HalfKP<Features::Side::kFriend>>;
 // Number of input feature dimensions after conversion
 constexpr IndexType kTransformedFeatureDimensions = 256;
 namespace Layers {
 // define network structure
 using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
 using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
 using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
 using OutputLayer = AffineTransform<HiddenLayer2, 1>;
 }  // namespace Layers
 using Network = Layers::OutputLayer;
 }  // namespace NNUE
 }  // namespace Eval
 #endif // HALFKP_256X2_32_32_H
@@ -1,326 +0,0 @@
 // Code for calculating NNUE evaluation function
 #if defined(EVAL_NNUE)
 #include <fstream>
 #include <iostream>
 #include "../../evaluate.h"
 #include "../../position.h"
 #include "../../misc.h"
 #include "../../uci.h"
 #include "evaluate_nnue.h"
 namespace Eval {
 namespace NNUE {
 // Input feature converter
 AlignedPtr<FeatureTransformer> feature_transformer;
 // Evaluation function
 AlignedPtr<Network> network;
 // Evaluation function file name
 std::string fileName = "nn.bin";
 // Saved evaluation function file name
 std::string savedfileName = "nn.bin";
 // Get a string that represents the structure of the evaluation function
 std::string GetArchitectureString() {
  return "Features=" + FeatureTransformer::GetStructureString() +
      ",Network=" + Network::GetStructureString();
 }
 namespace {
 namespace Detail {
 // Initialize the evaluation function parameters
 template <typename T>
 void Initialize(AlignedPtr<T>& pointer) {
  pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
  std::memset(pointer.get(), 0, sizeof(T));
 }
 // read evaluation function parameters
 template <typename T>
 bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
  std::uint32_t header;
  stream.read(reinterpret_cast<char*>(&header), sizeof(header));
  if (!stream || header != T::GetHashValue()) return false;
  return pointer->ReadParameters(stream);
 }
 // write evaluation function parameters
 template <typename T>
 bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
  constexpr std::uint32_t header = T::GetHashValue();
  stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
  return pointer->WriteParameters(stream);
 }
 }  // namespace Detail
 // Initialize the evaluation function parameters
 void Initialize() {
  Detail::Initialize(feature_transformer);
  Detail::Initialize(network);
 }
 }  // namespace
 // read the header
 bool ReadHeader(std::istream& stream,
  std::uint32_t* hash_value, std::string* architecture) {
  std::uint32_t version, size;
  stream.read(reinterpret_cast<char*>(&version), sizeof(version));
  stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
  stream.read(reinterpret_cast<char*>(&size), sizeof(size));
  if (!stream || version != kVersion) return false;
  architecture->resize(size);
  stream.read(&(*architecture)[0], size);
  return !stream.fail();
 }
 // write the header
 bool WriteHeader(std::ostream& stream,
  std::uint32_t hash_value, const std::string& architecture) {
  stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
  stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
  const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
  stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
  stream.write(architecture.data(), size);
  return !stream.fail();
 }
 // read evaluation function parameters
 bool ReadParameters(std::istream& stream) {
  std::uint32_t hash_value;
  std::string architecture;
  if (!ReadHeader(stream, &hash_value, &architecture)) return false;
  if (hash_value != kHashValue) return false;
  if (!Detail::ReadParameters(stream, feature_transformer)) return false;
  if (!Detail::ReadParameters(stream, network)) return false;
  return stream && stream.peek() == std::ios::traits_type::eof();
 }
 // write evaluation function parameters
 bool WriteParameters(std::ostream& stream) {
  if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
  if (!Detail::WriteParameters(stream, feature_transformer)) return false;
  if (!Detail::WriteParameters(stream, network)) return false;
  return !stream.fail();
 }
 // proceed if you can calculate the difference
 static void UpdateAccumulatorIfPossible(const Position& pos) {
  feature_transformer->UpdateAccumulatorIfPossible(pos);
 }
 // Calculate the evaluation value
 static Value ComputeScore(const Position& pos, bool refresh = false) {
  auto& accumulator = pos.state()->accumulator;
  if (!refresh && accumulator.computed_score) {
    return accumulator.score;
  }
  alignas(kCacheLineSize) TransformedFeatureType
      transformed_features[FeatureTransformer::kBufferSize];
  feature_transformer->Transform(pos, transformed_features, refresh);
  alignas(kCacheLineSize) char buffer[Network::kBufferSize];
  const auto output = network->Propagate(transformed_features, buffer);
  // When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
  // It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
  // Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
  // The best move in the previous iteration is pointed to as bestmove, so apparently
  // no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
  // Since such a jamming phase often appears at the end, there is a big difference in the situation
  // Doesn't really affect the outcome.
  // However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
  // Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
  auto score = static_cast<Value>(output[0] / FV_SCALE);
  // 1) I feel that if I clip too poorly, it will have an effect on my learning...
  // 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
  score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
  accumulator.score = score;
  accumulator.computed_score = true;
  return accumulator.score;
 }
 } // namespace NNUE
 #if defined(USE_EVAL_HASH)
 // Class used to store evaluation values in HashTable
 struct alignas(16) ScoreKeyValue {
 #if defined(USE_SSE2)
  ScoreKeyValue() = default;
  ScoreKeyValue(const ScoreKeyValue& other) {
    static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
                  "sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
    _mm_store_si128(&as_m128i, other.as_m128i);
  }
  ScoreKeyValue& operator=(const ScoreKeyValue& other) {
    _mm_store_si128(&as_m128i, other.as_m128i);
    return *this;
  }
 #endif
  // It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
  void encode() {
 #if defined(USE_SSE2)
    // ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
 #else
    key ^= score;
 #endif
  }
  // decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
  void decode() { encode(); }
  union {
    struct {
      std::uint64_t key;
      std::uint64_t score;
    };
 #if defined(USE_SSE2)
    __m128i as_m128i;
 #endif
  };
 };
 // Simple HashTable implementation.
 // Size is a power of 2.
 template <typename T, size_t Size>
 struct HashTable {
  HashTable() { clear(); }
  T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
  void clear() { memset(entries_, 0, sizeof(T)*Size); }
  // Check that Size is a power of 2
  static_assert((Size & (Size - 1)) == 0, "");
 private:
  T entries_[Size];
 };
 //HashTable to save the evaluated ones (following ehash)
 #if !defined(USE_LARGE_EVAL_HASH)
 // 134MB (setting other than witch's AVX2)
 struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
 #else
 // If you have prefetch, it's better to have a big one...
 // → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
 // 1GB (setting for witch's AVX2)
 struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
 #endif
 EvaluateHashTable g_evalTable;
 // Prepare a function to prefetch.
 void prefetch_evalhash(const Key key) {
  constexpr auto mask = ~((uint64_t)0x1f);
  prefetch((void*)((uint64_t)g_evalTable[key] & mask));
 }
 #endif
 // read the evaluation function file
 // Save and restore Options with bench command etc., so EvalDir is changed at this time,
 // This function may be called twice to flag that the evaluation function needs to be reloaded.
 void load_eval() {
  // Must be done!
  NNUE::Initialize();
  if (Options["SkipLoadingEval"])
  {
      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
      return;
  }
  const std::string file_name = Options["EvalFile"];
  NNUE::fileName = file_name;
  std::ifstream stream(file_name, std::ios::binary);
  const bool result = NNUE::ReadParameters(stream);
  if (!result)
      // It's a problem if it doesn't finish when there is a read error.
      std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
  else
      std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
 }
 // Initialization
 void init() {
 }
 // Evaluation function. Perform full calculation instead of difference calculation.
 // Called only once with Position::set(). (The difference calculation after that)
 // Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
 // Since, we will not try to optimize this function.
 Value compute_eval(const Position& pos) {
  return NNUE::ComputeScore(pos, true);
 }
 // Evaluation function
 Value evaluate(const Position& pos) {
  const auto& accumulator = pos.state()->accumulator;
  if (accumulator.computed_score) {
    return accumulator.score;
  }
 #if defined(USE_GLOBAL_OPTIONS)
  // If Global Options is set not to use eval hash
  // Skip the query to the eval hash.
  if (!GlobalOptions.use_eval_hash) {
    ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
    return NNUE::ComputeScore(pos);
  }
 #endif
 #if defined(USE_EVAL_HASH)
  // May be in the evaluate hash table.
  const Key key = pos.key();
  ScoreKeyValue entry = *g_evalTable[key];
  entry.decode();
  if (entry.key == key) {
    // there were!
    return Value(entry.score);
  }
 #endif
  Value score = NNUE::ComputeScore(pos);
 #if defined(USE_EVAL_HASH)
  // Since it was calculated carefully, save it in the evaluate hash table.
  entry.key = key;
  entry.score = score;
  entry.encode();
  *g_evalTable[key] = entry;
 #endif
  return score;
 }
 // proceed if you can calculate the difference
 void evaluate_with_no_return(const Position& pos) {
  NNUE::UpdateAccumulatorIfPossible(pos);
 }
 // display the breakdown of the evaluation value of the current phase
 void print_eval_stat(Position& /*pos*/) {
  std::cout << "--- EVAL STAT: not implemented" << std::endl;
 }
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
@@ -1,67 +0,0 @@
 // header used in NNUE evaluation function
 #ifndef _EVALUATE_NNUE_H_
 #define _EVALUATE_NNUE_H_
 #if defined(EVAL_NNUE)
 #include "nnue_feature_transformer.h"
 #include "nnue_architecture.h"
 #include <memory>
 namespace Eval {
 namespace NNUE {
 // hash value of evaluation function structure
 constexpr std::uint32_t kHashValue =
    FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
 // Deleter for automating release of memory area
 template <typename T>
 struct AlignedDeleter {
  void operator()(T* ptr) const {
    ptr->~T();
    aligned_free(ptr);
  }
 };
 template <typename T>
 using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
 // Input feature converter
 extern AlignedPtr<FeatureTransformer> feature_transformer;
 // Evaluation function
 extern AlignedPtr<Network> network;
 // Evaluation function file name
 extern std::string fileName;
 // Saved evaluation function file name
 extern std::string savedfileName;
 // Get a string that represents the structure of the evaluation function
 std::string GetArchitectureString();
 // read the header
 bool ReadHeader(std::istream& stream,
    std::uint32_t* hash_value, std::string* architecture);
 // write the header
 bool WriteHeader(std::ostream& stream,
    std::uint32_t hash_value, const std::string& architecture);
 // read evaluation function parameters
 bool ReadParameters(std::istream& stream);
 // write evaluation function parameters
 bool WriteParameters(std::ostream& stream);
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,249 +0,0 @@
 // A class template that represents the input feature set of the NNUE evaluation function
 #ifndef _NNUE_FEATURE_SET_H_
 #define _NNUE_FEATURE_SET_H_
 #if defined(EVAL_NNUE)
 #include "features_common.h"
 #include <array>
 namespace Eval {
 namespace NNUE {
 namespace Features {
 // A class template that represents a list of values
 template <typename T, T... Values>
 struct CompileTimeList;
 template <typename T, T First, T... Remaining>
 struct CompileTimeList<T, First, Remaining...> {
  static constexpr bool Contains(T value) {
    return value == First || CompileTimeList<T, Remaining...>::Contains(value);
  }
  static constexpr std::array<T, sizeof...(Remaining) + 1>
      kValues = {{First, Remaining...}};
 };
 template <typename T, T First, T... Remaining>
 constexpr std::array<T, sizeof...(Remaining) + 1>
    CompileTimeList<T, First, Remaining...>::kValues;
 template <typename T>
 struct CompileTimeList<T> {
  static constexpr bool Contains(T /*value*/) {
    return false;
  }
  static constexpr std::array<T, 0> kValues = {{}};
 };
 // Class template that adds to the beginning of the list
 template <typename T, typename ListType, T Value>
 struct AppendToList;
 template <typename T, T... Values, T AnotherValue>
 struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
  using Result = CompileTimeList<T, AnotherValue, Values...>;
 };
 // Class template for adding to a sorted, unique list
 template <typename T, typename ListType, T Value>
 struct InsertToSet;
 template <typename T, T First, T... Remaining, T AnotherValue>
 struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
  using Result = std::conditional_t<
      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
      CompileTimeList<T, First, Remaining...>,
      std::conditional_t<(AnotherValue <First),
          CompileTimeList<T, AnotherValue, First, Remaining...>,
          typename AppendToList<T, typename InsertToSet<
              T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
              First>::Result>>;
 };
 template <typename T, T Value>
 struct InsertToSet<T, CompileTimeList<T>, Value> {
  using Result = CompileTimeList<T, Value>;
 };
 // Base class of feature set
 template <typename Derived>
 class FeatureSetBase {
 public:
  // Get a list of indices with a value of 1 among the features
  template <typename IndexListType>
  static void AppendActiveIndices(
      const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
    for (const auto perspective :Colors) {
      Derived::CollectActiveIndices(
          pos, trigger, perspective, &active[perspective]);
    }
  }
  // Get a list of indices whose values have changed from the previous one in the feature quantity
  template <typename PositionType, typename IndexListType>
  static void AppendChangedIndices(
      const PositionType& pos, TriggerEvent trigger,
      IndexListType removed[2], IndexListType added[2], bool reset[2]) {
    const auto& dp = pos.state()->dirtyPiece;
    if (dp.dirty_num == 0) return;
    for (const auto perspective :Colors) {
      reset[perspective] = false;
      switch (trigger) {
        case TriggerEvent::kNone:
          break;
        case TriggerEvent::kFriendKingMoved:
          reset[perspective] =
              dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
          break;
        case TriggerEvent::kEnemyKingMoved:
          reset[perspective] =
              dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
          break;
        case TriggerEvent::kAnyKingMoved:
          reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
          break;
        case TriggerEvent::kAnyPieceMoved:
          reset[perspective] = true;
          break;
        default:
          assert(false);
          break;
      }
      if (reset[perspective]) {
        Derived::CollectActiveIndices(
            pos, trigger, perspective, &added[perspective]);
      } else {
        Derived::CollectChangedIndices(
            pos, trigger, perspective,
            &removed[perspective], &added[perspective]);
      }
    }
  }
 };
 // Class template that represents the feature set
 // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
 template <typename FirstFeatureType, typename... RemainingFeatureTypes>
 class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
    public FeatureSetBase<
        FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
 private:
  using Head = FirstFeatureType;
  using Tail = FeatureSet<RemainingFeatureTypes...>;
 public:
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t kHashValue =
      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
  // number of feature dimensions
  static constexpr IndexType kDimensions =
      Head::kDimensions + Tail::kDimensions;
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
  static constexpr IndexType kMaxActiveDimensions =
      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
  // List of timings to perform all calculations instead of difference calculation
  using SortedTriggerSet = typename InsertToSet<TriggerEvent,
      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
  // Get the feature quantity name
  static std::string GetName() {
    return std::string(Head::kName) + "+" + Tail::GetName();
  }
 private:
  // Get a list of indices with a value of 1 among the features
  template <typename IndexListType>
  static void CollectActiveIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexListType* const active) {
    Tail::CollectActiveIndices(pos, trigger, perspective, active);
    if (Head::kRefreshTrigger == trigger) {
      const auto start = active->size();
      Head::AppendActiveIndices(pos, perspective, active);
      for (auto i = start; i < active->size(); ++i) {
        (*active)[i] += Tail::kDimensions;
      }
    }
  }
  // Get a list of indices whose values have changed from the previous one in the feature quantity
  template <typename IndexListType>
  static void CollectChangedIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexListType* const removed, IndexListType* const added) {
    Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
    if (Head::kRefreshTrigger == trigger) {
      const auto start_removed = removed->size();
      const auto start_added = added->size();
      Head::AppendChangedIndices(pos, perspective, removed, added);
      for (auto i = start_removed; i < removed->size(); ++i) {
        (*removed)[i] += Tail::kDimensions;
      }
      for (auto i = start_added; i < added->size(); ++i) {
        (*added)[i] += Tail::kDimensions;
      }
    }
  }
  // Make the base class and the class template that recursively uses itself a friend
  friend class FeatureSetBase<FeatureSet>;
  template <typename... FeatureTypes>
  friend class FeatureSet;
 };
 // Class template that represents the feature set
 // Specialization with one template argument
 template <typename FeatureType>
 class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
 public:
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
  // number of feature dimensions
  static constexpr IndexType kDimensions = FeatureType::kDimensions;
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
  static constexpr IndexType kMaxActiveDimensions =
      FeatureType::kMaxActiveDimensions;
  // List of timings to perform all calculations instead of difference calculation
  using SortedTriggerSet =
      CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
  // Get the feature quantity name
  static std::string GetName() {
    return FeatureType::kName;
  }
 private:
  // Get a list of indices with a value of 1 among the features
  static void CollectActiveIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexList* const active) {
    if (FeatureType::kRefreshTrigger == trigger) {
      FeatureType::AppendActiveIndices(pos, perspective, active);
    }
  }
  // Get a list of indices whose values have changed from the previous one in the feature quantity
  static void CollectChangedIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexList* const removed, IndexList* const added) {
    if (FeatureType::kRefreshTrigger == trigger) {
      FeatureType::AppendChangedIndices(pos, perspective, removed, added);
    }
  }
  // Make the base class and the class template that recursively uses itself a friend
  friend class FeatureSetBase<FeatureSet>;
  template <typename... FeatureTypes>
  friend class FeatureSet;
 };
 }  // namespace Features
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,47 +0,0 @@
 //Common header of input features of NNUE evaluation function
 #ifndef _NNUE_FEATURES_COMMON_H_
 #define _NNUE_FEATURES_COMMON_H_
 #if defined(EVAL_NNUE)
 #include "../../../evaluate.h"
 #include "../nnue_common.h"
 namespace Eval {
 namespace NNUE {
 namespace Features {
 // Index list type
 class IndexList;
 // Class template that represents the feature set
 template <typename... FeatureTypes>
 class FeatureSet;
 // Type of timing to perform all calculations instead of difference calculation
 enum class TriggerEvent {
  kNone, // Calculate the difference whenever possible
  kFriendKingMoved, // calculate all when own ball moves
  kEnemyKingMoved, // do all calculations when enemy balls move
  kAnyKingMoved, // do all calculations if either ball moves
  kAnyPieceMoved, // always do all calculations
 };
 // turn side or other side
 enum class Side {
  kFriend, // turn side
  kEnemy, // opponent
 };
 }  // namespace Features
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,84 +0,0 @@
 //Definition of input features HalfKP of NNUE evaluation function
 #if defined(EVAL_NNUE)
 #include "half_kp.h"
 #include "index_list.h"
 namespace Eval {
 namespace NNUE {
 namespace Features {
 // Find the index of the feature quantity from the ball position and BonaPiece
 template <Side AssociatedKing>
 inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
  return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
 }
 // Get the piece information
 template <Side AssociatedKing>
 inline void HalfKP<AssociatedKing>::GetPieces(
    const Position& pos, Color perspective,
    BonaPiece** pieces, Square* sq_target_k) {
  *pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
 }
 // Get a list of indices with a value of 1 among the features
 template <Side AssociatedKing>
 void HalfKP<AssociatedKing>::AppendActiveIndices(
    const Position& pos, Color perspective, IndexList* active) {
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
  BonaPiece* pieces;
  Square sq_target_k;
  GetPieces(pos, perspective, &pieces, &sq_target_k);
  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
      active->push_back(MakeIndex(sq_target_k, pieces[i]));
    }
  }
 }
 // Get a list of indices whose values have changed from the previous one in the feature quantity
 template <Side AssociatedKing>
 void HalfKP<AssociatedKing>::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
  BonaPiece* pieces;
  Square sq_target_k;
  GetPieces(pos, perspective, &pieces, &sq_target_k);
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
    const auto old_p = static_cast<BonaPiece>(
        dp.changed_piece[i].old_piece.from[perspective]);
    if (old_p != Eval::BONA_PIECE_ZERO) {
      removed->push_back(MakeIndex(sq_target_k, old_p));
    }
    const auto new_p = static_cast<BonaPiece>(
        dp.changed_piece[i].new_piece.from[perspective]);
    if (new_p != Eval::BONA_PIECE_ZERO) {
      added->push_back(MakeIndex(sq_target_k, new_p));
    }
  }
 }
 template class HalfKP<Side::kFriend>;
 template class HalfKP<Side::kEnemy>;
 }  // namespace Features
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
@@ -1,62 +0,0 @@
 //Definition of input features HalfKP of NNUE evaluation function
 #ifndef _NNUE_FEATURES_HALF_KP_H_
 #define _NNUE_FEATURES_HALF_KP_H_
 #if defined(EVAL_NNUE)
 #include "../../../evaluate.h"
 #include "features_common.h"
 namespace Eval {
 namespace NNUE {
 namespace Features {
 // Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
 template <Side AssociatedKing>
 class HalfKP {
 public:
  // feature quantity name
  static constexpr const char* kName =
      (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t kHashValue =
      0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
  // number of feature dimensions
  static constexpr IndexType kDimensions =
      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
  // Timing of full calculation instead of difference calculation
  static constexpr TriggerEvent kRefreshTrigger =
      (AssociatedKing == Side::kFriend) ?
      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
  // Get a list of indices with a value of 1 among the features
  static void AppendActiveIndices(const Position& pos, Color perspective,
                                  IndexList* active);
  // Get a list of indices whose values have changed from the previous one in the feature quantity
  static void AppendChangedIndices(const Position& pos, Color perspective,
                                   IndexList* removed, IndexList* added);
  // Find the index of the feature quantity from the ball position and BonaPiece
  static IndexType MakeIndex(Square sq_k, BonaPiece p);
 private:
  // Get the piece information
  static void GetPieces(const Position& pos, Color perspective,
                        BonaPiece** pieces, Square* sq_target_k);
 };
 }  // namespace Features
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,55 +0,0 @@
 // Definition of index list of input features
 #ifndef _NNUE_FEATURES_INDEX_LIST_H_
 #define _NNUE_FEATURES_INDEX_LIST_H_
 #if defined(EVAL_NNUE)
 #include "../../../position.h"
 #include "../nnue_architecture.h"
 namespace Eval {
 namespace NNUE {
 namespace Features {
 // Class template used for feature index list
 template <typename T, std::size_t MaxSize>
 class ValueList {
 public:
  std::size_t size() const { return size_; }
  void resize(std::size_t size) { size_ = size; }
  void push_back(const T& value) { values_[size_++] = value; }
  T& operator[](std::size_t index) { return values_[index]; }
  T* begin() { return values_; }
  T* end() { return values_ + size_; }
  const T& operator[](std::size_t index) const { return values_[index]; }
  const T* begin() const { return values_; }
  const T* end() const { return values_ + size_; }
  void swap(ValueList& other) {
    const std::size_t max_size = std::max(size_, other.size_);
    for (std::size_t i = 0; i < max_size; ++i) {
      std::swap(values_[i], other.values_[i]);
    }
    std::swap(size_, other.size_);
  }
 private:
  T values_[MaxSize];
  std::size_t size_ = 0;
 };
 //Type of feature index list
 class IndexList
    : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
 };
 }  // namespace Features
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,217 +0,0 @@
 // Definition of layer AffineTransform of NNUE evaluation function
 #ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
 #define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
 #if defined(EVAL_NNUE)
 #include "../nnue_common.h"
 namespace Eval {
 namespace NNUE {
 namespace Layers {
 // affine transformation layer
 template <typename PreviousLayer, IndexType OutputDimensions>
 class AffineTransform {
 public:
  // Input/output type
  using InputType = typename PreviousLayer::OutputType;
  using OutputType = std::int32_t;
  static_assert(std::is_same<InputType, std::uint8_t>::value, "");
  // number of input/output dimensions
  static constexpr IndexType kInputDimensions =
      PreviousLayer::kOutputDimensions;
  static constexpr IndexType kOutputDimensions = OutputDimensions;
  static constexpr IndexType kPaddedInputDimensions =
      CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
  // Size of forward propagation buffer used in this layer
  static constexpr std::size_t kSelfBufferSize =
      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
  // Size of the forward propagation buffer used from the input layer to this layer
  static constexpr std::size_t kBufferSize =
      PreviousLayer::kBufferSize + kSelfBufferSize;
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t GetHashValue() {
    std::uint32_t hash_value = 0xCC03DAE4u;
    hash_value += kOutputDimensions;
    hash_value ^= PreviousLayer::GetHashValue() >> 1;
    hash_value ^= PreviousLayer::GetHashValue() << 31;
    return hash_value;
  }
  // A string that represents the structure from the input layer to this layer
  static std::string GetStructureString() {
    return "AffineTransform[" +
        std::to_string(kOutputDimensions) + "<-" +
        std::to_string(kInputDimensions) + "](" +
        PreviousLayer::GetStructureString() + ")";
  }
  // read parameters
  bool ReadParameters(std::istream& stream) {
    if (!previous_layer_.ReadParameters(stream)) return false;
    stream.read(reinterpret_cast<char*>(biases_),
                kOutputDimensions * sizeof(BiasType));
    stream.read(reinterpret_cast<char*>(weights_),
                kOutputDimensions * kPaddedInputDimensions *
                sizeof(WeightType));
    return !stream.fail();
  }
  // write parameters
  bool WriteParameters(std::ostream& stream) const {
    if (!previous_layer_.WriteParameters(stream)) return false;
    stream.write(reinterpret_cast<const char*>(biases_),
                 kOutputDimensions * sizeof(BiasType));
    stream.write(reinterpret_cast<const char*>(weights_),
                 kOutputDimensions * kPaddedInputDimensions *
                 sizeof(WeightType));
    return !stream.fail();
  }
  // forward propagation
  const OutputType* Propagate(
      const TransformedFeatureType* transformed_features, char* buffer) const {
    const auto input = previous_layer_.Propagate(
        transformed_features, buffer + kSelfBufferSize);
    const auto output = reinterpret_cast<OutputType*>(buffer);
 #if defined(USE_AVX512)
    constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
    const __m512i kOnes = _mm512_set1_epi16(1);
    const auto input_vector = reinterpret_cast<const __m512i*>(input);
 #elif defined(USE_AVX2)
    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
    const __m256i kOnes = _mm256_set1_epi16(1);
    const auto input_vector = reinterpret_cast<const __m256i*>(input);
 #elif defined(USE_SSSE3)
    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
    const __m128i kOnes = _mm_set1_epi16(1);
    const auto input_vector = reinterpret_cast<const __m128i*>(input);
 #elif defined(IS_ARM)
    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
    const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
 #endif
    for (IndexType i = 0; i < kOutputDimensions; ++i) {
      const IndexType offset = i * kPaddedInputDimensions;
 #if defined(USE_AVX512)
      __m512i sum = _mm512_setzero_si512();
      const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
 #if defined(__MINGW32__) || defined(__MINGW64__)
          __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
 #else
          __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
 #endif
          product = _mm512_madd_epi16(product, kOnes);
          sum = _mm512_add_epi32(sum, product);
      }
      output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
      // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
      // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
      // and we have to do one more 256bit chunk.
      if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
      {
          const auto iv_256  = reinterpret_cast<const __m256i*>(input);
          const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
          int j = kNumChunks * 2;
 #if defined(__MINGW32__) || defined(__MINGW64__)  // See HACK comment below in AVX2.
          __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
 #else
          __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
 #endif
          sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
          sum256 = _mm256_hadd_epi32(sum256, sum256);
          sum256 = _mm256_hadd_epi32(sum256, sum256);
          const __m128i lo = _mm256_extracti128_si256(sum256, 0);
          const __m128i hi = _mm256_extracti128_si256(sum256, 1);
          output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
      }
 #elif defined(USE_AVX2)
      __m256i sum = _mm256_setzero_si256();
      const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
        __m256i product = _mm256_maddubs_epi16(
 #if defined(__MINGW32__) || defined(__MINGW64__)
          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
          //       even though alignas is specified.
          _mm256_loadu_si256
 #else
          _mm256_load_si256
 #endif
          (&input_vector[j]), _mm256_load_si256(&row[j]));
        product = _mm256_madd_epi16(product, kOnes);
        sum = _mm256_add_epi32(sum, product);
      }
      sum = _mm256_hadd_epi32(sum, sum);
      sum = _mm256_hadd_epi32(sum, sum);
      const __m128i lo = _mm256_extracti128_si256(sum, 0);
      const __m128i hi = _mm256_extracti128_si256(sum, 1);
      output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
 #elif defined(USE_SSSE3)
      __m128i sum = _mm_cvtsi32_si128(biases_[i]);
      const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
        __m128i product = _mm_maddubs_epi16(
            _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
        product = _mm_madd_epi16(product, kOnes);
        sum = _mm_add_epi32(sum, product);
      }
      sum = _mm_hadd_epi32(sum, sum);
      sum = _mm_hadd_epi32(sum, sum);
      output[i] = _mm_cvtsi128_si32(sum);
 #elif defined(IS_ARM)
      int32x4_t sum = {biases_[i]};
      const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
        int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
        product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
        sum = vpadalq_s16(sum, product);
      }
      output[i] = sum[0] + sum[1] + sum[2] + sum[3];
 #else
      OutputType sum = biases_[i];
      for (IndexType j = 0; j < kInputDimensions; ++j) {
        sum += weights_[offset + j] * input[j];
      }
      output[i] = sum;
 #endif
    }
    return output;
  }
 private:
  // parameter type
  using BiasType = OutputType;
  using WeightType = std::int8_t;
  // Make the learning class a friend
  friend class Trainer<AffineTransform>;
  // the layer immediately before this layer
  PreviousLayer previous_layer_;
  // parameter
  alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
  alignas(kCacheLineSize)
      WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
 };
 }  // namespace Layers
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,177 +0,0 @@
 // Definition of layer ClippedReLU of NNUE evaluation function
 #ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
 #define _NNUE_LAYERS_CLIPPED_RELU_H_
 #if defined(EVAL_NNUE)
 #include "../nnue_common.h"
 namespace Eval {
 namespace NNUE {
 namespace Layers {
 // Clipped ReLU
 template <typename PreviousLayer>
 class ClippedReLU {
 public:
  // Input/output type
  using InputType = typename PreviousLayer::OutputType;
  using OutputType = std::uint8_t;
  static_assert(std::is_same<InputType, std::int32_t>::value, "");
  // number of input/output dimensions
  static constexpr IndexType kInputDimensions =
      PreviousLayer::kOutputDimensions;
  static constexpr IndexType kOutputDimensions = kInputDimensions;
  // Size of forward propagation buffer used in this layer
  static constexpr std::size_t kSelfBufferSize =
      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
  // Size of the forward propagation buffer used from the input layer to this layer
  static constexpr std::size_t kBufferSize =
      PreviousLayer::kBufferSize + kSelfBufferSize;
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t GetHashValue() {
    std::uint32_t hash_value = 0x538D24C7u;
    hash_value += PreviousLayer::GetHashValue();
    return hash_value;
  }
  // A string that represents the structure from the input layer to this layer
  static std::string GetStructureString() {
    return "ClippedReLU[" +
        std::to_string(kOutputDimensions) + "](" +
        PreviousLayer::GetStructureString() + ")";
  }
  // read parameters
  bool ReadParameters(std::istream& stream) {
    return previous_layer_.ReadParameters(stream);
  }
  // write parameters
  bool WriteParameters(std::ostream& stream) const {
    return previous_layer_.WriteParameters(stream);
  }
  // forward propagation
  const OutputType* Propagate(
      const TransformedFeatureType* transformed_features, char* buffer) const {
    const auto input = previous_layer_.Propagate(
        transformed_features, buffer + kSelfBufferSize);
    const auto output = reinterpret_cast<OutputType*>(buffer);
 #if defined(USE_AVX2)
    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
    const __m256i kZero = _mm256_setzero_si256();
    const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
    const auto in = reinterpret_cast<const __m256i*>(input);
    const auto out = reinterpret_cast<__m256i*>(output);
    for (IndexType i = 0; i < kNumChunks; ++i) {
      const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
 #if defined(__MINGW32__) || defined(__MINGW64__)
        // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
        //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
        //       even though alignas is specified.
        _mm256_loadu_si256
 #else
        _mm256_load_si256
 #endif
        (&in[i * 4 + 0]),
 #if defined(__MINGW32__) || defined(__MINGW64__)
        _mm256_loadu_si256
 #else
        _mm256_load_si256
 #endif
        (&in[i * 4 + 1])), kWeightScaleBits);
      const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
 #if defined(__MINGW32__) || defined(__MINGW64__)
        _mm256_loadu_si256
 #else
        _mm256_load_si256
 #endif
        (&in[i * 4 + 2]),
 #if defined(__MINGW32__) || defined(__MINGW64__)
        _mm256_loadu_si256
 #else
        _mm256_load_si256
 #endif
        (&in[i * 4 + 3])), kWeightScaleBits);
 #if defined(__MINGW32__) || defined(__MINGW64__)
      _mm256_storeu_si256
 #else
      _mm256_store_si256
 #endif
        (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
          _mm256_packs_epi16(words0, words1), kZero), kOffsets));
    }
    constexpr IndexType kStart = kNumChunks * kSimdWidth;
 #elif defined(USE_SSSE3)
    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
    const __m128i kZero = _mm_setzero_si128();
 #ifndef USE_SSE41
    const __m128i k0x80s = _mm_set1_epi8(-128);
 #endif
    const auto in = reinterpret_cast<const __m128i*>(input);
    const auto out = reinterpret_cast<__m128i*>(output);
    for (IndexType i = 0; i < kNumChunks; ++i) {
      const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
          _mm_load_si128(&in[i * 4 + 0]),
          _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
      const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
          _mm_load_si128(&in[i * 4 + 2]),
          _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
      const __m128i packedbytes = _mm_packs_epi16(words0, words1);
      _mm_store_si128(&out[i], 
 #ifdef USE_SSE41
        _mm_max_epi8(packedbytes, kZero)
 #else
        _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
 #endif
      );
    }
    constexpr IndexType kStart = kNumChunks * kSimdWidth;
 #elif defined(IS_ARM)
    constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
    const int8x8_t kZero = {0};
    const auto in = reinterpret_cast<const int32x4_t*>(input);
    const auto out = reinterpret_cast<int8x8_t*>(output);
    for (IndexType i = 0; i < kNumChunks; ++i) {
      int16x8_t shifted;
      const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
      pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
      pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
      out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
    }
    constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
 #else
    constexpr IndexType kStart = 0;
 #endif
    for (IndexType i = kStart; i < kInputDimensions; ++i) {
      output[i] = static_cast<OutputType>(
          std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
    }
    return output;
  }
 private:
  // Make the learning class a friend
  friend class Trainer<ClippedReLU>;
  // the layer immediately before this layer
  PreviousLayer previous_layer_;
 };
 }  // namespace Layers
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,74 +0,0 @@
 // NNUE evaluation function layer InputSlice definition
 #ifndef _NNUE_LAYERS_INPUT_SLICE_H_
 #define _NNUE_LAYERS_INPUT_SLICE_H_
 #if defined(EVAL_NNUE)
 #include "../nnue_common.h"
 namespace Eval {
 namespace NNUE {
 namespace Layers {
 // input layer
 template <IndexType OutputDimensions, IndexType Offset = 0>
 class InputSlice {
 public:
  // need to maintain alignment
  static_assert(Offset % kMaxSimdWidth == 0, "");
  // output type
  using OutputType = TransformedFeatureType;
  // output dimensionality
  static constexpr IndexType kOutputDimensions = OutputDimensions;
  // Size of the forward propagation buffer used from the input layer to this layer
  static constexpr std::size_t kBufferSize = 0;
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t GetHashValue() {
    std::uint32_t hash_value = 0xEC42E90Du;
    hash_value ^= kOutputDimensions ^ (Offset << 10);
    return hash_value;
  }
  // A string that represents the structure from the input layer to this layer
  static std::string GetStructureString() {
    return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
        std::to_string(Offset) + ":" +
        std::to_string(Offset + kOutputDimensions) + ")]";
  }
  // read parameters
  bool ReadParameters(std::istream& /*stream*/) {
    return true;
  }
  // write parameters
  bool WriteParameters(std::ostream& /*stream*/) const {
    return true;
  }
  // forward propagation
  const OutputType* Propagate(
      const TransformedFeatureType* transformed_features,
      char* /*buffer*/) const {
    return transformed_features + Offset;
  }
 private:
 };
 }  // namespace Layers
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,30 +0,0 @@
 // Class for difference calculation of NNUE evaluation function
 #ifndef _NNUE_ACCUMULATOR_H_
 #define _NNUE_ACCUMULATOR_H_
 #if defined(EVAL_NNUE)
 #include "nnue_architecture.h"
 namespace Eval {
 namespace NNUE {
 // Class that holds the result of affine transformation of input features
 // Keep the evaluation value that is the final output together
 struct alignas(32) Accumulator {
  std::int16_t
      accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
  Value score = VALUE_ZERO;
  bool computed_accumulation = false;
  bool computed_score = false;
 };
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,33 +0,0 @@
 // Input features and network structure used in NNUE evaluation function
 #ifndef _NNUE_ARCHITECTURE_H_
 #define _NNUE_ARCHITECTURE_H_
 #if defined(EVAL_NNUE)
 // include a header that defines the input features and network structure
 //#include "architectures/k-p_256x2-32-32.h"
 //#include "architectures/k-p-cr_256x2-32-32.h"
 //#include "architectures/k-p-cr-ep_256x2-32-32.h"
 #include "architectures/halfkp_256x2-32-32.h"
 //#include "architectures/halfkp-cr-ep_256x2-32-32.h"
 //#include "architectures/halfkp_384x2-32-32.h"
 namespace Eval {
 namespace NNUE {
 static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
 static_assert(Network::kOutputDimensions == 1, "");
 static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
 // List of timings to perform all calculations instead of difference calculation
 constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,64 +0,0 @@
 // Constants used in NNUE evaluation function
 #ifndef _NNUE_COMMON_H_
 #define _NNUE_COMMON_H_
 #if defined(EVAL_NNUE)
 #if defined(USE_AVX2)
 #include <immintrin.h>
 #elif defined(USE_SSE41)
 #include <smmintrin.h>
 #elif defined(USE_SSSE3)
 #include <tmmintrin.h>
 #elif defined(USE_SSE2)
 #include <emmintrin.h>
 #endif
 namespace Eval {
 namespace NNUE {
 // A constant that represents the version of the evaluation function file
 constexpr std::uint32_t kVersion = 0x7AF32F16u;
 // Constant used in evaluation value calculation
 constexpr int FV_SCALE = 16;
 constexpr int kWeightScaleBits = 6;
 // Size of cache line (in bytes)
 constexpr std::size_t kCacheLineSize = 64;
 // SIMD width (in bytes)
 #if defined(USE_AVX2)
 constexpr std::size_t kSimdWidth = 32;
 #elif defined(USE_SSE2)
 constexpr std::size_t kSimdWidth = 16;
 #elif defined(IS_ARM)
 constexpr std::size_t kSimdWidth = 16;
 #endif
 constexpr std::size_t kMaxSimdWidth = 32;
 // Type of input feature after conversion
 using TransformedFeatureType = std::uint8_t;
 // index type
 using IndexType = std::uint32_t;
 // Forward declaration of learning class template
 template <typename Layer>
 class Trainer;
 // find the smallest multiple of n and above
 template <typename IntType>
 constexpr IntType CeilToMultiple(IntType n, IntType base) {
  return (n + base - 1) / base * base;
 }
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,357 +0,0 @@
 // A class that converts the input features of the NNUE evaluation function
 #ifndef _NNUE_FEATURE_TRANSFORMER_H_
 #define _NNUE_FEATURE_TRANSFORMER_H_
 #if defined(EVAL_NNUE)
 #include "nnue_common.h"
 #include "nnue_architecture.h"
 #include "features/index_list.h"
 #include <cstring> // std::memset()
 namespace Eval {
 namespace NNUE {
 // Input feature converter
 class FeatureTransformer {
 private:
  // number of output dimensions for one side
  static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
 public:
  // output type
  using OutputType = TransformedFeatureType;
  // number of input/output dimensions
  static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
  static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
  // size of forward propagation buffer
  static constexpr std::size_t kBufferSize =
      kOutputDimensions * sizeof(OutputType);
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t GetHashValue() {
    return RawFeatures::kHashValue ^ kOutputDimensions;
  }
  // a string representing the structure
  static std::string GetStructureString() {
    return RawFeatures::GetName() + "[" +
        std::to_string(kInputDimensions) + "->" +
        std::to_string(kHalfDimensions) + "x2]";
  }
  // read parameters
  bool ReadParameters(std::istream& stream) {
    stream.read(reinterpret_cast<char*>(biases_),
                kHalfDimensions * sizeof(BiasType));
    stream.read(reinterpret_cast<char*>(weights_),
                kHalfDimensions * kInputDimensions * sizeof(WeightType));
    return !stream.fail();
  }
  // write parameters
  bool WriteParameters(std::ostream& stream) const {
    stream.write(reinterpret_cast<const char*>(biases_),
                 kHalfDimensions * sizeof(BiasType));
    stream.write(reinterpret_cast<const char*>(weights_),
                 kHalfDimensions * kInputDimensions * sizeof(WeightType));
    return !stream.fail();
  }
  // proceed with the difference calculation if possible
  bool UpdateAccumulatorIfPossible(const Position& pos) const {
    const auto now = pos.state();
    if (now->accumulator.computed_accumulation) {
      return true;
    }
    const auto prev = now->previous;
    if (prev && prev->accumulator.computed_accumulation) {
      UpdateAccumulator(pos);
      return true;
    }
    return false;
  }
  // convert input features
  void Transform(const Position& pos, OutputType* output, bool refresh) const {
    if (refresh || !UpdateAccumulatorIfPossible(pos)) {
      RefreshAccumulator(pos);
    }
    const auto& accumulation = pos.state()->accumulator.accumulation;
 #if defined(USE_AVX2)
    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
    constexpr int kControl = 0b11011000;
    const __m256i kZero = _mm256_setzero_si256();
 #elif defined(USE_SSSE3)
    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
    const __m128i kZero = _mm_setzero_si128();
 #ifndef USE_SSE41
    const __m128i k0x80s = _mm_set1_epi8(-128);
 #endif
 #elif defined(IS_ARM)
    constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
    const int8x8_t kZero = {0};
 #endif
    const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
    for (IndexType p = 0; p < 2; ++p) {
      const IndexType offset = kHalfDimensions * p;
 #if defined(USE_AVX2)
      auto out = reinterpret_cast<__m256i*>(&output[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
        __m256i sum0 =
 #if defined(__MINGW32__) || defined(__MINGW64__)
          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
          //       even though alignas is specified.
          _mm256_loadu_si256
 #else
          _mm256_load_si256
 #endif
          (&reinterpret_cast<const __m256i*>(
            accumulation[perspectives[p]][0])[j * 2 + 0]);
        __m256i sum1 =
 #if defined(__MINGW32__) || defined(__MINGW64__)
          _mm256_loadu_si256
 #else
          _mm256_load_si256
 #endif
          (&reinterpret_cast<const __m256i*>(
            accumulation[perspectives[p]][0])[j * 2 + 1]);
        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
          sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
              accumulation[perspectives[p]][i])[j * 2 + 0]);
          sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
              accumulation[perspectives[p]][i])[j * 2 + 1]);
        }
 #if defined(__MINGW32__) || defined(__MINGW64__)
        _mm256_storeu_si256
 #else
        _mm256_store_si256
 #endif
        (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
            _mm256_packs_epi16(sum0, sum1), kZero), kControl));
      }
 #elif defined(USE_SSSE3)
      auto out = reinterpret_cast<__m128i*>(&output[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
        __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
            accumulation[perspectives[p]][0])[j * 2 + 0]);
        __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
            accumulation[perspectives[p]][0])[j * 2 + 1]);
        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
          sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][i])[j * 2 + 0]);
          sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][i])[j * 2 + 1]);
        }
  	const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
        _mm_store_si128(&out[j],
 #ifdef USE_SSE41
          _mm_max_epi8(packedbytes, kZero)
 #else
          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
 #endif
        );
      }
 #elif defined(IS_ARM)
      const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
      for (IndexType j = 0; j < kNumChunks; ++j) {
        int16x8_t sum = reinterpret_cast<const int16x8_t*>(
            accumulation[perspectives[p]][0])[j];
        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
          sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
              accumulation[perspectives[p]][i])[j]);
        }
        out[j] = vmax_s8(vqmovn_s16(sum), kZero);
      }
 #else
      for (IndexType j = 0; j < kHalfDimensions; ++j) {
        BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
          sum += accumulation[static_cast<int>(perspectives[p])][i][j];
        }
        output[offset + j] = static_cast<OutputType>(
            std::max<int>(0, std::min<int>(127, sum)));
      }
 #endif
    }
  }
 private:
  // Calculate cumulative value without using difference calculation
  void RefreshAccumulator(const Position& pos) const {
    auto& accumulator = pos.state()->accumulator;
    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
      Features::IndexList active_indices[2];
      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
                                       active_indices);
      for (const auto perspective : Colors) {
        if (i == 0) {
          std::memcpy(accumulator.accumulation[perspective][i], biases_,
                      kHalfDimensions * sizeof(BiasType));
        } else {
          std::memset(accumulator.accumulation[perspective][i], 0,
                      kHalfDimensions * sizeof(BiasType));
        }
        for (const auto index : active_indices[perspective]) {
          const IndexType offset = kHalfDimensions * index;
 #if defined(USE_AVX2)
          auto accumulation = reinterpret_cast<__m256i*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
 #if defined(__MINGW32__) || defined(__MINGW64__)
            _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
 #else
            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
 #endif
          }
 #elif defined(USE_SSE2)
          auto accumulation = reinterpret_cast<__m128i*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
          }
 #elif defined(IS_ARM)
          auto accumulation = reinterpret_cast<int16x8_t*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
          }
 #else
          for (IndexType j = 0; j < kHalfDimensions; ++j) {
            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
          }
 #endif
        }
      }
    }
    accumulator.computed_accumulation = true;
    accumulator.computed_score = false;
  }
  // Calculate cumulative value using difference calculation
  void UpdateAccumulator(const Position& pos) const {
    const auto prev_accumulator = pos.state()->previous->accumulator;
    auto& accumulator = pos.state()->accumulator;
    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
      Features::IndexList removed_indices[2], added_indices[2];
      bool reset[2];
      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
                                        removed_indices, added_indices, reset);
      for (const auto perspective : Colors) {
 #if defined(USE_AVX2)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<__m256i*>(
            &accumulator.accumulation[perspective][i][0]);
 #elif defined(USE_SSE2)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<__m128i*>(
            &accumulator.accumulation[perspective][i][0]);
 #elif defined(IS_ARM)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<int16x8_t*>(
            &accumulator.accumulation[perspective][i][0]);
 #endif
        if (reset[perspective]) {
          if (i == 0) {
            std::memcpy(accumulator.accumulation[perspective][i], biases_,
                        kHalfDimensions * sizeof(BiasType));
          } else {
            std::memset(accumulator.accumulation[perspective][i], 0,
                        kHalfDimensions * sizeof(BiasType));
          }
        } else {// Difference calculation for the feature amount changed from 1 to 0
          std::memcpy(accumulator.accumulation[perspective][i],
                      prev_accumulator.accumulation[perspective][i],
                      kHalfDimensions * sizeof(BiasType));
          for (const auto index : removed_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
 #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
            }
 #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
            }
 #elif defined(IS_ARM)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
            }
 #else
            for (IndexType j = 0; j < kHalfDimensions; ++j) {
              accumulator.accumulation[perspective][i][j] -=
                  weights_[offset + j];
            }
 #endif
          }
        }
        {// Difference calculation for features that changed from 0 to 1
          for (const auto index : added_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
 #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
            }
 #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
            }
 #elif defined(IS_ARM)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
            }
 #else
            for (IndexType j = 0; j < kHalfDimensions; ++j) {
              accumulator.accumulation[perspective][i][j] +=
                  weights_[offset + j];
            }
 #endif
          }
        }
      }
    }
    accumulator.computed_accumulation = true;
    accumulator.computed_score = false;
  }
  // parameter type
  using BiasType = std::int16_t;
  using WeightType = std::int16_t;
  // Make the learning class a friend
  friend class Trainer<FeatureTransformer>;
  // parameter
  alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
  alignas(kCacheLineSize)
      WeightType weights_[kHalfDimensions * kInputDimensions];
 };
 }  // namespace NNUE
 }  // namespace Eval
 #endif  // defined(EVAL_NNUE)
 #endif
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -20,17 +18,55 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdlib>
 #include <cstring>   // For std::memset
 #include <iomanip>
 #include <set>
 #include <sstream>
 #include <iostream>
 #include <set>
 #include "bitboard.h"
 #include "evaluate.h"
 #include "material.h"
 #include "pawns.h"
 #include "thread.h"
-#include "eval/nnue/evaluate_nnue.h"
+#include "uci.h"
 namespace Eval {
  bool useNNUE;
  std::string eval_file_loaded="None";
  void init_NNUE() {
    useNNUE = Options["Use NNUE"];
    std::string eval_file = std::string(Options["EvalFile"]);
    if (useNNUE && eval_file_loaded != eval_file)
        if (Eval::NNUE::load_eval_file(eval_file))
            eval_file_loaded = eval_file;
  }
  void verify_NNUE() {
    std::string eval_file = std::string(Options["EvalFile"]);
    if (useNNUE && eval_file_loaded != eval_file)
    {
        UCI::OptionsMap defaults;
        UCI::init(defaults);
        std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. "
                  << "These network evaluation parameters must be available, and compatible with this version of the code. "
                  << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. "
                  << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl;
        std::exit(EXIT_FAILURE);
    }
    if (useNNUE)
        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl;
    else
        sync_cout << "info string classical evaluation enabled." << sync_endl;
  }
 }
 namespace Trace {
@@ -76,8 +112,10 @@ using namespace Trace;
 namespace {
  // Threshold for lazy and space evaluation
-  constexpr Value LazyThreshold  = Value(1400);
+  constexpr Value LazyThreshold1 =  Value(1400);
  constexpr Value LazyThreshold2 =  Value(1300);
  constexpr Value SpaceThreshold = Value(12222);
  constexpr Value NNUEThreshold  =   Value(460);
  // KingAttackWeights[PieceType] contains king attack weights by piece type
  constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -148,7 +186,6 @@ namespace {
  constexpr Score MinorBehindPawn     = S( 18,  3);
  constexpr Score PassedFile          = S( 11,  8);
  constexpr Score PawnlessFlank       = S( 17, 95);
  constexpr Score QueenInfiltration   = S( -2, 14);
  constexpr Score ReachableOutpost    = S( 31, 22);
  constexpr Score RestrictedPiece     = S(  7,  7);
  constexpr Score RookOnKingRing      = S( 16,  0);
@@ -311,13 +348,16 @@ namespace {
        if (Pt == BISHOP || Pt == KNIGHT)
        {
-            // Bonus if piece is on an outpost square or can reach one
+            // Bonus if the piece is on an outpost square or can reach one
-            bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them);
+            // Reduced bonus for knights (BadOutpost) if few relevant targets
            bb = OutpostRanks & (attackedBy[Us][PAWN] | shift<Down>(pos.pieces(PAWN)))
                              & ~pe->pawn_attacks_span(Them);
            Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN);
            if (   Pt == KNIGHT
-                && bb & s & ~CenterFiles
+                && bb & s & ~CenterFiles // on a side outpost
-                && !(b & pos.pieces(Them) & ~pos.pieces(PAWN))
+                && !(b & targets)        // no relevant attacks
-                && !conditional_more_than_two(
+                && (!more_than_one(targets & (s & QueenSide ? QueenSide : KingSide))))
                      pos.pieces(Them) & ~pos.pieces(PAWN) & (s & QueenSide ? QueenSide : KingSide)))
                score += BadOutpost;
            else if (bb & s)
                score += Outpost[Pt == BISHOP];
@@ -388,10 +428,6 @@ namespace {
            Bitboard queenPinners;
            if (pos.slider_blockers(pos.pieces(Them, ROOK, BISHOP), s, queenPinners))
                score -= WeakQueen;
            // Bonus for queen on weak square in enemy camp
            if (relative_rank(Us, s) > RANK_4 && (~pe->pawn_attacks_span(Them) & s))
                score += QueenInfiltration;
        }
    }
    if (T)
@@ -578,17 +614,21 @@ namespace {
    // Bonus for threats on the next moves against enemy queen
    if (pos.count<QUEEN>(Them) == 1)
    {
        bool queenImbalance = pos.count<QUEEN>() == 1;
        Square s = pos.square<QUEEN>(Them);
-        safe = mobilityArea[Us] & ~stronglyProtected;
+        safe =   mobilityArea[Us]
              & ~pos.pieces(Us, PAWN)
              & ~stronglyProtected;
        b = attackedBy[Us][KNIGHT] & attacks_bb<KNIGHT>(s);
-        score += KnightOnQueen * popcount(b & safe);
+        score += KnightOnQueen * popcount(b & safe) * (1 + queenImbalance);
        b =  (attackedBy[Us][BISHOP] & attacks_bb<BISHOP>(s, pos.pieces()))
           | (attackedBy[Us][ROOK  ] & attacks_bb<ROOK  >(s, pos.pieces()));
-        score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]);
+        score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]) * (1 + queenImbalance);
    }
    if (T)
@@ -788,7 +828,7 @@ namespace {
                && pos.non_pawn_material(BLACK) == RookValueMg
                && pos.count<PAWN>(strongSide) - pos.count<PAWN>(~strongSide) <= 1
                && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN))
-                && (attackedBy[~strongSide][KING] & pos.pieces(~strongSide, PAWN)))
+                && (attacks_bb<KING>(pos.square<KING>(~strongSide)) & pos.pieces(~strongSide, PAWN)))
            sf = 36;
        else if (pos.count<QUEEN>() == 1)
            sf = 37 + 3 * (pos.count<QUEEN>(WHITE) == 1 ? pos.count<BISHOP>(BLACK) + pos.count<KNIGHT>(BLACK)
@@ -839,9 +879,12 @@ namespace {
    score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK);
    // Early exit if score is high
-    Value v = (mg_value(score) + eg_value(score)) / 2;
+    auto lazy_skip = [&](Value lazyThreshold) {
-    if (abs(v) > LazyThreshold + pos.non_pawn_material() / 64)
+        return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64;
-       return pos.side_to_move() == WHITE ? v : -v;
+    };
    if (lazy_skip(LazyThreshold1))
        goto make_v;
    // Main evaluation begins here
    initialize<WHITE>();
@@ -858,12 +901,17 @@ namespace {
    // More complex interactions that require fully populated attack bitboards
    score +=  king<   WHITE>() - king<   BLACK>()
-            + threats<WHITE>() - threats<BLACK>()
+            + passed< WHITE>() - passed< BLACK>();
-            + passed< WHITE>() - passed< BLACK>()
+
    if (lazy_skip(LazyThreshold2))
        goto make_v;
    score +=  threats<WHITE>() - threats<BLACK>()
            + space<  WHITE>() - space<  BLACK>();
 make_v:
    // Derive single value from mg and eg parts of score
-    v = winnable(score);
+    Value v = winnable(score);
    // In case of tracing add all remaining individual evaluation terms
    if (T)
@@ -892,187 +940,168 @@ namespace {
 /// evaluate() is the evaluator for the outer world. It returns a static
 /// evaluation of the position from the point of view of the side to move.
 #if !defined(EVAL_NNUE)
 Value Eval::evaluate(const Position& pos) {
  if (Eval::useNNUE)
  {
      Value v = eg_value(pos.psq_score());
      // Take NNUE eval only on balanced positions
      if (abs(v) < NNUEThreshold + 20 * pos.count<PAWN>())
         return NNUE::evaluate(pos) + Tempo;
  }
  return Evaluation<NO_TRACE>(pos).value();
 }
 #endif  // defined(EVAL_NNUE)
 /// trace() is like evaluate(), but instead of returning a value, it returns
 /// a string (suitable for outputting to stdout) that contains the detailed
 /// descriptions and values of each evaluation term. Useful for debugging.
 /// Trace scores are from white's point of view
 std::string Eval::trace(const Position& pos) {
  if (pos.checkers())
-      return "Total evaluation: none (in check)";
+      return "Final evaluation: none (in check)";
  std::memset(scores, 0, sizeof(scores));
  pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
  Value v = Evaluation<TRACE>(pos).value();
  v = pos.side_to_move() == WHITE ? v : -v; // Trace scores are from white's point of view
  std::stringstream ss;
-  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
+  ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
-     << "     Term    |    White    |    Black    |    Total   \n"
+
-     << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
+  Value v;
-     << " ------------+-------------+-------------+------------\n"
+
-     << "    Material | " << Term(MATERIAL)
+  if (Eval::useNNUE)
-     << "   Imbalance | " << Term(IMBALANCE)
+  {
-     << "       Pawns | " << Term(PAWN)
+      v = NNUE::evaluate(pos);
-     << "     Knights | " << Term(KNIGHT)
+  }
-     << "     Bishops | " << Term(BISHOP)
+  else
-     << "       Rooks | " << Term(ROOK)
+  {
-     << "      Queens | " << Term(QUEEN)
+      std::memset(scores, 0, sizeof(scores));
-     << "    Mobility | " << Term(MOBILITY)
+
-     << " King safety | " << Term(KING)
+      pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
-     << "     Threats | " << Term(THREAT)
+
-     << "      Passed | " << Term(PASSED)
+      v = Evaluation<TRACE>(pos).value();
-     << "       Space | " << Term(SPACE)
+
-     << "    Winnable | " << Term(WINNABLE)
+      ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
-     << " ------------+-------------+-------------+------------\n"
+         << "     Term    |    White    |    Black    |    Total   \n"
-     << "       Total | " << Term(TOTAL);
+         << "             |   MG    EG  |   MG    EG  |   MG    EG \n"
         << " ------------+-------------+-------------+------------\n"
         << "    Material | " << Term(MATERIAL)
         << "   Imbalance | " << Term(IMBALANCE)
         << "       Pawns | " << Term(PAWN)
         << "     Knights | " << Term(KNIGHT)
         << "     Bishops | " << Term(BISHOP)
         << "       Rooks | " << Term(ROOK)
         << "      Queens | " << Term(QUEEN)
         << "    Mobility | " << Term(MOBILITY)
         << " King safety | " << Term(KING)
         << "     Threats | " << Term(THREAT)
         << "      Passed | " << Term(PASSED)
         << "       Space | " << Term(SPACE)
         << "    Winnable | " << Term(WINNABLE)
         << " ------------+-------------+-------------+------------\n"
         << "       Total | " << Term(TOTAL);
  }
  v = pos.side_to_move() == WHITE ? v : -v;
  ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
  return ss.str();
 }
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
+// Check whether the pieceListFw[] held internally is a correct PieceSquare.
 namespace Eval {
 ExtBonaPiece kpp_board_index[PIECE_NB] = {
    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
    { f_pawn, e_pawn },
    { f_knight, e_knight },
    { f_bishop, e_bishop },
    { f_rook, e_rook },
    { f_queen, e_queen },
    { f_king, e_king },
    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
    // When viewed from behind. f and e are exchanged.
    { BONA_PIECE_ZERO, BONA_PIECE_ZERO },
    { e_pawn, f_pawn },
    { e_knight, f_knight },
    { e_bishop, f_bishop },
    { e_rook, f_rook },
    { e_queen, f_queen },
    { e_king, f_king },
    { BONA_PIECE_ZERO, BONA_PIECE_ZERO }, // no money
 };
 // Check whether the pieceListFw[] held internally is a correct BonaPiece.
 // Note: For debugging. slow.
 bool EvalList::is_valid(const Position& pos)
 {
-  std::set<PieceNumber> piece_numbers;
+    std::set<PieceId> piece_numbers;
-  for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
+    for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
-    auto piece_number = piece_no_of_board(sq);
+        auto piece_number = piece_id_list[sq];
-    if (piece_number == PIECE_NUMBER_NB) {
+        if (piece_number == PieceId::PIECE_ID_NONE) {
-      continue;
+            continue;
-    }
+        }
-    assert(!piece_numbers.count(piece_number));
+        assert(!piece_numbers.count(piece_number));
-    piece_numbers.insert(piece_number);
+        piece_numbers.insert(piece_number);
  }
  for (int i = 0; i < length(); ++i)
  {
    BonaPiece fw = pieceListFw[i];
    // Go to the Position class to see if this fw really exists.
    if (fw == Eval::BONA_PIECE_ZERO) {
      continue;
    }
-    // Out of range
+    for (int i = 0; i < PieceId::PIECE_ID_KING; ++i)
    if (!(0 <= fw && fw < fe_end))
      return false;
    // Since it is a piece on the board, I will check if this piece really exists.
    for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
    {
-      auto pt = type_of(pc);
+        PieceSquare fw = pieceListFw[i];
-      if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
+        // Go to the Position class to see if this fw really exists.
        continue;
-      // BonaPiece start number of piece pc
+        if (fw == PieceSquare::PS_NONE) {
-      auto s = BonaPiece(kpp_board_index[pc].fw);
+            continue;
-      if (s <= fw && fw < s + SQUARE_NB)
+        }
      {
        // Since it was found, check if this piece is at sq.
        Square sq = (Square)(fw - s);
        Piece pc2 = pos.piece_on(sq);
-        if (pc2 != pc)
+        // Out of range
-          return false;
+        if (!(0 <= fw && fw < PieceSquare::PS_END))
            return false;
-        goto Found;
+        // Since it is a piece on the board, I will check if this piece really exists.
-      }
+        for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
-    }
+        {
-    // It was a piece that did not exist for some reason..
+            auto pt = type_of(pc);
-    return false;
+            if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
-  Found:;
+                continue;
  }
-  // Validate piece_no_list_board
+            // PieceSquare start number of piece pc
-  for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
+            auto s = PieceSquare(kpp_board_index[pc].from[Color::WHITE]);
-    Piece expected_piece = pos.piece_on(sq);
+            if (s <= fw && fw < s + SQUARE_NB)
-    PieceNumber piece_number = piece_no_list_board[sq];
+            {
-    if (piece_number == PIECE_NUMBER_NB) {
+                // Since it was found, check if this piece is at sq.
-      assert(expected_piece == NO_PIECE);
+                Square sq = (Square)(fw - s);
-      if (expected_piece != NO_PIECE) {
+                Piece pc2 = pos.piece_on(sq);
                if (pc2 != pc)
                    return false;
                goto Found;
            }
        }
        // It was a piece that did not exist for some reason..
        return false;
-      }
+    Found:;
      continue;
    }
-    BonaPiece bona_piece_white = pieceListFw[piece_number];
+    // Validate piece_id_list
-    Piece actual_piece;
+    for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
-    for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
+        Piece expected_piece = pos.piece_on(sq);
-      if (kpp_board_index[actual_piece].fw == BONA_PIECE_ZERO) {
+        PieceId piece_number = piece_id_list[sq];
-        continue;
+        if (piece_number == PieceId::PIECE_ID_NONE) {
-      }
+            assert(expected_piece == NO_PIECE);
            if (expected_piece != NO_PIECE) {
                return false;
            }
            continue;
        }
-      if (kpp_board_index[actual_piece].fw <= bona_piece_white
+        PieceSquare bona_piece_white = pieceListFw[piece_number];
-        && bona_piece_white < kpp_board_index[actual_piece].fw + SQUARE_NB) {
+        Piece actual_piece;
-        break;
+        for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
-      }
+            if (kpp_board_index[actual_piece].from[Color::WHITE] == PieceSquare::PS_NONE) {
                continue;
            }
            if (kpp_board_index[actual_piece].from[Color::WHITE] <= bona_piece_white
                && bona_piece_white < kpp_board_index[actual_piece].from[Color::WHITE] + SQUARE_NB) {
                break;
            }
        }
        assert(actual_piece != PIECE_NB);
        if (actual_piece == PIECE_NB) {
            return false;
        }
        assert(actual_piece == expected_piece);
        if (actual_piece != expected_piece) {
            return false;
        }
        Square actual_square = static_cast<Square>(
            bona_piece_white - kpp_board_index[actual_piece].from[Color::WHITE]);
        assert(sq == actual_square);
        if (sq != actual_square) {
            return false;
        }
    }
-    assert(actual_piece != PIECE_NB);
+    return true;
    if (actual_piece == PIECE_NB) {
      return false;
    }
    assert(actual_piece == expected_piece);
    if (actual_piece != expected_piece) {
      return false;
    }
    Square actual_square = static_cast<Square>(
      bona_piece_white - kpp_board_index[actual_piece].fw);
    assert(sq == actual_square);
    if (sq != actual_square) {
      return false;
    }
  }
  return true;
 }
 }
 #endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
 #if !defined(EVAL_NNUE)
 namespace Eval {
 void evaluate_with_no_return(const Position& pos) {}
 void update_weights(uint64_t epoch, const std::array<bool, 4> & freeze) {}
 void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3) {}
 void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4> & freeze) {}
 void save_eval(std::string suffix) {}
 double get_eta() { return 0.0; }
 }
 #endif  // defined(EVAL_NNUE)
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -29,194 +27,23 @@ class Position;
 namespace Eval {
-std::string trace(const Position& pos);
+  std::string trace(const Position& pos);
  Value evaluate(const Position& pos);
-Value evaluate(const Position& pos);
+  extern bool useNNUE;
  extern std::string eval_file_loaded;
  void init_NNUE();
  void verify_NNUE();
-void evaluate_with_no_return(const Position& pos);
+  namespace NNUE {
-Value compute_eval(const Position& pos);
+    Value evaluate(const Position& pos);
    Value compute_eval(const Position& pos);
    void  update_eval(const Position& pos);
    bool  load_eval_file(const std::string& evalFile);
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  } // namespace NNUE
 // Read the evaluation function file.
 // This is only called once in response to the "is_ready" command. It is not supposed to be called twice.
 // (However, if isready is sent again after EvalDir (evaluation function folder) has been changed, read it again.)
 void load_eval();
-static uint64_t calc_check_sum() {return 0;}
+} // namespace Eval
 static void print_softname(uint64_t check_sum) {}
 // --- enum corresponding to P of constant KPP (ball and arbitrary 2 pieces) used in evaluation function
 // (BonaPiece wants to define freely in experiment of evaluation function, so I don't define it here.)
 // A type that represents P(Piece) when calling KKP/KPP in Bonanza.
 // When you ask for ƒ° KPP, you need a unique number for each box �~ piece type, like the step at 39 points.
 enum BonaPiece : int32_t
 {
 	// Meaning of f = friend (�àfirst move). Meaning of e = enemy (�àrear)
 	// Value when uninitialized
 	BONA_PIECE_NOT_INIT = -1,
 	// Invalid piece. When you drop a piece, move unnecessary pieces here.
 	BONA_PIECE_ZERO = 0,
 	fe_hand_end = BONA_PIECE_ZERO + 1,
 	// Don't pack the numbers of unrealistic walks and incense on the board like Bonanza.
 	// Reason 1) When learning, there are times when the incense is on the first stage in relative PP, and it is difficult to display it correctly in the inverse transformation.
 	// Reason 2) It is difficult to convert from Square with vertical Bitboard.
 	// --- Pieces on the board
 	f_pawn = fe_hand_end,
 	e_pawn = f_pawn + SQUARE_NB,
 	f_knight = e_pawn + SQUARE_NB,
 	e_knight = f_knight + SQUARE_NB,
 	f_bishop = e_knight + SQUARE_NB,
 	e_bishop = f_bishop + SQUARE_NB,
 	f_rook = e_bishop + SQUARE_NB,
 	e_rook = f_rook + SQUARE_NB,
 	f_queen = e_rook + SQUARE_NB,
 	e_queen = f_queen + SQUARE_NB,
 	fe_end = e_queen + SQUARE_NB,
 	f_king = fe_end,
 	e_king = f_king + SQUARE_NB,
 	fe_end2 = e_king + SQUARE_NB, // Last number including balls.
 };
 #define ENABLE_INCR_OPERATORS_ON(T)                                \
 inline T& operator++(T& d) { return d = T(int(d) + 1); }           \
 inline T& operator--(T& d) { return d = T(int(d) - 1); }
 ENABLE_INCR_OPERATORS_ON(BonaPiece)
 #undef ENABLE_INCR_OPERATORS_ON
 // The number when you look at BonaPiece from the back (the number of steps from the previous 39 to the number 71 from the back)
 // Let's call the paired one the ExtBonaPiece type.
 union ExtBonaPiece
 {
 	struct {
 		BonaPiece fw; // from white
 		BonaPiece fb; // from black
 	};
 	BonaPiece from[2];
 	ExtBonaPiece() {}
 	ExtBonaPiece(BonaPiece fw_, BonaPiece fb_) : fw(fw_), fb(fb_) {}
 };
 // Information about where the piece has moved from where to by this move.
 // Assume the piece is an ExtBonaPiece expression.
 struct ChangedBonaPiece
 {
 	ExtBonaPiece old_piece;
 	ExtBonaPiece new_piece;
 };
 // An array for finding the BonaPiece corresponding to the piece pc on the board of the KPP table.
 // example)
 // BonaPiece fb = kpp_board_index[pc].fb + sq; // BonaPiece corresponding to pc in sq seen from the front
 // BonaPiece fw = kpp_board_index[pc].fw + sq; // BonaPiece corresponding to pc in sq seen from behind
 extern ExtBonaPiece kpp_board_index[PIECE_NB];
 // List of pieces used in the evaluation function. A structure holding which piece (PieceNumber) is where (BonaPiece)
 struct EvalList
 {
 	// List of frame numbers used in evaluation function (FV38 type)
 	BonaPiece* piece_list_fw() const { return const_cast<BonaPiece*>(pieceListFw); }
 	BonaPiece* piece_list_fb() const { return const_cast<BonaPiece*>(pieceListFb); }
 	// Convert the specified piece_no piece to ExtBonaPiece type and return it.
 	ExtBonaPiece bona_piece(PieceNumber piece_no) const
 	{
 		ExtBonaPiece bp;
 		bp.fw = pieceListFw[piece_no];
 		bp.fb = pieceListFb[piece_no];
 		return bp;
 	}
 	// Place the piece_no pc piece in the sq box on the board
 	void put_piece(PieceNumber piece_no, Square sq, Piece pc) {
 		set_piece_on_board(piece_no, BonaPiece(kpp_board_index[pc].fw + sq), BonaPiece(kpp_board_index[pc].fb + Inv(sq)), sq);
 	}
 	// Returns the PieceNumber corresponding to a box on the board.
 	PieceNumber piece_no_of_board(Square sq) const { return piece_no_list_board[sq]; }
 	// Initialize the pieceList.
 	// Set the value of unused pieces to BONA_PIECE_ZERO in case you want to deal with dropped pieces.
 	// A normal evaluation function can be used as an evaluation function for missing frames.
 	// piece_no_list is initialized with PIECE_NUMBER_NB to facilitate debugging.
 	void clear()
 	{
 		for (auto& p: pieceListFw)
 			p = BONA_PIECE_ZERO;
 		for (auto& p: pieceListFb)
 			p = BONA_PIECE_ZERO;
 		for (auto& v :piece_no_list_board)
 			v = PIECE_NUMBER_NB;
 	}
 	// Check whether the pieceListFw[] held internally is a correct BonaPiece.
 	// Note: For debugging. slow.
 	bool is_valid(const Position& pos);
 	// Set that the BonaPiece of the piece_no piece on the board sq is fb,fw.
 	inline void set_piece_on_board(PieceNumber piece_no, BonaPiece fw, BonaPiece fb, Square sq)
 	{
 		assert(is_ok(piece_no));
 		pieceListFw[piece_no] = fw;
 		pieceListFb[piece_no] = fb;
 		piece_no_list_board[sq] = piece_no;
 	}
 	// Piece list. Piece Number Shows how many pieces are in place (Bona Piece). Used in FV38 etc.
 	// Length of piece list
  // 38 fixed
 public:
 	int length() const { return PIECE_NUMBER_KING; }
 	// Must be a multiple of 4 to use VPGATHERDD.
 	// In addition, the KPPT type evaluation function, etc. is based on the assumption that the 39th and 40th elements are zero.
 	// Please note that there is a part that is accessed.
 	static const int MAX_LENGTH = 32;
  // An array that holds the piece number (PieceNumber) for the pieces on the board
  // Hold up to +1 for when the ball is moving to SQUARE_NB,
  // SQUARE_NB balls are not moved, so this value should never be used.
  PieceNumber piece_no_list_board[SQUARE_NB_PLUS1];
 private:
 	BonaPiece pieceListFw[MAX_LENGTH];
 	BonaPiece pieceListFb[MAX_LENGTH];
 };
 // For management of evaluation value difference calculation
 // A structure for managing the number of pieces that have moved from the previous stage
 // Up to 2 moving pieces.
 struct DirtyPiece
 {
 	// What changed from the piece with that piece number
 	Eval::ChangedBonaPiece changed_piece[2];
 	// The number of dirty pieces
 	PieceNumber pieceNo[2];
 	// The number of dirty files.
 	// It can be 0 for null move.
 	// Up to 2 moving pieces and taken pieces.
 	int dirty_num;
 };
 #endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
 }
 #endif // #ifndef EVALUATE_H_INCLUDED
@@ -281,7 +281,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
 	// In updating the PieceList, we have to set which piece is where,
 	// A counter of how much each piece has been used
-  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
+  PieceId next_piece_number = PieceId::PIECE_ID_ZERO;
  pieceList[W_KING][0] = SQUARE_NB;
  pieceList[B_KING][0] = SQUARE_NB;
@@ -290,7 +290,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
 	if (mirror)
 	{
 		for (auto c : Colors)
-			board[Mir((Square)stream.read_n_bit(6))] = make_piece(c, KING);
+			board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
 	}
 	else
 	{
@@ -305,7 +305,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
    {
      auto sq = make_square(f, r);
      if (mirror) {
-        sq = Mir(sq);
+        sq = flip_file(sq);
      }
      // it seems there are already balls
@@ -328,9 +328,9 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
      put_piece(Piece(pc), sq);
      // update evalList
-      PieceNumber piece_no =
+      PieceId piece_no =
-        (pc == B_KING) ?PIECE_NUMBER_BKING :// Move ball
+        (pc == B_KING) ?PieceId::PIECE_ID_BKING :// Move ball
-        (pc == W_KING) ?PIECE_NUMBER_WKING :// Backing ball
+        (pc == W_KING) ?PieceId::PIECE_ID_WKING :// Backing ball
        next_piece_number++; // otherwise
      evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
@@ -372,7 +372,7 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
  if (stream.read_one_bit()) {
    Square ep_square = static_cast<Square>(stream.read_n_bit(6));
    if (mirror) {
-      ep_square = Mir(ep_square);
+      ep_square = flip_file(ep_square);
    }
    st->epSquare = ep_square;
@@ -81,7 +81,7 @@
 #include "multi_think.h"
 #if defined(EVAL_NNUE)
-#include "../eval/nnue/evaluate_nnue_learner.h"
+#include "../nnue/evaluate_nnue_learner.h"
 #include <shared_mutex>
 #endif
@@ -627,7 +627,7 @@ void MultiThinkGenSfen::thread_worker(size_t thread_id)
 						// If the depth is 8 or more, it seems faster not to calculate this difference.
 #if defined(EVAL_NNUE)
            if (depth < 8)
-              Eval::evaluate_with_no_return(pos);
+              Eval::NNUE::update_eval(pos);
 #endif  // defined(EVAL_NNUE)
 					}
@@ -635,10 +635,10 @@ void MultiThinkGenSfen::thread_worker(size_t thread_id)
 					// cout << pos;
 					auto v = Eval::evaluate(pos);
-					// evaluate() returns the evaluation value on the turn side, so
+						// evaluate() returns the evaluation value on the turn side, so
-					// If it's a turn different from root_color, you must invert v and return it.
+						// If it's a turn different from root_color, you must invert v and return it.
-					if (rootColor != pos.side_to_move())
+						if (rootColor != pos.side_to_move())
-						v = -v;
+							v = -v;
 					// Rewind.
 					// Is it C++x14, and isn't there even foreach to turn in reverse?
@@ -825,7 +825,7 @@ void MultiThinkGenSfen::thread_worker(size_t thread_id)
 			pos.do_move(m, states[ply]);
 			// Call node evaluate() for each difference calculation.
-			Eval::evaluate_with_no_return(pos);
+			Eval::NNUE::update_eval(pos);
 		} // for (int ply = 0; ; ++ply)
@@ -979,7 +979,7 @@ void gen_sfen(Position&, istringstream& is)
 		<< "  loop_max = " << loop_max << endl
 		<< "  eval_limit = " << eval_limit << endl
 		<< "  thread_num (set by USI setoption) = " << thread_num << endl
-		<< "  book_moves (set by USI setoption) = " << Options["BookMoves"] << endl
+		//<< "  book_moves (set by USI setoption) = " << Options["BookMoves"] << endl
 		<< "  random_move_minply     = " << random_move_minply << endl
 		<< "  random_move_maxply     = " << random_move_maxply << endl
 		<< "  random_move_count      = " << random_move_count << endl
@@ -994,6 +994,9 @@ void gen_sfen(Position&, istringstream& is)
 		<< "  save_every             = " << save_every << endl
 		<< "  random_file_name       = " << random_file_name << endl;
 	// Show if the training data generator uses NNUE.
 	Eval::verify_NNUE();
 	// Create and execute threads as many as Options["Threads"].
 	{
 		SfenWriter sw(output_file_name, thread_num);
@@ -1697,7 +1700,7 @@ void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
 				for (size_t i = 0; i < pv.size(); ++i)
 				{
 					pos.do_move(pv[i], states[i]);
-					Eval::evaluate_with_no_return(pos);
+					Eval::NNUE::update_eval(pos);
 				}
 				shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
 				for (auto it = pv.rbegin(); it != pv.rend(); ++it)
@@ -2106,7 +2109,7 @@ void LearnerThink::thread_worker(size_t thread_id)
 			pos.do_move(m, state[ply++]);
 			// Since the value of evaluate in leaf is used, the difference is updated.
-			Eval::evaluate_with_no_return(pos);
+			Eval::NNUE::update_eval(pos);
 		}
 		if (illegal_move) {
@@ -2135,9 +2138,6 @@ void LearnerThink::thread_worker(size_t thread_id)
 // Write evaluation function file.
 bool LearnerThink::save(bool is_final)
 {
 	// Calculate and output check sum before saving. (To check if it matches the next time)
 	std::cout << "Check Sum = "<< std::hex << Eval::calc_check_sum() << std::dec << std::endl;
 	// Each time you save, change the extension part of the file name like "0","1","2",..
 	// (Because I want to compare the winning rate for each evaluation function parameter later)
@@ -3089,14 +3089,14 @@ void learn(Position&, istringstream& is)
 	}
 	if (use_convert_plain)
 	{
-		init_nnue(true);
+		Eval::init_NNUE();
 		cout << "convert_plain.." << endl;
 		convert_plain(filenames, output_file_name);
 		return;
 	}
 	if (use_convert_bin)
 	{
-	  	init_nnue(true);
+		Eval::init_NNUE();
 		cout << "convert_bin.." << endl;
 		convert_bin(filenames,output_file_name, ply_minimum, ply_maximum, interpolate_eval);
 		return;
@@ -3104,7 +3104,7 @@ void learn(Position&, istringstream& is)
 	}
 	if (use_convert_bin_from_pgn_extract)
 	{
-		init_nnue(true);
+		Eval::init_NNUE();
 		cout << "convert_bin_from_pgn-extract.." << endl;
 		convert_bin_from_pgn_extract(filenames, output_file_name, pgn_eval_side_to_move);
 		return;
@@ -3170,7 +3170,7 @@ void learn(Position&, istringstream& is)
 	cout << "init.." << endl;
 	// Read evaluation function parameters
-	init_nnue(true);
+	Eval::init_NNUE();
 #if !defined(EVAL_NNUE)
 	cout << "init_grad.." << endl;
@@ -28,17 +28,17 @@ namespace EvalLearningTools
 	void init_min_index_flag()
 	{
 		// Initialization of mir_piece and inv_piece must be completed.
-		assert(mir_piece(Eval::f_pawn) == Eval::e_pawn);
+		assert(Eval::mir_piece(PieceSquare::PS_W_PAWN) == PieceSquare::PS_B_PAWN);
 		// Initialize the flag array for dimension reduction
 		// Not involved in KPPP.
 		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
+		g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0);
 		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
+		g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index());
 		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
+		g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index());
 		uint64_t size = g_kpp.max_index();
 		min_index_flag.resize(size);
@@ -123,22 +123,22 @@ namespace EvalLearningTools
 		// Determine if it is correct.
 		KK g_kk;
-		g_kk.set(SQUARE_NB, Eval::fe_end, 0);
+		g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0);
 		KKP g_kkp;
-		g_kkp.set(SQUARE_NB, Eval::fe_end, g_kk.max_index());
+		g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index());
 		KPP g_kpp;
-		g_kpp.set(SQUARE_NB, Eval::fe_end, g_kkp.max_index());
+		g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index());
 		std::vector<bool> f;
 		f.resize(g_kpp.max_index() - g_kpp.min_index());
 		for(auto k = SQUARE_ZERO ; k < SQUARE_NB ; ++k)
-			for(auto p0 = BonaPiece::BONA_PIECE_ZERO; p0 < fe_end ; ++p0)
+			for(auto p0 = PieceSquare::PS_NONE; p0 < PieceSquare::PS_END ; ++p0)
-				for (auto p1 = BonaPiece::BONA_PIECE_ZERO; p1 < fe_end; ++p1)
+				for (auto p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
 				{
 					KPP kpp_org = g_kpp.fromKPP(k,p0,p1);
 					KPP kpp0;
-					KPP kpp1 = g_kpp.fromKPP(Mir(k), mir_piece(p0), mir_piece(p1));
+					KPP kpp1 = g_kpp.fromKPP(flip_file(k), mir_piece(p0), mir_piece(p1));
 					KPP kpp_array[2];
 					auto index = kpp_org.toIndex();
@@ -172,7 +172,7 @@ namespace EvalLearningTools
 		// Test for missing KPPP calculations
 		KPPP g_kppp;
-		g_kppp.set(15, Eval::fe_end,0);
+		g_kppp.set(15, PieceSquare::PS_END,0);
 		uint64_t min_index = g_kppp.min_index();
 		uint64_t max_index = g_kppp.max_index();
@@ -214,7 +214,7 @@ namespace EvalLearningTools
 			for (int i = 0; i<10000; ++i) // As a test, assuming a large fe_end, try turning at 10000.
 				for (int j = 0; j < i; ++j)
 				{
-					auto kkpp = g_kkpp.fromKKPP(k, (BonaPiece)i, (BonaPiece)j);
+					auto kkpp = g_kkpp.fromKKPP(k, (PieceSquare)i, (PieceSquare)j);
 					auto r = kkpp.toRawIndex();
 					assert(n++ == r);
 					auto kkpp2 = g_kkpp.fromIndex(r + g_kkpp.min_index());
@@ -281,7 +281,7 @@ namespace EvalLearningTools
 		// The number of balls to support (normally SQUARE_NB)
 		int max_king_sq_;
-		// Maximum BonaPiece value supported
+		// Maximum PieceSquare value supported
 		uint64_t fe_end_;
 	};
@@ -341,10 +341,10 @@ namespace EvalLearningTools
 		void toLowerDimensions(/*out*/KK kk_[KK_LOWER_COUNT]) const {
 			kk_[0] = fromKK(king0_, king1_,false);
 #if defined(USE_KK_MIRROR_WRITE)
-			kk_[1] = fromKK(Mir(king0_),Mir(king1_),false);
+			kk_[1] = fromKK(flip_file(king0_),flip_file(king1_),false);
 #if defined(USE_KK_INVERSE_WRITE)
-			kk_[2] = fromKK(Inv(king1_), Inv(king0_),true);
+			kk_[2] = fromKK(rotate180(king1_), rotate180(king0_),true);
-			kk_[3] = fromKK(Inv(Mir(king1_)) , Inv(Mir(king0_)),true);
+			kk_[3] = fromKK(rotate180(flip_file(king1_)) , rotate180(flip_file(king0_)),true);
 #endif
 #endif
 		}
@@ -386,8 +386,8 @@ namespace EvalLearningTools
 	struct KKP : public SerializerBase
 	{
 	protected:
-		KKP(Square king0, Square king1, Eval::BonaPiece p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {}
+		KKP(Square king0, Square king1, PieceSquare p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {}
-		KKP(Square king0, Square king1, Eval::BonaPiece p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {}
+		KKP(Square king0, Square king1, PieceSquare p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {}
 	public:
 		KKP() {}
@@ -399,27 +399,27 @@ namespace EvalLearningTools
 		// A builder that creates a KKP object from raw_index (a number that starts from 0, not a serial number)
 		KKP fromRawIndex(uint64_t raw_index) const
 		{
-			int piece = (int)(raw_index % Eval::fe_end);
+			int piece = (int)(raw_index % PieceSquare::PS_END);
-			raw_index /= Eval::fe_end;
+			raw_index /= PieceSquare::PS_END;
 			int king1 = (int)(raw_index % SQUARE_NB);
 			raw_index /= SQUARE_NB;
 			int king0 = (int)(raw_index  /* % SQUARE_NB */);
 			assert(king0 < SQUARE_NB);
-			return fromKKP((Square)king0, (Square)king1, (Eval::BonaPiece)piece,false);
+			return fromKKP((Square)king0, (Square)king1, (PieceSquare)piece,false);
 		}
-		KKP fromKKP(Square king0, Square king1, Eval::BonaPiece p, bool inverse) const
+		KKP fromKKP(Square king0, Square king1, PieceSquare p, bool inverse) const
 		{
 			KKP my_kkp(king0, king1, p, inverse);
 			my_kkp.set(max_king_sq_,fe_end_,min_index());
 			return my_kkp;
 		}
-		KKP fromKKP(Square king0, Square king1, Eval::BonaPiece p) const { return fromKKP(king0, king1, p, false); }
+		KKP fromKKP(Square king0, Square king1, PieceSquare p) const { return fromKKP(king0, king1, p, false); }
 		// When you construct this object using fromIndex(), you can get information with the following accessors.
 		Square king0() const { return king0_; }
 		Square king1() const { return king1_; }
-		Eval::BonaPiece piece() const { return piece_; }
+		PieceSquare piece() const { return piece_; }
 		// Number of KKP dimension reductions
 #if defined(USE_KKP_INVERSE_WRITE)
@@ -442,10 +442,10 @@ namespace EvalLearningTools
 		void toLowerDimensions(/*out*/ KKP kkp_[KKP_LOWER_COUNT]) const {
 			kkp_[0] = fromKKP(king0_, king1_, piece_,false);
 #if defined(USE_KKP_MIRROR_WRITE)
-			kkp_[1] = fromKKP(Mir(king0_), Mir(king1_), mir_piece(piece_),false);
+			kkp_[1] = fromKKP(flip_file(king0_), flip_file(king1_), Eval::mir_piece(piece_),false);
 #if defined(USE_KKP_INVERSE_WRITE)
-			kkp_[2] = fromKKP( Inv(king1_), Inv(king0_), inv_piece(piece_),true);
+			kkp_[2] = fromKKP( rotate180(king1_), rotate180(king0_), Eval::inv_piece(piece_),true);
-			kkp_[3] = fromKKP( Inv(Mir(king1_)), Inv(Mir(king0_)) , inv_piece(mir_piece(piece_)),true);
+			kkp_[3] = fromKKP( rotate180(flip_file(king1_)), rotate180(flip_file(king0_)) , Eval::inv_piece(Eval::mir_piece(piece_)),true);
 #endif
 #endif
 		}
@@ -473,7 +473,7 @@ namespace EvalLearningTools
 	private:
 		Square king0_, king1_;
-		Eval::BonaPiece piece_;
+		PieceSquare piece_;
 		bool inverse_sign;
 	};
@@ -489,7 +489,7 @@ namespace EvalLearningTools
 	struct KPP : public SerializerBase
 	{
 	protected:
-		KPP(Square king, Eval::BonaPiece p0, Eval::BonaPiece p1) : king_(king), piece0_(p0), piece1_(p1) {}
+		KPP(Square king, PieceSquare p0, PieceSquare p1) : king_(king), piece0_(p0), piece1_(p1) {}
 	public:
 		KPP() {}
@@ -534,7 +534,7 @@ namespace EvalLearningTools
 			// From the solution formula of the quadratic equation i = (sqrt(8*index2+1)-1) / 2.
 			// After i is converted into an integer, j can be calculated as j = index2-i * (i + 1) / 2.
-			// BonaPiece assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit.
+			// PieceSquare assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit.
 			int piece1 = int(sqrt(8 * index2 + 1) - 1) / 2;
 			int piece0 = int(index2 - (uint64_t)piece1*((uint64_t)piece1 + 1) / 2);
@@ -546,10 +546,10 @@ namespace EvalLearningTools
 #endif
 			int king = (int)(raw_index  /* % SQUARE_NB */);
 			assert(king < max_king_sq_);
-			return fromKPP((Square)king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1);
+			return fromKPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1);
 		}
-		KPP fromKPP(Square king, Eval::BonaPiece p0, Eval::BonaPiece p1) const
+		KPP fromKPP(Square king, PieceSquare p0, PieceSquare p1) const
 		{
 			KPP my_kpp(king, p0, p1);
 			my_kpp.set(max_king_sq_,fe_end_,min_index());
@@ -558,8 +558,8 @@ namespace EvalLearningTools
 		// When you construct this object using fromIndex(), you can get information with the following accessors.
 		Square king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
+		PieceSquare piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
+		PieceSquare piece1() const { return piece1_; }
 // number of dimension reductions
@@ -584,7 +584,7 @@ namespace EvalLearningTools
 			// Note that if you use a triangular array, the swapped piece0 and piece1 will not be returned.
 			kpp_[0] = fromKPP(king_, piece0_, piece1_);
 #if defined(USE_KPP_MIRROR_WRITE)
-			kpp_[1] = fromKPP(Mir(king_), mir_piece(piece0_), mir_piece(piece1_));
+			kpp_[1] = fromKPP(flip_file(king_), Eval::mir_piece(piece0_), Eval::mir_piece(piece1_));
 #endif
 #else
@@ -592,8 +592,8 @@ namespace EvalLearningTools
 			kpp_[0] = fromKPP(king_, piece0_, piece1_);
 			kpp_[1] = fromKPP(king_, piece1_, piece0_);
 #if defined(USE_KPP_MIRROR_WRITE)
-			kpp_[2] = fromKPP(Mir(king_), mir_piece(piece0_), mir_piece(piece1_));
+			kpp_[2] = fromKPP(flip_file(king_), mir_piece(piece0_), mir_piece(piece1_));
-			kpp_[3] = fromKPP(Mir(king_), mir_piece(piece1_), mir_piece(piece0_));
+			kpp_[3] = fromKPP(flip_file(king_), mir_piece(piece1_), mir_piece(piece0_));
 #endif
 #endif
 		}
@@ -607,14 +607,14 @@ namespace EvalLearningTools
 #else
 			// Macro similar to that used in Bonanza 6.0
-			auto PcPcOnSq = [&](Square k, Eval::BonaPiece i, Eval::BonaPiece j)
+			auto PcPcOnSq = [&](Square k, PieceSquare i, PieceSquare j)
 			{
 				// (i,j) in this triangular array is the element in the i-th row and the j-th column.
 				// 1st row + 2 + ... + i = i * (i+1) / 2 because the i-th row and 0th column is the total of the elements up to that point
 				// The i-th row and the j-th column is j plus this. i*(i+1)/2+j
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
+				// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
 				return (uint64_t)k * triangle_fe_end + (uint64_t)(uint64_t(i)*(uint64_t(i)+1) / 2 + uint64_t(j));
 			};
@@ -646,7 +646,7 @@ namespace EvalLearningTools
 	private:
 		Square king_;
-		Eval::BonaPiece piece0_, piece1_;
+		PieceSquare piece0_, piece1_;
 		uint64_t triangle_fe_end; // = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2;
 	};
@@ -672,7 +672,7 @@ namespace EvalLearningTools
 	struct KPPP : public SerializerBase
 	{
 	protected:
-		KPPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1, Eval::BonaPiece p2) :
+		KPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) :
 			king_(king), piece0_(p0), piece1_(p1), piece2_(p2)
 		{
 			assert(piece0_ > piece1_ && piece1_ > piece2_);
@@ -716,9 +716,9 @@ namespace EvalLearningTools
 			kppp_[0] = fromKPPP(king_, piece0_, piece1_,piece2_);
 #if KPPP_LOWER_COUNT > 1
 			// If mir_piece is done, it will be in a state not sorted. Need code to sort.
-			Eval::BonaPiece p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) };
+			PieceSquare p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) };
 			my_insertion_sort(p_list, 0, 3);
-			kppp_[1] = fromKPPP((int)Mir((Square)king_), p_list[2] , p_list[1], p_list[0]);
+			kppp_[1] = fromKPPP((int)flip_file((Square)king_), p_list[2] , p_list[1], p_list[0]);
 #endif
 		}
@@ -797,12 +797,12 @@ namespace EvalLearningTools
 			assert(king < max_king_sq_);
 			// Propagate king_sq and fe_end.
-			return fromKPPP((Square)king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1 , (Eval::BonaPiece)piece2);
+			return fromKPPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1 , (PieceSquare)piece2);
 		}
 		// Specify k,p0,p1,p2 to build KPPP instance.
 		// The king_sq and fe_end passed by set() which is internally retained are inherited.
-		KPPP fromKPPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1, Eval::BonaPiece p2) const
+		KPPP fromKPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) const
 		{
 			KPPP kppp(king, p0, p1, p2);
 			kppp.set(max_king_sq_, fe_end_,min_index());
@@ -815,7 +815,7 @@ namespace EvalLearningTools
 			// Macro similar to the one used in Bonanza 6.0
 			// Precondition) i> j> k.
 			// NG in case of i==j,j==k.
-			auto PcPcPcOnSq = [this](int king, Eval::BonaPiece i, Eval::BonaPiece j , Eval::BonaPiece k)
+			auto PcPcPcOnSq = [this](int king, PieceSquare i, PieceSquare j , PieceSquare k)
 			{
 				// (i,j,k) in this triangular array is the element in the i-th row and the j-th column.
 				// 0th row 0th column 0th is the sum of the elements up to that point, so 0 + 0 + 1 + 3 + 6 + ... + (i)*(i-1)/2 = i*( i-1)*(i-2)/6
@@ -823,7 +823,7 @@ namespace EvalLearningTools
 				// i-th row, j-th column and k-th row is k plus it. + k
 				assert(i > j && j > k);
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
+				// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
 				return (uint64_t)king * triangle_fe_end + (uint64_t)(
 						  uint64_t(i)*(uint64_t(i) - 1) * (uint64_t(i) - 2) / 6
 						+ uint64_t(j)*(uint64_t(j) - 1) / 2
@@ -836,9 +836,9 @@ namespace EvalLearningTools
 		// When you construct this object using fromIndex(), you can get information with the following accessors.
 		int king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
+		PieceSquare piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
+		PieceSquare piece1() const { return piece1_; }
-		Eval::BonaPiece piece2() const { return piece2_; }
+		PieceSquare piece2() const { return piece2_; }
 		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
 		// Prepared to match KK, KKP and interface. This method always returns false for this KPPP class.
 		bool is_inverse() const {
@@ -859,14 +859,14 @@ namespace EvalLearningTools
 	private:
 		int king_;
-		Eval::BonaPiece piece0_, piece1_,piece2_;
+		PieceSquare piece0_, piece1_,piece2_;
 		// The part of the square array of [fe_end][fe_end][fe_end] of kppp[king_sq][fe_end][fe_end][fe_end] is made into a triangular array.
 		// If kppp[king_sq][triangle_fe_end], the number of elements from the 0th row of this triangular array is 0,0,1,3,..., The nth row is n(n-1)/2.
 		// therefore,
 		// triangle_fe_end = Σn(n-1)/2 , n=0..fe_end-1
 		//                 =  fe_end * (fe_end - 1) * (fe_end - 2) / 6
-		uint64_t triangle_fe_end; // ((uint64_t)Eval::fe_end)*((uint64_t)Eval::fe_end - 1)*((uint64_t)Eval::fe_end - 2) / 6;
+		uint64_t triangle_fe_end; // ((uint64_t)PieceSquare::PS_END)*((uint64_t)PieceSquare::PS_END - 1)*((uint64_t)PieceSquare::PS_END - 2) / 6;
 	};
 	// Output for debugging.
@@ -885,12 +885,12 @@ namespace EvalLearningTools
 	// piece0() >piece1()
 	// It is, and it is necessary to keep this constraint even when passing piece0,1 in the constructor.
 	//
-	// Due to this constraint, BonaPieceZero cannot be assigned to piece0 and piece1 at the same time and passed.
+	// Due to this constraint, PieceSquareZero cannot be assigned to piece0 and piece1 at the same time and passed.
 	// If you want to support learning of dropped frames, you need to devise with evaluate().
 	struct KKPP: SerializerBase
 	{
 	protected:
-		KKPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1) :
+		KKPP(int king, PieceSquare p0, PieceSquare p1) :
 			king_(king), piece0_(p0), piece1_(p1)
 		{
 			assert(piece0_ > piece1_);
@@ -956,12 +956,12 @@ namespace EvalLearningTools
 			assert(king < max_king_sq_);
 			// Propagate king_sq and fe_end.
-			return fromKKPP(king, (Eval::BonaPiece)piece0, (Eval::BonaPiece)piece1);
+			return fromKKPP(king, (PieceSquare)piece0, (PieceSquare)piece1);
 		}
 		// Specify k,p0,p1 to build KKPP instance.
 		// The king_sq and fe_end passed by set() which is internally retained are inherited.
-		KKPP fromKKPP(int king, Eval::BonaPiece p0, Eval::BonaPiece p1) const
+		KKPP fromKKPP(int king, PieceSquare p0, PieceSquare p1) const
 		{
 			KKPP kkpp(king, p0, p1);
 			kkpp.set(max_king_sq_, fe_end_,min_index());
@@ -974,11 +974,11 @@ namespace EvalLearningTools
 			// Macro similar to the one used in Bonanza 6.0
 			// Precondition) i> j.
 			// NG in case of i==j,j==k.
-			auto PcPcOnSq = [this](int king, Eval::BonaPiece i, Eval::BonaPiece j)
+			auto PcPcOnSq = [this](int king, PieceSquare i, PieceSquare j)
 			{
 				assert(i > j);
-				// BonaPiece type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
+				// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
 				return (uint64_t)king * triangle_fe_end + (uint64_t)(
 					+ uint64_t(i)*(uint64_t(i) - 1) / 2
 					+ uint64_t(j)
@@ -990,8 +990,8 @@ namespace EvalLearningTools
 		// When you construct this object using fromIndex(), fromKKPP(), you can get information with the following accessors.
 		int king() const { return king_; }
-		Eval::BonaPiece piece0() const { return piece0_; }
+		PieceSquare piece0() const { return piece0_; }
-		Eval::BonaPiece piece1() const { return piece1_; }
+		PieceSquare piece1() const { return piece1_; }
 		// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
 		// Prepared to match KK, KKP and interface. In this KKPP class, this method always returns false.
@@ -1013,7 +1013,7 @@ namespace EvalLearningTools
 	private:
 		int king_;
-		Eval::BonaPiece piece0_, piece1_;
+		PieceSquare piece0_, piece1_;
 		// Triangularize the square array part of [fe_end][fe_end] of kppp[king_sq][fe_end][fe_end].
 		uint64_t triangle_fe_end = 0;
@@ -20,7 +20,7 @@ void MultiThink::go_think()
 	// Read evaluation function, etc.
 	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
 	// Skip memory corruption check.
-	init_nnue(true);
+	Eval::init_NNUE();
 	// Call the derived class's init().
 	init();
@@ -4,6 +4,7 @@
 #if defined(EVAL_LEARN)
 #include <functional>
 #include <mutex>
 #include "../misc.h"
 #include "../learn/learn.h"
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -20,15 +18,6 @@
 #include <iostream>
 #ifdef _WIN32
 #include <filesystem>
 #ifndef NOMINMAX
 #define NOMINMAX
 #endif
 #include <Windows.h>
 #endif
 #include "bitboard.h"
 #include "endgame.h"
 #include "position.h"
@@ -43,17 +32,6 @@ namespace PSQT {
 }
 int main(int argc, char* argv[]) {
  // Change the current working directory to the binary directory.  So that a
  // net file path can be specified with a relative path from the binary
  // directory.
  // TODO(someone): Implement the logic for other OS.
 #ifdef _WIN32
  TCHAR filename[_MAX_PATH];
  ::GetModuleFileName(NULL, filename, sizeof(filename) / sizeof(filename[0]));
  std::filesystem::path current_path = filename;
  current_path.remove_filename();
  std::filesystem::current_path(current_path);
 #endif
  std::cout << engine_info() << std::endl;
@@ -66,6 +44,7 @@ int main(int argc, char* argv[]) {
  Endgames::init();
  Threads.set(size_t(Options["Threads"]));
  Search::clear(); // After threads are up
  Eval::init_NNUE();
  UCI::loop(argc, argv);
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -42,11 +40,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #endif
 #include <fstream>
 #include <functional>
 #include <iomanip>
 #include <iostream>
 #include <sstream>
 #include <vector>
 #include <cstdlib>
 #if defined(__linux__) && !defined(__ANDROID__)
 #include <stdlib.h>
@@ -140,7 +138,7 @@ const string engine_info(bool to_uci) {
  string month, day, year;
  stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008"
-  ss << "Stockfish+NNUE " << Version << setfill('0');
+  ss << "Stockfish " << Version << setfill('0');
  if (Version.empty())
  {
@@ -148,10 +146,8 @@ const string engine_info(bool to_uci) {
      ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2);
  }
-  ss << (Is64Bit ? " 64" : "")
+  ss << (to_uci  ? "\nid author ": " by ")
-     << (HasPext ? " BMI2" : (HasPopCnt ? " POPCNT" : ""))
+     << "the Stockfish developers (see AUTHORS file)";
     << (to_uci  ? "\nid author ": " by ")
     << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott, H. Noda, Y. Nasu, M. Isozaki";
  return ss.str();
 }
@@ -216,7 +212,33 @@ const std::string compiler_info() {
     compiler += " on unknown system";
  #endif
-  compiler += "\n __VERSION__ macro expands to: ";
+  compiler += "\nCompilation settings include: ";
  compiler += (Is64Bit ? " 64bit" : " 32bit");
  #if defined(USE_AVX512)
    compiler += " AVX512";
  #endif
  #if defined(USE_AVX2)
    compiler += " AVX2";
  #endif
  #if defined(USE_SSE42)
    compiler += " SSE42";
  #endif
  #if defined(USE_SSE41)
    compiler += " SSE41";
  #endif
  #if defined(USE_SSSE3)
    compiler += " SSSE3";
  #endif
  #if defined(USE_SSE3)
    compiler += " SSE3";
  #endif
    compiler += (HasPext ? " BMI2" : "");
    compiler += (HasPopCnt ? " POPCNT" : "");
  #if !defined(NDEBUG)
    compiler += " DEBUG";
  #endif
  compiler += "\n__VERSION__ macro expands to: ";
  #ifdef __VERSION__
     compiler += __VERSION__;
  #else
@@ -294,6 +316,29 @@ void prefetch(void* addr) {
 #endif
 /// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
 /// Memory allocated with std_aligned_alloc must be freed with std_aligned_free.
 ///
 void* std_aligned_alloc(size_t alignment, size_t size) {
 #if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
  return aligned_alloc(alignment, size);
 #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
  return _mm_malloc(size, alignment);
 #else
  return std::aligned_alloc(alignment, size);
 #endif
 }
 void std_aligned_free(void* ptr) {
 #if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
  free(ptr);
 #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES)))
  _mm_free(ptr);
 #else
  free(ptr);
 #endif
 }
 /// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages.
 /// The returned pointer is the aligned one, while the mem argument is the one that needs
@@ -371,8 +416,8 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
  {
      if (mem)
          sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
-      //else
+      else
-          //sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
+          sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
  }
  firstCall = false;
@@ -530,99 +575,99 @@ void bindThisThread(size_t idx) {
 // Returns a string that represents the current time. (Used when learning evaluation functions)
 std::string now_string()
 {
-  // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
+    // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
-  // This shouldn't happen in the C++ standard, but...
+    // This shouldn't happen in the C++ standard, but...
 #if defined(_MSC_VER)
  // C4996 : 'ctime' : This function or variable may be unsafe.Consider using ctime_s instead.
 #pragma warning(disable : 4996)
 #endif
-  auto now = std::chrono::system_clock::now();
+    auto now = std::chrono::system_clock::now();
-  auto tp = std::chrono::system_clock::to_time_t(now);
+    auto tp = std::chrono::system_clock::to_time_t(now);
-  auto result = string(std::ctime(&tp));
+    auto result = string(std::ctime(&tp));
-  // remove line endings if they are included at the end
+    // remove line endings if they are included at the end
-  while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
+    while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
-    result.pop_back();
+        result.pop_back();
-  return result;
+    return result;
 }
 void sleep(int ms)
 {
-	std::this_thread::sleep_for(std::chrono::milliseconds(ms));
+    std::this_thread::sleep_for(std::chrono::milliseconds(ms));
 }
 void* aligned_malloc(size_t size, size_t align)
 {
-	void* p = _mm_malloc(size, align);
+    void* p = _mm_malloc(size, align);
-	if (p == nullptr)
+    if (p == nullptr)
-	{
+    {
-		std::cout << "info string can't allocate memory. sise = " << size << std::endl;
+        std::cout << "info string can't allocate memory. sise = " << size << std::endl;
-		exit(1);
+        exit(1);
-	}
+    }
-	return p;
+    return p;
 }
 int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
 {
-  fstream fs(filename, ios::in | ios::binary);
+    fstream fs(filename, ios::in | ios::binary);
  if (fs.fail())
    return 1;
  fs.seekg(0, fstream::end);
  uint64_t eofPos = (uint64_t)fs.tellg();
  fs.clear(); // Otherwise the next seek may fail.
  fs.seekg(0, fstream::beg);
  uint64_t begPos = (uint64_t)fs.tellg();
  uint64_t file_size = eofPos - begPos;
  //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
  // I know the file size, so call callback_func to get a buffer for this,
  // Get the pointer.
  void* ptr = callback_func(file_size);
  // If the buffer could not be secured, or if the file size is different from the expected file size,
  // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
  if (ptr == nullptr)
    return 2;
  // read in pieces
  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
  for (uint64_t pos = 0; pos < file_size; pos += block_size)
  {
    // size to read this time
    uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
    fs.read((char*)ptr + pos, read_size);
    // Read error occurred in the middle of the file.
    if (fs.fail())
-      return 2;
+        return 1;
-    //cout << ".";
+    fs.seekg(0, fstream::end);
-  }
+    uint64_t eofPos = (uint64_t)fs.tellg();
-  fs.close();
+    fs.clear(); // Otherwise the next seek may fail.
    fs.seekg(0, fstream::beg);
    uint64_t begPos = (uint64_t)fs.tellg();
    uint64_t file_size = eofPos - begPos;
    //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
-  return 0;
+    // I know the file size, so call callback_func to get a buffer for this,
    // Get the pointer.
    void* ptr = callback_func(file_size);
    // If the buffer could not be secured, or if the file size is different from the expected file size,
    // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
    if (ptr == nullptr)
        return 2;
    // read in pieces
    const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
    for (uint64_t pos = 0; pos < file_size; pos += block_size)
    {
        // size to read this time
        uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
        fs.read((char*)ptr + pos, read_size);
        // Read error occurred in the middle of the file.
        if (fs.fail())
            return 2;
        //cout << ".";
    }
    fs.close();
    return 0;
 }
 int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
 {
-  fstream fs(filename, ios::out | ios::binary);
+    fstream fs(filename, ios::out | ios::binary);
-  if (fs.fail())
+    if (fs.fail())
-    return 1;
+        return 1;
-  const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
+    const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
-  for (uint64_t pos = 0; pos < size; pos += block_size)
+    for (uint64_t pos = 0; pos < size; pos += block_size)
-  {
+    {
-    // Memory size to write this time
+        // Memory size to write this time
-    uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
+        uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
-    fs.write((char*)ptr + pos, write_size);
+        fs.write((char*)ptr + pos, write_size);
-    //cout << ".";
+        //cout << ".";
-  }
+    }
-  fs.close();
+    fs.close();
-  return 0;
+    return 0;
 }
 // ----------------------------
@@ -642,22 +687,22 @@ int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
 #include <locale> // This is required for wstring_convert.
 namespace Dependency {
-  int mkdir(std::string dir_name)
+    int mkdir(std::string dir_name)
-  {
+    {
-    std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
+        std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> cv;
-    return _wmkdir(cv.from_bytes(dir_name).c_str());
+        return _wmkdir(cv.from_bytes(dir_name).c_str());
-    //	::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
+        //	::CreateDirectory(cv.from_bytes(dir_name).c_str(),NULL);
-  }
+    }
 }
 #elif defined(__GNUC__) 
 #include <direct.h>
 namespace Dependency {
-  int mkdir(std::string dir_name)
+    int mkdir(std::string dir_name)
-  {
+    {
-    return _mkdir(dir_name.c_str());
+        return _mkdir(dir_name.c_str());
-  }
+    }
 }
 #endif
@@ -669,10 +714,10 @@ namespace Dependency {
 #include "sys/stat.h"
 namespace Dependency {
-  int mkdir(std::string dir_name)
+    int mkdir(std::string dir_name)
-  {
+    {
-    return ::mkdir(dir_name.c_str(), 0777);
+        return ::mkdir(dir_name.c_str(), 0777);
-  }
+    }
 }
 #else
@@ -680,10 +725,10 @@ namespace Dependency {
 // The function to dig a folder on linux is good for the time being... Only used to save the evaluation function file...
 namespace Dependency {
-  int mkdir(std::string dir_name)
+    int mkdir(std::string dir_name)
-  {
+    {
-    return 0;
+        return 0;
-  }
+    }
 }
 #endif
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,7 +19,6 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED
 #include <algorithm>
 #include <cassert>
 #include <chrono>
 #include <functional>
@@ -29,17 +26,15 @@
 #include <ostream>
 #include <string>
 #include <vector>
 #ifndef _MSC_VER
 #include <mm_malloc.h>
 #endif
 #include "types.h"
 #include "thread_win32_osx.h"
 const std::string engine_info(bool to_uci = false);
 const std::string compiler_info();
 void prefetch(void* addr);
 void start_logger(const std::string& fname);
 void* std_aligned_alloc(size_t alignment, size_t size);
 void std_aligned_free(void* ptr);
 void* aligned_ttmem_alloc(size_t size, void*& mem);
 void aligned_ttmem_free(void* mem); // nop if mem == nullptr
@@ -158,13 +153,6 @@ extern void sleep(int ms);
 // Returns a string that represents the current time. (Used for log output when learning evaluation function)
 std::string now_string();
 // wrapper for end processing on the way
 static void my_exit()
 {
 	sleep(3000); // It is bad to finish before the error message is output, so put wait.
 	exit(EXIT_FAILURE);
 }
 // When compiled with gcc/clang such as msys2, Windows Subsystem for Linux,
 // In C++ std::ifstream, ::read() is a wrapper for that because it is not possible to read and write files larger than 2GB in one shot.
 //
@@ -265,9 +253,6 @@ struct Path
 	}
 };
 extern void* aligned_malloc(size_t size, size_t align);
 static void aligned_free(void* ptr) { _mm_free(ptr); }
 // It is ignored when new even though alignas is specified & because it is ignored when the STL container allocates memory,
 // A custom allocator used for that.
 template <typename T>
@@ -281,8 +266,8 @@ public:
  template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
-  T* allocate(std::size_t n) { return (T*)aligned_malloc(n * sizeof(T), alignof(T)); }
+  T* allocate(std::size_t n) { return (T*)std_aligned_alloc(alignof(T), n * sizeof(T)); }
-  void deallocate(T* p, std::size_t n) { aligned_free(p); }
+  void deallocate(T* p, std::size_t n) { std_aligned_free(p); }
 };
 // --------------------
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -0,0 +1,54 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Definition of input features and network structure used in NNUE evaluation function
 #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
 #define NNUE_HALFKP_256X2_32_32_H_INCLUDED
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
 #include "../layers/input_slice.h"
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
 namespace Eval::NNUE {
 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
    Features::HalfKP<Features::Side::kFriend>>;
 // Number of input feature dimensions after conversion
 constexpr IndexType kTransformedFeatureDimensions = 256;
 namespace Layers {
 // Define network structure
 using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
 using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
 using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
 using OutputLayer = AffineTransform<HiddenLayer2, 1>;
 }  // namespace Layers
 using Network = Layers::OutputLayer;
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
@@ -0,0 +1,221 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Code for calculating NNUE evaluation function
 #include <fstream>
 #include <iostream>
 #include <set>
 #include "../evaluate.h"
 #include "../position.h"
 #include "../misc.h"
 #include "../uci.h"
 #include "evaluate_nnue.h"
 ExtPieceSquare kpp_board_index[PIECE_NB] = {
 // convention: W - us, B - them
 // viewed from other side, W and B are reversed
    { PS_NONE,     PS_NONE     },
    { PS_W_PAWN,   PS_B_PAWN   },
    { PS_W_KNIGHT, PS_B_KNIGHT },
    { PS_W_BISHOP, PS_B_BISHOP },
    { PS_W_ROOK,   PS_B_ROOK   },
    { PS_W_QUEEN,  PS_B_QUEEN  },
    { PS_W_KING,   PS_B_KING   },
    { PS_NONE,     PS_NONE     },
    { PS_NONE,     PS_NONE     },
    { PS_B_PAWN,   PS_W_PAWN   },
    { PS_B_KNIGHT, PS_W_KNIGHT },
    { PS_B_BISHOP, PS_W_BISHOP },
    { PS_B_ROOK,   PS_W_ROOK   },
    { PS_B_QUEEN,  PS_W_QUEEN  },
    { PS_B_KING,   PS_W_KING   },
    { PS_NONE,     PS_NONE     }
 };
 namespace Eval::NNUE {
  // Input feature converter
  AlignedPtr<FeatureTransformer> feature_transformer;
  // Evaluation function
  AlignedPtr<Network> network;
  // Evaluation function file name
  std::string fileName;
  // Saved evaluation function file name
  std::string savedfileName = "nn.bin";
  // Get a string that represents the structure of the evaluation function
  std::string GetArchitectureString() {
    return "Features=" + FeatureTransformer::GetStructureString() +
      ",Network=" + Network::GetStructureString();
  }
  namespace Detail {
  // Initialize the evaluation function parameters
  template <typename T>
  void Initialize(AlignedPtr<T>& pointer) {
    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
    std::memset(pointer.get(), 0, sizeof(T));
  }
  // Read evaluation function parameters
  template <typename T>
  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
    std::uint32_t header;
    stream.read(reinterpret_cast<char*>(&header), sizeof(header));
    if (!stream || header != T::GetHashValue()) return false;
    return pointer->ReadParameters(stream);
  }
  // write evaluation function parameters
  template <typename T>
  bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
    constexpr std::uint32_t header = T::GetHashValue();
    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
    return pointer->WriteParameters(stream);
  }
  }  // namespace Detail
  // Initialize the evaluation function parameters
  void Initialize() {
    Detail::Initialize(feature_transformer);
    Detail::Initialize(network);
  }
  // Read network header
  bool ReadHeader(std::istream& stream,
    std::uint32_t* hash_value, std::string* architecture) {
    std::uint32_t version, size;
    stream.read(reinterpret_cast<char*>(&version), sizeof(version));
    stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
    stream.read(reinterpret_cast<char*>(&size), sizeof(size));
    if (!stream || version != kVersion) return false;
    architecture->resize(size);
    stream.read(&(*architecture)[0], size);
    return !stream.fail();
  }
  // write the header
  bool WriteHeader(std::ostream& stream,
    std::uint32_t hash_value, const std::string& architecture) {
    stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
    stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
    const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
    stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
    stream.write(architecture.data(), size);
    return !stream.fail();
  }
  // Read network parameters
  bool ReadParameters(std::istream& stream) {
    std::uint32_t hash_value;
    std::string architecture;
    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
    if (hash_value != kHashValue) return false;
    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
    if (!Detail::ReadParameters(stream, network)) return false;
    return stream && stream.peek() == std::ios::traits_type::eof();
  }
  // write evaluation function parameters
  bool WriteParameters(std::ostream& stream) {
    if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
    if (!Detail::WriteParameters(stream, feature_transformer)) return false;
    if (!Detail::WriteParameters(stream, network)) return false;
    return !stream.fail();
  }
  // Proceed with the difference calculation if possible
  static void UpdateAccumulatorIfPossible(const Position& pos) {
    feature_transformer->UpdateAccumulatorIfPossible(pos);
  }
  // Calculate the evaluation value
  static Value ComputeScore(const Position& pos, bool refresh) {
    auto& accumulator = pos.state()->accumulator;
    if (!refresh && accumulator.computed_score) {
      return accumulator.score;
    }
    alignas(kCacheLineSize) TransformedFeatureType
        transformed_features[FeatureTransformer::kBufferSize];
    feature_transformer->Transform(pos, transformed_features, refresh);
    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
    const auto output = network->Propagate(transformed_features, buffer);
    auto score = static_cast<Value>(output[0] / FV_SCALE);
    accumulator.score = score;
    accumulator.computed_score = true;
    return accumulator.score;
  }
  // Load the evaluation function file
  bool load_eval_file(const std::string& evalFile) {
    Initialize();
    if (Options["SkipLoadingEval"])
    {
      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
      return true;
    }
    fileName = evalFile;
    std::ifstream stream(evalFile, std::ios::binary);
    const bool result = ReadParameters(stream);
    return result;
  }
  // Evaluation function. Perform differential calculation.
  Value evaluate(const Position& pos) {
    Value v = ComputeScore(pos, false);
    v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
    return v;
  }
  // Evaluation function. Perform full calculation.
  Value compute_eval(const Position& pos) {
    return ComputeScore(pos, true);
  }
  // Proceed with the difference calculation if possible
  void update_eval(const Position& pos) {
    UpdateAccumulatorIfPossible(pos);
  }
 } // namespace Eval::NNUE
@@ -0,0 +1,77 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // header used in NNUE evaluation function
 #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
 #define NNUE_EVALUATE_NNUE_H_INCLUDED
 #include "nnue_feature_transformer.h"
 #include <memory>
 namespace Eval::NNUE {
  // Hash value of evaluation function structure
  constexpr std::uint32_t kHashValue =
      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
  // Deleter for automating release of memory area
  template <typename T>
  struct AlignedDeleter {
    void operator()(T* ptr) const {
      ptr->~T();
      std_aligned_free(ptr);
    }
  };
  template <typename T>
  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
  // Input feature converter
  extern AlignedPtr<FeatureTransformer> feature_transformer;
  // Evaluation function
  extern AlignedPtr<Network> network;
  // Evaluation function file name
  extern std::string fileName;
  // Saved evaluation function file name
  extern std::string savedfileName;
  // Get a string that represents the structure of the evaluation function
  std::string GetArchitectureString();
  // read the header
  bool ReadHeader(std::istream& stream,
    std::uint32_t* hash_value, std::string* architecture);
  // write the header
  bool WriteHeader(std::ostream& stream,
    std::uint32_t hash_value, const std::string& architecture);
  // read evaluation function parameters
  bool ReadParameters(std::istream& stream);
  // write evaluation function parameters
  bool WriteParameters(std::ostream& stream);
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
@@ -5,15 +5,15 @@
 #include <random>
 #include <fstream>
-#include "../../learn/learn.h"
+#include "../learn/learn.h"
-#include "../../learn/learning_tools.h"
+#include "../learn/learning_tools.h"
-#include "../../position.h"
+#include "../position.h"
-#include "../../uci.h"
+#include "../uci.h"
-#include "../../misc.h"
+#include "../misc.h"
-#include "../../thread_win32_osx.h"
+#include "../thread_win32_osx.h"
-#include "../evaluate_common.h"
+#include "../eval/evaluate_common.h"
 #include "evaluate_nnue.h"
 #include "evaluate_nnue_learner.h"
@@ -5,7 +5,7 @@
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-#include "../../learn/learn.h"
+#include "../learn/learn.h"
 namespace Eval {
@@ -5,7 +5,7 @@
 #if defined(EVAL_NNUE)
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 namespace Eval {
@@ -23,7 +23,7 @@ namespace Eval {
        }
        if (perspective == BLACK) {
-          epSquare = Inv(epSquare);
+          epSquare = rotate180(epSquare);
        }
        auto file = file_of(epSquare);
@@ -5,7 +5,7 @@
 #if defined(EVAL_NNUE)
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 namespace Eval {
@@ -0,0 +1,249 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // A class template that represents the input feature set of the NNUE evaluation function
 #ifndef NNUE_FEATURE_SET_H_INCLUDED
 #define NNUE_FEATURE_SET_H_INCLUDED
 #include "features_common.h"
 #include <array>
 namespace Eval::NNUE::Features {
  // Class template that represents a list of values
  template <typename T, T... Values>
  struct CompileTimeList;
  template <typename T, T First, T... Remaining>
  struct CompileTimeList<T, First, Remaining...> {
    static constexpr bool Contains(T value) {
      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
    }
    static constexpr std::array<T, sizeof...(Remaining) + 1>
        kValues = {{First, Remaining...}};
  };
  template <typename T, T First, T... Remaining>
  constexpr std::array<T, sizeof...(Remaining) + 1>
    CompileTimeList<T, First, Remaining...>::kValues;
  template <typename T>
  struct CompileTimeList<T> {
    static constexpr bool Contains(T /*value*/) {
      return false;
    }
    static constexpr std::array<T, 0> kValues = { {} };
  };
  // Class template that adds to the beginning of the list
  template <typename T, typename ListType, T Value>
  struct AppendToList;
  template <typename T, T... Values, T AnotherValue>
  struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
    using Result = CompileTimeList<T, AnotherValue, Values...>;
  };
  // Class template for adding to a sorted, unique list
  template <typename T, typename ListType, T Value>
  struct InsertToSet;
  template <typename T, T First, T... Remaining, T AnotherValue>
  struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
    using Result = std::conditional_t<
      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
      CompileTimeList<T, First, Remaining...>,
      std::conditional_t<(AnotherValue < First),
      CompileTimeList<T, AnotherValue, First, Remaining...>,
      typename AppendToList<T, typename InsertToSet<
      T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
      First>::Result>>;
  };
  template <typename T, T Value>
  struct InsertToSet<T, CompileTimeList<T>, Value> {
    using Result = CompileTimeList<T, Value>;
  };
  // Base class of feature set
  template <typename Derived>
  class FeatureSetBase {
   public:
    // Get a list of indices for active features
    template <typename IndexListType>
    static void AppendActiveIndices(
        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
      for (Color perspective : { WHITE, BLACK }) {
        Derived::CollectActiveIndices(
            pos, trigger, perspective, &active[perspective]);
      }
    }
    // Get a list of indices for recently changed features
    template <typename PositionType, typename IndexListType>
    static void AppendChangedIndices(
        const PositionType& pos, TriggerEvent trigger,
        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
      const auto& dp = pos.state()->dirtyPiece;
      if (dp.dirty_num == 0) return;
      for (Color perspective : { WHITE, BLACK }) {
        reset[perspective] = false;
        switch (trigger) {
          case TriggerEvent::kFriendKingMoved:
            reset[perspective] =
                dp.pieceId[0] == PIECE_ID_KING + perspective;
            break;
          default:
            assert(false);
            break;
        }
        if (reset[perspective]) {
          Derived::CollectActiveIndices(
              pos, trigger, perspective, &added[perspective]);
        } else {
          Derived::CollectChangedIndices(
              pos, trigger, perspective,
              &removed[perspective], &added[perspective]);
        }
      }
    }
  };
  // Class template that represents the feature set
  // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
  template <typename FirstFeatureType, typename... RemainingFeatureTypes>
  class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
    public FeatureSetBase<
    FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
  private:
    using Head = FirstFeatureType;
    using Tail = FeatureSet<RemainingFeatureTypes...>;
  public:
    // Hash value embedded in the evaluation function file
    static constexpr std::uint32_t kHashValue =
      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
    // number of feature dimensions
    static constexpr IndexType kDimensions =
      Head::kDimensions + Tail::kDimensions;
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions =
      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
    // List of timings to perform all calculations instead of difference calculation
    using SortedTriggerSet = typename InsertToSet<TriggerEvent,
      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
    // Get the feature quantity name
    static std::string GetName() {
      return std::string(Head::kName) + "+" + Tail::GetName();
    }
  private:
    // Get a list of indices with a value of 1 among the features
    template <typename IndexListType>
    static void CollectActiveIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexListType* const active) {
      Tail::CollectActiveIndices(pos, trigger, perspective, active);
      if (Head::kRefreshTrigger == trigger) {
        const auto start = active->size();
        Head::AppendActiveIndices(pos, perspective, active);
        for (auto i = start; i < active->size(); ++i) {
          (*active)[i] += Tail::kDimensions;
        }
      }
    }
    // Get a list of indices whose values have changed from the previous one in the feature quantity
    template <typename IndexListType>
    static void CollectChangedIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexListType* const removed, IndexListType* const added) {
      Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
      if (Head::kRefreshTrigger == trigger) {
        const auto start_removed = removed->size();
        const auto start_added = added->size();
        Head::AppendChangedIndices(pos, perspective, removed, added);
        for (auto i = start_removed; i < removed->size(); ++i) {
          (*removed)[i] += Tail::kDimensions;
        }
        for (auto i = start_added; i < added->size(); ++i) {
          (*added)[i] += Tail::kDimensions;
        }
      }
    }
    // Make the base class and the class template that recursively uses itself a friend
    friend class FeatureSetBase<FeatureSet>;
    template <typename... FeatureTypes>
    friend class FeatureSet;
  };
  // Class template that represents the feature set
  template <typename FeatureType>
  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
   public:
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
    // Number of feature dimensions
    static constexpr IndexType kDimensions = FeatureType::kDimensions;
    // Maximum number of simultaneously active features
    static constexpr IndexType kMaxActiveDimensions =
        FeatureType::kMaxActiveDimensions;
    // Trigger for full calculation instead of difference calculation
    using SortedTriggerSet =
        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
    // Get the feature quantity name
    static std::string GetName() {
      return FeatureType::kName;
    }
   private:
    // Get a list of indices for active features
    static void CollectActiveIndices(
        const Position& pos, const TriggerEvent trigger, const Color perspective,
        IndexList* const active) {
      if (FeatureType::kRefreshTrigger == trigger) {
        FeatureType::AppendActiveIndices(pos, perspective, active);
      }
    }
    // Get a list of indices for recently changed features
    static void CollectChangedIndices(
        const Position& pos, const TriggerEvent trigger, const Color perspective,
        IndexList* const removed, IndexList* const added) {
      if (FeatureType::kRefreshTrigger == trigger) {
        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
      }
    }
    // Make the base class and the class template that recursively uses itself a friend
    friend class FeatureSetBase<FeatureSet>;
    template <typename... FeatureTypes>
    friend class FeatureSet;
  };
 }  // namespace Eval::NNUE::Features
 #endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
@@ -0,0 +1,50 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 //Common header of input features of NNUE evaluation function
 #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
 #define NNUE_FEATURES_COMMON_H_INCLUDED
 #include "../../evaluate.h"
 #include "../nnue_common.h"
 namespace Eval::NNUE::Features {
  class IndexList;
  template <typename... FeatureTypes>
  class FeatureSet;
  // Trigger to perform full calculations instead of difference only
  enum class TriggerEvent {
    kNone, // Calculate the difference whenever possible
    kFriendKingMoved, // calculate all when own ball moves
    kEnemyKingMoved, // do all calculations when enemy balls move
    kAnyKingMoved, // do all calculations if either ball moves
    kAnyPieceMoved, // always do all calculations
  };
  enum class Side {
    kFriend, // side to move
    kEnemy, // opponent
  };
 }  // namespace Eval::NNUE::Features
 #endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
@@ -0,0 +1,92 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 //Definition of input features HalfKP of NNUE evaluation function
 #include "half_kp.h"
 #include "index_list.h"
 namespace Eval::NNUE::Features {
  // Find the index of the feature quantity from the king position and PieceSquare
  template <Side AssociatedKing>
  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
  }
  // Get pieces information
  template <Side AssociatedKing>
  inline void HalfKP<AssociatedKing>::GetPieces(
      const Position& pos, Color perspective,
      PieceSquare** pieces, Square* sq_target_k) {
    *pieces = (perspective == BLACK) ?
        pos.eval_list()->piece_list_fb() :
        pos.eval_list()->piece_list_fw();
    const PieceId target = (AssociatedKing == Side::kFriend) ?
        static_cast<PieceId>(PIECE_ID_KING + perspective) :
        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
  }
  // Get a list of indices for active features
  template <Side AssociatedKing>
  void HalfKP<AssociatedKing>::AppendActiveIndices(
      const Position& pos, Color perspective, IndexList* active) {
    // Do nothing if array size is small to avoid compiler warning
    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
    PieceSquare* pieces;
    Square sq_target_k;
    GetPieces(pos, perspective, &pieces, &sq_target_k);
    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
      if (pieces[i] != PS_NONE) {
        active->push_back(MakeIndex(sq_target_k, pieces[i]));
      }
    }
  }
  // Get a list of indices for recently changed features
  template <Side AssociatedKing>
  void HalfKP<AssociatedKing>::AppendChangedIndices(
      const Position& pos, Color perspective,
      IndexList* removed, IndexList* added) {
    PieceSquare* pieces;
    Square sq_target_k;
    GetPieces(pos, perspective, &pieces, &sq_target_k);
    const auto& dp = pos.state()->dirtyPiece;
    for (int i = 0; i < dp.dirty_num; ++i) {
      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
      const auto old_p = static_cast<PieceSquare>(
          dp.old_piece[i].from[perspective]);
      if (old_p != PS_NONE) {
        removed->push_back(MakeIndex(sq_target_k, old_p));
      }
      const auto new_p = static_cast<PieceSquare>(
          dp.new_piece[i].from[perspective]);
      if (new_p != PS_NONE) {
        added->push_back(MakeIndex(sq_target_k, new_p));
      }
    }
  }
  template class HalfKP<Side::kFriend>;
 }  // namespace Eval::NNUE::Features
@@ -0,0 +1,67 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 //Definition of input features HalfKP of NNUE evaluation function
 #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
 #define NNUE_FEATURES_HALF_KP_H_INCLUDED
 #include "../../evaluate.h"
 #include "features_common.h"
 namespace Eval::NNUE::Features {
  // Feature HalfKP: Combination of the position of own king
  // and the position of pieces other than kings
  template <Side AssociatedKing>
  class HalfKP {
   public:
    // Feature name
    static constexpr const char* kName = "HalfKP(Friend)";
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t kHashValue =
        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
    // Number of feature dimensions
    static constexpr IndexType kDimensions =
        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
    // Maximum number of simultaneously active features
    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
    // Trigger for full calculation instead of difference calculation
    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
    // Get a list of indices for active features
    static void AppendActiveIndices(const Position& pos, Color perspective,
                                    IndexList* active);
    // Get a list of indices for recently changed features
    static void AppendChangedIndices(const Position& pos, Color perspective,
                                     IndexList* removed, IndexList* added);
    // Index of a feature for a given king position and another piece on some square
    static IndexType MakeIndex(Square sq_k, PieceSquare p);
   private:
    // Get pieces information
    static void GetPieces(const Position& pos, Color perspective,
                          PieceSquare** pieces, Square* sq_target_k);
  };
 }  // namespace Eval::NNUE::Features
 #endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
@@ -11,14 +11,14 @@ namespace NNUE {
 namespace Features {
-// Find the index of the feature quantity from the ball position and BonaPiece
+// Find the index of the feature quantity from the ball position and PieceSquare
 template <Side AssociatedKing>
 inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
-    Square sq_k, BonaPiece p) {
+    Square sq_k, PieceSquare p) {
  constexpr IndexType W = kBoardWidth;
  constexpr IndexType H = kBoardHeight;
-  const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
+  const IndexType piece_index = (p - PieceSquare::PS_W_PAWN) / SQUARE_NB;
-  const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
+  const Square sq_p = static_cast<Square>((p - PieceSquare::PS_W_PAWN) % SQUARE_NB);
  const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
  const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
  return H * W * piece_index + H * relative_file + relative_rank;
@@ -28,14 +28,14 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
 template <Side AssociatedKing>
 inline void HalfRelativeKP<AssociatedKing>::GetPieces(
    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
+    PieceSquare** pieces, Square* sq_target_k) {
  *pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
+  const PieceId target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
+      static_cast<PieceId>(PieceId::PIECE_ID_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
+      static_cast<PieceId>(PieceId::PIECE_ID_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
+  *sq_target_k = static_cast<Square>(((*pieces)[target] - PieceSquare::PS_W_KING) % SQUARE_NB);
 }
 // Get a list of indices with a value of 1 among the features
@@ -45,12 +45,12 @@ void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-  BonaPiece* pieces;
+  PieceSquare* pieces;
  Square sq_target_k;
  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
+  for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
-    if (pieces[i] >= fe_hand_end) {
+    if (pieces[i] >= PieceSquare::PS_W_PAWN) {
-      if (pieces[i] != Eval::BONA_PIECE_ZERO) {
+      if (pieces[i] != PieceSquare::PS_NONE) {
        active->push_back(MakeIndex(sq_target_k, pieces[i]));
      }
    }
@@ -62,23 +62,23 @@ template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
+  PieceSquare* pieces;
  Square sq_target_k;
  GetPieces(pos, perspective, &pieces, &sq_target_k);
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
+    if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
+    const auto old_p = static_cast<PieceSquare>(
-        dp.changed_piece[i].old_piece.from[perspective]);
+        dp.old_piece[i].from[perspective]);
-    if (old_p >= fe_hand_end) {
+    if (old_p >= PieceSquare::PS_W_PAWN) {
-      if (old_p != Eval::BONA_PIECE_ZERO) {
+      if (old_p != PieceSquare::PS_NONE) {
        removed->push_back(MakeIndex(sq_target_k, old_p));
      }
    }
-    const auto new_p = static_cast<BonaPiece>(
+    const auto new_p = static_cast<PieceSquare>(
-        dp.changed_piece[i].new_piece.from[perspective]);
+        dp.new_piece[i].from[perspective]);
-    if (new_p >= fe_hand_end) {
+    if (new_p >= PieceSquare::PS_W_PAWN) {
-      if (new_p != Eval::BONA_PIECE_ZERO) {
+      if (new_p != PieceSquare::PS_NONE) {
        added->push_back(MakeIndex(sq_target_k, new_p));
      }
    }
@@ -5,7 +5,7 @@
 #if defined(EVAL_NNUE)
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 namespace Eval {
@@ -25,7 +25,7 @@ class HalfRelativeKP {
  static constexpr std::uint32_t kHashValue =
      0xF9180919u ^ (AssociatedKing == Side::kFriend);
  // Piece type excluding balls
-  static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
+  static constexpr IndexType kNumPieceKinds = (PieceSquare::PS_END - PieceSquare::PS_W_PAWN) / SQUARE_NB;
  // width of the virtual board with the ball in the center
  static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
  // height of a virtual board with balls in the center
@@ -34,7 +34,7 @@ class HalfRelativeKP {
  static constexpr IndexType kDimensions =
      kNumPieceKinds * kBoardHeight * kBoardWidth;
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
+  static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
  // Timing of full calculation instead of difference calculation
  static constexpr TriggerEvent kRefreshTrigger =
      (AssociatedKing == Side::kFriend) ?
@@ -48,13 +48,13 @@ class HalfRelativeKP {
  static void AppendChangedIndices(const Position& pos, Color perspective,
                                   IndexList* removed, IndexList* added);
-  // Find the index of the feature quantity from the ball position and BonaPiece
+  // Find the index of the feature quantity from the ball position and PieceSquare
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
+  static IndexType MakeIndex(Square sq_k, PieceSquare p);
 private:
  // Get the piece information
  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
+                        PieceSquare** pieces, Square* sq_target_k);
 };
 }  // namespace Features
@@ -0,0 +1,64 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Definition of index list of input features
 #ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
 #define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
 #include "../../position.h"
 #include "../nnue_architecture.h"
 namespace Eval::NNUE::Features {
  // Class template used for feature index list
  template <typename T, std::size_t MaxSize>
  class ValueList {
   public:
    std::size_t size() const { return size_; }
    void resize(std::size_t size) { size_ = size; }
    void push_back(const T& value) { values_[size_++] = value; }
    T& operator[](std::size_t index) { return values_[index]; }
    T* begin() { return values_; }
    T* end() { return values_ + size_; }
    const T& operator[](std::size_t index) const { return values_[index]; }
    const T* begin() const { return values_; }
    const T* end() const { return values_ + size_; }
    void swap(ValueList& other) {
      const std::size_t max_size = std::max(size_, other.size_);
      for (std::size_t i = 0; i < max_size; ++i) {
        std::swap(values_[i], other.values_[i]);
      }
      std::swap(size_, other.size_);
    }
   private:
    T values_[MaxSize];
    std::size_t size_ = 0;
  };
  //Type of feature index list
  class IndexList
      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
  };
 }  // namespace Eval::NNUE::Features
 #endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
@@ -17,13 +17,13 @@ void K::AppendActiveIndices(
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-  const BonaPiece* pieces = (perspective == BLACK) ?
+  const PieceSquare* pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
-  assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
+  assert(pieces[PieceId::PIECE_ID_BKING] != PieceSquare::PS_NONE);
-  assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
+  assert(pieces[PieceId::PIECE_ID_WKING] != PieceSquare::PS_NONE);
-  for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
+  for (PieceId i = PieceId::PIECE_ID_KING; i < PieceId::PIECE_ID_NONE; ++i) {
-    active->push_back(pieces[i] - fe_end);
+    active->push_back(pieces[i] - PieceSquare::PS_END);
  }
 }
@@ -32,11 +32,11 @@ void K::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
-  if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
+  if (dp.pieceId[0] >= PieceId::PIECE_ID_KING) {
    removed->push_back(
-        dp.changed_piece[0].old_piece.from[perspective] - fe_end);
+        dp.old_piece[0].from[perspective] - PieceSquare::PS_END);
    added->push_back(
-        dp.changed_piece[0].new_piece.from[perspective] - fe_end);
+        dp.new_piece[0].from[perspective] - PieceSquare::PS_END);
  }
 }
@@ -5,7 +5,7 @@
 #if defined(EVAL_NNUE)
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 namespace Eval {
@@ -17,11 +17,11 @@ void P::AppendActiveIndices(
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-  const BonaPiece* pieces = (perspective == BLACK) ?
+  const PieceSquare* pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
+  for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
+    if (pieces[i] != PieceSquare::PS_NONE) {
      active->push_back(pieces[i]);
    }
  }
@@ -33,12 +33,12 @@ void P::AppendChangedIndices(
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
+    if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
-    if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
+    if (dp.old_piece[i].from[perspective] != PieceSquare::PS_NONE) {
-      removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
+      removed->push_back(dp.old_piece[i].from[perspective]);
    }
-    if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
+    if (dp.new_piece[i].from[perspective] != PieceSquare::PS_NONE) {
-      added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
+      added->push_back(dp.new_piece[i].from[perspective]);
    }
  }
 }
@@ -5,7 +5,7 @@
 #if defined(EVAL_NNUE)
-#include "../../../evaluate.h"
+#include "../../evaluate.h"
 #include "features_common.h"
 namespace Eval {
@@ -14,7 +14,7 @@ namespace NNUE {
 namespace Features {
-// Feature P: BonaPiece of pieces other than balls
+// Feature P: PieceSquare of pieces other than balls
 class P {
 public:
  // feature quantity name
@@ -22,9 +22,9 @@ class P {
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
  // number of feature dimensions
-  static constexpr IndexType kDimensions = fe_end;
+  static constexpr IndexType kDimensions = PieceSquare::PS_END;
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
+  static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
  // Timing of full calculation instead of difference calculation
  static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
@@ -0,0 +1,218 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Definition of layer AffineTransform of NNUE evaluation function
 #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
 #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
 #include <iostream>
 #include "../nnue_common.h"
 namespace Eval::NNUE::Layers {
  // Affine transformation layer
  template <typename PreviousLayer, IndexType OutputDimensions>
  class AffineTransform {
   public:
    // Input/output type
    using InputType = typename PreviousLayer::OutputType;
    using OutputType = std::int32_t;
    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
    // Number of input/output dimensions
    static constexpr IndexType kInputDimensions =
        PreviousLayer::kOutputDimensions;
    static constexpr IndexType kOutputDimensions = OutputDimensions;
    static constexpr IndexType kPaddedInputDimensions =
        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
    // Size of forward propagation buffer used in this layer
    static constexpr std::size_t kSelfBufferSize =
        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
    // Size of the forward propagation buffer used from the input layer to this layer
    static constexpr std::size_t kBufferSize =
        PreviousLayer::kBufferSize + kSelfBufferSize;
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t GetHashValue() {
      std::uint32_t hash_value = 0xCC03DAE4u;
      hash_value += kOutputDimensions;
      hash_value ^= PreviousLayer::GetHashValue() >> 1;
      hash_value ^= PreviousLayer::GetHashValue() << 31;
      return hash_value;
    }
    // A string that represents the structure from the input layer to this layer
    static std::string GetStructureString() {
      return "AffineTransform[" +
        std::to_string(kOutputDimensions) + "<-" +
        std::to_string(kInputDimensions) + "](" +
        PreviousLayer::GetStructureString() + ")";
    }
   // Read network parameters
    bool ReadParameters(std::istream& stream) {
      if (!previous_layer_.ReadParameters(stream)) return false;
      stream.read(reinterpret_cast<char*>(biases_),
                  kOutputDimensions * sizeof(BiasType));
      stream.read(reinterpret_cast<char*>(weights_),
                  kOutputDimensions * kPaddedInputDimensions *
                  sizeof(WeightType));
      return !stream.fail();
    }
    // write parameters
    bool WriteParameters(std::ostream& stream) const {
      if (!previous_layer_.WriteParameters(stream)) return false;
      stream.write(reinterpret_cast<const char*>(biases_),
        kOutputDimensions * sizeof(BiasType));
      stream.write(reinterpret_cast<const char*>(weights_),
        kOutputDimensions * kPaddedInputDimensions *
        sizeof(WeightType));
      return !stream.fail();
    }
    // Forward propagation
    const OutputType* Propagate(
        const TransformedFeatureType* transformed_features, char* buffer) const {
      const auto input = previous_layer_.Propagate(
          transformed_features, buffer + kSelfBufferSize);
      const auto output = reinterpret_cast<OutputType*>(buffer);
  #if defined(USE_AVX512)
      constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
      const __m512i kOnes = _mm512_set1_epi16(1);
      const auto input_vector = reinterpret_cast<const __m512i*>(input);
  #elif defined(USE_AVX2)
      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
      const __m256i kOnes = _mm256_set1_epi16(1);
      const auto input_vector = reinterpret_cast<const __m256i*>(input);
  #elif defined(USE_SSSE3)
      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
      const __m128i kOnes = _mm_set1_epi16(1);
      const auto input_vector = reinterpret_cast<const __m128i*>(input);
  #elif defined(USE_NEON)
      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
  #endif
      for (IndexType i = 0; i < kOutputDimensions; ++i) {
        const IndexType offset = i * kPaddedInputDimensions;
  #if defined(USE_AVX512)
        __m512i sum = _mm512_setzero_si512();
        const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
            __m512i product = _mm512_maddubs_epi16(
              _mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
            product = _mm512_madd_epi16(product, kOnes);
            sum = _mm512_add_epi32(sum, product);
        }
        output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
        // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
        // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
        // and we have to do one more 256bit chunk.
        if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
        {
            const auto iv_256  = reinterpret_cast<const __m256i*>(input);
            const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
            int j = kNumChunks * 2;
            __m256i sum256 = _mm256_maddubs_epi16(
              _mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
            sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
            sum256 = _mm256_hadd_epi32(sum256, sum256);
            sum256 = _mm256_hadd_epi32(sum256, sum256);
            const __m128i lo = _mm256_extracti128_si256(sum256, 0);
            const __m128i hi = _mm256_extracti128_si256(sum256, 1);
            output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
        }
  #elif defined(USE_AVX2)
        __m256i sum = _mm256_setzero_si256();
        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
          __m256i product = _mm256_maddubs_epi16(
            _mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
          product = _mm256_madd_epi16(product, kOnes);
          sum = _mm256_add_epi32(sum, product);
        }
        sum = _mm256_hadd_epi32(sum, sum);
        sum = _mm256_hadd_epi32(sum, sum);
        const __m128i lo = _mm256_extracti128_si256(sum, 0);
        const __m128i hi = _mm256_extracti128_si256(sum, 1);
        output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
  #elif defined(USE_SSSE3)
        __m128i sum = _mm_cvtsi32_si128(biases_[i]);
        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
          __m128i product = _mm_maddubs_epi16(
              _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
          product = _mm_madd_epi16(product, kOnes);
          sum = _mm_add_epi32(sum, product);
        }
        sum = _mm_hadd_epi32(sum, sum);
        sum = _mm_hadd_epi32(sum, sum);
        output[i] = _mm_cvtsi128_si32(sum);
  #elif defined(USE_NEON)
        int32x4_t sum = {biases_[i]};
        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
          sum = vpadalq_s16(sum, product);
        }
        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
  #else
        OutputType sum = biases_[i];
        for (IndexType j = 0; j < kInputDimensions; ++j) {
          sum += weights_[offset + j] * input[j];
        }
        output[i] = sum;
  #endif
      }
      return output;
    }
   private:
    using BiasType = OutputType;
    using WeightType = std::int8_t;
    // Make the learning class a friend
    friend class Trainer<AffineTransform>;
    PreviousLayer previous_layer_;
    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
    alignas(kCacheLineSize)
        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
  };
 }  // namespace Eval::NNUE::Layers
 #endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
@@ -0,0 +1,164 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Definition of layer ClippedReLU of NNUE evaluation function
 #ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
 #define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
 #include "../nnue_common.h"
 namespace Eval::NNUE::Layers {
  // Clipped ReLU
  template <typename PreviousLayer>
  class ClippedReLU {
   public:
    // Input/output type
    using InputType = typename PreviousLayer::OutputType;
    using OutputType = std::uint8_t;
    static_assert(std::is_same<InputType, std::int32_t>::value, "");
    // Number of input/output dimensions
    static constexpr IndexType kInputDimensions =
        PreviousLayer::kOutputDimensions;
    static constexpr IndexType kOutputDimensions = kInputDimensions;
    // Size of forward propagation buffer used in this layer
    static constexpr std::size_t kSelfBufferSize =
        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
    // Size of the forward propagation buffer used from the input layer to this layer
    static constexpr std::size_t kBufferSize =
        PreviousLayer::kBufferSize + kSelfBufferSize;
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t GetHashValue() {
      std::uint32_t hash_value = 0x538D24C7u;
      hash_value += PreviousLayer::GetHashValue();
      return hash_value;
    }
    // A string that represents the structure from the input layer to this layer
    static std::string GetStructureString() {
      return "ClippedReLU[" +
        std::to_string(kOutputDimensions) + "](" +
        PreviousLayer::GetStructureString() + ")";
    }
    // Read network parameters
    bool ReadParameters(std::istream& stream) {
      return previous_layer_.ReadParameters(stream);
    }
    // write parameters
    bool WriteParameters(std::ostream& stream) const {
      return previous_layer_.WriteParameters(stream);
    }
    // Forward propagation
    const OutputType* Propagate(
        const TransformedFeatureType* transformed_features, char* buffer) const {
      const auto input = previous_layer_.Propagate(
          transformed_features, buffer + kSelfBufferSize);
      const auto output = reinterpret_cast<OutputType*>(buffer);
  #if defined(USE_AVX2)
      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
      const __m256i kZero = _mm256_setzero_si256();
      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
      const auto in = reinterpret_cast<const __m256i*>(input);
      const auto out = reinterpret_cast<__m256i*>(output);
      for (IndexType i = 0; i < kNumChunks; ++i) {
        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
          _mm256_load_si256(&in[i * 4 + 0]),
          _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
          _mm256_load_si256(&in[i * 4 + 2]),
          _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
        _mm256_store_si256(
            &out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
      }
      constexpr IndexType kStart = kNumChunks * kSimdWidth;
  #elif defined(USE_SSSE3)
      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
  #ifdef USE_SSE41
      const __m128i kZero = _mm_setzero_si128();
  #else
      const __m128i k0x80s = _mm_set1_epi8(-128);
  #endif
      const auto in = reinterpret_cast<const __m128i*>(input);
      const auto out = reinterpret_cast<__m128i*>(output);
      for (IndexType i = 0; i < kNumChunks; ++i) {
        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
            _mm_load_si128(&in[i * 4 + 0]),
            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
            _mm_load_si128(&in[i * 4 + 2]),
            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
        _mm_store_si128(&out[i],
  #ifdef USE_SSE41
          _mm_max_epi8(packedbytes, kZero)
  #else
          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
  #endif
        );
      }
      constexpr IndexType kStart = kNumChunks * kSimdWidth;
  #elif defined(USE_NEON)
      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
      const int8x8_t kZero = {0};
      const auto in = reinterpret_cast<const int32x4_t*>(input);
      const auto out = reinterpret_cast<int8x8_t*>(output);
      for (IndexType i = 0; i < kNumChunks; ++i) {
        int16x8_t shifted;
        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
      }
      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
  #else
      constexpr IndexType kStart = 0;
  #endif
      for (IndexType i = kStart; i < kInputDimensions; ++i) {
        output[i] = static_cast<OutputType>(
            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
      }
      return output;
    }
   private:
     // Make the learning class a friend
     friend class Trainer<ClippedReLU>;
    PreviousLayer previous_layer_;
  };
 }  // namespace Eval::NNUE::Layers
 #endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
@@ -0,0 +1,80 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // NNUE evaluation function layer InputSlice definition
 #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
 #define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
 #include "../nnue_common.h"
 namespace Eval::NNUE::Layers {
 // Input layer
 template <IndexType OutputDimensions, IndexType Offset = 0>
 class InputSlice {
 public:
  // Need to maintain alignment
  static_assert(Offset % kMaxSimdWidth == 0, "");
  // Output type
  using OutputType = TransformedFeatureType;
  // Output dimensionality
  static constexpr IndexType kOutputDimensions = OutputDimensions;
  // Size of forward propagation buffer used from the input layer to this layer
  static constexpr std::size_t kBufferSize = 0;
  // Hash value embedded in the evaluation file
  static constexpr std::uint32_t GetHashValue() {
    std::uint32_t hash_value = 0xEC42E90Du;
    hash_value ^= kOutputDimensions ^ (Offset << 10);
    return hash_value;
  }
  // A string that represents the structure from the input layer to this layer
  static std::string GetStructureString() {
    return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
      std::to_string(Offset) + ":" +
      std::to_string(Offset + kOutputDimensions) + ")]";
  }
  // Read network parameters
  bool ReadParameters(std::istream& /*stream*/) {
    return true;
  }
  // write parameters
  bool WriteParameters(std::ostream& /*stream*/) const {
    return true;
  }
  // Forward propagation
  const OutputType* Propagate(
      const TransformedFeatureType* transformed_features,
      char* /*buffer*/) const {
    return transformed_features + Offset;
  }
 private:
 };
 }  // namespace Layers
 #endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
@@ -0,0 +1,39 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Class for difference calculation of NNUE evaluation function
 #ifndef NNUE_ACCUMULATOR_H_INCLUDED
 #define NNUE_ACCUMULATOR_H_INCLUDED
 #include "nnue_architecture.h"
 namespace Eval::NNUE {
  // Class that holds the result of affine transformation of input features
  struct alignas(32) Accumulator {
    std::int16_t
        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
    Value score;
    bool computed_accumulation;
    bool computed_score;
  };
 }  // namespace Eval::NNUE
 #endif // NNUE_ACCUMULATOR_H_INCLUDED
@@ -0,0 +1,38 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Input features and network structure used in NNUE evaluation function
 #ifndef NNUE_ARCHITECTURE_H_INCLUDED
 #define NNUE_ARCHITECTURE_H_INCLUDED
 // Defines the network structure
 #include "architectures/halfkp_256x2-32-32.h"
 namespace Eval::NNUE {
  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
  static_assert(Network::kOutputDimensions == 1, "");
  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
  // Trigger for full calculation instead of difference calculation
  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
@@ -0,0 +1,81 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Constants used in NNUE evaluation function
 #ifndef NNUE_COMMON_H_INCLUDED
 #define NNUE_COMMON_H_INCLUDED
 #if defined(USE_AVX2)
 #include <immintrin.h>
 #elif defined(USE_SSE41)
 #include <smmintrin.h>
 #elif defined(USE_SSSE3)
 #include <tmmintrin.h>
 #elif defined(USE_SSE2)
 #include <emmintrin.h>
 #elif defined(USE_NEON)
 #include <arm_neon.h>
 #endif
 namespace Eval::NNUE {
  // Version of the evaluation file
  constexpr std::uint32_t kVersion = 0x7AF32F16u;
  // Constant used in evaluation value calculation
  constexpr int FV_SCALE = 16;
  constexpr int kWeightScaleBits = 6;
  // Size of cache line (in bytes)
  constexpr std::size_t kCacheLineSize = 64;
  // SIMD width (in bytes)
  #if defined(USE_AVX2)
  constexpr std::size_t kSimdWidth = 32;
  #elif defined(USE_SSE2)
  constexpr std::size_t kSimdWidth = 16;
  #elif defined(USE_NEON)
  constexpr std::size_t kSimdWidth = 16;
  #endif
  constexpr std::size_t kMaxSimdWidth = 32;
  // Type of input feature after conversion
  using TransformedFeatureType = std::uint8_t;
  using IndexType = std::uint32_t;
  // Forward declaration of learning class template
  template <typename Layer>
  class Trainer;
  // Round n up to be a multiple of base
  template <typename IntType>
  constexpr IntType CeilToMultiple(IntType n, IntType base) {
    return (n + base - 1) / base * base;
  }
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_COMMON_H_INCLUDED
@@ -0,0 +1,346 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // A class that converts the input features of the NNUE evaluation function
 #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
 #define NNUE_FEATURE_TRANSFORMER_H_INCLUDED
 #include "nnue_common.h"
 #include "nnue_architecture.h"
 #include "features/index_list.h"
 #include <cstring> // std::memset()
 namespace Eval::NNUE {
  // Input feature converter
  class FeatureTransformer {
   private:
    // Number of output dimensions for one side
    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
   public:
    // Output type
    using OutputType = TransformedFeatureType;
    // Number of input/output dimensions
    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
    // Size of forward propagation buffer
    static constexpr std::size_t kBufferSize =
        kOutputDimensions * sizeof(OutputType);
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t GetHashValue() {
      return RawFeatures::kHashValue ^ kOutputDimensions;
    }
    // a string representing the structure
    static std::string GetStructureString() {
      return RawFeatures::GetName() + "[" +
        std::to_string(kInputDimensions) + "->" +
        std::to_string(kHalfDimensions) + "x2]";
    }
    // Read network parameters
    bool ReadParameters(std::istream& stream) {
      stream.read(reinterpret_cast<char*>(biases_),
                  kHalfDimensions * sizeof(BiasType));
      stream.read(reinterpret_cast<char*>(weights_),
                  kHalfDimensions * kInputDimensions * sizeof(WeightType));
      return !stream.fail();
    }
    // write parameters
    bool WriteParameters(std::ostream& stream) const {
      stream.write(reinterpret_cast<const char*>(biases_),
        kHalfDimensions * sizeof(BiasType));
      stream.write(reinterpret_cast<const char*>(weights_),
        kHalfDimensions * kInputDimensions * sizeof(WeightType));
      return !stream.fail();
    }
    // Proceed with the difference calculation if possible
    bool UpdateAccumulatorIfPossible(const Position& pos) const {
      const auto now = pos.state();
      if (now->accumulator.computed_accumulation) {
        return true;
      }
      const auto prev = now->previous;
      if (prev && prev->accumulator.computed_accumulation) {
        UpdateAccumulator(pos);
        return true;
      }
      return false;
    }
    // Convert input features
    void Transform(const Position& pos, OutputType* output, bool refresh) const {
      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
        RefreshAccumulator(pos);
      }
      const auto& accumulation = pos.state()->accumulator.accumulation;
  #if defined(USE_AVX2)
      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
      constexpr int kControl = 0b11011000;
      const __m256i kZero = _mm256_setzero_si256();
  #elif defined(USE_SSSE3)
      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
  #ifdef USE_SSE41
      const __m128i kZero = _mm_setzero_si128();
  #else
      const __m128i k0x80s = _mm_set1_epi8(-128);
  #endif
  #elif defined(USE_NEON)
      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
      const int8x8_t kZero = {0};
  #endif
      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
      for (IndexType p = 0; p < 2; ++p) {
        const IndexType offset = kHalfDimensions * p;
  #if defined(USE_AVX2)
        auto out = reinterpret_cast<__m256i*>(&output[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
          __m256i sum0 =
            _mm256_load_si256(&reinterpret_cast<const __m256i*>(
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m256i sum1 =
            _mm256_load_si256(&reinterpret_cast<const __m256i*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
          _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
        }
  #elif defined(USE_SSSE3)
        auto out = reinterpret_cast<__m128i*>(&output[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
          _mm_store_si128(&out[j],
  #ifdef USE_SSE41
            _mm_max_epi8(packedbytes, kZero)
  #else
            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
  #endif
          );
        }
  #elif defined(USE_NEON)
        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
        for (IndexType j = 0; j < kNumChunks; ++j) {
          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
              accumulation[perspectives[p]][0])[j];
          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
        }
  #else
        for (IndexType j = 0; j < kHalfDimensions; ++j) {
          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
          output[offset + j] = static_cast<OutputType>(
              std::max<int>(0, std::min<int>(127, sum)));
        }
  #endif
      }
    }
   private:
    // Calculate cumulative value without using difference calculation
    void RefreshAccumulator(const Position& pos) const {
      auto& accumulator = pos.state()->accumulator;
      IndexType i = 0;
      Features::IndexList active_indices[2];
      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
                                       active_indices);
      for (Color perspective : { WHITE, BLACK }) {
        std::memcpy(accumulator.accumulation[perspective][i], biases_,
                   kHalfDimensions * sizeof(BiasType));
        for (const auto index : active_indices[perspective]) {
          const IndexType offset = kHalfDimensions * index;
  #if defined(USE_AVX2)
          auto accumulation = reinterpret_cast<__m256i*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
          }
  #elif defined(USE_SSE2)
          auto accumulation = reinterpret_cast<__m128i*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
          }
  #elif defined(USE_NEON)
          auto accumulation = reinterpret_cast<int16x8_t*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
          }
  #else
          for (IndexType j = 0; j < kHalfDimensions; ++j) {
            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
          }
  #endif
        }
      }
      accumulator.computed_accumulation = true;
      accumulator.computed_score = false;
    }
    // Calculate cumulative value using difference calculation
    void UpdateAccumulator(const Position& pos) const {
      const auto prev_accumulator = pos.state()->previous->accumulator;
      auto& accumulator = pos.state()->accumulator;
      IndexType i = 0;
      Features::IndexList removed_indices[2], added_indices[2];
      bool reset[2];
      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
                                        removed_indices, added_indices, reset);
      for (Color perspective : { WHITE, BLACK }) {
  #if defined(USE_AVX2)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<__m256i*>(
            &accumulator.accumulation[perspective][i][0]);
  #elif defined(USE_SSE2)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<__m128i*>(
            &accumulator.accumulation[perspective][i][0]);
  #elif defined(USE_NEON)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<int16x8_t*>(
            &accumulator.accumulation[perspective][i][0]);
  #endif
        if (reset[perspective]) {
          std::memcpy(accumulator.accumulation[perspective][i], biases_,
                      kHalfDimensions * sizeof(BiasType));
        } else {
          std::memcpy(accumulator.accumulation[perspective][i],
                      prev_accumulator.accumulation[perspective][i],
                      kHalfDimensions * sizeof(BiasType));
          // Difference calculation for the deactivated features
          for (const auto index : removed_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
  #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
            }
  #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
            }
  #elif defined(USE_NEON)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
            }
  #else
            for (IndexType j = 0; j < kHalfDimensions; ++j) {
              accumulator.accumulation[perspective][i][j] -=
                  weights_[offset + j];
            }
  #endif
          }
        }
        { // Difference calculation for the activated features
          for (const auto index : added_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
  #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
            }
  #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
            }
  #elif defined(USE_NEON)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
            }
  #else
            for (IndexType j = 0; j < kHalfDimensions; ++j) {
              accumulator.accumulation[perspective][i][j] +=
                  weights_[offset + j];
            }
  #endif
          }
        }
      }
      accumulator.computed_accumulation = true;
      accumulator.computed_score = false;
    }
    using BiasType = std::int16_t;
    using WeightType = std::int16_t;
    // Make the learning class a friend
    friend class Trainer<FeatureTransformer>;
    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
    alignas(kCacheLineSize)
        WeightType weights_[kHalfDimensions * kInputDimensions];
  };
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
@@ -2,8 +2,8 @@
 #if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
-#include "../../thread.h"
+#include "../thread.h"
-#include "../../uci.h"
+#include "../uci.h"
 #include "evaluate_nnue.h"
 #include "nnue_test_command.h"
@@ -62,8 +62,8 @@ class Factorizer<HalfKP<AssociatedKing>> {
    IndexType index_offset = AppendBaseFeature<FeatureType>(
        kProperties[kFeaturesHalfKP], base_index, training_features);
-    const auto sq_k = static_cast<Square>(base_index / fe_end);
+    const auto sq_k = static_cast<Square>(base_index / PieceSquare::PS_END);
-    const auto p = static_cast<BonaPiece>(base_index % fe_end);
+    const auto p = static_cast<PieceSquare>(base_index % PieceSquare::PS_END);
    // kFeaturesHalfK
    {
      const auto& properties = kProperties[kFeaturesHalfK];
@@ -76,7 +76,7 @@ class Factorizer<HalfKP<AssociatedKing>> {
    index_offset += InheritFeaturesIfRequired<P>(
        index_offset, kProperties[kFeaturesP], p, training_features);
    // kFeaturesHalfRelativeKP
-    if (p >= fe_hand_end) {
+    if (p >= PieceSquare::PS_W_PAWN) {
      index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
          index_offset, kProperties[kFeaturesHalfRelativeKP],
          HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
@@ -111,7 +111,7 @@ IntType Round(double value) {
 // make_shared with alignment
 template <typename T, typename... ArgumentTypes>
 std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
-  const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
+  const auto ptr = new(std_aligned_alloc(alignof(T), sizeof(T)))
      T(std::forward<ArgumentTypes>(arguments)...);
  return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
 }
@@ -5,7 +5,7 @@
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/affine_transform.h"
 #include "trainer.h"
@@ -5,7 +5,7 @@
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/clipped_relu.h"
 #include "trainer.h"
@@ -5,7 +5,7 @@
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../nnue_feature_transformer.h"
 #include "trainer.h"
 #include "features/factorizer_feature_set.h"
@@ -5,7 +5,7 @@
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/input_slice.h"
 #include "trainer.h"
@@ -5,7 +5,7 @@
 #if defined(EVAL_LEARN) && defined(EVAL_NNUE)
-#include "../../../learn/learn.h"
+#include "../../learn/learn.h"
 #include "../layers/sum.h"
 #include "trainer.h"
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -32,21 +30,21 @@ namespace {
  #define S(mg, eg) make_score(mg, eg)
  // Pawn penalties
-  constexpr Score Backward      = S( 9, 24);
+  constexpr Score Backward      = S( 8, 27);
-  constexpr Score Doubled       = S(11, 56);
+  constexpr Score Doubled       = S(11, 55);
-  constexpr Score Isolated      = S( 5, 15);
+  constexpr Score Isolated      = S( 5, 17);
-  constexpr Score WeakLever     = S( 0, 56);
+  constexpr Score WeakLever     = S( 2, 54);
-  constexpr Score WeakUnopposed = S(13, 27);
+  constexpr Score WeakUnopposed = S(15, 25);
  // Bonus for blocked pawns at 5th or 6th rank
-  constexpr Score BlockedPawn[2] = { S(-11, -4), S(-3, 4) };
+  constexpr Score BlockedPawn[2] = { S(-13, -4), S(-4, 3) };
  constexpr Score BlockedStorm[RANK_NB] = {
    S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)
  };
  // Connected pawn bonus
-  constexpr int Connected[RANK_NB] = { 0, 7, 8, 12, 29, 48, 86 };
+  constexpr int Connected[RANK_NB] = { 0, 7, 8, 11, 24, 45, 85 };
  // Strength of pawn shelter for our king by [distance from edge][rank].
  // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -200,14 +198,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
  std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
  st = si;
-#if defined(EVAL_NNUE)
+  // Each piece on board gets a unique ID used to track the piece later
-  // clear evalList. It is cleared when memset is cleared to zero above...
+  PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
  evalList.clear();
  // In updating the PieceList, we have to set which piece is where,
  // A counter of how much each piece has been used
  PieceNumber next_piece_number = PIECE_NUMBER_ZERO;
 #endif  // defined(EVAL_NNUE)
  ss >> std::noskipws;
@@ -225,13 +217,15 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
          auto pc = Piece(idx);
          put_piece(pc, sq);
-#if defined(EVAL_NNUE)
+          if (Eval::useNNUE)
-          PieceNumber piece_no =
+          {
-            (idx == W_KING) ?PIECE_NUMBER_WKING : //
+              // Kings get a fixed ID, other pieces get ID in order of placement
-            (idx == B_KING) ?PIECE_NUMBER_BKING : // back ball
+              piece_id =
-            next_piece_number++; // otherwise
+                (idx == W_KING) ? PIECE_ID_WKING :
-          evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
+                (idx == B_KING) ? PIECE_ID_BKING :
-#endif  // defined(EVAL_NNUE)
+                next_piece_id++;
              evalList.put_piece(piece_id, sq, pc);
          }
          ++sq;
      }
@@ -303,9 +297,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
  set_state(st);
  assert(pos_is_ok());
 #if defined(EVAL_NNUE)
  assert(evalList.is_valid(*this));
 #endif  // defined(EVAL_NNUE)
  return *this;
 }
@@ -727,10 +718,13 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  ++st->rule50;
  ++st->pliesFromNull;
-#if defined(EVAL_NNUE)
+  // Used by NNUE
  st->accumulator.computed_accumulation = false;
  st->accumulator.computed_score = false;
-#endif  // defined(EVAL_NNUE)
+  PieceId dp0 = PIECE_ID_NONE;
  PieceId dp1 = PIECE_ID_NONE;
  auto& dp = st->dirtyPiece;
  dp.dirty_num = 1;
  Color us = sideToMove;
  Color them = ~us;
@@ -739,20 +733,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  Piece pc = piece_on(from);
  Piece captured = type_of(m) == ENPASSANT ? make_piece(them, PAWN) : piece_on(to);
 #if defined(EVAL_NNUE)
  PieceNumber piece_no0 = PIECE_NUMBER_NB;
  PieceNumber piece_no1 = PIECE_NUMBER_NB;
 #endif  // defined(EVAL_NNUE)
  assert(color_of(pc) == us);
  assert(captured == NO_PIECE || color_of(captured) == (type_of(m) != CASTLING ? them : us));
  assert(type_of(captured) != KING);
 #if defined(EVAL_NNUE)
  auto& dp = st->dirtyPiece;
  dp.dirty_num = 1;
 #endif  // defined(EVAL_NNUE)
  if (type_of(m) == CASTLING)
  {
      assert(pc == make_piece(us, KING));
@@ -782,30 +766,21 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
              assert(relative_rank(us, to) == RANK_6);
              assert(piece_on(to) == NO_PIECE);
              assert(piece_on(capsq) == make_piece(them, PAWN));
 #if defined(EVAL_NNUE)
              piece_no1 = piece_no_of(capsq);
 #endif  // defined(EVAL_NNUE)
              //board[capsq] = NO_PIECE; // Not done by remove_piece()
 #if defined(EVAL_NNUE)
              evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
 #endif  // defined(EVAL_NNUE)
          }
          else {
 #if defined(EVAL_NNUE)
            piece_no1 = piece_no_of(capsq);
 #endif  // defined(EVAL_NNUE)
          }
          st->pawnKey ^= Zobrist::psq[captured][capsq];
      }
-      else {
+      else
          st->nonPawnMaterial[them] -= PieceValue[MG][captured];
-#if defined(EVAL_NNUE)
+      if (Eval::useNNUE)
-          piece_no1 = piece_no_of(capsq);
+      {
-#endif  // defined(EVAL_NNUE)
+          dp.dirty_num = 2; // 2 pieces moved
          dp1 = piece_id_on(capsq);
          dp.pieceId[1] = dp1;
          dp.old_piece[1] = evalList.piece_with_id(dp1);
          evalList.put_piece(dp1, capsq, NO_PIECE);
          dp.new_piece[1] = evalList.piece_with_id(dp1);
      }
      // Update board and piece lists
@@ -821,21 +796,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
      // Reset rule 50 counter
      st->rule50 = 0;
 #if defined(EVAL_NNUE)
      dp.dirty_num = 2; // 2 pieces moved
      dp.pieceNo[1] = piece_no1;
      dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
      // Do not use Eval::EvalList::put_piece() because the piece is removed
      // from the game, and the corresponding elements of the piece lists
      // needs to be Eval::BONA_PIECE_ZERO.
      evalList.set_piece_on_board(piece_no1, Eval::BONA_PIECE_ZERO, Eval::BONA_PIECE_ZERO, capsq);
      // Set PIECE_NUMBER_NB to piece_no_of_board[capsq] directly because it
      // will not be overritten to pc if the move type is enpassant.
      evalList.piece_no_list_board[capsq] = PIECE_NUMBER_NB;
      dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
 #endif  // defined(EVAL_NNUE)
  }
  // Update hash key
@@ -857,20 +817,18 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
  }
  // Move the piece. The tricky Chess960 castling is handled earlier
-  if (type_of(m) != CASTLING) {
+  if (type_of(m) != CASTLING)
-#if defined(EVAL_NNUE)
+  {
-    piece_no0 = piece_no_of(from);
+      if (Eval::useNNUE)
-#endif  // defined(EVAL_NNUE)
+      {
          dp0 = piece_id_on(from);
          dp.pieceId[0] = dp0;
          dp.old_piece[0] = evalList.piece_with_id(dp0);
          evalList.put_piece(dp0, to, pc);
          dp.new_piece[0] = evalList.piece_with_id(dp0);
      }
-    move_piece(from, to);
+      move_piece(from, to);
 #if defined(EVAL_NNUE)
    dp.pieceNo[0] = piece_no0;
    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
    evalList.put_piece(piece_no0, to, pc);
    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
 #endif  // defined(EVAL_NNUE)
  }
  // If the moving piece is a pawn do some special extra work
@@ -894,14 +852,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
          remove_piece(to);
          put_piece(promotion, to);
-#if defined(EVAL_NNUE)
+          if (Eval::useNNUE)
-          piece_no0 = piece_no_of(to);
+          {
-          //dp.pieceNo[0] = piece_no0;
+              dp0 = piece_id_on(to);
-          //dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
+              evalList.put_piece(dp0, to, promotion);
-          assert(evalList.piece_no_list_board[from] == PIECE_NUMBER_NB);
+              dp.new_piece[0] = evalList.piece_with_id(dp0);
-          evalList.put_piece(piece_no0, to, promotion);
+          }
          dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
 #endif  // defined(EVAL_NNUE)
          // Update hash keys
          k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to];
@@ -953,12 +909,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
      }
  }
  //std::cout << *this << std::endl;
  assert(pos_is_ok());
 #if defined(EVAL_NNUE)
  assert(evalList.is_valid(*this));
 #endif  // defined(EVAL_NNUE)
 }
@@ -988,11 +939,6 @@ void Position::undo_move(Move m) {
      remove_piece(to);
      pc = make_piece(us, PAWN);
      put_piece(pc, to);
 #if defined(EVAL_NNUE)
      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
      evalList.put_piece(piece_no0, to, pc);
 #endif  // defined(EVAL_NNUE)
  }
  if (type_of(m) == CASTLING)
@@ -1002,14 +948,13 @@ void Position::undo_move(Move m) {
  }
  else
  {
      move_piece(to, from); // Put the piece back at the source square
-#if defined(EVAL_NNUE)
+      if (Eval::useNNUE)
-      PieceNumber piece_no0 = st->dirtyPiece.pieceNo[0];
+      {
-      evalList.put_piece(piece_no0, from, pc);
+          PieceId dp0 = st->dirtyPiece.pieceId[0];
-      evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
+          evalList.put_piece(dp0, from, pc);
-#endif  // defined(EVAL_NNUE)
+      }
      if (st->capturedPiece)
      {
@@ -1028,12 +973,13 @@ void Position::undo_move(Move m) {
          put_piece(st->capturedPiece, capsq); // Restore the captured piece
-#if defined(EVAL_NNUE)
+          if (Eval::useNNUE)
-          PieceNumber piece_no1 = st->dirtyPiece.pieceNo[1];
+          {
-          assert(evalList.bona_piece(piece_no1).fw == Eval::BONA_PIECE_ZERO);
+              PieceId dp1 = st->dirtyPiece.pieceId[1];
-          assert(evalList.bona_piece(piece_no1).fb == Eval::BONA_PIECE_ZERO);
+              assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
-          evalList.put_piece(piece_no1, capsq, st->capturedPiece);
+              assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
-#endif  // defined(EVAL_NNUE)
+              evalList.put_piece(dp1, capsq, st->capturedPiece);
          }
      }
  }
@@ -1042,9 +988,6 @@ void Position::undo_move(Move m) {
  --gamePly;
  assert(pos_is_ok());
 #if defined(EVAL_NNUE)
  assert(evalList.is_valid(*this));
 #endif  // defined(EVAL_NNUE)
 }
@@ -1052,31 +995,39 @@ void Position::undo_move(Move m) {
 /// is a bit tricky in Chess960 where from/to squares can overlap.
 template<bool Do>
 void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto) {
 #if defined(EVAL_NNUE)
  auto& dp = st->dirtyPiece;
   // Record the moved pieces in StateInfo for difference calculation.
   dp.dirty_num = 2; // 2 pieces moved
  PieceNumber piece_no0;
  PieceNumber piece_no1;
  if (Do) {
    piece_no0 = piece_no_of(from);
    piece_no1 = piece_no_of(to);
  }
 #endif  // defined(EVAL_NNUE)
  bool kingSide = to > from;
  rfrom = to; // Castling is encoded as "king captures friendly rook"
  rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
  to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
-#if defined(EVAL_NNUE)
+  if (Eval::useNNUE)
-  if (!Do) {
+  {
-    piece_no0 = piece_no_of(to);
+      PieceId dp0, dp1;
-    piece_no1 = piece_no_of(rto);
+      auto& dp = st->dirtyPiece;
      dp.dirty_num = 2; // 2 pieces moved
      if (Do)
      {
          dp0 = piece_id_on(from);
          dp1 = piece_id_on(rfrom);
          dp.pieceId[0] = dp0;
          dp.old_piece[0] = evalList.piece_with_id(dp0);
          evalList.put_piece(dp0, to, make_piece(us, KING));
          dp.new_piece[0] = evalList.piece_with_id(dp0);
          dp.pieceId[1] = dp1;
          dp.old_piece[1] = evalList.piece_with_id(dp1);
          evalList.put_piece(dp1, rto, make_piece(us, ROOK));
          dp.new_piece[1] = evalList.piece_with_id(dp1);
      }
      else
      {
          dp0 = piece_id_on(to);
          dp1 = piece_id_on(rto);
          evalList.put_piece(dp0, from, make_piece(us, KING));
          evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
      }
  }
 #endif  // defined(EVAL_NNUE)
  // Remove both pieces first since squares could overlap in Chess960
  remove_piece(Do ? from : to);
@@ -1084,28 +1035,6 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
  board[Do ? from : to] = board[Do ? rfrom : rto] = NO_PIECE; // Since remove_piece doesn't do this for us
  put_piece(make_piece(us, KING), Do ? to : from);
  put_piece(make_piece(us, ROOK), Do ? rto : rfrom);
 #if defined(EVAL_NNUE)
  if (Do) {
    dp.pieceNo[0] = piece_no0;
    dp.changed_piece[0].old_piece = evalList.bona_piece(piece_no0);
    evalList.piece_no_list_board[from] = PIECE_NUMBER_NB;
    evalList.put_piece(piece_no0, to, make_piece(us, KING));
    dp.changed_piece[0].new_piece = evalList.bona_piece(piece_no0);
    dp.pieceNo[1] = piece_no1;
    dp.changed_piece[1].old_piece = evalList.bona_piece(piece_no1);
    evalList.piece_no_list_board[rfrom] = PIECE_NUMBER_NB;
    evalList.put_piece(piece_no1, rto, make_piece(us, ROOK));
    dp.changed_piece[1].new_piece = evalList.bona_piece(piece_no1);
  }
  else {
    evalList.piece_no_list_board[to] = PIECE_NUMBER_NB;
    evalList.put_piece(piece_no0, from, make_piece(us, KING));
    evalList.piece_no_list_board[rto] = PIECE_NUMBER_NB;
    evalList.put_piece(piece_no1, rfrom, make_piece(us, ROOK));
  }
 #endif  // defined(EVAL_NNUE)
 }
@@ -1117,7 +1046,14 @@ void Position::do_null_move(StateInfo& newSt) {
  assert(!checkers());
  assert(&newSt != st);
-  std::memcpy(&newSt, st, sizeof(StateInfo));
+  if (Eval::useNNUE)
  {
      std::memcpy(&newSt, st, sizeof(StateInfo));
      st->accumulator.computed_score = false;
  }
  else
      std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
  newSt.previous = st;
  st = &newSt;
@@ -1130,10 +1066,6 @@ void Position::do_null_move(StateInfo& newSt) {
  st->key ^= Zobrist::side;
  prefetch(TT.first_entry(st->key));
 #if defined(EVAL_NNUE)
  st->accumulator.computed_score = false;
 #endif
  ++st->rule50;
  st->pliesFromNull = 0;
@@ -1463,13 +1395,3 @@ bool Position::pos_is_ok() const {
  return true;
 }
 #if defined(EVAL_NNUE)
 PieceNumber Position::piece_no_of(Square sq) const
 {
  assert(piece_on(sq) != NO_PIECE);
  PieceNumber n = evalList.piece_no_of_board(sq);
  assert(is_ok(n));
  return n;
 }
 #endif  // defined(EVAL_NNUE)
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -23,16 +21,14 @@
 #include <cassert>
 #include <deque>
 #include <iostream>
 #include <memory> // For std::unique_ptr
 #include <string>
 #include "bitboard.h"
 #include "evaluate.h"
 #include "misc.h"
 #include "types.h"
-#include "eval/nnue/nnue_accumulator.h"
+#include "nnue/nnue_accumulator.h"
 /// StateInfo struct stores information needed to restore a Position object to
@@ -60,12 +56,9 @@ struct StateInfo {
  Bitboard   checkSquares[PIECE_TYPE_NB];
  int        repetition;
-#if defined(EVAL_NNUE)
+  // Used by NNUE
  Eval::NNUE::Accumulator accumulator;
-
+  DirtyPiece dirtyPiece;
   // For management of evaluation value difference calculation
  Eval::DirtyPiece dirtyPiece;
 #endif  // defined(EVAL_NNUE)
 };
@@ -83,7 +76,7 @@ typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
 class Thread;
 // packed sfen
-struct PackedSfen { uint8_t data[32]; };
+struct PackedSfen { uint8_t data[32]; }; 
 class Position {
 public:
@@ -178,16 +171,9 @@ public:
  bool pos_is_ok() const;
  void flip();
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  // Used by NNUE
-  // --- StateInfo
+  StateInfo* state() const;
-
+  const EvalList* eval_list() const;
  // Returns the StateInfo corresponding to the current situation.
  // For example, if state()->capturedPiece, the pieces captured in the previous phase are stored.
  StateInfo* state() const { return st; }
  // Information such as where and which piece number is used for the evaluation function.
  const Eval::EvalList* eval_list() const { return &evalList; }
 #endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
 #if defined(EVAL_LEARN)
  // --sfenization helper
@@ -196,7 +182,7 @@ public:
  // Do not include gamePly in pack.
  void sfen_pack(PackedSfen& sfen);
-  // �ª It is slow to go through sfen, so I made a function to set packed sfen directly.
+  // It is slow to go through sfen, so I made a function to set packed sfen directly.
  // Equivalent to pos.set(sfen_unpack(data),si,th);.
  // If there is a problem with the passed phase and there is an error, non-zero is returned.
  // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
@@ -222,10 +208,8 @@ private:
  template<bool Do>
  void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
-#if defined(EVAL_NNUE)
+  // ID of a piece on a given square
-  // Returns the PieceNumber of the piece in the sq box on the board.
+  PieceId piece_id_on(Square sq) const;
  PieceNumber piece_no_of(Square sq) const;
 #endif  // defined(EVAL_NNUE)
  // Data members
  Piece board[SQUARE_NB];
@@ -244,10 +228,8 @@ private:
  StateInfo* st;
  bool chess960;
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
+  // List of pieces used in NNUE evaluation function
-  // List of pieces used in the evaluation function
+  EvalList evalList;
  Eval::EvalList evalList;
 #endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
 };
 namespace PSQT {
@@ -482,4 +464,25 @@ inline void Position::do_move(Move m, StateInfo& newSt) {
  do_move(m, newSt, gives_check(m));
 }
 inline StateInfo* Position::state() const {
  return st;
 }
 inline const EvalList* Position::eval_list() const {
  return &evalList;
 }
 inline PieceId Position::piece_id_on(Square sq) const
 {
  assert(piece_on(sq) != NO_PIECE);
  PieceId pid = evalList.piece_id_list[sq];
  assert(is_ok(pid));
  return pid;
 }
 #endif // #ifndef POSITION_H_INCLUDED
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -227,6 +225,8 @@ void MainThread::search() {
  Time.init(Limits, us, rootPos.game_ply());
  TT.new_search();
  Eval::verify_NNUE();
  if (rootMoves.empty())
  {
      rootMoves.emplace_back(MOVE_NONE);
@@ -816,7 +816,7 @@ namespace {
    // Step 8. Futility pruning: child node (~50 Elo)
    if (   !PvNode
-        &&  depth < 6
+        &&  depth < 8
        &&  eval - futility_margin(depth, improving) >= beta
        &&  eval < VALUE_KNOWN_WIN) // Do not return unproven wins
        return eval;
@@ -827,7 +827,7 @@ namespace {
        && (ss-1)->statScore < 23824
        &&  eval >= beta
        &&  eval >= ss->staticEval
-        &&  ss->staticEval >= beta - 33 * depth - 33 * improving + 112 * ttPv + 311
+        &&  ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200
        && !excludedMove
        &&  pos.non_pawn_material(us)
        && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -925,9 +925,12 @@ namespace {
                if (value >= probcutBeta)
                {
-                    tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
+                    if ( !(ttHit
-                        BOUND_LOWER,
+                       && tte->depth() >= depth - 3
-                        depth - 3, move, ss->staticEval);
+                       && ttValue != VALUE_NONE))
                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
                            BOUND_LOWER,
                            depth - 3, move, ss->staticEval);
                    return value;
                }
            }
@@ -983,9 +986,17 @@ moves_loop: // When in check, search starts from here
                                  thisThread->rootMoves.begin() + thisThread->pvLast, move))
          continue;
      // Check for legality
      if (!rootNode && !pos.legal(move))
          continue;
      ss->moveCount = ++moveCount;
-      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000 && !Limits.silent)
+      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000
 #if defined(EVAL_LEARN)
          && !Limits.silent
 #endif
          )
          sync_cout << "info depth " << depth
                    << " currmove " << UCI::move(move, pos.is_chess960())
                    << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl;
@@ -1021,7 +1032,7 @@ moves_loop: // When in check, search starts from here
                  continue;
              // Futility pruning: parent node (~5 Elo)
-              if (   lmrDepth < 6
+              if (   lmrDepth < 8
                  && !ss->inCheck
                  && ss->staticEval + 284 + 188 * lmrDepth <= alpha
                  &&  (*contHist[0])[movedPiece][to_sq(move)]
@@ -1048,7 +1059,7 @@ moves_loop: // When in check, search starts from here
                  && !(PvNode && abs(bestValue) < 2)
                  && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
                  && !ss->inCheck
-                  && ss->staticEval + 267 + 391 * lmrDepth
+                  && ss->staticEval + 178 + 261 * lmrDepth
                     + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
                  continue;
@@ -1064,16 +1075,15 @@ moves_loop: // When in check, search starts from here
      // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
      // then that move is singular and should be extended. To verify this we do
      // a reduced search on all the other moves but the ttMove and if the
-      // result is lower than ttValue minus a margin then we will extend the ttMove.
+      // result is lower than ttValue minus a margin, then we will extend the ttMove.
-      if (    depth >= 6
+      if (    depth >= 7
          &&  move == ttMove
          && !rootNode
          && !excludedMove // Avoid recursive singular search
       /* &&  ttValue != VALUE_NONE Already implicit in the next condition */
          &&  abs(ttValue) < VALUE_KNOWN_WIN
          && (tte->bound() & BOUND_LOWER)
-          &&  tte->depth() >= depth - 3
+          &&  tte->depth() >= depth - 3)
          &&  pos.legal(move))
      {
          Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2;
          Depth singularDepth = (depth - 1 + 3 * formerPv) / 2;
@@ -1140,13 +1150,6 @@ moves_loop: // When in check, search starts from here
      // Speculative prefetch as early as possible
      prefetch(TT.first_entry(pos.key_after(move)));
      // Check for legality just before making the move
      if (!rootNode && !pos.legal(move))
      {
          ss->moveCount = --moveCount;
          continue;
      }
      // Update the current move (this must be done after singular extension search)
      ss->currentMove = move;
      ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck]
@@ -1160,7 +1163,7 @@ moves_loop: // When in check, search starts from here
      // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
      // re-searched at full depth.
      if (    depth >= 3
-          &&  moveCount > 1 + 2 * rootNode
+          &&  moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
          && (!rootNode || thisThread->best_move_count(move) == 0)
          && (  !captureOrPromotion
              || moveCountPruning
@@ -1170,6 +1173,13 @@ moves_loop: // When in check, search starts from here
      {
          Depth r = reduction(improving, depth, moveCount);
          // Decrease reduction at non-check cut nodes for second move at low depths
          if (   cutNode
              && depth <= 10
              && moveCount <= 2
              && !ss->inCheck)
              r--;
          // Decrease reduction if the ttHit running average is large
          if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024)
              r--;
@@ -2060,10 +2070,10 @@ namespace Learner
      // Increase the generation of the substitution table for this thread because it is a new search.
            //TT.new_search(th->thread_id());
-            // ↑ If you call new_search here, it may be a loss because you can't use the previous search result.
+            // �ª If you call new_search here, it may be a loss because you can't use the previous search result.
            // Do not do this here, but caller should do TT.new_search(th->thread_id()) for each station ...
-            // →Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
+            // �¨Because we want to avoid reaching the same final diagram, use the substitution table commonly for all threads when generating teachers.
      //#endif
    }
  }
@@ -2253,7 +2263,7 @@ namespace Learner
    }
    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // → PV should not be NULL_MOVE because it is PV
+    // �¨ PV should not be NULL_MOVE because it is PV
    // MOVE_WIN has never been thrust. (For now)
    for (Move move : rootMoves[0].pv)
    {
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -88,7 +86,9 @@ struct LimitsType {
    time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
    movestogo = depth = mate = perft = infinite = 0;
    nodes = 0;
 #if defined (EVAL_LEARN)
    silent = false;
 #endif
  }
  bool use_time_management() const {
@@ -99,9 +99,11 @@ struct LimitsType {
  TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
  int movestogo, depth, mate, perft, infinite;
  int64_t nodes;
 #if defined (EVAL_LEARN)
  // Silent mode that does not output to the screen (for continuous self-play in process)
  // Do not output PV at this time.
  bool silent;
 #endif
 };
 extern LimitsType Limits;
@@ -1,7 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (c) 2013 Ronald de Man
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,7 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (c) 2013 Ronald de Man
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -1,8 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad
  Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -206,21 +204,18 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
  // We use Position::set() to set root position across threads. But there are
  // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
-  // be deduced from a fen string, so set() clears them and to not lose the info
+  // be deduced from a fen string, so set() clears them and they are set from
-  // we need to backup and later restore setupStates->back(). Note that setupStates
+  // setupStates->back() later. The rootState is per thread, earlier states are shared
-  // is shared by threads but is accessed in read-only mode.
+  // since they are read-only.
  StateInfo tmp = setupStates->back();
  for (Thread* th : *this)
  {
      th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
      th->rootDepth = th->completedDepth = 0;
      th->rootMoves = rootMoves;
-      th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th);
+      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
      th->rootState = setupStates->back();
  }
  setupStates->back() = tmp;
  main()->start_searching();
 }
--- a/Show More
+++ b/Show More