Merge branch 'master' of github.com:official-stockfish/Stockfish into nnue-player-merge

# Conflicts: # README.md # Readme.md # src/Makefile # src/evaluate.cpp # src/evaluate.h # src/misc.cpp # src/nnue/architectures/halfkp_256x2-32-32.h # src/nnue/evaluate_nnue.cpp # src/nnue/evaluate_nnue.h # src/nnue/features/feature_set.h # src/nnue/features/features_common.h # src/nnue/features/half_kp.cpp # src/nnue/features/half_kp.h # src/nnue/features/index_list.h # src/nnue/layers/affine_transform.h # src/nnue/layers/clipped_relu.h # src/nnue/layers/input_slice.h # src/nnue/nnue_accumulator.h # src/nnue/nnue_architecture.h # src/nnue/nnue_common.h # src/nnue/nnue_feature_transformer.h # src/position.cpp # src/position.h # src/types.h # src/ucioption.cpp # stockfish.md
2026-05-20 14:27:45 +00:00 · 2020-08-08 15:55:42 +09:00
parent 1abae04ceb 857e045ced
commit 55a6b2bdc4
74 changed files with 2527 additions and 2729 deletions
@@ -1,7 +1,25 @@
-// Definition of input features and network structure used in NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef HALFKP_256X2_32_32_H
-#define HALFKP_256X2_32_32_H
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
+#define NNUE_HALFKP_256X2_32_32_H_INCLUDED

 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@@ -10,9 +28,7 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"

-namespace Eval {
-
-namespace NNUE {
+namespace Eval::NNUE {

 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
@@ -23,7 +39,7 @@ constexpr IndexType kTransformedFeatureDimensions = 256;

 namespace Layers {

-// define network structure
+// Define network structure
 using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
 using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
 using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
@@ -33,7 +49,6 @@ using OutputLayer = AffineTransform<HiddenLayer2, 1>;

 using Network = Layers::OutputLayer;

-}  // namespace NNUE
+}  // namespace Eval::NNUE

-}  // namespace Eval
-#endif // HALFKP_256X2_32_32_H
+#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
@@ -1,9 +1,26 @@
-// Code for calculating NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#if defined(EVAL_NNUE)
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Code for calculating NNUE evaluation function

 #include <fstream>
 #include <iostream>
+#include <set>

 #include "../evaluate.h"
 #include "../position.h"
@@ -12,315 +29,186 @@

 #include "evaluate_nnue.h"

-namespace Eval {
+ExtPieceSquare kpp_board_index[PIECE_NB] = {
+ // convention: W - us, B - them
+ // viewed from other side, W and B are reversed
+    { PS_NONE,     PS_NONE     },
+    { PS_W_PAWN,   PS_B_PAWN   },
+    { PS_W_KNIGHT, PS_B_KNIGHT },
+    { PS_W_BISHOP, PS_B_BISHOP },
+    { PS_W_ROOK,   PS_B_ROOK   },
+    { PS_W_QUEEN,  PS_B_QUEEN  },
+    { PS_W_KING,   PS_B_KING   },
+    { PS_NONE,     PS_NONE     },
+    { PS_NONE,     PS_NONE     },
+    { PS_B_PAWN,   PS_W_PAWN   },
+    { PS_B_KNIGHT, PS_W_KNIGHT },
+    { PS_B_BISHOP, PS_W_BISHOP },
+    { PS_B_ROOK,   PS_W_ROOK   },
+    { PS_B_QUEEN,  PS_W_QUEEN  },
+    { PS_B_KING,   PS_W_KING   },
+    { PS_NONE,     PS_NONE     }
+};

-namespace NNUE {

-// Input feature converter
-AlignedPtr<FeatureTransformer> feature_transformer;
+namespace Eval::NNUE {

-// Evaluation function
-AlignedPtr<Network> network;
+  // Input feature converter
+  AlignedPtr<FeatureTransformer> feature_transformer;

-// Evaluation function file name
-std::string fileName = "nn.bin";
+  // Evaluation function
+  AlignedPtr<Network> network;

-// Saved evaluation function file name
-std::string savedfileName = "nn.bin";
+  // Evaluation function file name
+  std::string fileName;

-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString() {
-  return "Features=" + FeatureTransformer::GetStructureString() +
+  // Saved evaluation function file name
+  std::string savedfileName = "nn.bin";
+
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString() {
+    return "Features=" + FeatureTransformer::GetStructureString() +
      ",Network=" + Network::GetStructureString();
-}
+  }

-namespace {
+  namespace Detail {

-namespace Detail {
+  // Initialize the evaluation function parameters
+  template <typename T>
+  void Initialize(AlignedPtr<T>& pointer) {

-// Initialize the evaluation function parameters
-template <typename T>
-void Initialize(AlignedPtr<T>& pointer) {
-  pointer.reset(reinterpret_cast<T*>(aligned_malloc(sizeof(T), alignof(T))));
-  std::memset(pointer.get(), 0, sizeof(T));
-}
+    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
+    std::memset(pointer.get(), 0, sizeof(T));
+  }

-// read evaluation function parameters
-template <typename T>
-bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
-  std::uint32_t header;
-  stream.read(reinterpret_cast<char*>(&header), sizeof(header));
-  if (!stream || header != T::GetHashValue()) return false;
-  return pointer->ReadParameters(stream);
-}
+  // Read evaluation function parameters
+  template <typename T>
+  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {

-// write evaluation function parameters
-template <typename T>
-bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
-  constexpr std::uint32_t header = T::GetHashValue();
-  stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
-  return pointer->WriteParameters(stream);
-}
+    std::uint32_t header;
+    stream.read(reinterpret_cast<char*>(&header), sizeof(header));
+    if (!stream || header != T::GetHashValue()) return false;
+    return pointer->ReadParameters(stream);
+  }

-}  // namespace Detail
+  // write evaluation function parameters
+  template <typename T>
+  bool WriteParameters(std::ostream& stream, const AlignedPtr<T>& pointer) {
+    constexpr std::uint32_t header = T::GetHashValue();
+    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
+    return pointer->WriteParameters(stream);
+  }

-// Initialize the evaluation function parameters
-void Initialize() {
-  Detail::Initialize(feature_transformer);
-  Detail::Initialize(network);
-}
+  }  // namespace Detail

-}  // namespace
+  // Initialize the evaluation function parameters
+  void Initialize() {

-// read the header
-bool ReadHeader(std::istream& stream,
-  std::uint32_t* hash_value, std::string* architecture) {
-  std::uint32_t version, size;
-  stream.read(reinterpret_cast<char*>(&version), sizeof(version));
-  stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
-  stream.read(reinterpret_cast<char*>(&size), sizeof(size));
-  if (!stream || version != kVersion) return false;
-  architecture->resize(size);
-  stream.read(&(*architecture)[0], size);
-  return !stream.fail();
-}
+    Detail::Initialize(feature_transformer);
+    Detail::Initialize(network);
+  }

-// write the header
-bool WriteHeader(std::ostream& stream,
-  std::uint32_t hash_value, const std::string& architecture) {
-  stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
-  stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
-  const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
-  stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
-  stream.write(architecture.data(), size);
-  return !stream.fail();
-}
+  // Read network header
+  bool ReadHeader(std::istream& stream,
+    std::uint32_t* hash_value, std::string* architecture) {

-// read evaluation function parameters
-bool ReadParameters(std::istream& stream) {
-  std::uint32_t hash_value;
-  std::string architecture;
-  if (!ReadHeader(stream, &hash_value, &architecture)) return false;
-  if (hash_value != kHashValue) return false;
-  if (!Detail::ReadParameters(stream, feature_transformer)) return false;
-  if (!Detail::ReadParameters(stream, network)) return false;
-  return stream && stream.peek() == std::ios::traits_type::eof();
-}
+    std::uint32_t version, size;
+    stream.read(reinterpret_cast<char*>(&version), sizeof(version));
+    stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value));
+    stream.read(reinterpret_cast<char*>(&size), sizeof(size));
+    if (!stream || version != kVersion) return false;
+    architecture->resize(size);
+    stream.read(&(*architecture)[0], size);
+    return !stream.fail();
+  }

-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream) {
-  if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
-  if (!Detail::WriteParameters(stream, feature_transformer)) return false;
-  if (!Detail::WriteParameters(stream, network)) return false;
-  return !stream.fail();
-}
+  // write the header
+  bool WriteHeader(std::ostream& stream,
+    std::uint32_t hash_value, const std::string& architecture) {
+    stream.write(reinterpret_cast<const char*>(&kVersion), sizeof(kVersion));
+    stream.write(reinterpret_cast<const char*>(&hash_value), sizeof(hash_value));
+    const std::uint32_t size = static_cast<std::uint32_t>(architecture.size());
+    stream.write(reinterpret_cast<const char*>(&size), sizeof(size));
+    stream.write(architecture.data(), size);
+    return !stream.fail();
+  }

-// proceed if you can calculate the difference
-static void UpdateAccumulatorIfPossible(const Position& pos) {
-  feature_transformer->UpdateAccumulatorIfPossible(pos);
-}
+  // Read network parameters
+  bool ReadParameters(std::istream& stream) {

-// Calculate the evaluation value
-static Value ComputeScore(const Position& pos, bool refresh = false) {
-  auto& accumulator = pos.state()->accumulator;
-  if (!refresh && accumulator.computed_score) {
+    std::uint32_t hash_value;
+    std::string architecture;
+    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
+    if (hash_value != kHashValue) return false;
+    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, network)) return false;
+    return stream && stream.peek() == std::ios::traits_type::eof();
+  }
+
+  // write evaluation function parameters
+  bool WriteParameters(std::ostream& stream) {
+    if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
+    if (!Detail::WriteParameters(stream, feature_transformer)) return false;
+    if (!Detail::WriteParameters(stream, network)) return false;
+    return !stream.fail();
+  }
+
+  // Proceed with the difference calculation if possible
+  static void UpdateAccumulatorIfPossible(const Position& pos) {
+
+    feature_transformer->UpdateAccumulatorIfPossible(pos);
+  }
+
+  // Calculate the evaluation value
+  static Value ComputeScore(const Position& pos, bool refresh) {
+
+    auto& accumulator = pos.state()->accumulator;
+    if (!refresh && accumulator.computed_score) {
+      return accumulator.score;
+    }
+
+    alignas(kCacheLineSize) TransformedFeatureType
+        transformed_features[FeatureTransformer::kBufferSize];
+    feature_transformer->Transform(pos, transformed_features, refresh);
+    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
+    const auto output = network->Propagate(transformed_features, buffer);
+
+    auto score = static_cast<Value>(output[0] / FV_SCALE);
+
+    accumulator.score = score;
+    accumulator.computed_score = true;
    return accumulator.score;
  }

-  alignas(kCacheLineSize) TransformedFeatureType
-      transformed_features[FeatureTransformer::kBufferSize];
-  feature_transformer->Transform(pos, transformed_features, refresh);
-  alignas(kCacheLineSize) char buffer[Network::kBufferSize];
-  const auto output = network->Propagate(transformed_features, buffer);
+  // Load the evaluation function file
+  bool load_eval_file(const std::string& evalFile) {

-  // When a value larger than VALUE_MAX_EVAL is returned, aspiration search fails high
-  // It should be guaranteed that it is less than VALUE_MAX_EVAL because the search will not end.
+    Initialize();
+    fileName = evalFile;

-  // Even if this phenomenon occurs, if the seconds are fixed when playing, the search will be aborted there, so
-  // The best move in the previous iteration is pointed to as bestmove, so apparently
-  // no problem. The situation in which this VALUE_MAX_EVAL is returned is almost at a dead end,
-  // Since such a jamming phase often appears at the end, there is a big difference in the situation
-  // Doesn't really affect the outcome.
+    std::ifstream stream(evalFile, std::ios::binary);

-  // However, when searching with a fixed depth such as when creating a teacher, it will not return from the search
-  // Waste the computation time for that thread. Also, it will be timed out with fixed depth game.
+    const bool result = ReadParameters(stream);

-  auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-  // 1) I feel that if I clip too poorly, it will have an effect on my learning...
-  // 2) Since accumulator.score is not used at the time of difference calculation, it can be rewritten without any problem.
-  score = Math::clamp(score , -VALUE_MAX_EVAL , VALUE_MAX_EVAL);
-
-  accumulator.score = score;
-  accumulator.computed_score = true;
-  return accumulator.score;
-}
-
-} // namespace NNUE
-
-#if defined(USE_EVAL_HASH)
-// Class used to store evaluation values in HashTable
-struct alignas(16) ScoreKeyValue {
-#if defined(USE_SSE2)
-  ScoreKeyValue() = default;
-  ScoreKeyValue(const ScoreKeyValue& other) {
-    static_assert(sizeof(ScoreKeyValue) == sizeof(__m128i),
-                  "sizeof(ScoreKeyValue) should be equal to sizeof(__m128i)");
-    _mm_store_si128(&as_m128i, other.as_m128i);
-  }
-  ScoreKeyValue& operator=(const ScoreKeyValue& other) {
-    _mm_store_si128(&as_m128i, other.as_m128i);
-    return *this;
-  }
-#endif
-
-  // It is necessary to be able to operate atomically with evaluate hash, so the manipulator for that
-  void encode() {
-#if defined(USE_SSE2)
-    // ScoreKeyValue is copied to atomic, so if the key matches, the data matches.
-#else
-    key ^= score;
-#endif
-  }
-  // decode() is the reverse conversion of encode(), but since it is xor, the reverse conversion is the same.
-  void decode() { encode(); }
-
-  union {
-    struct {
-      std::uint64_t key;
-      std::uint64_t score;
-    };
-#if defined(USE_SSE2)
-    __m128i as_m128i;
-#endif
-  };
-};
-
-// Simple HashTable implementation.
-// Size is a power of 2.
-template <typename T, size_t Size>
-struct HashTable {
-  HashTable() { clear(); }
-  T* operator [] (const Key k) { return entries_ + (static_cast<size_t>(k) & (Size - 1)); }
-  void clear() { memset(entries_, 0, sizeof(T)*Size); }
-
-  // Check that Size is a power of 2
-  static_assert((Size & (Size - 1)) == 0, "");
-
- private:
-  T entries_[Size];
-};
-
-//HashTable to save the evaluated ones (following ehash)
-
-#if !defined(USE_LARGE_EVAL_HASH)
-// 134MB (setting other than witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x800000> {};
-#else
-// If you have prefetch, it's better to have a big one...
-// → It doesn't change much and the memory is wasteful, so is it okay to set ↑ by default?
-// 1GB (setting for witch's AVX2)
-struct EvaluateHashTable : HashTable<ScoreKeyValue, 0x4000000> {};
-#endif
-
-EvaluateHashTable g_evalTable;
-
-// Prepare a function to prefetch.
-void prefetch_evalhash(const Key key) {
-  constexpr auto mask = ~((uint64_t)0x1f);
-  prefetch((void*)((uint64_t)g_evalTable[key] & mask));
-}
-#endif
-
-// read the evaluation function file
-// Save and restore Options with bench command etc., so EvalDir is changed at this time,
-// This function may be called twice to flag that the evaluation function needs to be reloaded.
-void load_eval() {
-
-  // Must be done!
-  NNUE::Initialize();
-
-  if (Options["SkipLoadingEval"])
-  {
-      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
-      return;
+    return result;
  }

-  const std::string file_name = Options["EvalFile"];
-  NNUE::fileName = file_name;
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {
+    Value v = ComputeScore(pos, false);
+    v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);

-  std::ifstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::ReadParameters(stream);
-
-  if (!result)
-      // It's a problem if it doesn't finish when there is a read error.
-      std::cout << "Error! " << NNUE::fileName << " not found or wrong format" << std::endl;
-
-  else
-      std::cout << "info string NNUE " << NNUE::fileName << " found & loaded" << std::endl;
-}
-
-// Initialization
-void init() {
-}
-
-// Evaluation function. Perform full calculation instead of difference calculation.
-// Called only once with Position::set(). (The difference calculation after that)
-// Note that the evaluation value seen from the turn side is returned. (Design differs from other evaluation functions in this respect)
-// Since, we will not try to optimize this function.
-Value compute_eval(const Position& pos) {
-  return NNUE::ComputeScore(pos, true);
-}
-
-// Evaluation function
-Value evaluate(const Position& pos) {
-  const auto& accumulator = pos.state()->accumulator;
-  if (accumulator.computed_score) {
-    return accumulator.score;
+    return v;
  }

-#if defined(USE_GLOBAL_OPTIONS)
-  // If Global Options is set not to use eval hash
-  // Skip the query to the eval hash.
-  if (!GlobalOptions.use_eval_hash) {
-    ASSERT_LV5(pos.state()->materialValue == Eval::material(pos));
-    return NNUE::ComputeScore(pos);
+  // Evaluation function. Perform full calculation.
+  Value compute_eval(const Position& pos) {
+    return ComputeScore(pos, true);
  }
-#endif

-#if defined(USE_EVAL_HASH)
-  // May be in the evaluate hash table.
-  const Key key = pos.key();
-  ScoreKeyValue entry = *g_evalTable[key];
-  entry.decode();
-  if (entry.key == key) {
-    // there were!
-    return Value(entry.score);
+  // Proceed with the difference calculation if possible
+  void update_eval(const Position& pos) {
+    UpdateAccumulatorIfPossible(pos);
  }
-#endif

-  Value score = NNUE::ComputeScore(pos);
-#if defined(USE_EVAL_HASH)
-  // Since it was calculated carefully, save it in the evaluate hash table.
-  entry.key = key;
-  entry.score = score;
-  entry.encode();
-  *g_evalTable[key] = entry;
-#endif
-
-  return score;
-}
-
-// proceed if you can calculate the difference
-void evaluate_with_no_return(const Position& pos) {
-  NNUE::UpdateAccumulatorIfPossible(pos);
-}
-
-// display the breakdown of the evaluation value of the current phase
-void print_eval_stat(Position& /*pos*/) {
-  std::cout << "--- EVAL STAT: not implemented" << std::endl;
-}
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
+} // namespace Eval::NNUE
@@ -1,67 +1,77 @@
-// header used in NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _EVALUATE_NNUE_H_
-#define _EVALUATE_NNUE_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// header used in NNUE evaluation function
+
+#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
+#define NNUE_EVALUATE_NNUE_H_INCLUDED

 #include "nnue_feature_transformer.h"
-#include "nnue_architecture.h"

 #include <memory>

-namespace Eval {
+namespace Eval::NNUE {

-namespace NNUE {
+  // Hash value of evaluation function structure
+  constexpr std::uint32_t kHashValue =
+      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();

-// hash value of evaluation function structure
-constexpr std::uint32_t kHashValue =
-    FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
+  // Deleter for automating release of memory area
+  template <typename T>
+  struct AlignedDeleter {
+    void operator()(T* ptr) const {
+      ptr->~T();
+      std_aligned_free(ptr);
+    }
+  };

-// Deleter for automating release of memory area
-template <typename T>
-struct AlignedDeleter {
-  void operator()(T* ptr) const {
-    ptr->~T();
-    aligned_free(ptr);
-  }
-};
-template <typename T>
-using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
+  template <typename T>
+  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;

-// Input feature converter
-extern AlignedPtr<FeatureTransformer> feature_transformer;
+  // Input feature converter
+  extern AlignedPtr<FeatureTransformer> feature_transformer;

-// Evaluation function
-extern AlignedPtr<Network> network;
+  // Evaluation function
+  extern AlignedPtr<Network> network;

-// Evaluation function file name
-extern std::string fileName;
+  // Evaluation function file name
+  extern std::string fileName;

-// Saved evaluation function file name
-extern std::string savedfileName;
+  // Saved evaluation function file name
+  extern std::string savedfileName;

-// Get a string that represents the structure of the evaluation function
-std::string GetArchitectureString();
+  // Get a string that represents the structure of the evaluation function
+  std::string GetArchitectureString();

-// read the header
-bool ReadHeader(std::istream& stream,
+  // read the header
+  bool ReadHeader(std::istream& stream,
    std::uint32_t* hash_value, std::string* architecture);

-// write the header
-bool WriteHeader(std::ostream& stream,
+  // write the header
+  bool WriteHeader(std::ostream& stream,
    std::uint32_t hash_value, const std::string& architecture);

-// read evaluation function parameters
-bool ReadParameters(std::istream& stream);
+  // read evaluation function parameters
+  bool ReadParameters(std::istream& stream);

-// write evaluation function parameters
-bool WriteParameters(std::ostream& stream);
+  // write evaluation function parameters
+  bool WriteParameters(std::ostream& stream);

-}  // namespace NNUE
+}  // namespace Eval::NNUE

-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
@@ -23,7 +23,7 @@ namespace Eval {
        }

        if (perspective == BLACK) {
-          epSquare = Inv(epSquare);
+          epSquare = rotate180(epSquare);
        }

        auto file = file_of(epSquare);
@@ -1,249 +1,249 @@
-// A class template that represents the input feature set of the NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_FEATURE_SET_H_
-#define _NNUE_FEATURE_SET_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class template that represents the input feature set of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_SET_H_INCLUDED
+#define NNUE_FEATURE_SET_H_INCLUDED

 #include "features_common.h"
 #include <array>

-namespace Eval {
+namespace Eval::NNUE::Features {

-namespace NNUE {
+  // Class template that represents a list of values
+  template <typename T, T... Values>
+  struct CompileTimeList;

-namespace Features {
+  template <typename T, T First, T... Remaining>
+  struct CompileTimeList<T, First, Remaining...> {
+    static constexpr bool Contains(T value) {
+      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
+    }
+    static constexpr std::array<T, sizeof...(Remaining) + 1>
+        kValues = {{First, Remaining...}};
+  };

-// A class template that represents a list of values
-template <typename T, T... Values>
-struct CompileTimeList;
-template <typename T, T First, T... Remaining>
-struct CompileTimeList<T, First, Remaining...> {
-  static constexpr bool Contains(T value) {
-    return value == First || CompileTimeList<T, Remaining...>::Contains(value);
-  }
-  static constexpr std::array<T, sizeof...(Remaining) + 1>
-      kValues = {{First, Remaining...}};
-};
-template <typename T, T First, T... Remaining>
-constexpr std::array<T, sizeof...(Remaining) + 1>
+  template <typename T, T First, T... Remaining>
+  constexpr std::array<T, sizeof...(Remaining) + 1>
    CompileTimeList<T, First, Remaining...>::kValues;
-template <typename T>
-struct CompileTimeList<T> {
-  static constexpr bool Contains(T /*value*/) {
-    return false;
-  }
-  static constexpr std::array<T, 0> kValues = {{}};
-};
+  template <typename T>
+  struct CompileTimeList<T> {
+    static constexpr bool Contains(T /*value*/) {
+      return false;
+    }
+    static constexpr std::array<T, 0> kValues = { {} };
+  };

-// Class template that adds to the beginning of the list
-template <typename T, typename ListType, T Value>
-struct AppendToList;
-template <typename T, T... Values, T AnotherValue>
-struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
-  using Result = CompileTimeList<T, AnotherValue, Values...>;
-};
+  // Class template that adds to the beginning of the list
+  template <typename T, typename ListType, T Value>
+  struct AppendToList;
+  template <typename T, T... Values, T AnotherValue>
+  struct AppendToList<T, CompileTimeList<T, Values...>, AnotherValue> {
+    using Result = CompileTimeList<T, AnotherValue, Values...>;
+  };

-// Class template for adding to a sorted, unique list
-template <typename T, typename ListType, T Value>
-struct InsertToSet;
-template <typename T, T First, T... Remaining, T AnotherValue>
-struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
-  using Result = std::conditional_t<
+  // Class template for adding to a sorted, unique list
+  template <typename T, typename ListType, T Value>
+  struct InsertToSet;
+  template <typename T, T First, T... Remaining, T AnotherValue>
+  struct InsertToSet<T, CompileTimeList<T, First, Remaining...>, AnotherValue> {
+    using Result = std::conditional_t<
      CompileTimeList<T, First, Remaining...>::Contains(AnotherValue),
      CompileTimeList<T, First, Remaining...>,
-      std::conditional_t<(AnotherValue <First),
-          CompileTimeList<T, AnotherValue, First, Remaining...>,
-          typename AppendToList<T, typename InsertToSet<
-              T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
-              First>::Result>>;
-};
-template <typename T, T Value>
-struct InsertToSet<T, CompileTimeList<T>, Value> {
-  using Result = CompileTimeList<T, Value>;
-};
+      std::conditional_t<(AnotherValue < First),
+      CompileTimeList<T, AnotherValue, First, Remaining...>,
+      typename AppendToList<T, typename InsertToSet<
+      T, CompileTimeList<T, Remaining...>, AnotherValue>::Result,
+      First>::Result>>;
+  };
+  template <typename T, T Value>
+  struct InsertToSet<T, CompileTimeList<T>, Value> {
+    using Result = CompileTimeList<T, Value>;
+  };

-// Base class of feature set
-template <typename Derived>
-class FeatureSetBase {
- public:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void AppendActiveIndices(
-      const Position& pos, TriggerEvent trigger, IndexListType active[2]) {
-    for (const auto perspective :Colors) {
-      Derived::CollectActiveIndices(
-          pos, trigger, perspective, &active[perspective]);
-    }
-  }
+  // Base class of feature set
+  template <typename Derived>
+  class FeatureSetBase {

-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  template <typename PositionType, typename IndexListType>
-  static void AppendChangedIndices(
-      const PositionType& pos, TriggerEvent trigger,
-      IndexListType removed[2], IndexListType added[2], bool reset[2]) {
-    const auto& dp = pos.state()->dirtyPiece;
-    if (dp.dirty_num == 0) return;
+   public:
+    // Get a list of indices for active features
+    template <typename IndexListType>
+    static void AppendActiveIndices(
+        const Position& pos, TriggerEvent trigger, IndexListType active[2]) {

-    for (const auto perspective :Colors) {
-      reset[perspective] = false;
-      switch (trigger) {
-        case TriggerEvent::kNone:
-          break;
-        case TriggerEvent::kFriendKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + perspective;
-          break;
-        case TriggerEvent::kEnemyKingMoved:
-          reset[perspective] =
-              dp.pieceNo[0] == PIECE_NUMBER_KING + ~perspective;
-          break;
-        case TriggerEvent::kAnyKingMoved:
-          reset[perspective] = dp.pieceNo[0] >= PIECE_NUMBER_KING;
-          break;
-        case TriggerEvent::kAnyPieceMoved:
-          reset[perspective] = true;
-          break;
-        default:
-          assert(false);
-          break;
-      }
-      if (reset[perspective]) {
+      for (Color perspective : { WHITE, BLACK }) {
        Derived::CollectActiveIndices(
-            pos, trigger, perspective, &added[perspective]);
-      } else {
-        Derived::CollectChangedIndices(
-            pos, trigger, perspective,
-            &removed[perspective], &added[perspective]);
+            pos, trigger, perspective, &active[perspective]);
      }
    }
-  }
-};

-// Class template that represents the feature set
-// do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
-template <typename FirstFeatureType, typename... RemainingFeatureTypes>
-class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
+    // Get a list of indices for recently changed features
+    template <typename PositionType, typename IndexListType>
+    static void AppendChangedIndices(
+        const PositionType& pos, TriggerEvent trigger,
+        IndexListType removed[2], IndexListType added[2], bool reset[2]) {
+
+      const auto& dp = pos.state()->dirtyPiece;
+      if (dp.dirty_num == 0) return;
+
+      for (Color perspective : { WHITE, BLACK }) {
+        reset[perspective] = false;
+        switch (trigger) {
+          case TriggerEvent::kFriendKingMoved:
+            reset[perspective] =
+                dp.pieceId[0] == PIECE_ID_KING + perspective;
+            break;
+          default:
+            assert(false);
+            break;
+        }
+        if (reset[perspective]) {
+          Derived::CollectActiveIndices(
+              pos, trigger, perspective, &added[perspective]);
+        } else {
+          Derived::CollectChangedIndices(
+              pos, trigger, perspective,
+              &removed[perspective], &added[perspective]);
+        }
+      }
+    }
+  };
+
+  // Class template that represents the feature set
+  // do internal processing in reverse order of template arguments in order to linearize the amount of calculation at runtime
+  template <typename FirstFeatureType, typename... RemainingFeatureTypes>
+  class FeatureSet<FirstFeatureType, RemainingFeatureTypes...> :
    public FeatureSetBase<
-        FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
- private:
-  using Head = FirstFeatureType;
-  using Tail = FeatureSet<RemainingFeatureTypes...>;
+    FeatureSet<FirstFeatureType, RemainingFeatureTypes...>> {
+  private:
+    using Head = FirstFeatureType;
+    using Tail = FeatureSet<RemainingFeatureTypes...>;

- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
+  public:
+    // Hash value embedded in the evaluation function file
+    static constexpr std::uint32_t kHashValue =
      Head::kHashValue ^ (Tail::kHashValue << 1) ^ (Tail::kHashValue >> 31);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
+    // number of feature dimensions
+    static constexpr IndexType kDimensions =
      Head::kDimensions + Tail::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
+    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
+    static constexpr IndexType kMaxActiveDimensions =
      Head::kMaxActiveDimensions + Tail::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet = typename InsertToSet<TriggerEvent,
+    // List of timings to perform all calculations instead of difference calculation
+    using SortedTriggerSet = typename InsertToSet<TriggerEvent,
      typename Tail::SortedTriggerSet, Head::kRefreshTrigger>::Result;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;

-  // Get the feature quantity name
-  static std::string GetName() {
-    return std::string(Head::kName) + "+" + Tail::GetName();
-  }
+    // Get the feature quantity name
+    static std::string GetName() {
+      return std::string(Head::kName) + "+" + Tail::GetName();
+    }

- private:
-  // Get a list of indices with a value of 1 among the features
-  template <typename IndexListType>
-  static void CollectActiveIndices(
+  private:
+    // Get a list of indices with a value of 1 among the features
+    template <typename IndexListType>
+    static void CollectActiveIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexListType* const active) {
-    Tail::CollectActiveIndices(pos, trigger, perspective, active);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start = active->size();
-      Head::AppendActiveIndices(pos, perspective, active);
-      for (auto i = start; i < active->size(); ++i) {
-        (*active)[i] += Tail::kDimensions;
+      Tail::CollectActiveIndices(pos, trigger, perspective, active);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start = active->size();
+        Head::AppendActiveIndices(pos, perspective, active);
+        for (auto i = start; i < active->size(); ++i) {
+          (*active)[i] += Tail::kDimensions;
+        }
      }
    }
-  }

-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  template <typename IndexListType>
-  static void CollectChangedIndices(
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
+    template <typename IndexListType>
+    static void CollectChangedIndices(
      const Position& pos, const TriggerEvent trigger, const Color perspective,
      IndexListType* const removed, IndexListType* const added) {
-    Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
-    if (Head::kRefreshTrigger == trigger) {
-      const auto start_removed = removed->size();
-      const auto start_added = added->size();
-      Head::AppendChangedIndices(pos, perspective, removed, added);
-      for (auto i = start_removed; i < removed->size(); ++i) {
-        (*removed)[i] += Tail::kDimensions;
-      }
-      for (auto i = start_added; i < added->size(); ++i) {
-        (*added)[i] += Tail::kDimensions;
+      Tail::CollectChangedIndices(pos, trigger, perspective, removed, added);
+      if (Head::kRefreshTrigger == trigger) {
+        const auto start_removed = removed->size();
+        const auto start_added = added->size();
+        Head::AppendChangedIndices(pos, perspective, removed, added);
+        for (auto i = start_removed; i < removed->size(); ++i) {
+          (*removed)[i] += Tail::kDimensions;
+        }
+        for (auto i = start_added; i < added->size(); ++i) {
+          (*added)[i] += Tail::kDimensions;
+        }
      }
    }
-  }

-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };

-// Class template that represents the feature set
-// Specialization with one template argument
-template <typename FeatureType>
-class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
- public:
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
-  // number of feature dimensions
-  static constexpr IndexType kDimensions = FeatureType::kDimensions;
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions =
-      FeatureType::kMaxActiveDimensions;
-  // List of timings to perform all calculations instead of difference calculation
-  using SortedTriggerSet =
-      CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
-  static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
+  // Class template that represents the feature set
+  template <typename FeatureType>
+  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {

-  // Get the feature quantity name
-  static std::string GetName() {
-    return FeatureType::kName;
-  }
+   public:
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions = FeatureType::kDimensions;
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions =
+        FeatureType::kMaxActiveDimensions;
+    // Trigger for full calculation instead of difference calculation
+    using SortedTriggerSet =
+        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
+    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;

- private:
-  // Get a list of indices with a value of 1 among the features
-  static void CollectActiveIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const active) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendActiveIndices(pos, perspective, active);
+    // Get the feature quantity name
+    static std::string GetName() {
+      return FeatureType::kName;
    }
-  }

-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void CollectChangedIndices(
-      const Position& pos, const TriggerEvent trigger, const Color perspective,
-      IndexList* const removed, IndexList* const added) {
-    if (FeatureType::kRefreshTrigger == trigger) {
-      FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+   private:
+    // Get a list of indices for active features
+    static void CollectActiveIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const active) {
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendActiveIndices(pos, perspective, active);
+      }
    }
-  }

-  // Make the base class and the class template that recursively uses itself a friend
-  friend class FeatureSetBase<FeatureSet>;
-  template <typename... FeatureTypes>
-  friend class FeatureSet;
-};
+    // Get a list of indices for recently changed features
+    static void CollectChangedIndices(
+        const Position& pos, const TriggerEvent trigger, const Color perspective,
+        IndexList* const removed, IndexList* const added) {

-}  // namespace Features
+      if (FeatureType::kRefreshTrigger == trigger) {
+        FeatureType::AppendChangedIndices(pos, perspective, removed, added);
+      }
+    }

-}  // namespace NNUE
+    // Make the base class and the class template that recursively uses itself a friend
+    friend class FeatureSetBase<FeatureSet>;
+    template <typename... FeatureTypes>
+    friend class FeatureSet;
+  };

-}  // namespace Eval
+}  // namespace Eval::NNUE::Features

-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
@@ -1,47 +1,50 @@
-//Common header of input features of NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_FEATURES_COMMON_H_
-#define _NNUE_FEATURES_COMMON_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Common header of input features of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+#define NNUE_FEATURES_COMMON_H_INCLUDED

 #include "../../evaluate.h"
 #include "../nnue_common.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-namespace NNUE {
+  class IndexList;

-namespace Features {
+  template <typename... FeatureTypes>
+  class FeatureSet;

-// Index list type
-class IndexList;
+  // Trigger to perform full calculations instead of difference only
+  enum class TriggerEvent {
+    kNone, // Calculate the difference whenever possible
+    kFriendKingMoved, // calculate all when own ball moves
+    kEnemyKingMoved, // do all calculations when enemy balls move
+    kAnyKingMoved, // do all calculations if either ball moves
+    kAnyPieceMoved, // always do all calculations
+  };

-// Class template that represents the feature set
-template <typename... FeatureTypes>
-class FeatureSet;
+  enum class Side {
+    kFriend, // side to move
+    kEnemy, // opponent
+  };

-// Type of timing to perform all calculations instead of difference calculation
-enum class TriggerEvent {
-  kNone, // Calculate the difference whenever possible
-  kFriendKingMoved, // calculate all when own ball moves
-  kEnemyKingMoved, // do all calculations when enemy balls move
-  kAnyKingMoved, // do all calculations if either ball moves
-  kAnyPieceMoved, // always do all calculations
-};
+}  // namespace Eval::NNUE::Features

-// turn side or other side
-enum class Side {
-  kFriend, // turn side
-  kEnemy, // opponent
-};
-
-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
@@ -1,84 +1,92 @@
-//Definition of input features HalfKP of NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#if defined(EVAL_NNUE)
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function

 #include "half_kp.h"
 #include "index_list.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-namespace NNUE {
+  // Find the index of the feature quantity from the king position and PieceSquare
+  template <Side AssociatedKing>
+  inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) {
+    return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
+  }

-namespace Features {
+  // Get pieces information
+  template <Side AssociatedKing>
+  inline void HalfKP<AssociatedKing>::GetPieces(
+      const Position& pos, Color perspective,
+      PieceSquare** pieces, Square* sq_target_k) {

-// Find the index of the feature quantity from the ball position and BonaPiece
-template <Side AssociatedKing>
-inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, BonaPiece p) {
-  return static_cast<IndexType>(fe_end) * static_cast<IndexType>(sq_k) + p;
-}
+    *pieces = (perspective == BLACK) ?
+        pos.eval_list()->piece_list_fb() :
+        pos.eval_list()->piece_list_fw();
+    const PieceId target = (AssociatedKing == Side::kFriend) ?
+        static_cast<PieceId>(PIECE_ID_KING + perspective) :
+        static_cast<PieceId>(PIECE_ID_KING + ~perspective);
+    *sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
+  }

-// Get the piece information
-template <Side AssociatedKing>
-inline void HalfKP<AssociatedKing>::GetPieces(
-    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
-  *pieces = (perspective == BLACK) ?
-      pos.eval_list()->piece_list_fb() :
-      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
-}
+  // Get a list of indices for active features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendActiveIndices(
+      const Position& pos, Color perspective, IndexList* active) {

-// Get a list of indices with a value of 1 among the features
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendActiveIndices(
-    const Position& pos, Color perspective, IndexList* active) {
-  // do nothing if array size is small to avoid compiler warning
-  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+    // Do nothing if array size is small to avoid compiler warning
+    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
-      active->push_back(MakeIndex(sq_target_k, pieces[i]));
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
+      if (pieces[i] != PS_NONE) {
+        active->push_back(MakeIndex(sq_target_k, pieces[i]));
+      }
    }
  }
-}

-// Get a list of indices whose values have changed from the previous one in the feature quantity
-template <Side AssociatedKing>
-void HalfKP<AssociatedKing>::AppendChangedIndices(
-    const Position& pos, Color perspective,
-    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
-  Square sq_target_k;
-  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  const auto& dp = pos.state()->dirtyPiece;
-  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(MakeIndex(sq_target_k, old_p));
-    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p != Eval::BONA_PIECE_ZERO) {
-      added->push_back(MakeIndex(sq_target_k, new_p));
+  // Get a list of indices for recently changed features
+  template <Side AssociatedKing>
+  void HalfKP<AssociatedKing>::AppendChangedIndices(
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* added) {
+
+    PieceSquare* pieces;
+    Square sq_target_k;
+    GetPieces(pos, perspective, &pieces, &sq_target_k);
+    const auto& dp = pos.state()->dirtyPiece;
+    for (int i = 0; i < dp.dirty_num; ++i) {
+      if (dp.pieceId[i] >= PIECE_ID_KING) continue;
+      const auto old_p = static_cast<PieceSquare>(
+          dp.old_piece[i].from[perspective]);
+      if (old_p != PS_NONE) {
+        removed->push_back(MakeIndex(sq_target_k, old_p));
+      }
+      const auto new_p = static_cast<PieceSquare>(
+          dp.new_piece[i].from[perspective]);
+      if (new_p != PS_NONE) {
+        added->push_back(MakeIndex(sq_target_k, new_p));
+      }
    }
  }
-}

-template class HalfKP<Side::kFriend>;
-template class HalfKP<Side::kEnemy>;
+  template class HalfKP<Side::kFriend>;

-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
+}  // namespace Eval::NNUE::Features
@@ -1,62 +1,67 @@
-//Definition of input features HalfKP of NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_FEATURES_HALF_KP_H_
-#define _NNUE_FEATURES_HALF_KP_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
+#define NNUE_FEATURES_HALF_KP_H_INCLUDED

 #include "../../evaluate.h"
 #include "features_common.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-namespace NNUE {
+  // Feature HalfKP: Combination of the position of own king
+  // and the position of pieces other than kings
+  template <Side AssociatedKing>
+  class HalfKP {

-namespace Features {
+   public:
+    // Feature name
+    static constexpr const char* kName = "HalfKP(Friend)";
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t kHashValue =
+        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
+    // Number of feature dimensions
+    static constexpr IndexType kDimensions =
+        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
+    // Maximum number of simultaneously active features
+    static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING;
+    // Trigger for full calculation instead of difference calculation
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;

-// Feature HalfKP: Combination of the position of own ball or enemy ball and the position of pieces other than balls
-template <Side AssociatedKing>
-class HalfKP {
- public:
-  // feature quantity name
-  static constexpr const char* kName =
-      (AssociatedKing == Side::kFriend) ? "HalfKP(Friend)" : "HalfKP(Enemy)";
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t kHashValue =
-      0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
-  // number of feature dimensions
-  static constexpr IndexType kDimensions =
-      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(fe_end);
-  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
-  // Timing of full calculation instead of difference calculation
-  static constexpr TriggerEvent kRefreshTrigger =
-      (AssociatedKing == Side::kFriend) ?
-      TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
+    // Get a list of indices for active features
+    static void AppendActiveIndices(const Position& pos, Color perspective,
+                                    IndexList* active);

-  // Get a list of indices with a value of 1 among the features
-  static void AppendActiveIndices(const Position& pos, Color perspective,
-                                  IndexList* active);
+    // Get a list of indices for recently changed features
+    static void AppendChangedIndices(const Position& pos, Color perspective,
+                                     IndexList* removed, IndexList* added);

-  // Get a list of indices whose values have changed from the previous one in the feature quantity
-  static void AppendChangedIndices(const Position& pos, Color perspective,
-                                   IndexList* removed, IndexList* added);
+    // Index of a feature for a given king position and another piece on some square
+    static IndexType MakeIndex(Square sq_k, PieceSquare p);

-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
+   private:
+    // Get pieces information
+    static void GetPieces(const Position& pos, Color perspective,
+                          PieceSquare** pieces, Square* sq_target_k);
+  };

- private:
-  // Get the piece information
-  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
-};
+}  // namespace Eval::NNUE::Features

-}  // namespace Features
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
@@ -11,14 +11,14 @@ namespace NNUE {

 namespace Features {

-// Find the index of the feature quantity from the ball position and BonaPiece
+// Find the index of the feature quantity from the ball position and PieceSquare
 template <Side AssociatedKing>
 inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
-    Square sq_k, BonaPiece p) {
+    Square sq_k, PieceSquare p) {
  constexpr IndexType W = kBoardWidth;
  constexpr IndexType H = kBoardHeight;
-  const IndexType piece_index = (p - fe_hand_end) / SQUARE_NB;
-  const Square sq_p = static_cast<Square>((p - fe_hand_end) % SQUARE_NB);
+  const IndexType piece_index = (p - PieceSquare::PS_W_PAWN) / SQUARE_NB;
+  const Square sq_p = static_cast<Square>((p - PieceSquare::PS_W_PAWN) % SQUARE_NB);
  const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
  const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
  return H * W * piece_index + H * relative_file + relative_rank;
@@ -28,14 +28,14 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
 template <Side AssociatedKing>
 inline void HalfRelativeKP<AssociatedKing>::GetPieces(
    const Position& pos, Color perspective,
-    BonaPiece** pieces, Square* sq_target_k) {
+    PieceSquare** pieces, Square* sq_target_k) {
  *pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
-  const PieceNumber target = (AssociatedKing == Side::kFriend) ?
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + perspective) :
-      static_cast<PieceNumber>(PIECE_NUMBER_KING + ~perspective);
-  *sq_target_k = static_cast<Square>(((*pieces)[target] - f_king) % SQUARE_NB);
+  const PieceId target = (AssociatedKing == Side::kFriend) ?
+      static_cast<PieceId>(PieceId::PIECE_ID_KING + perspective) :
+      static_cast<PieceId>(PieceId::PIECE_ID_KING + ~perspective);
+  *sq_target_k = static_cast<Square>(((*pieces)[target] - PieceSquare::PS_W_KING) % SQUARE_NB);
 }

 // Get a list of indices with a value of 1 among the features
@@ -45,12 +45,12 @@ void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-  BonaPiece* pieces;
+  PieceSquare* pieces;
  Square sq_target_k;
  GetPieces(pos, perspective, &pieces, &sq_target_k);
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] >= fe_hand_end) {
-      if (pieces[i] != Eval::BONA_PIECE_ZERO) {
+  for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
+    if (pieces[i] >= PieceSquare::PS_W_PAWN) {
+      if (pieces[i] != PieceSquare::PS_NONE) {
        active->push_back(MakeIndex(sq_target_k, pieces[i]));
      }
    }
@@ -62,23 +62,23 @@ template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
-  BonaPiece* pieces;
+  PieceSquare* pieces;
  Square sq_target_k;
  GetPieces(pos, perspective, &pieces, &sq_target_k);
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    const auto old_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].old_piece.from[perspective]);
-    if (old_p >= fe_hand_end) {
-      if (old_p != Eval::BONA_PIECE_ZERO) {
+    if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
+    const auto old_p = static_cast<PieceSquare>(
+        dp.old_piece[i].from[perspective]);
+    if (old_p >= PieceSquare::PS_W_PAWN) {
+      if (old_p != PieceSquare::PS_NONE) {
        removed->push_back(MakeIndex(sq_target_k, old_p));
      }
    }
-    const auto new_p = static_cast<BonaPiece>(
-        dp.changed_piece[i].new_piece.from[perspective]);
-    if (new_p >= fe_hand_end) {
-      if (new_p != Eval::BONA_PIECE_ZERO) {
+    const auto new_p = static_cast<PieceSquare>(
+        dp.new_piece[i].from[perspective]);
+    if (new_p >= PieceSquare::PS_W_PAWN) {
+      if (new_p != PieceSquare::PS_NONE) {
        added->push_back(MakeIndex(sq_target_k, new_p));
      }
    }
@@ -25,7 +25,7 @@ class HalfRelativeKP {
  static constexpr std::uint32_t kHashValue =
      0xF9180919u ^ (AssociatedKing == Side::kFriend);
  // Piece type excluding balls
-  static constexpr IndexType kNumPieceKinds = (fe_end - fe_hand_end) / SQUARE_NB;
+  static constexpr IndexType kNumPieceKinds = (PieceSquare::PS_END - PieceSquare::PS_W_PAWN) / SQUARE_NB;
  // width of the virtual board with the ball in the center
  static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
  // height of a virtual board with balls in the center
@@ -34,7 +34,7 @@ class HalfRelativeKP {
  static constexpr IndexType kDimensions =
      kNumPieceKinds * kBoardHeight * kBoardWidth;
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
+  static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
  // Timing of full calculation instead of difference calculation
  static constexpr TriggerEvent kRefreshTrigger =
      (AssociatedKing == Side::kFriend) ?
@@ -48,13 +48,13 @@ class HalfRelativeKP {
  static void AppendChangedIndices(const Position& pos, Color perspective,
                                   IndexList* removed, IndexList* added);

-  // Find the index of the feature quantity from the ball position and BonaPiece
-  static IndexType MakeIndex(Square sq_k, BonaPiece p);
+  // Find the index of the feature quantity from the ball position and PieceSquare
+  static IndexType MakeIndex(Square sq_k, PieceSquare p);

 private:
  // Get the piece information
  static void GetPieces(const Position& pos, Color perspective,
-                        BonaPiece** pieces, Square* sq_target_k);
+                        PieceSquare** pieces, Square* sq_target_k);
 };

 }  // namespace Features
@@ -1,55 +1,64 @@
-// Definition of index list of input features
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_FEATURES_INDEX_LIST_H_
-#define _NNUE_FEATURES_INDEX_LIST_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of index list of input features
+
+#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
+#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED

 #include "../../position.h"
 #include "../nnue_architecture.h"

-namespace Eval {
+namespace Eval::NNUE::Features {

-namespace NNUE {
+  // Class template used for feature index list
+  template <typename T, std::size_t MaxSize>
+  class ValueList {

-namespace Features {
+   public:
+    std::size_t size() const { return size_; }
+    void resize(std::size_t size) { size_ = size; }
+    void push_back(const T& value) { values_[size_++] = value; }
+    T& operator[](std::size_t index) { return values_[index]; }
+    T* begin() { return values_; }
+    T* end() { return values_ + size_; }
+    const T& operator[](std::size_t index) const { return values_[index]; }
+    const T* begin() const { return values_; }
+    const T* end() const { return values_ + size_; }

-// Class template used for feature index list
-template <typename T, std::size_t MaxSize>
-class ValueList {
- public:
-  std::size_t size() const { return size_; }
-  void resize(std::size_t size) { size_ = size; }
-  void push_back(const T& value) { values_[size_++] = value; }
-  T& operator[](std::size_t index) { return values_[index]; }
-  T* begin() { return values_; }
-  T* end() { return values_ + size_; }
-  const T& operator[](std::size_t index) const { return values_[index]; }
-  const T* begin() const { return values_; }
-  const T* end() const { return values_ + size_; }
-  void swap(ValueList& other) {
-    const std::size_t max_size = std::max(size_, other.size_);
-    for (std::size_t i = 0; i < max_size; ++i) {
-      std::swap(values_[i], other.values_[i]);
+    void swap(ValueList& other) {
+      const std::size_t max_size = std::max(size_, other.size_);
+      for (std::size_t i = 0; i < max_size; ++i) {
+        std::swap(values_[i], other.values_[i]);
+      }
+      std::swap(size_, other.size_);
    }
-    std::swap(size_, other.size_);
-  }
- private:
-  T values_[MaxSize];
-  std::size_t size_ = 0;
-};

-//Type of feature index list
-class IndexList
-    : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
-};
+   private:
+    T values_[MaxSize];
+    std::size_t size_ = 0;
+  };

-}  // namespace Features
+  //Type of feature index list
+  class IndexList
+      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
+  };

-}  // namespace NNUE
+}  // namespace Eval::NNUE::Features

-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
@@ -17,13 +17,13 @@ void K::AppendActiveIndices(
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-  const BonaPiece* pieces = (perspective == BLACK) ?
+  const PieceSquare* pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
-  assert(pieces[PIECE_NUMBER_BKING] != BONA_PIECE_ZERO);
-  assert(pieces[PIECE_NUMBER_WKING] != BONA_PIECE_ZERO);
-  for (PieceNumber i = PIECE_NUMBER_KING; i < PIECE_NUMBER_NB; ++i) {
-    active->push_back(pieces[i] - fe_end);
+  assert(pieces[PieceId::PIECE_ID_BKING] != PieceSquare::PS_NONE);
+  assert(pieces[PieceId::PIECE_ID_WKING] != PieceSquare::PS_NONE);
+  for (PieceId i = PieceId::PIECE_ID_KING; i < PieceId::PIECE_ID_NONE; ++i) {
+    active->push_back(pieces[i] - PieceSquare::PS_END);
  }
 }

@@ -32,11 +32,11 @@ void K::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
-  if (dp.pieceNo[0] >= PIECE_NUMBER_KING) {
+  if (dp.pieceId[0] >= PieceId::PIECE_ID_KING) {
    removed->push_back(
-        dp.changed_piece[0].old_piece.from[perspective] - fe_end);
+        dp.old_piece[0].from[perspective] - PieceSquare::PS_END);
    added->push_back(
-        dp.changed_piece[0].new_piece.from[perspective] - fe_end);
+        dp.new_piece[0].from[perspective] - PieceSquare::PS_END);
  }
 }

@@ -17,11 +17,11 @@ void P::AppendActiveIndices(
  // do nothing if array size is small to avoid compiler warning
  if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;

-  const BonaPiece* pieces = (perspective == BLACK) ?
+  const PieceSquare* pieces = (perspective == BLACK) ?
      pos.eval_list()->piece_list_fb() :
      pos.eval_list()->piece_list_fw();
-  for (PieceNumber i = PIECE_NUMBER_ZERO; i < PIECE_NUMBER_KING; ++i) {
-    if (pieces[i] != Eval::BONA_PIECE_ZERO) {
+  for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
+    if (pieces[i] != PieceSquare::PS_NONE) {
      active->push_back(pieces[i]);
    }
  }
@@ -33,12 +33,12 @@ void P::AppendChangedIndices(
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
-    if (dp.pieceNo[i] >= PIECE_NUMBER_KING) continue;
-    if (dp.changed_piece[i].old_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      removed->push_back(dp.changed_piece[i].old_piece.from[perspective]);
+    if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue;
+    if (dp.old_piece[i].from[perspective] != PieceSquare::PS_NONE) {
+      removed->push_back(dp.old_piece[i].from[perspective]);
    }
-    if (dp.changed_piece[i].new_piece.from[perspective] != Eval::BONA_PIECE_ZERO) {
-      added->push_back(dp.changed_piece[i].new_piece.from[perspective]);
+    if (dp.new_piece[i].from[perspective] != PieceSquare::PS_NONE) {
+      added->push_back(dp.new_piece[i].from[perspective]);
    }
  }
 }
@@ -14,7 +14,7 @@ namespace NNUE {

 namespace Features {

-// Feature P: BonaPiece of pieces other than balls
+// Feature P: PieceSquare of pieces other than balls
 class P {
 public:
  // feature quantity name
@@ -22,9 +22,9 @@ class P {
  // Hash value embedded in the evaluation function file
  static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
  // number of feature dimensions
-  static constexpr IndexType kDimensions = fe_end;
+  static constexpr IndexType kDimensions = PieceSquare::PS_END;
  // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
-  static constexpr IndexType kMaxActiveDimensions = PIECE_NUMBER_KING;
+  static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING;
  // Timing of full calculation instead of difference calculation
  static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;

@@ -1,217 +1,237 @@
-// Definition of layer AffineTransform of NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_LAYERS_AFFINE_TRANSFORM_H_
-#define _NNUE_LAYERS_AFFINE_TRANSFORM_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.

+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer AffineTransform of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+
+#include <iostream>
 #include "../nnue_common.h"

-namespace Eval {
+namespace Eval::NNUE::Layers {

-namespace NNUE {
+  // Affine transformation layer
+  template <typename PreviousLayer, IndexType OutputDimensions>
+  class AffineTransform {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::int32_t;
+    static_assert(std::is_same<InputType, std::uint8_t>::value, "");

-namespace Layers {
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = OutputDimensions;
+    static constexpr IndexType kPaddedInputDimensions =
+        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);

-// affine transformation layer
-template <typename PreviousLayer, IndexType OutputDimensions>
-class AffineTransform {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::int32_t;
-  static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);

-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static constexpr IndexType kPaddedInputDimensions =
-      CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;

-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0xCC03DAE4u;
+      hash_value += kOutputDimensions;
+      hash_value ^= PreviousLayer::GetHashValue() >> 1;
+      hash_value ^= PreviousLayer::GetHashValue() << 31;
+      return hash_value;
+    }

-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xCC03DAE4u;
-    hash_value += kOutputDimensions;
-    hash_value ^= PreviousLayer::GetHashValue() >> 1;
-    hash_value ^= PreviousLayer::GetHashValue() << 31;
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "AffineTransform[" +
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "AffineTransform[" +
        std::to_string(kOutputDimensions) + "<-" +
        std::to_string(kInputDimensions) + "](" +
        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    if (!previous_layer_.ReadParameters(stream)) return false;
-    stream.read(reinterpret_cast<char*>(biases_),
-                kOutputDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kOutputDimensions * kPaddedInputDimensions *
-                sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    if (!previous_layer_.WriteParameters(stream)) return false;
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kOutputDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kOutputDimensions * kPaddedInputDimensions *
-                 sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX512)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
-    const __m512i kOnes = _mm512_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m512i*>(input);
-#elif defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m256i kOnes = _mm256_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m256i*>(input);
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const __m128i kOnes = _mm_set1_epi16(1);
-    const auto input_vector = reinterpret_cast<const __m128i*>(input);
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-    const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
-#endif
-    for (IndexType i = 0; i < kOutputDimensions; ++i) {
-      const IndexType offset = i * kPaddedInputDimensions;
-#if defined(USE_AVX512)
-      __m512i sum = _mm512_setzero_si512();
-      const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
-#else
-          __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
-#endif
-          product = _mm512_madd_epi16(product, kOnes);
-          sum = _mm512_add_epi32(sum, product);
-      }
-      output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
-      
-      // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
-      // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
-      // and we have to do one more 256bit chunk.
-      if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
-      {
-          const auto iv_256  = reinterpret_cast<const __m256i*>(input);
-          const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
-          int j = kNumChunks * 2;
-#if defined(__MINGW32__) || defined(__MINGW64__)  // See HACK comment below in AVX2.
-          __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
-#else
-          __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
-#endif
-          sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
-
-          sum256 = _mm256_hadd_epi32(sum256, sum256);
-          sum256 = _mm256_hadd_epi32(sum256, sum256);
-          const __m128i lo = _mm256_extracti128_si256(sum256, 0);
-          const __m128i hi = _mm256_extracti128_si256(sum256, 1);
-          output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
-      }
-#elif defined(USE_AVX2)
-      __m256i sum = _mm256_setzero_si256();
-      const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i product = _mm256_maddubs_epi16(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&input_vector[j]), _mm256_load_si256(&row[j]));
-        product = _mm256_madd_epi16(product, kOnes);
-        sum = _mm256_add_epi32(sum, product);
-      }
-      sum = _mm256_hadd_epi32(sum, sum);
-      sum = _mm256_hadd_epi32(sum, sum);
-      const __m128i lo = _mm256_extracti128_si256(sum, 0);
-      const __m128i hi = _mm256_extracti128_si256(sum, 1);
-      output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
-#elif defined(USE_SSSE3)
-      __m128i sum = _mm_cvtsi32_si128(biases_[i]);
-      const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i product = _mm_maddubs_epi16(
-            _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
-        product = _mm_madd_epi16(product, kOnes);
-        sum = _mm_add_epi32(sum, product);
-      }
-      sum = _mm_hadd_epi32(sum, sum);
-      sum = _mm_hadd_epi32(sum, sum);
-      output[i] = _mm_cvtsi128_si32(sum);
-#elif defined(IS_ARM)
-      int32x4_t sum = {biases_[i]};
-      const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
-        product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
-        sum = vpadalq_s16(sum, product);
-      }
-      output[i] = sum[0] + sum[1] + sum[2] + sum[3];
-#else
-      OutputType sum = biases_[i];
-      for (IndexType j = 0; j < kInputDimensions; ++j) {
-        sum += weights_[offset + j] * input[j];
-      }
-      output[i] = sum;
-#endif
    }
-    return output;
-  }
+    
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      if (!previous_layer_.ReadParameters(stream)) return false;
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kOutputDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kOutputDimensions * kPaddedInputDimensions *
+                  sizeof(WeightType));
+      return !stream.fail();
+    }

- private:
-  // parameter type
-  using BiasType = OutputType;
-  using WeightType = std::int8_t;
+    // write parameters
+    bool WriteParameters(std::ostream& stream) const {
+      if (!previous_layer_.WriteParameters(stream)) return false;
+      stream.write(reinterpret_cast<const char*>(biases_),
+        kOutputDimensions * sizeof(BiasType));
+      stream.write(reinterpret_cast<const char*>(weights_),
+        kOutputDimensions * kPaddedInputDimensions *
+        sizeof(WeightType));
+      return !stream.fail();
+    }

-  // Make the learning class a friend
-  friend class Trainer<AffineTransform>;
+    // Forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);

-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
+  #if defined(USE_AVX512)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
+      const __m512i kOnes = _mm512_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m512i*>(input);

-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
-};
+  #elif defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m256i kOnes = _mm256_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m256i*>(input);

-}  // namespace Layers
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const __m128i kOnes = _mm_set1_epi16(1);
+      const auto input_vector = reinterpret_cast<const __m128i*>(input);

-}  // namespace NNUE
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
+      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
+  #endif

-}  // namespace Eval
+      for (IndexType i = 0; i < kOutputDimensions; ++i) {
+        const IndexType offset = i * kPaddedInputDimensions;

-#endif  // defined(EVAL_NNUE)
+  #if defined(USE_AVX512)
+        __m512i sum = _mm512_setzero_si512();
+        const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {

-#endif
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+  #else
+            __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
+  #endif
+
+            product = _mm512_madd_epi16(product, kOnes);
+            sum = _mm512_add_epi32(sum, product);
+        }
+        output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
+
+        // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
+        // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
+        // and we have to do one more 256bit chunk.
+        if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
+        {
+            const auto iv_256  = reinterpret_cast<const __m256i*>(input);
+            const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]);
+            int j = kNumChunks * 2;
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)  // See HACK comment below in AVX2.
+            __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+  #else
+            __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j]));
+  #endif
+
+            sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1));
+            sum256 = _mm256_hadd_epi32(sum256, sum256);
+            sum256 = _mm256_hadd_epi32(sum256, sum256);
+            const __m128i lo = _mm256_extracti128_si256(sum256, 0);
+            const __m128i hi = _mm256_extracti128_si256(sum256, 1);
+            output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
+        }
+
+  #elif defined(USE_AVX2)
+        __m256i sum = _mm256_setzero_si256();
+        const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i product = _mm256_maddubs_epi16(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&input_vector[j]), _mm256_load_si256(&row[j]));
+          product = _mm256_madd_epi16(product, kOnes);
+          sum = _mm256_add_epi32(sum, product);
+        }
+        sum = _mm256_hadd_epi32(sum, sum);
+        sum = _mm256_hadd_epi32(sum, sum);
+        const __m128i lo = _mm256_extracti128_si256(sum, 0);
+        const __m128i hi = _mm256_extracti128_si256(sum, 1);
+        output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
+
+  #elif defined(USE_SSSE3)
+        __m128i sum = _mm_cvtsi32_si128(biases_[i]);
+        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i product = _mm_maddubs_epi16(
+              _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
+          product = _mm_madd_epi16(product, kOnes);
+          sum = _mm_add_epi32(sum, product);
+        }
+        sum = _mm_hadd_epi32(sum, sum);
+        sum = _mm_hadd_epi32(sum, sum);
+        output[i] = _mm_cvtsi128_si32(sum);
+
+  #elif defined(USE_NEON)
+        int32x4_t sum = {biases_[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
+          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
+          sum = vpadalq_s16(sum, product);
+        }
+        output[i] = sum[0] + sum[1] + sum[2] + sum[3];
+
+  #else
+        OutputType sum = biases_[i];
+        for (IndexType j = 0; j < kInputDimensions; ++j) {
+          sum += weights_[offset + j] * input[j];
+        }
+        output[i] = sum;
+  #endif
+
+      }
+      return output;
+    }
+
+   private:
+    using BiasType = OutputType;
+    using WeightType = std::int8_t;
+
+    // Make the learning class a friend
+    friend class Trainer<AffineTransform>;
+
+    PreviousLayer previous_layer_;
+
+    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
+  };
+
+}  // namespace Eval::NNUE::Layers
+
+#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
@@ -1,177 +1,201 @@
-// Definition of layer ClippedReLU of NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_LAYERS_CLIPPED_RELU_H_
-#define _NNUE_LAYERS_CLIPPED_RELU_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer ClippedReLU of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED

 #include "../nnue_common.h"

-namespace Eval {
+namespace Eval::NNUE::Layers {

-namespace NNUE {
+  // Clipped ReLU
+  template <typename PreviousLayer>
+  class ClippedReLU {
+   public:
+    // Input/output type
+    using InputType = typename PreviousLayer::OutputType;
+    using OutputType = std::uint8_t;
+    static_assert(std::is_same<InputType, std::int32_t>::value, "");

-namespace Layers {
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions =
+        PreviousLayer::kOutputDimensions;
+    static constexpr IndexType kOutputDimensions = kInputDimensions;

-// Clipped ReLU
-template <typename PreviousLayer>
-class ClippedReLU {
- public:
-  // Input/output type
-  using InputType = typename PreviousLayer::OutputType;
-  using OutputType = std::uint8_t;
-  static_assert(std::is_same<InputType, std::int32_t>::value, "");
+    // Size of forward propagation buffer used in this layer
+    static constexpr std::size_t kSelfBufferSize =
+        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);

-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      PreviousLayer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = kInputDimensions;
+    // Size of the forward propagation buffer used from the input layer to this layer
+    static constexpr std::size_t kBufferSize =
+        PreviousLayer::kBufferSize + kSelfBufferSize;

-  // Size of forward propagation buffer used in this layer
-  static constexpr std::size_t kSelfBufferSize =
-      CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      std::uint32_t hash_value = 0x538D24C7u;
+      hash_value += PreviousLayer::GetHashValue();
+      return hash_value;
+    }

-  // Size of the forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize =
-      PreviousLayer::kBufferSize + kSelfBufferSize;
-
-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0x538D24C7u;
-    hash_value += PreviousLayer::GetHashValue();
-    return hash_value;
-  }
-
-  // A string that represents the structure from the input layer to this layer
-  static std::string GetStructureString() {
-    return "ClippedReLU[" +
+    // A string that represents the structure from the input layer to this layer
+    static std::string GetStructureString() {
+      return "ClippedReLU[" +
        std::to_string(kOutputDimensions) + "](" +
        PreviousLayer::GetStructureString() + ")";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    return previous_layer_.ReadParameters(stream);
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    return previous_layer_.WriteParameters(stream);
-  }
-
-  // forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features, char* buffer) const {
-    const auto input = previous_layer_.Propagate(
-        transformed_features, buffer + kSelfBufferSize);
-    const auto output = reinterpret_cast<OutputType*>(buffer);
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m256i kZero = _mm256_setzero_si256();
-    const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-    const auto in = reinterpret_cast<const __m256i*>(input);
-    const auto out = reinterpret_cast<__m256i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-        //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-        //       even though alignas is specified.
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 0]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 1])), kWeightScaleBits);
-      const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 2]),
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_loadu_si256
-#else
-        _mm256_load_si256
-#endif
-        (&in[i * 4 + 3])), kWeightScaleBits);
-#if defined(__MINGW32__) || defined(__MINGW64__)
-      _mm256_storeu_si256
-#else
-      _mm256_store_si256
-#endif
-        (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-          _mm256_packs_epi16(words0, words1), kZero), kOffsets));
    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-    const auto in = reinterpret_cast<const __m128i*>(input);
-    const auto out = reinterpret_cast<__m128i*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 0]),
-          _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
-      const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
-          _mm_load_si128(&in[i * 4 + 2]),
-          _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
-      const __m128i packedbytes = _mm_packs_epi16(words0, words1);
-      _mm_store_si128(&out[i], 
-#ifdef USE_SSE41
-        _mm_max_epi8(packedbytes, kZero)
-#else
-        _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-      );
+
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      return previous_layer_.ReadParameters(stream);
    }
-    constexpr IndexType kStart = kNumChunks * kSimdWidth;
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-    const auto in = reinterpret_cast<const int32x4_t*>(input);
-    const auto out = reinterpret_cast<int8x8_t*>(output);
-    for (IndexType i = 0; i < kNumChunks; ++i) {
-      int16x8_t shifted;
-      const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-      pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
-      pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
-      out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+
+    // write parameters
+    bool WriteParameters(std::ostream& stream) const {
+      return previous_layer_.WriteParameters(stream);
    }
-    constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
-#else
-    constexpr IndexType kStart = 0;
-#endif
-    for (IndexType i = kStart; i < kInputDimensions; ++i) {
-      output[i] = static_cast<OutputType>(
-          std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+
+    // Forward propagation
+    const OutputType* Propagate(
+        const TransformedFeatureType* transformed_features, char* buffer) const {
+      const auto input = previous_layer_.Propagate(
+          transformed_features, buffer + kSelfBufferSize);
+      const auto output = reinterpret_cast<OutputType*>(buffer);
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+      const __m256i kZero = _mm256_setzero_si256();
+      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+      const auto in = reinterpret_cast<const __m256i*>(input);
+      const auto out = reinterpret_cast<__m256i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+          //       even though alignas is specified.
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 0]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 1])), kWeightScaleBits);
+        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 2]),
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_loadu_si256
+  #else
+          _mm256_load_si256
+  #endif
+
+          (&in[i * 4 + 3])), kWeightScaleBits);
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+        _mm256_storeu_si256
+  #else
+        _mm256_store_si256
+  #endif
+
+          (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
+            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+      const auto in = reinterpret_cast<const __m128i*>(input);
+      const auto out = reinterpret_cast<__m128i*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 0]),
+            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
+        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
+            _mm_load_si128(&in[i * 4 + 2]),
+            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
+        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+        _mm_store_si128(&out[i],
+
+  #ifdef USE_SSE41
+          _mm_max_epi8(packedbytes, kZero)
+  #else
+          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+        );
+      }
+      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+      const auto in = reinterpret_cast<const int32x4_t*>(input);
+      const auto out = reinterpret_cast<int8x8_t*>(output);
+      for (IndexType i = 0; i < kNumChunks; ++i) {
+        int16x8_t shifted;
+        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
+        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
+        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
+      }
+      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
+  #else
+      constexpr IndexType kStart = 0;
+  #endif
+
+      for (IndexType i = kStart; i < kInputDimensions; ++i) {
+        output[i] = static_cast<OutputType>(
+            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
+      }
+      return output;
    }
-    return output;
-  }

- private:
-  // Make the learning class a friend
-  friend class Trainer<ClippedReLU>;
+   private:
+     // Make the learning class a friend
+     friend class Trainer<ClippedReLU>;
+     
+     PreviousLayer previous_layer_;
+  };

-  // the layer immediately before this layer
-  PreviousLayer previous_layer_;
-};
+}  // namespace Eval::NNUE::Layers

-}  // namespace Layers
-
-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
@@ -1,35 +1,47 @@
-// NNUE evaluation function layer InputSlice definition
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_LAYERS_INPUT_SLICE_H_
-#define _NNUE_LAYERS_INPUT_SLICE_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// NNUE evaluation function layer InputSlice definition
+
+#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
+#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED

 #include "../nnue_common.h"

-namespace Eval {
+namespace Eval::NNUE::Layers {

-namespace NNUE {
-
-namespace Layers {
-
-// input layer
+// Input layer
 template <IndexType OutputDimensions, IndexType Offset = 0>
 class InputSlice {
 public:
-  // need to maintain alignment
+  // Need to maintain alignment
  static_assert(Offset % kMaxSimdWidth == 0, "");

-  // output type
+  // Output type
  using OutputType = TransformedFeatureType;

-  // output dimensionality
+  // Output dimensionality
  static constexpr IndexType kOutputDimensions = OutputDimensions;

-  // Size of the forward propagation buffer used from the input layer to this layer
+  // Size of forward propagation buffer used from the input layer to this layer
  static constexpr std::size_t kBufferSize = 0;

-  // Hash value embedded in the evaluation function file
+  // Hash value embedded in the evaluation file
  static constexpr std::uint32_t GetHashValue() {
    std::uint32_t hash_value = 0xEC42E90Du;
    hash_value ^= kOutputDimensions ^ (Offset << 10);
@@ -39,11 +51,11 @@ class InputSlice {
  // A string that represents the structure from the input layer to this layer
  static std::string GetStructureString() {
    return "InputSlice[" + std::to_string(kOutputDimensions) + "(" +
-        std::to_string(Offset) + ":" +
-        std::to_string(Offset + kOutputDimensions) + ")]";
+      std::to_string(Offset) + ":" +
+      std::to_string(Offset + kOutputDimensions) + ")]";
  }

-  // read parameters
+  // Read network parameters
  bool ReadParameters(std::istream& /*stream*/) {
    return true;
  }
@@ -53,7 +65,7 @@ class InputSlice {
    return true;
  }

-  // forward propagation
+  // Forward propagation
  const OutputType* Propagate(
      const TransformedFeatureType* transformed_features,
      char* /*buffer*/) const {
@@ -65,10 +77,4 @@ class InputSlice {

 }  // namespace Layers

-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
@@ -1,30 +1,39 @@
-// Class for difference calculation of NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_ACCUMULATOR_H_
-#define _NNUE_ACCUMULATOR_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Class for difference calculation of NNUE evaluation function
+
+#ifndef NNUE_ACCUMULATOR_H_INCLUDED
+#define NNUE_ACCUMULATOR_H_INCLUDED

 #include "nnue_architecture.h"

-namespace Eval {
+namespace Eval::NNUE {

-namespace NNUE {
+  // Class that holds the result of affine transformation of input features
+  struct alignas(32) Accumulator {
+    std::int16_t
+        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
+    Value score;
+    bool computed_accumulation;
+    bool computed_score;
+  };

-// Class that holds the result of affine transformation of input features
-// Keep the evaluation value that is the final output together
-struct alignas(32) Accumulator {
-  std::int16_t
-      accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-  Value score = VALUE_ZERO;
-  bool computed_accumulation = false;
-  bool computed_score = false;
-};
+}  // namespace Eval::NNUE

-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // NNUE_ACCUMULATOR_H_INCLUDED
@@ -1,33 +1,38 @@
-// Input features and network structure used in NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_ARCHITECTURE_H_
-#define _NNUE_ARCHITECTURE_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.

-// include a header that defines the input features and network structure
-//#include "architectures/k-p_256x2-32-32.h"
-//#include "architectures/k-p-cr_256x2-32-32.h"
-//#include "architectures/k-p-cr-ep_256x2-32-32.h"
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Input features and network structure used in NNUE evaluation function
+
+#ifndef NNUE_ARCHITECTURE_H_INCLUDED
+#define NNUE_ARCHITECTURE_H_INCLUDED
+
+// Defines the network structure
 #include "architectures/halfkp_256x2-32-32.h"
-//#include "architectures/halfkp-cr-ep_256x2-32-32.h"
-//#include "architectures/halfkp_384x2-32-32.h"

-namespace Eval {
+namespace Eval::NNUE {

-namespace NNUE {
+  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
+  static_assert(Network::kOutputDimensions == 1, "");
+  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");

-static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
-static_assert(Network::kOutputDimensions == 1, "");
-static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+  // Trigger for full calculation instead of difference calculation
+  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;

-// List of timings to perform all calculations instead of difference calculation
-constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
+}  // namespace Eval::NNUE

-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
@@ -1,64 +1,81 @@
-// Constants used in NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_COMMON_H_
-#define _NNUE_COMMON_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Constants used in NNUE evaluation function
+
+#ifndef NNUE_COMMON_H_INCLUDED
+#define NNUE_COMMON_H_INCLUDED

 #if defined(USE_AVX2)
 #include <immintrin.h>
+
 #elif defined(USE_SSE41)
 #include <smmintrin.h>
+
 #elif defined(USE_SSSE3)
 #include <tmmintrin.h>
+
 #elif defined(USE_SSE2)
 #include <emmintrin.h>
+
+#elif defined(USE_NEON)
+#include <arm_neon.h>
 #endif

-namespace Eval {
+namespace Eval::NNUE {

-namespace NNUE {
+  // Version of the evaluation file
+  constexpr std::uint32_t kVersion = 0x7AF32F16u;

-// A constant that represents the version of the evaluation function file
-constexpr std::uint32_t kVersion = 0x7AF32F16u;
+  // Constant used in evaluation value calculation
+  constexpr int FV_SCALE = 16;
+  constexpr int kWeightScaleBits = 6;

-// Constant used in evaluation value calculation
-constexpr int FV_SCALE = 16;
-constexpr int kWeightScaleBits = 6;
+  // Size of cache line (in bytes)
+  constexpr std::size_t kCacheLineSize = 64;

-// Size of cache line (in bytes)
-constexpr std::size_t kCacheLineSize = 64;
+  // SIMD width (in bytes)
+  #if defined(USE_AVX2)
+  constexpr std::size_t kSimdWidth = 32;

-// SIMD width (in bytes)
-#if defined(USE_AVX2)
-constexpr std::size_t kSimdWidth = 32;
-#elif defined(USE_SSE2)
-constexpr std::size_t kSimdWidth = 16;
-#elif defined(IS_ARM)
-constexpr std::size_t kSimdWidth = 16;
-#endif
-constexpr std::size_t kMaxSimdWidth = 32;
+  #elif defined(USE_SSE2)
+  constexpr std::size_t kSimdWidth = 16;

-// Type of input feature after conversion
-using TransformedFeatureType = std::uint8_t;
+  #elif defined(USE_NEON)
+  constexpr std::size_t kSimdWidth = 16;
+  #endif

-// index type
-using IndexType = std::uint32_t;
+  constexpr std::size_t kMaxSimdWidth = 32;

-// Forward declaration of learning class template
-template <typename Layer>
-class Trainer;
+  // Type of input feature after conversion
+  using TransformedFeatureType = std::uint8_t;
+  using IndexType = std::uint32_t;

-// find the smallest multiple of n and above
-template <typename IntType>
-constexpr IntType CeilToMultiple(IntType n, IntType base) {
-  return (n + base - 1) / base * base;
-}
+  // Forward declaration of learning class template
+  template <typename Layer>
+  class Trainer;

-}  // namespace NNUE
+  // Round n up to be a multiple of base
+  template <typename IntType>
+  constexpr IntType CeilToMultiple(IntType n, IntType base) {
+    return (n + base - 1) / base * base;
+  }

-}  // namespace Eval
+}  // namespace Eval::NNUE

-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_COMMON_H_INCLUDED
@@ -1,9 +1,25 @@
-// A class that converts the input features of the NNUE evaluation function
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)

-#ifndef _NNUE_FEATURE_TRANSFORMER_H_
-#define _NNUE_FEATURE_TRANSFORMER_H_
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.

-#if defined(EVAL_NNUE)
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// A class that converts the input features of the NNUE evaluation function
+
+#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
+#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED

 #include "nnue_common.h"
 #include "nnue_architecture.h"
@@ -11,209 +27,205 @@

 #include <cstring> // std::memset()

-namespace Eval {
+namespace Eval::NNUE {

-namespace NNUE {
+  // Input feature converter
+  class FeatureTransformer {

-// Input feature converter
-class FeatureTransformer {
- private:
-  // number of output dimensions for one side
-  static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;
+   private:
+    // Number of output dimensions for one side
+    static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions;

- public:
-  // output type
-  using OutputType = TransformedFeatureType;
+   public:
+    // Output type
+    using OutputType = TransformedFeatureType;

-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
-  static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;
+    // Number of input/output dimensions
+    static constexpr IndexType kInputDimensions = RawFeatures::kDimensions;
+    static constexpr IndexType kOutputDimensions = kHalfDimensions * 2;

-  // size of forward propagation buffer
-  static constexpr std::size_t kBufferSize =
-      kOutputDimensions * sizeof(OutputType);
+    // Size of forward propagation buffer
+    static constexpr std::size_t kBufferSize =
+        kOutputDimensions * sizeof(OutputType);

-  // Hash value embedded in the evaluation function file
-  static constexpr std::uint32_t GetHashValue() {
-    return RawFeatures::kHashValue ^ kOutputDimensions;
-  }
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t GetHashValue() {
+      return RawFeatures::kHashValue ^ kOutputDimensions;
+    }

-  // a string representing the structure
-  static std::string GetStructureString() {
-    return RawFeatures::GetName() + "[" +
+    // a string representing the structure
+    static std::string GetStructureString() {
+      return RawFeatures::GetName() + "[" +
        std::to_string(kInputDimensions) + "->" +
        std::to_string(kHalfDimensions) + "x2]";
-  }
-
-  // read parameters
-  bool ReadParameters(std::istream& stream) {
-    stream.read(reinterpret_cast<char*>(biases_),
-                kHalfDimensions * sizeof(BiasType));
-    stream.read(reinterpret_cast<char*>(weights_),
-                kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // write parameters
-  bool WriteParameters(std::ostream& stream) const {
-    stream.write(reinterpret_cast<const char*>(biases_),
-                 kHalfDimensions * sizeof(BiasType));
-    stream.write(reinterpret_cast<const char*>(weights_),
-                 kHalfDimensions * kInputDimensions * sizeof(WeightType));
-    return !stream.fail();
-  }
-
-  // proceed with the difference calculation if possible
-  bool UpdateAccumulatorIfPossible(const Position& pos) const {
-    const auto now = pos.state();
-    if (now->accumulator.computed_accumulation) {
-      return true;
    }
-    const auto prev = now->previous;
-    if (prev && prev->accumulator.computed_accumulation) {
-      UpdateAccumulator(pos);
-      return true;
-    }
-    return false;
-  }

-  // convert input features
-  void Transform(const Position& pos, OutputType* output, bool refresh) const {
-    if (refresh || !UpdateAccumulatorIfPossible(pos)) {
-      RefreshAccumulator(pos);
+    // Read network parameters
+    bool ReadParameters(std::istream& stream) {
+      stream.read(reinterpret_cast<char*>(biases_),
+                  kHalfDimensions * sizeof(BiasType));
+      stream.read(reinterpret_cast<char*>(weights_),
+                  kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
    }
-    const auto& accumulation = pos.state()->accumulator.accumulation;
-#if defined(USE_AVX2)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    constexpr int kControl = 0b11011000;
-    const __m256i kZero = _mm256_setzero_si256();
-#elif defined(USE_SSSE3)
-    constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
-    const __m128i kZero = _mm_setzero_si128();
-#ifndef USE_SSE41
-    const __m128i k0x80s = _mm_set1_epi8(-128);
-#endif
-#elif defined(IS_ARM)
-    constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-    const int8x8_t kZero = {0};
-#endif
-    const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
-    for (IndexType p = 0; p < 2; ++p) {
-      const IndexType offset = kHalfDimensions * p;
-#if defined(USE_AVX2)
-      auto out = reinterpret_cast<__m256i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m256i sum0 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
-          //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
-          //       even though alignas is specified.
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m256i sum1 =
-#if defined(__MINGW32__) || defined(__MINGW64__)
-          _mm256_loadu_si256
-#else
-          _mm256_load_si256
-#endif
-          (&reinterpret_cast<const __m256i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-#if defined(__MINGW32__) || defined(__MINGW64__)
-        _mm256_storeu_si256
-#else
-        _mm256_store_si256
-#endif
-        (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
-            _mm256_packs_epi16(sum0, sum1), kZero), kControl));
-      }
-#elif defined(USE_SSSE3)
-      auto out = reinterpret_cast<__m128i*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 0]);
-        __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
-            accumulation[perspectives[p]][0])[j * 2 + 1]);
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 0]);
-          sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
-              accumulation[perspectives[p]][i])[j * 2 + 1]);
-        }
-  	const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
- 
-        _mm_store_si128(&out[j],
-#ifdef USE_SSE41
-          _mm_max_epi8(packedbytes, kZero)
-#else
-          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-#endif
-        );
-      }
-#elif defined(IS_ARM)
-      const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
-      for (IndexType j = 0; j < kNumChunks; ++j) {
-        int16x8_t sum = reinterpret_cast<const int16x8_t*>(
-            accumulation[perspectives[p]][0])[j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
-              accumulation[perspectives[p]][i])[j]);
-        }
-        out[j] = vmax_s8(vqmovn_s16(sum), kZero);
-      }
-#else
-      for (IndexType j = 0; j < kHalfDimensions; ++j) {
-        BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
-        for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
-          sum += accumulation[static_cast<int>(perspectives[p])][i][j];
-        }
-        output[offset + j] = static_cast<OutputType>(
-            std::max<int>(0, std::min<int>(127, sum)));
-      }
-#endif
-    }
-  }

- private:
-  // Calculate cumulative value without using difference calculation
-  void RefreshAccumulator(const Position& pos) const {
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+    // write parameters
+    bool WriteParameters(std::ostream& stream) const {
+      stream.write(reinterpret_cast<const char*>(biases_),
+        kHalfDimensions * sizeof(BiasType));
+      stream.write(reinterpret_cast<const char*>(weights_),
+        kHalfDimensions * kInputDimensions * sizeof(WeightType));
+      return !stream.fail();
+    }
+
+    // Proceed with the difference calculation if possible
+    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+      const auto now = pos.state();
+      if (now->accumulator.computed_accumulation) {
+        return true;
+      }
+      const auto prev = now->previous;
+      if (prev && prev->accumulator.computed_accumulation) {
+        UpdateAccumulator(pos);
+        return true;
+      }
+      return false;
+    }
+
+    // Convert input features
+    void Transform(const Position& pos, OutputType* output, bool refresh) const {
+      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+        RefreshAccumulator(pos);
+      }
+      const auto& accumulation = pos.state()->accumulator.accumulation;
+
+  #if defined(USE_AVX2)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+      constexpr int kControl = 0b11011000;
+      const __m256i kZero = _mm256_setzero_si256();
+
+  #elif defined(USE_SSSE3)
+      constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
+
+  #ifdef USE_SSE41
+      const __m128i kZero = _mm_setzero_si128();
+  #else
+      const __m128i k0x80s = _mm_set1_epi8(-128);
+  #endif
+
+  #elif defined(USE_NEON)
+      constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
+      const int8x8_t kZero = {0};
+  #endif
+
+      const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()};
+      for (IndexType p = 0; p < 2; ++p) {
+        const IndexType offset = kHalfDimensions * p;
+
+  #if defined(USE_AVX2)
+        auto out = reinterpret_cast<__m256i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m256i sum0 =
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary
+            //       compiled with g++ in MSYS2 crashes here because the output memory is not aligned
+            //       even though alignas is specified.
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m256i sum1 =
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+            _mm256_loadu_si256
+  #else
+            _mm256_load_si256
+  #endif
+
+            (&reinterpret_cast<const __m256i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+
+  #if defined(__MINGW32__) || defined(__MINGW64__)
+          _mm256_storeu_si256
+  #else
+          _mm256_store_si256
+  #endif
+
+          (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
+              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
+        }
+
+  #elif defined(USE_SSSE3)
+        auto out = reinterpret_cast<__m128i*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 0]);
+          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
+              accumulation[perspectives[p]][0])[j * 2 + 1]);
+      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
+
+          _mm_store_si128(&out[j],
+
+  #ifdef USE_SSE41
+            _mm_max_epi8(packedbytes, kZero)
+  #else
+            _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
+  #endif
+
+          );
+        }
+
+  #elif defined(USE_NEON)
+        const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
+        for (IndexType j = 0; j < kNumChunks; ++j) {
+          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
+              accumulation[perspectives[p]][0])[j];
+          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
+        }
+
+  #else
+        for (IndexType j = 0; j < kHalfDimensions; ++j) {
+          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
+          output[offset + j] = static_cast<OutputType>(
+              std::max<int>(0, std::min<int>(127, sum)));
+        }
+  #endif
+
+      }
+    }
+
+   private:
+    // Calculate cumulative value without using difference calculation
+    void RefreshAccumulator(const Position& pos) const {
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
      Features::IndexList active_indices[2];
      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
                                       active_indices);
-      for (const auto perspective : Colors) {
-        if (i == 0) {
-          std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                      kHalfDimensions * sizeof(BiasType));
-        } else {
-          std::memset(accumulator.accumulation[perspective][i], 0,
-                      kHalfDimensions * sizeof(BiasType));
-        }
+      for (Color perspective : { WHITE, BLACK }) {
+        std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                   kHalfDimensions * sizeof(BiasType));
        for (const auto index : active_indices[perspective]) {
          const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
+
+  #if defined(USE_AVX2)
          auto accumulation = reinterpret_cast<__m256i*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
          for (IndexType j = 0; j < kNumChunks; ++j) {
-#if defined(__MINGW32__) || defined(__MINGW64__)
+  #if defined(__MINGW32__) || defined(__MINGW64__)
            _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j]));
-#else
+  #else
            accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-#endif
+  #endif
          }
-#elif defined(USE_SSE2)
+
+  #elif defined(USE_SSE2)
          auto accumulation = reinterpret_cast<__m128i*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
@@ -221,7 +233,8 @@ class FeatureTransformer {
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
          }
-#elif defined(IS_ARM)
+
+  #elif defined(USE_NEON)
          auto accumulation = reinterpret_cast<int16x8_t*>(
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
@@ -229,129 +242,133 @@ class FeatureTransformer {
          for (IndexType j = 0; j < kNumChunks; ++j) {
            accumulation[j] = vaddq_s16(accumulation[j], column[j]);
          }
-#else
+
+  #else
          for (IndexType j = 0; j < kHalfDimensions; ++j) {
            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
          }
-#endif
+  #endif
+
        }
      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
    }

-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
-
-  // Calculate cumulative value using difference calculation
-  void UpdateAccumulator(const Position& pos) const {
-    const auto prev_accumulator = pos.state()->previous->accumulator;
-    auto& accumulator = pos.state()->accumulator;
-    for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
+    // Calculate cumulative value using difference calculation
+    void UpdateAccumulator(const Position& pos) const {
+      const auto prev_accumulator = pos.state()->previous->accumulator;
+      auto& accumulator = pos.state()->accumulator;
+      IndexType i = 0;
      Features::IndexList removed_indices[2], added_indices[2];
      bool reset[2];
      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
                                        removed_indices, added_indices, reset);
-      for (const auto perspective : Colors) {
-#if defined(USE_AVX2)
+      for (Color perspective : { WHITE, BLACK }) {
+
+  #if defined(USE_AVX2)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<__m256i*>(
            &accumulator.accumulation[perspective][i][0]);
-#elif defined(USE_SSE2)
+
+  #elif defined(USE_SSE2)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<__m128i*>(
            &accumulator.accumulation[perspective][i][0]);
-#elif defined(IS_ARM)
+
+  #elif defined(USE_NEON)
        constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
        auto accumulation = reinterpret_cast<int16x8_t*>(
            &accumulator.accumulation[perspective][i][0]);
-#endif
+  #endif
+
        if (reset[perspective]) {
-          if (i == 0) {
-            std::memcpy(accumulator.accumulation[perspective][i], biases_,
-                        kHalfDimensions * sizeof(BiasType));
-          } else {
-            std::memset(accumulator.accumulation[perspective][i], 0,
-                        kHalfDimensions * sizeof(BiasType));
-          }
-        } else {// Difference calculation for the feature amount changed from 1 to 0
+          std::memcpy(accumulator.accumulation[perspective][i], biases_,
+                      kHalfDimensions * sizeof(BiasType));
+        } else {
          std::memcpy(accumulator.accumulation[perspective][i],
                      prev_accumulator.accumulation[perspective][i],
                      kHalfDimensions * sizeof(BiasType));
+          // Difference calculation for the deactivated features
          for (const auto index : removed_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
+
+  #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
            }
-#elif defined(USE_SSE2)
+
+  #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
            }
-#elif defined(IS_ARM)
+
+  #elif defined(USE_NEON)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
            }
-#else
+
+  #else
            for (IndexType j = 0; j < kHalfDimensions; ++j) {
              accumulator.accumulation[perspective][i][j] -=
                  weights_[offset + j];
            }
-#endif
+  #endif
+
          }
        }
-        {// Difference calculation for features that changed from 0 to 1
+        { // Difference calculation for the activated features
          for (const auto index : added_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
-#if defined(USE_AVX2)
+
+  #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
            }
-#elif defined(USE_SSE2)
+
+  #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
            }
-#elif defined(IS_ARM)
+
+  #elif defined(USE_NEON)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
            for (IndexType j = 0; j < kNumChunks; ++j) {
              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
            }
-#else
+
+  #else
            for (IndexType j = 0; j < kHalfDimensions; ++j) {
              accumulator.accumulation[perspective][i][j] +=
                  weights_[offset + j];
            }
-#endif
+  #endif
+
          }
        }
      }
+
+      accumulator.computed_accumulation = true;
+      accumulator.computed_score = false;
    }

-    accumulator.computed_accumulation = true;
-    accumulator.computed_score = false;
-  }
+    using BiasType = std::int16_t;
+    using WeightType = std::int16_t;

-  // parameter type
-  using BiasType = std::int16_t;
-  using WeightType = std::int16_t;
+    // Make the learning class a friend
+    friend class Trainer<FeatureTransformer>;

-  // Make the learning class a friend
-  friend class Trainer<FeatureTransformer>;
+    alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
+    alignas(kCacheLineSize)
+        WeightType weights_[kHalfDimensions * kInputDimensions];
+  };

-  // parameter
-  alignas(kCacheLineSize) BiasType biases_[kHalfDimensions];
-  alignas(kCacheLineSize)
-      WeightType weights_[kHalfDimensions * kInputDimensions];
-};
+}  // namespace Eval::NNUE

-}  // namespace NNUE
-
-}  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
-
-#endif
+#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED
@@ -62,8 +62,8 @@ class Factorizer<HalfKP<AssociatedKing>> {
    IndexType index_offset = AppendBaseFeature<FeatureType>(
        kProperties[kFeaturesHalfKP], base_index, training_features);

-    const auto sq_k = static_cast<Square>(base_index / fe_end);
-    const auto p = static_cast<BonaPiece>(base_index % fe_end);
+    const auto sq_k = static_cast<Square>(base_index / PieceSquare::PS_END);
+    const auto p = static_cast<PieceSquare>(base_index % PieceSquare::PS_END);
    // kFeaturesHalfK
    {
      const auto& properties = kProperties[kFeaturesHalfK];
@@ -76,7 +76,7 @@ class Factorizer<HalfKP<AssociatedKing>> {
    index_offset += InheritFeaturesIfRequired<P>(
        index_offset, kProperties[kFeaturesP], p, training_features);
    // kFeaturesHalfRelativeKP
-    if (p >= fe_hand_end) {
+    if (p >= PieceSquare::PS_W_PAWN) {
      index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
          index_offset, kProperties[kFeaturesHalfRelativeKP],
          HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
@@ -111,7 +111,7 @@ IntType Round(double value) {
 // make_shared with alignment
 template <typename T, typename... ArgumentTypes>
 std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {
-  const auto ptr = new(aligned_malloc(sizeof(T), alignof(T)))
+  const auto ptr = new(std_aligned_alloc(sizeof(T), alignof(T)))
      T(std::forward<ArgumentTypes>(arguments)...);
  return std::shared_ptr<T>(ptr, AlignedDeleter<T>());
 }