Merge branch 'trainer' into merge_attempt

2026-05-20 14:27:45 +00:00 · 2020-09-24 20:45:23 +02:00
parent 4abe836896 9827411b7c
commit 56f1a2fe49
47 changed files with 1199 additions and 1165 deletions
@@ -41,7 +41,7 @@ BINDIR = $(PREFIX)/bin
 ### Built-in benchmark for pgo-builds
 PGO_TRAINING_DATA_FILE = pgo_training_data.bin
 PGOBENCH = ./$(EXE) bench
-PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 1000 output_file_name $(PGO_TRAINING_DATA_FILE)
+PGOGENSFEN = ./$(EXE) gensfen depth 6 loop 10000 output_file_name $(PGO_TRAINING_DATA_FILE)
 ### Source and object files
 SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
@@ -60,7 +60,6 @@ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp
 	learn/learn.cpp \
 	learn/gensfen.cpp \
 	learn/convert.cpp \
 	learn/learning_tools.cpp \
 	learn/multi_think.cpp
 OBJS = $(notdir $(SRCS:.cpp=.o))
@@ -747,10 +746,10 @@ endif
        config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \
        clang-profile-use clang-profile-make
-build: config-sanity net
+build: config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
-profile-build: net config-sanity objclean profileclean
+profile-build: config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@@ -1,22 +0,0 @@
 #ifndef _EVALUATE_COMMON_H_
 #define _EVALUATE_COMMON_H_
 // A common header-like function for modern evaluation functions.
 #include <string>
 namespace Eval
 {
 	// --------------------------
 	// for learning
 	// --------------------------
 	// Save the evaluation function parameters to a file.
 	// You can specify the extension added to the end of the file.
 	void save_eval(std::string suffix);
 	// Get the current eta.
 	double get_eta();
 }
 #endif // _EVALUATE_KPPT_COMMON_H_
@@ -36,42 +36,30 @@
 #include "uci.h"
 #include "incbin/incbin.h"
 // Macro to embed the default NNUE file data in the engine binary (using incbin.h, by Dale Weiler).
 // This macro invocation will declare the following three variables
 //     const unsigned char        gEmbeddedNNUEData[];  // a pointer to the embedded data
 //     const unsigned char *const gEmbeddedNNUEEnd;     // a marker to the end
 //     const unsigned int         gEmbeddedNNUESize;    // the size of the embedded file
 // Note that this does not work in Microsof Visual Studio.
 #if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
  INCBIN(EmbeddedNNUE, EvalFileDefaultName);
 #else
  const unsigned char        gEmbeddedNNUEData[1] = {0x0};
  const unsigned char *const gEmbeddedNNUEEnd = &gEmbeddedNNUEData[1];
  const unsigned int         gEmbeddedNNUESize = 1;
 #endif
 using namespace std;
 using namespace Eval::NNUE;
 namespace Eval {
-  bool useNNUE;
+  UseNNUEMode useNNUE;
  string eval_file_loaded = "None";
-  /// NNUE::init() tries to load a nnue network at startup time, or when the engine
+  static UseNNUEMode nnue_mode_from_option(const UCI::Option& mode)
-  /// receives a UCI command "setoption name EvalFile value nn-[a-z0-9]{12}.nnue"
+  {
-  /// The name of the nnue network is always retrieved from the EvalFile option.
+    if (mode == "false")
-  /// We search the given network in three locations: internally (the default
+      return UseNNUEMode::False;
-  /// network may be embedded in the binary), in the active working directory and
+    else if (mode == "true")
-  /// in the engine directory. Distro packagers may define the DEFAULT_NNUE_DIRECTORY
+      return UseNNUEMode::True;
-  /// variable to have the engine search in a special directory in their distro.
+    else if (mode == "pure")
      return UseNNUEMode::Pure;
    return UseNNUEMode::False;
  }
  void NNUE::init() {
-    useNNUE = Options["Use NNUE"];
+    useNNUE = nnue_mode_from_option(Options["Use NNUE"]);
-    if (!useNNUE)
+    if (useNNUE == UseNNUEMode::False)
        return;
    string eval_file = string(Options["EvalFile"]);
@@ -79,35 +67,17 @@ namespace Eval {
    #if defined(DEFAULT_NNUE_DIRECTORY)
    #define stringify2(x) #x
    #define stringify(x) stringify2(x)
-    vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
+    vector<string> dirs = { "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) };
    #else
-    vector<string> dirs = { "<internal>" , "" , CommandLine::binaryDirectory };
+    vector<string> dirs = { "" , CommandLine::binaryDirectory };
    #endif
    for (string directory : dirs)
        if (eval_file_loaded != eval_file)
        {
-            if (directory != "<internal>")
+            ifstream stream(directory + eval_file, ios::binary);
-            {
+            if (load_eval(eval_file, stream))
-                ifstream stream(directory + eval_file, ios::binary);
+                eval_file_loaded = eval_file;
                if (load_eval(eval_file, stream))
                    eval_file_loaded = eval_file;
            }
            if (directory == "<internal>" && eval_file == EvalFileDefaultName)
            {
                // C++ way to prepare a buffer for a memory stream
                class MemoryBuffer : public basic_streambuf<char> {
                    public: MemoryBuffer(char* p, size_t n) { setg(p, p, p + n); setp(p, p + n); }
                };
                MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(gEmbeddedNNUEData)),
                                    size_t(gEmbeddedNNUESize));
                istream stream(&buffer);
                if (load_eval(eval_file, stream))
                    eval_file_loaded = eval_file;
            }
        }
  }
@@ -116,7 +86,7 @@ namespace Eval {
    string eval_file = string(Options["EvalFile"]);
-    if (useNNUE && eval_file_loaded != eval_file)
+    if (useNNUE != UseNNUEMode::False && eval_file_loaded != eval_file)
    {
        UCI::OptionsMap defaults;
        UCI::init(defaults);
@@ -136,7 +106,7 @@ namespace Eval {
        exit(EXIT_FAILURE);
    }
-    if (useNNUE)
+    if (useNNUE != UseNNUEMode::False)
        sync_cout << "info string NNUE evaluation using " << eval_file << " enabled" << sync_endl;
    else
        sync_cout << "info string classical evaluation enabled" << sync_endl;
@@ -1017,7 +987,10 @@ Value Eval::evaluate(const Position& pos) {
  Value v;
-  if (!Eval::useNNUE)
+  if (Eval::useNNUE == UseNNUEMode::Pure) {
      v = NNUE::evaluate(pos);
  }
  else if (Eval::useNNUE == UseNNUEMode::False)
      v = Evaluation<NO_TRACE>(pos).value();
  else
  {
@@ -1092,7 +1065,7 @@ std::string Eval::trace(const Position& pos) {
  ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
-  if (Eval::useNNUE)
+  if (useNNUE != UseNNUEMode::False)
  {
      v = NNUE::evaluate(pos);
      v = pos.side_to_move() == WHITE ? v : -v;
@@ -42,7 +42,7 @@ namespace Eval {
  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
  // for the build process (profile-build and fishtest) to work. Do not change the
  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-03744f8d56d8.nnue"
+  #define EvalFileDefaultName   "nn.bin"
  namespace NNUE {
@@ -8,9 +8,6 @@
 #include "position.h"
 #include "tt.h"
 // evaluate header for learning
 #include "eval/evaluate_common.h"
 #include "extra/nnue_data_binpack_format.h"
 #include "syzygy/tbprobe.h"
@@ -122,7 +119,7 @@ namespace Learner
                else if (token == "score") {
                    double score;
                    ss >> score;
-                    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
+                    // Training Formula ?Issue #71 ?nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
                    // Normalize to [0.0, 1.0].
                    score = (score - src_score_min_value) / (src_score_max_value - src_score_min_value);
                    // Scale to [dest_score_min_value, dest_score_max_value].
@@ -480,7 +477,7 @@ namespace Learner
            {
                if (fs.read((char*)&p, sizeof(PackedSfenValue))) {
                    StateInfo si;
-                    tpos.set_from_packed_sfen(p.sfen, &si, th, false);
+                    tpos.set_from_packed_sfen(p.sfen, &si, th);
                    // write as plain text
                    ofs << "fen " << tpos.fen() << std::endl;
@@ -2,6 +2,7 @@
 #include "packed_sfen.h"
 #include "multi_think.h"
 #include "../syzygy/tbprobe.h"
 #include "misc.h"
 #include "position.h"
@@ -9,8 +10,6 @@
 #include "tt.h"
 #include "uci.h"
 #include "eval/evaluate_common.h"
 #include "extra/nnue_data_binpack_format.h"
 #include "nnue/evaluate_nnue_learner.h"
@@ -48,6 +47,7 @@ namespace Learner
    static bool detect_draw_by_consecutive_low_score = false;
    static bool detect_draw_by_insufficient_mating_material = false;
    static std::vector<std::string> bookStart;
    static SfenOutputType sfen_output_type = SfenOutputType::Bin;
    static bool ends_with(const std::string& lhs, const std::string& end)
@@ -392,7 +392,6 @@ namespace Learner
            Position& pos,
            std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
            int ply,
            int depth,
            vector<Move>& pv);
        // Min and max depths for search during gensfen
@@ -749,7 +748,6 @@ namespace Learner
        Position& pos,
        std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
        int ply,
        int depth,
        vector<Move>& pv)
    {
        auto rootColor = pos.side_to_move();
@@ -763,15 +761,6 @@ namespace Learner
            }
            pos.do_move(m, states[ply++]);
            // Because the difference calculation of evaluate() cannot be
            // performed unless each node evaluate() is called!
            // If the depth is 8 or more, it seems
            // faster not to calculate this difference.
            if (depth < 8)
            {
                Eval::NNUE::update_eval(pos);
            }
        }
        // Reach leaf
@@ -828,8 +817,10 @@ namespace Learner
            auto th = Threads[thread_id];
            auto& pos = th->rootPos;
-            pos.set(StartFEN, false, &si, th);
+            pos.set(bookStart[prng.rand(bookStart.size())], false, &si, th);
            int resign_counter = 0;
            bool should_resign = prng.rand(10) > 1;
            // Vector for holding the sfens in the current simulated game.
            PSVector a_psv;
            a_psv.reserve(write_maxply + MAX_PLY);
@@ -871,11 +862,14 @@ namespace Learner
                    // Also because of this we don't have to check for TB/MATE scores
                    if (abs(search_value) >= eval_limit)
                    {
-                        const auto wdl = (search_value >= eval_limit) ? 1 : -1;
+                        resign_counter++;
-                        flush_psv(wdl);
+                        if ((should_resign && resign_counter >= 4) || abs(search_value) >= 10000) {
-                        break;
+                            flush_psv((search_value >= eval_limit) ? 1 : -1);
                            break;
                        }
                    } else {
                        resign_counter = 0;
                    }
                    // Verification of a strange move
                    if (search_pv.size() > 0
                        && (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
@@ -917,7 +911,6 @@ namespace Learner
                        auto old_key = hash[hash_index];
                        if (key == old_key)
                        {
                            a_psv.clear();
                            goto SKIP_SAVE;
                        }
                        else
@@ -936,20 +929,7 @@ namespace Learner
                        // Result is added after the whole game is done.
                        pos.sfen_pack(psv.sfen);
-                        // Get the value of evaluate() as seen from the
+                        psv.score = search_value;
                        // root color on the leaf node of the PV line.
                        // I don't know the goodness and badness of using the
                        // return value of search() as it is.
                        // TODO: Consider using search value instead of evaluate_leaf.
                        //       Maybe give it as an option.
                        // Use PV moves to reach the leaf node and use the value
                        // that evaluated() is called on that leaf node.
                        const auto leaf_value = evaluate_leaf(pos, states, ply, depth, search_pv);
                        // If for some reason the leaf node couldn't yield an eval
                        // we fallback to search value.
                        psv.score = leaf_value == VALUE_NONE ? search_value : leaf_value;
                        psv.gamePly = ply;
@@ -983,18 +963,11 @@ namespace Learner
                    {
                        break;
                    }
                    // Clear the sfens that were written before the random move.
                    // (???) why?
                    a_psv.clear();
                }
                // Do move.
                pos.do_move(next_move, states[ply]);
                // Call node evaluate() for each difference calculation.
                Eval::NNUE::update_eval(pos);
            } // for (int ply = 0; ; ++ply)
        } // while(!quit)
@@ -1154,12 +1127,28 @@ namespace Learner
            output_file_name = output_file_name + "_" + to_hex(r.rand<uint64_t>()) + to_hex(r.rand<uint64_t>());
        }
        bookStart.clear();
        {
          std::string line;
          std::ifstream myfile ("3moves_v2.epd");
          if (myfile.is_open())
          {
            while (getline(myfile,line))
            {
                bookStart.push_back(line);
            }
            myfile.close();
          } else {
            bookStart.push_back(StartFEN);
          }
        }
        std::cout << "gensfen : " << endl
            << "  search_depth_min = " << search_depth_min << " to " << search_depth_max << endl
            << "  nodes = " << nodes << endl
            << "  loop_max = " << loop_max << endl
            << "  eval_limit = " << eval_limit << endl
-            << "  thread_num (set by USI setoption) = " << thread_num << endl
+            << "  thread_num             = " << thread_num << endl
            << "  bookStart              = " << bookStart.size() << endl
            << "  random_move_minply     = " << random_move_minply << endl
            << "  random_move_maxply     = " << random_move_maxply << endl
            << "  random_move_count      = " << random_move_count << endl
@@ -1177,10 +1166,28 @@ namespace Learner
            << "  detect_draw_by_insufficient_mating_material = " << detect_draw_by_insufficient_mating_material << endl;
        // Show if the training data generator uses NNUE.
-        Eval::verify_NNUE();
+        Eval::NNUE::verify();
        Threads.main()->ponder = false;
        // About Search::Limits
        // Be careful because this member variable is global and affects other threads.
        {
          auto& limits = Search::Limits;
          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
          limits.infinite = true;
          // Since PV is an obstacle when displayed, erase it.
          limits.silent = true;
          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
          limits.nodes = 0;
          // depth is also processed by the one passed as an argument of Learner::search().
          limits.depth = 0;
        }
        // Create and execute threads as many as Options["Threads"].
        {
            SfenWriter sfen_writer(output_file_name, thread_num);
@@ -29,8 +29,6 @@
 #include "uci.h"
 #include "search.h"
 #include "eval/evaluate_common.h"
 #include "extra/nnue_data_binpack_format.h"
 #include "nnue/evaluate_nnue_learner.h"
@@ -58,6 +56,7 @@
 #include <omp.h>
 #endif
 extern double global_learning_rate;
 using namespace std;
@@ -92,12 +91,6 @@ namespace Learner
    static double dest_score_min_value = 0.0;
    static double dest_score_max_value = 1.0;
    // Assume teacher signals are the scores of deep searches,
    // and convert them into winning probabilities in the trainer.
    // Sometimes we want to use the winning probabilities in the training
    // data directly. In those cases, we set false to this variable.
    static bool convert_teacher_signal_to_winning_probability = true;
    // Using stockfish's WDL with win rate model instead of sigmoid
    static bool use_wdl = false;
@@ -164,14 +157,6 @@ namespace Learner
        return ((y2 - y1) / epsilon) / winning_probability_coefficient;
    }
    // A constant used in elmo (WCSC27). Adjustment required.
    // Since elmo does not internally divide the expression, the value is different.
    // You can set this value with the learn command.
    // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
    double ELMO_LAMBDA = 0.33;
    double ELMO_LAMBDA2 = 0.33;
    double ELMO_LAMBDA_LIMIT = 32000;
    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
    double get_scaled_signal(double signal)
    {
@@ -194,26 +179,7 @@ namespace Learner
    double calculate_p(double teacher_signal, int ply)
    {
        const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
-
+        return winning_percentage(scaled_teacher_signal, ply);
        double p = scaled_teacher_signal;
        if (convert_teacher_signal_to_winning_probability)
        {
            p = winning_percentage(scaled_teacher_signal, ply);
        }
        return p;
    }
    double calculate_lambda(double teacher_signal)
    {
        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
        const double lambda =
            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
            ? ELMO_LAMBDA2
            : ELMO_LAMBDA;
        return lambda;
    }
    double calculate_t(int game_result)
@@ -226,32 +192,6 @@ namespace Learner
        return t;
    }
    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
    {
        // elmo (WCSC27) method
        // Correct with the actual game wins and losses.
        const double q = winning_percentage(shallow, psv.gamePly);
        const double p = calculate_p(teacher_signal, psv.gamePly);
        const double t = calculate_t(psv.game_result);
        const double lambda = calculate_lambda(teacher_signal);
        double grad;
        if (use_wdl)
        {
            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
            grad = lambda * dce_p + (1.0 - lambda) * dce_t;
        }
        else
        {
            // Use the actual win rate as a correction term.
            // This is the idea of elmo (WCSC27), modern O-parts.
            grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
        }
        return grad;
    }
    // Calculate cross entropy during learning
    // The individual cross entropy of the win/loss term and win
    // rate term of the elmo expression is returned
@@ -262,21 +202,16 @@ namespace Learner
        const PackedSfenValue& psv,
        double& cross_entropy_eval,
        double& cross_entropy_win,
        double& cross_entropy,
        double& entropy_eval,
-        double& entropy_win,
+        double& entropy_win)
        double& entropy)
    {
        // Teacher winning probability.
        const double q = winning_percentage(shallow, psv.gamePly);
        const double p = calculate_p(teacher_signal, psv.gamePly);
        const double t = calculate_t(psv.game_result);
        const double lambda = calculate_lambda(teacher_signal);
        constexpr double epsilon = 0.000001;
        const double m = (1.0 - lambda) * t + lambda * p;
        cross_entropy_eval =
            (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
        cross_entropy_win =
@@ -285,17 +220,12 @@ namespace Learner
            (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
        entropy_win =
            (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
        cross_entropy =
            (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
        entropy =
            (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
    }
    // Other objective functions may be considered in the future...
    double calc_grad(Value shallow, const PackedSfenValue& psv)
    {
-        return calc_grad((Value)psv.score, shallow, psv);
+        return (double)(shallow - (Value)psv.score) / 2400.0;
    }
    struct BasicSfenInputStream
@@ -787,15 +717,9 @@ namespace Learner
        std::atomic<bool> stop_flag;
        // Discount rate
        double discount_rate;
        // Option to exclude early stage from learning
        int reduction_gameply;
        // Option not to learn kk/kkp/kpp/kppp
        std::array<bool, 4> freeze;
        // If the absolute value of the evaluation value of the deep search
        // of the teacher phase exceeds this value, discard the teacher phase.
        int eval_limit;
@@ -825,7 +749,6 @@ namespace Learner
        uint64_t eval_save_interval;
        uint64_t loss_output_interval;
        uint64_t mirror_percentage;
        // Loss calculation.
        // done: Number of phases targeted this time
@@ -849,7 +772,6 @@ namespace Learner
        for (size_t i = 0; i < pv.size(); ++i)
        {
            task_pos.do_move(pv[i], states[i]);
            Eval::NNUE::update_eval(task_pos);
        }
        const Value shallow_value =
@@ -870,20 +792,18 @@ namespace Learner
        // It doesn't matter if you have disabled the substitution table.
        TT.new_search();
-        std::cout << "PROGRESS: " << now_string() << ", ";
+        cout << "PROGRESS: " << now_string() << ", ";
-        std::cout << sr.total_done << " sfens";
+        cout << sr.total_done << " sfens";
-        std::cout << ", iteration " << epoch;
+        cout << ", iteration " << epoch;
-        std::cout << ", eta = " << Eval::get_eta() << ", ";
+        cout << ", learning rate = " << global_learning_rate << ", ";
        // For calculation of verification data loss
-        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
+        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
-        atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
+        atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
        test_sum_cross_entropy_eval = 0;
        test_sum_cross_entropy_win = 0;
        test_sum_cross_entropy = 0;
        test_sum_entropy_eval = 0;
        test_sum_entropy_win = 0;
        test_sum_entropy = 0;
        // norm for learning
        atomic<double> sum_norm;
@@ -899,7 +819,7 @@ namespace Learner
        auto& pos = th->rootPos;
        StateInfo si;
        pos.set(StartFEN, false, &si, th);
-        std::cout << "hirate eval = " << Eval::evaluate(pos);
+        cout << "hirate eval = " << Eval::evaluate(pos) << endl;
        // It's better to parallelize here, but it's a bit
        // troublesome because the search before slave has not finished.
@@ -923,10 +843,8 @@ namespace Learner
                    &ps,
                    &test_sum_cross_entropy_eval,
                    &test_sum_cross_entropy_win,
                    &test_sum_cross_entropy,
                    &test_sum_entropy_eval,
                    &test_sum_entropy_win,
                    &test_sum_entropy,
                    &sum_norm,
                    &task_count,
                    &move_accord_count
@@ -954,26 +872,22 @@ namespace Learner
                // For the time being, regarding the win rate and loss terms only in the elmo method
                // Calculate and display the cross entropy.
-                double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
+                double test_cross_entropy_eval, test_cross_entropy_win;
-                double test_entropy_eval, test_entropy_win, test_entropy;
+                double test_entropy_eval, test_entropy_win;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    test_cross_entropy_eval,
                    test_cross_entropy_win,
                    test_cross_entropy,
                    test_entropy_eval,
-                    test_entropy_win,
+                    test_entropy_win);
                    test_entropy);
                // The total cross entropy need not be abs() by definition.
                test_sum_cross_entropy_eval += test_cross_entropy_eval;
                test_sum_cross_entropy_win += test_cross_entropy_win;
                test_sum_cross_entropy += test_cross_entropy;
                test_sum_entropy_eval += test_entropy_eval;
                test_sum_entropy_win += test_entropy_win;
                test_sum_entropy += test_entropy;
                sum_norm += (double)abs(shallow_value);
                // Determine if the teacher's move and the score of the shallow search match
@@ -998,7 +912,7 @@ namespace Learner
        while (task_count)
            sleep(1);
-        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
+        latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
        latest_loss_count += sr.sfen_for_mse.size();
        // learn_cross_entropy may be called train cross
@@ -1008,27 +922,24 @@ namespace Learner
        if (sr.sfen_for_mse.size() && done)
        {
-            cout
+            cout << "INFO: "
-                << " , test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
+                << "test_cross_entropy_eval = " << test_sum_cross_entropy_eval / sr.sfen_for_mse.size()
                << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
                << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
                << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
                << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
                << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                << " , norm = " << sum_norm
-                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
+                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
                << endl;
            if (done != static_cast<uint64_t>(-1))
            {
-                cout
+                cout << "INFO: "
-                    << " , learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
+                    << "learn_cross_entropy_eval = " << learn_sum_cross_entropy_eval / done
                    << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
                    << " , learn_entropy_eval = " << learn_sum_entropy_eval / done
                    << " , learn_entropy_win = " << learn_sum_entropy_win / done
-                    << " , learn_cross_entropy = " << learn_sum_cross_entropy / done
+                    << endl;
                    << " , learn_entropy = " << learn_sum_entropy / done;
            }
            cout << endl;
        }
        else
        {
@@ -1038,10 +949,8 @@ namespace Learner
        // Clear 0 for next time.
        learn_sum_cross_entropy_eval = 0.0;
        learn_sum_cross_entropy_win = 0.0;
        learn_sum_cross_entropy = 0.0;
        learn_sum_entropy_eval = 0.0;
        learn_sum_entropy_win = 0.0;
        learn_sum_entropy = 0.0;
    }
    void LearnerThink::thread_worker(size_t thread_id)
@@ -1058,7 +967,7 @@ namespace Learner
            // display mse (this is sometimes done only for thread 0)
            // Immediately after being read from the file...
-        // Lock the evaluation function so that it is not used during updating.
+            // Lock the evaluation function so that it is not used during updating.
            shared_lock<shared_timed_mutex> read_lock(nn_mutex, defer_lock);
            if (sr.next_update_weights <= sr.total_done ||
                (thread_id != 0 && !read_lock.try_lock()))
@@ -1090,7 +999,7 @@ namespace Learner
                        // Lock the evaluation function so that it is not used during updating.
                        lock_guard<shared_timed_mutex> write_lock(nn_mutex);
-                        Eval::NNUE::UpdateParameters(epoch);
+                        Eval::NNUE::UpdateParameters();
                    }
                    ++epoch;
@@ -1167,8 +1076,7 @@ namespace Learner
                goto RETRY_READ;
            StateInfo si;
-            const bool mirror = prng.rand(100) < mirror_percentage;
+            if (pos.set_from_packed_sfen(ps.sfen, &si, th) != 0)
            if (pos.set_from_packed_sfen(ps.sfen, &si, th, mirror) != 0)
            {
                // I got a strange sfen. Should be debugged!
                // Since it is an illegal sfen, it may not be
@@ -1177,18 +1085,30 @@ namespace Learner
                goto RETRY_READ;
            }
            // There is a possibility that all the pieces are blocked and stuck.
            // Also, the declaration win phase is excluded from
            // learning because you cannot go to leaf with PV moves.
            // (shouldn't write out such teacher aspect itself,
            // but may have written it out with an old generation routine)
            // Skip the position if there are no legal moves (=checkmated or stalemate).
            if (MoveList<LEGAL>(pos).size() == 0)
                goto RETRY_READ;
            // I can read it, so try displaying it.
            //      cout << pos << value << endl;
            const auto rootColor = pos.side_to_move();
            int ply = 0;
            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
            if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
            {
                goto RETRY_READ;
            }
            pos.do_move((Move)ps.move, state[ply++]);
 			// There is a possibility that all the pieces are blocked and stuck.
 			// Also, the declaration win phase is excluded from
 			// learning because you cannot go to leaf with PV moves.
 			// (shouldn't write out such teacher aspect itself,
 			// but may have written it out with an old generation routine)
 			// Skip the position if there are no legal moves (=checkmated or stalemate).
 			if (MoveList<LEGAL>(pos).size() == 0)
 				goto RETRY_READ;
            // Evaluation value of shallow search (qsearch)
            const auto [_, pv] = qsearch(pos);
@@ -1199,13 +1119,11 @@ namespace Learner
            // Go to the leaf node as it is, add only to the gradient array,
            // and later try AdaGrad at the time of rmse aggregation.
            const auto rootColor = pos.side_to_move();
            // If the initial PV is different, it is better not to use it for learning.
            // If it is the result of searching a completely different place, it may become noise.
            // It may be better not to study where the difference in evaluation values is too large.
            int ply = 0;
            // A helper function that adds the gradient to the current phase.
            auto pos_add_grad = [&]() {
@@ -1224,35 +1142,28 @@ namespace Learner
                    : -Eval::evaluate(pos);
                // Calculate loss for training data
-                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
+                double learn_cross_entropy_eval, learn_cross_entropy_win;
-                double learn_entropy_eval, learn_entropy_win, learn_entropy;
+                double learn_entropy_eval, learn_entropy_win;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    learn_cross_entropy_eval,
                    learn_cross_entropy_win,
                    learn_cross_entropy,
                    learn_entropy_eval,
-                    learn_entropy_win,
+                    learn_entropy_win);
                    learn_entropy);
                learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                learn_sum_cross_entropy_win += learn_cross_entropy_win;
                learn_sum_cross_entropy += learn_cross_entropy;
                learn_sum_entropy_eval += learn_entropy_eval;
                learn_sum_entropy_win += learn_entropy_win;
                learn_sum_entropy += learn_entropy;
-                const double example_weight =
+                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
                    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
                Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
                // Since the processing is completed, the counter of the processed number is incremented
                sr.total_done++;
            };
            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
            bool illegal_move = false;
            for (auto m : pv)
            {
@@ -1266,29 +1177,16 @@ namespace Learner
                    break;
                }
                // Processing when adding the gradient to the node on each PV.
                //If discount_rate is 0, this process is not performed.
                if (discount_rate != 0)
                    pos_add_grad();
                pos.do_move(m, state[ply++]);
                // Since the value of evaluate in leaf is used, the difference is updated.
                Eval::NNUE::update_eval(pos);
            }
            if (illegal_move)
            {
-                sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
+                goto RETRY_READ;
                continue;
            }
            // Since we have reached the end phase of PV, add the slope here.
            pos_add_grad();
            // rewind the phase
            for (auto it = pv.rbegin(); it != pv.rend(); ++it)
                pos.undo_move(*it);
        }
    }
@@ -1303,18 +1201,18 @@ namespace Learner
        {
            // When EVAL_SAVE_ONLY_ONCE is defined,
            // Do not dig a subfolder because I want to save it only once.
-            Eval::save_eval("");
+            Eval::NNUE::save_eval("");
        }
        else if (is_final)
        {
-            Eval::save_eval("final");
+            Eval::NNUE::save_eval("final");
            return true;
        }
        else
        {
            static int dir_number = 0;
            const std::string dir_name = std::to_string(dir_number++);
-            Eval::save_eval(dir_name);
+            Eval::NNUE::save_eval(dir_name);
            if (newbob_decay != 1.0 && latest_loss_count > 0) {
                static int trials = newbob_num_trials;
@@ -1332,25 +1230,17 @@ namespace Learner
                else
                {
                    cout << " >= best (" << best_loss << "), rejected" << endl;
-                    if (best_nn_directory.empty())
+                    best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
                    {
                        cout << "WARNING: no improvement from initial model" << endl;
                    }
                    else
                    {
                        cout << "restoring parameters from " << best_nn_directory << endl;
                        Eval::NNUE::RestoreParameters(best_nn_directory);
                    }
                    if (--trials > 0 && !is_final)
                    {
                        cout
-                            << "reducing learning rate scale from " << newbob_scale
+                            << "reducing learning rate from " << newbob_scale
                            << " to " << (newbob_scale * newbob_decay)
                            << " (" << trials << " more trials)" << endl;
                        newbob_scale *= newbob_decay;
-                        Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
+                        global_learning_rate = newbob_scale;
                    }
                }
@@ -1628,13 +1518,6 @@ namespace Learner
        string target_dir;
        // If 0, it will be the default value.
        double eta1 = 0.0;
        double eta2 = 0.0;
        double eta3 = 0.0;
        uint64_t eta1_epoch = 0; // eta2 is not applied by default
        uint64_t eta2_epoch = 0; // eta3 is not applied by default
        // --- Function that only shuffles the teacher aspect
        // normal shuffle
@@ -1675,24 +1558,13 @@ namespace Learner
        // Turn on if you want to pass a pre-shuffled file.
        bool no_shuffle = false;
-        // elmo lambda
+        global_learning_rate = 1.0;
        ELMO_LAMBDA = 0.33;
        ELMO_LAMBDA2 = 0.33;
        ELMO_LAMBDA_LIMIT = 32000;
        // Discount rate. If this is set to a value other than 0,
        // the slope will be added even at other than the PV termination.
        // (At that time, apply this discount rate)
        double discount_rate = 0;
        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
        // If set to 1, rand(1)==0, so nothing is excluded.
        int reduction_gameply = 1;
        // Optional item that does not let you learn KK/KKP/KPP/KPPP
        array<bool, 4> freeze = {};
        uint64_t nn_batch_size = 1000;
        double newbob_decay = 1.0;
        int newbob_num_trials = 2;
@@ -1700,7 +1572,6 @@ namespace Learner
        uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
        uint64_t loss_output_interval = 0;
        uint64_t mirror_percentage = 0;
        string validation_set_file_name;
        string seed;
@@ -1734,12 +1605,7 @@ namespace Learner
            else if (option == "batchsize") is >> mini_batch_size;
            // learning rate
-            else if (option == "eta")        is >> eta1;
+            else if (option == "lr")        is >> global_learning_rate;
            else if (option == "eta1")       is >> eta1; // alias
            else if (option == "eta2")       is >> eta2;
            else if (option == "eta3")       is >> eta3;
            else if (option == "eta1_epoch") is >> eta1_epoch;
            else if (option == "eta2_epoch") is >> eta2_epoch;
            // Accept also the old option name.
            else if (option == "use_draw_in_training"
@@ -1758,22 +1624,9 @@ namespace Learner
            else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;
            // Discount rate
            else if (option == "discount_rate") is >> discount_rate;
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;
            // No learning of KK/KKP/KPP/KPPP.
            else if (option == "freeze_kk")    is >> freeze[0];
            else if (option == "freeze_kkp")   is >> freeze[1];
            else if (option == "freeze_kpp")   is >> freeze[2];
            // LAMBDA
            else if (option == "lambda")       is >> ELMO_LAMBDA;
            else if (option == "lambda2")      is >> ELMO_LAMBDA2;
            else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
            else if (option == "reduction_gameply") is >> reduction_gameply;
            // shuffle related
@@ -1794,7 +1647,6 @@ namespace Learner
            else if (option == "eval_save_interval") is >> eval_save_interval;
            else if (option == "loss_output_interval") is >> loss_output_interval;
            else if (option == "mirror_percentage") is >> mirror_percentage;
            else if (option == "validation_set_file_name") is >> validation_set_file_name;
            // Rabbit convert related
@@ -1810,7 +1662,6 @@ namespace Learner
            else if (option == "src_score_max_value") is >> src_score_max_value;
            else if (option == "dest_score_min_value") is >> dest_score_min_value;
            else if (option == "dest_score_max_value") is >> dest_score_max_value;
            else if (option == "convert_teacher_signal_to_winning_probability") is >> convert_teacher_signal_to_winning_probability;
            else if (option == "seed") is >> seed;
            // Otherwise, it's a filename.
            else
@@ -1884,7 +1735,7 @@ namespace Learner
        if (use_convert_plain)
        {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
            cout << "convert_plain.." << endl;
            convert_plain(filenames, output_file_name);
            return;
@@ -1892,7 +1743,7 @@ namespace Learner
        if (use_convert_bin)
        {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
            cout << "convert_bin.." << endl;
            convert_bin(
                filenames,
@@ -1913,7 +1764,7 @@ namespace Learner
        if (use_convert_bin_from_pgn_extract)
        {
-            Eval::init_NNUE();
+            Eval::NNUE::init();
            cout << "convert_bin_from_pgn-extract.." << endl;
            convert_bin_from_pgn_extract(
                filenames,
@@ -1946,8 +1797,7 @@ namespace Learner
        cout << "nn_batch_size     : " << nn_batch_size << endl;
        cout << "nn_options        : " << nn_options << endl;
-        cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
+        cout << "learning rate     : " << global_learning_rate << endl;
        cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
        cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
        cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
        cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
@@ -1960,17 +1810,10 @@ namespace Learner
            cout << "scheduling        : default" << endl;
        }
        cout << "discount rate     : " << discount_rate << endl;
        // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
        reduction_gameply = max(reduction_gameply, 1);
        cout << "reduction_gameply : " << reduction_gameply << endl;
        cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
        cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
        cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
        cout << "mirror_percentage : " << mirror_percentage << endl;
        cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
        cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
@@ -1981,7 +1824,7 @@ namespace Learner
        cout << "init.." << endl;
        // Read evaluation function parameters
-        Eval::init_NNUE();
+        Eval::NNUE::init();
        Threads.main()->ponder = false;
@@ -2004,12 +1847,12 @@ namespace Learner
        }
        cout << "init_training.." << endl;
-        Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
+        Eval::NNUE::InitializeTraining(seed);
        Eval::NNUE::SetBatchSize(nn_batch_size);
        Eval::NNUE::SetOptions(nn_options);
        if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
            // Save the current net to [EvalSaveDir]\original.
-            Eval::save_eval("original");
+            Eval::NNUE::save_eval("original");
            // Set the folder above to best_nn_directory so that the trainer can
            // resotre the network parameters from the original net file.
@@ -2020,11 +1863,9 @@ namespace Learner
        cout << "init done." << endl;
        // Reflect other option settings.
        learn_think.discount_rate = discount_rate;
        learn_think.eval_limit = eval_limit;
        learn_think.save_only_once = save_only_once;
        learn_think.sr.no_shuffle = no_shuffle;
        learn_think.freeze = freeze;
        learn_think.reduction_gameply = reduction_gameply;
        learn_think.newbob_scale = 1.0;
@@ -2033,7 +1874,6 @@ namespace Learner
        learn_think.eval_save_interval = eval_save_interval;
        learn_think.loss_output_interval = loss_output_interval;
        learn_think.mirror_percentage = mirror_percentage;
        // Start a thread that loads the phase file in the background
        // (If this is not started, mse cannot be calculated.)
@@ -2069,6 +1909,8 @@ namespace Learner
        // Start learning.
        learn_think.go_think();
        Eval::NNUE::FinalizeNet();
        // Save once at the end.
        learn_think.save(true);
    }
@@ -23,11 +23,7 @@ using LearnFloatType = float;
 // configure
 // ======================
-// ----------------------
+#define LOSS_FUNCTION "cross_entropy_eval"
 // Learning with the method of elmo (WCSC27)
 // ----------------------
 #define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
 // ----------------------
 // Definition of struct used in Learner
@@ -1,18 +0,0 @@
 #include "learning_tools.h"
 #include "misc.h"
 using namespace Eval;
 namespace EvalLearningTools
 {
 	// --- static variables
 	double Weight::eta;
 	double Weight::eta1;
 	double Weight::eta2;
 	double Weight::eta3;
 	uint64_t Weight::eta1_epoch;
 	uint64_t Weight::eta2_epoch;
 }
@@ -1,99 +0,0 @@
 #ifndef __LEARN_WEIGHT_H__
 #define __LEARN_WEIGHT_H__
 // A set of machine learning tools related to the weight array used for machine learning of evaluation functions
 #include "learn.h"
 #include "misc.h"  // PRNG , my_insertion_sort
 #include <array>
 #include <cmath>	// std::sqrt()
 namespace EvalLearningTools
 {
 	// -------------------------------------------------
 	//   Array for learning that stores gradients etc.
 	// -------------------------------------------------
 #if defined(_MSC_VER)
 #pragma pack(push,2)
 #elif defined(__GNUC__)
 #pragma pack(2)
 #endif
 	struct Weight
 	{
 		// cumulative value of one mini-batch gradient
 		LearnFloatType g = LearnFloatType(0);
 		// Learning rate η(eta) such as AdaGrad.
 		// It is assumed that eta1,2,3,eta1_epoch,eta2_epoch have been set by the time updateFV() is called.
 		// The epoch of update_weights() gradually changes from eta1 to eta2 until eta1_epoch.
 		// After eta2_epoch, gradually change from eta2 to eta3.
 		static double eta;
 		static double eta1;
 		static double eta2;
 		static double eta3;
 		static uint64_t eta1_epoch;
 		static uint64_t eta2_epoch;
 		// Batch initialization of eta. If 0 is passed, the default value will be set.
 		static void init_eta(double new_eta1, double new_eta2, double new_eta3,
 			uint64_t new_eta1_epoch, uint64_t new_eta2_epoch)
 		{
 			Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0;
 			Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0;
 			Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0;
 			Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0;
 			Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0;
 		}
 		// Set eta according to epoch.
 		static void calc_eta(uint64_t epoch)
 		{
 			if (Weight::eta1_epoch == 0) // Exclude eta2
 				Weight::eta = Weight::eta1;
 			else if (epoch < Weight::eta1_epoch)
 				// apportion
 				Weight::eta = Weight::eta1 + (Weight::eta2 - Weight::eta1) * epoch / Weight::eta1_epoch;
 			else if (Weight::eta2_epoch == 0) // Exclude eta3
 				Weight::eta = Weight::eta2;
 			else if (epoch < Weight::eta2_epoch)
 				Weight::eta = Weight::eta2 + (Weight::eta3 - Weight::eta2) * (epoch - Weight::eta1_epoch) / (Weight::eta2_epoch - Weight::eta1_epoch);
 			else
 				Weight::eta = Weight::eta3;
 		}
 		template <typename T> void updateFV(T& v) { updateFV(v, 1.0); }
 		// grad setting
 		template <typename T> void set_grad(const T& g_) { g = g_; }
 		// Add grad
 		template <typename T> void add_grad(const T& g_) { g += g_; }
 		LearnFloatType get_grad() const { return g; }
 	};
 #if defined(_MSC_VER)
 #pragma pack(pop)
 #elif defined(__GNUC__)
 #pragma pack(0)
 #endif
 	// Turned weight array
 	// In order to be able to handle it transparently, let's have the same member as Weight.
 	struct Weight2
 	{
 		Weight w[2];
 		//Evaluate your turn, eta 1/8.
 		template <typename T> void updateFV(std::array<T, 2>& v) { w[0].updateFV(v[0] , 1.0); w[1].updateFV(v[1],1.0/8.0); }
 		template <typename T> void set_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].set_grad(g[i]); }
 		template <typename T> void add_grad(const std::array<T, 2>& g) { for (int i = 0; i<2; ++i) w[i].add_grad(g[i]); }
 		std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
 	};
 }
 #endif
@@ -9,39 +9,14 @@
 void MultiThink::go_think()
 {
 	// Keep a copy to restore the Options settings later.
 	auto oldOptions = Options;
 	// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
 	// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
 	Options["BookOnTheFly"] = std::string("false");
 	// Read evaluation function, etc.
 	// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
 	// Skip memory corruption check.
-	Eval::init_NNUE();
+	Eval::NNUE::init();
 	// Call the derived class's init().
 	init();
        // About Search::Limits
        // Be careful because this member variable is global and affects other threads.
        {
          auto& limits = Search::Limits;
          // Make the search equivalent to the "go infinite" command. (Because it is troublesome if time management is done)
          limits.infinite = true;
          // Since PV is an obstacle when displayed, erase it.
          limits.silent = true;
          // If you use this, it will be compared with the accumulated nodes of each thread. Therefore, do not use it.
          limits.nodes = 0;
          // depth is also processed by the one passed as an argument of Learner::search().
          limits.depth = 0;
        }
 	// The loop upper limit is set with set_loop_max().
 	loop_count = 0;
 	done_count = 0;
@@ -123,10 +98,4 @@ void MultiThink::go_think()
 	// The file writing thread etc. are still running only when all threads are finished
 	// Since the work itself may not have completed, output only that all threads have finished.
 	std::cout << "all threads are joined." << std::endl;
 	// Restored because Options were rewritten.
 	// Restore the handler because the handler will not start unless you assign a value.
 	for (auto& s : oldOptions)
 		Options[s.first] = std::string(s.second);
 }
@@ -259,7 +259,7 @@ namespace Learner {
    return make_piece(c, pr);
  }
-  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror)
+  int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th)
  {
    SfenPacker packer;
    auto& stream = packer.stream;
@@ -280,16 +280,8 @@ namespace Learner {
    pos.pieceList[B_KING][0] = SQUARE_NB;
    // First the position of the ball
-    if (mirror)
+    for (auto c : Colors)
-    {
+      pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
      for (auto c : Colors)
        pos.board[flip_file((Square)stream.read_n_bit(6))] = make_piece(c, KING);
    }
    else
    {
      for (auto c : Colors)
        pos.board[stream.read_n_bit(6)] = make_piece(c, KING);
    }
    // Piece placement
    for (Rank r = RANK_8; r >= RANK_1; --r)
@@ -297,9 +289,6 @@ namespace Learner {
      for (File f = FILE_A; f <= FILE_H; ++f)
      {
        auto sq = make_square(f, r);
        if (mirror) {
          sq = flip_file(sq);
        }
        // it seems there are already balls
        Piece pc;
@@ -355,9 +344,6 @@ namespace Learner {
    // En passant square. Ignore if no pawn capture is possible
    if (stream.read_one_bit()) {
      Square ep_square = static_cast<Square>(stream.read_n_bit(6));
      if (mirror) {
        ep_square = flip_file(ep_square);
      }
      pos.st->epSquare = ep_square;
      if (!(pos.attackers_to(pos.st->epSquare) & pos.pieces(pos.sideToMove, PAWN))
@@ -13,7 +13,7 @@ class Thread;
 namespace Learner {
-    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+    int set_from_packed_sfen(Position& pos, const PackedSfen& sfen, StateInfo* si, Thread* th);
    PackedSfen sfen_pack(Position& pos);
 }
@@ -408,23 +408,11 @@ static void* aligned_large_pages_alloc_win(size_t allocSize) {
 void* aligned_large_pages_alloc(size_t allocSize) {
  static bool firstCall = true;
  void* mem;
  // Try to allocate large pages
  mem = aligned_large_pages_alloc_win(allocSize);
  // Suppress info strings on the first call. The first call occurs before 'uci'
  // is received and in that case this output confuses some GUIs.
  if (!firstCall)
  {
      if (mem)
          sync_cout << "info string Hash table allocation: Windows large pages used." << sync_endl;
      else
          sync_cout << "info string Hash table allocation: Windows large pages not used." << sync_endl;
  }
  firstCall = false;
  // Fall back to regular, page aligned, allocation if necessary
  if (!mem)
      mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
@@ -641,3 +629,109 @@ void init(int argc, char* argv[]) {
 } // namespace CommandLine
 // Returns a string that represents the current time. (Used when learning evaluation functions)
 std::string now_string()
 {
    // Using std::ctime(), localtime() gives a warning that MSVC is not secure.
    // This shouldn't happen in the C++ standard, but...
 #if defined(_MSC_VER)
  // C4996 : 'ctime' : This function or variable may be unsafe.Consider using ctime_s instead.
 #pragma warning(disable : 4996)
 #endif
    auto now = std::chrono::system_clock::now();
    auto tp = std::chrono::system_clock::to_time_t(now);
    auto result = string(std::ctime(&tp));
    // remove line endings if they are included at the end
    while (*result.rbegin() == '\n' || (*result.rbegin() == '\r'))
        result.pop_back();
    return result;
 }
 void sleep(int ms)
 {
    std::this_thread::sleep_for(std::chrono::milliseconds(ms));
 }
 void* aligned_malloc(size_t size, size_t align)
 {
    void* p = _mm_malloc(size, align);
    if (p == nullptr)
    {
        std::cout << "info string can't allocate memory. sise = " << size << std::endl;
        exit(1);
    }
    return p;
 }
 std::uint64_t get_file_size(std::fstream& fs)
 {
    auto pos = fs.tellg();
    fs.seekg(0, fstream::end);
    const uint64_t eofPos = (uint64_t)fs.tellg();
    fs.clear(); // Otherwise, the next seek may fail.
    fs.seekg(0, fstream::beg);
    const uint64_t begPos = (uint64_t)fs.tellg();
    fs.seekg(pos);
    return eofPos - begPos;
 }
 int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
 {
    fstream fs(filename, ios::in | ios::binary);
    if (fs.fail())
        return 1;
    const uint64_t file_size = get_file_size(fs);
    //std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
    // I know the file size, so call callback_func to get a buffer for this,
    // Get the pointer.
    void* ptr = callback_func(file_size);
    // If the buffer could not be secured, or if the file size is different from the expected file size,
    // It is supposed to return nullptr. At this time, reading is interrupted and an error is returned.
    if (ptr == nullptr)
        return 2;
    // read in pieces
    const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to read in one read (1GB)
    for (uint64_t pos = 0; pos < file_size; pos += block_size)
    {
        // size to read this time
        uint64_t read_size = (pos + block_size < file_size) ? block_size : (file_size - pos);
        fs.read((char*)ptr + pos, read_size);
        // Read error occurred in the middle of the file.
        if (fs.fail())
            return 2;
        //cout << ".";
    }
    fs.close();
    return 0;
 }
 int write_memory_to_file(std::string filename, void* ptr, uint64_t size)
 {
    fstream fs(filename, ios::out | ios::binary);
    if (fs.fail())
        return 1;
    const uint64_t block_size = 1024 * 1024 * 1024; // number of elements to write in one write (1GB)
    for (uint64_t pos = 0; pos < size; pos += block_size)
    {
        // Memory size to write this time
        uint64_t write_size = (pos + block_size < size) ? block_size : (size - pos);
        fs.write((char*)ptr + pos, write_size);
        //cout << ".";
    }
    fs.close();
    return 0;
 }
@@ -1,7 +1,25 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  Stockfish is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 // Definition of input features and network structure used in NNUE evaluation function
-#ifndef HALFKP_CR_EP_256X2_32_32_H
+#ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
-#define HALFKP_CR_EP_256X2_32_32_H
+#define NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
@@ -12,31 +30,28 @@
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
-namespace Eval {
+namespace Eval::NNUE {
-  namespace NNUE {
+// Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
    Features::EnPassant>;
-    // Input features used in evaluation function
+// Number of input feature dimensions after conversion
-    using RawFeatures = Features::FeatureSet<
+constexpr IndexType kTransformedFeatureDimensions = 256;
      Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight,
      Features::EnPassant>;
-    // Number of input feature dimensions after conversion
+namespace Layers {
    constexpr IndexType kTransformedFeatureDimensions = 256;
-    namespace Layers {
+// Define network structure
 using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
 using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
 using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
 using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-      // define network structure
+}  // namespace Layers
      using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
      using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
      using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
      using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-    }  // namespace Layers
+using Network = Layers::OutputLayer;
-    using Network = Layers::OutputLayer;
+}  // namespace Eval::NNUE
-  }  // namespace NNUE
+#endif // #ifndef NNUE_HALFKP_CR_EP_256X2_32_32_H_INCLUDED
 }  // namespace Eval
 #endif // HALFKP_CR_EP_256X2_32_32_H
@@ -0,0 +1,37 @@
 // Definition of input features and network structure used in NNUE evaluation function
 #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
 #define NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
 #include "../features/feature_set.h"
 #include "../features/half_kp.h"
 #include "../features/castling_right.h"
 #include "../layers/input_slice.h"
 #include "../layers/affine_transform.h"
 #include "../layers/clipped_relu.h"
 namespace Eval::NNUE {
 // Input features used in evaluation function
 using RawFeatures = Features::FeatureSet<
    Features::HalfKP<Features::Side::kFriend>, Features::CastlingRight>;
 // Number of input feature dimensions after conversion
 constexpr IndexType kTransformedFeatureDimensions = 256;
 namespace Layers {
 // Define network structure
 using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
 using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
 using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
 using OutputLayer = AffineTransform<HiddenLayer2, 1>;
 }  // namespace Layers
 using Network = Layers::OutputLayer;
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_HALFKP_CR_256X2_32_32_H_INCLUDED
@@ -1,4 +1,5 @@
 // Definition of input features and network structure used in NNUE evaluation function
 #ifndef K_P_256X2_32_32_H
 #define K_P_256X2_32_32_H
@@ -18,7 +18,6 @@
 // Code for calculating NNUE evaluation function
 #include <fstream>
 #include <iostream>
 #include <set>
@@ -31,7 +30,7 @@
 namespace Eval::NNUE {
-  uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
+  const uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
   // convention: W - us, B - them
   // viewed from other side, W and B are reversed
      { PS_NONE,     PS_NONE     },
@@ -53,7 +52,7 @@ namespace Eval::NNUE {
  };
  // Input feature converter
-  AlignedPtr<FeatureTransformer> feature_transformer;
+  LargePagePtr<FeatureTransformer> feature_transformer;
  // Evaluation function
  AlignedPtr<Network> network;
@@ -80,14 +79,22 @@ namespace Eval::NNUE {
    std::memset(pointer.get(), 0, sizeof(T));
  }
  template <typename T>
  void Initialize(LargePagePtr<T>& pointer) {
    static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
    pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
    std::memset(pointer.get(), 0, sizeof(T));
  }
  // Read evaluation function parameters
  template <typename T>
-  bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
+  bool ReadParameters(std::istream& stream, T& reference) {
    std::uint32_t header;
    header = read_little_endian<std::uint32_t>(stream);
    if (!stream || header != T::GetHashValue()) return false;
-    return pointer->ReadParameters(stream);
+    return reference.ReadParameters(stream);
  }
  // write evaluation function parameters
@@ -98,6 +105,13 @@ namespace Eval::NNUE {
    return pointer->WriteParameters(stream);
  }
  template <typename T>
  bool WriteParameters(std::ostream& stream, const LargePagePtr<T>& pointer) {
    constexpr std::uint32_t header = T::GetHashValue();
    stream.write(reinterpret_cast<const char*>(&header), sizeof(header));
    return pointer->WriteParameters(stream);
  }
  }  // namespace Detail
  // Initialize the evaluation function parameters
@@ -139,11 +153,10 @@ namespace Eval::NNUE {
    std::string architecture;
    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
    if (hash_value != kHashValue) return false;
-    if (!Detail::ReadParameters(stream, feature_transformer)) return false;
+    if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
-    if (!Detail::ReadParameters(stream, network)) return false;
+    if (!Detail::ReadParameters(stream, *network)) return false;
    return stream && stream.peek() == std::ios::traits_type::eof();
  }
  // write evaluation function parameters
  bool WriteParameters(std::ostream& stream) {
    if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
@@ -151,36 +164,20 @@ namespace Eval::NNUE {
    if (!Detail::WriteParameters(stream, network)) return false;
    return !stream.fail();
  }
-
+  // Evaluation function. Perform differential calculation.
-  // Proceed with the difference calculation if possible
+  Value evaluate(const Position& pos) {
  static void UpdateAccumulatorIfPossible(const Position& pos) {
    feature_transformer->UpdateAccumulatorIfPossible(pos);
  }
  // Calculate the evaluation value
  static Value ComputeScore(const Position& pos, bool refresh) {
    auto& accumulator = pos.state()->accumulator;
    if (!refresh && accumulator.computed_score) {
      return accumulator.score;
    }
    alignas(kCacheLineSize) TransformedFeatureType
        transformed_features[FeatureTransformer::kBufferSize];
-    feature_transformer->Transform(pos, transformed_features, refresh);
+    feature_transformer->Transform(pos, transformed_features);
    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
    const auto output = network->Propagate(transformed_features, buffer);
-    auto score = static_cast<Value>(output[0] / FV_SCALE);
+    return static_cast<Value>(output[0] / FV_SCALE);
    accumulator.score = score;
    accumulator.computed_score = true;
    return accumulator.score;
  }
-  // Load the evaluation function file
+  // Load eval, from a file stream or a memory stream
-  bool load_eval_file(const std::string& evalFile) {
+  bool load_eval(std::string name, std::istream& stream) {
    Initialize();
@@ -189,29 +186,8 @@ namespace Eval::NNUE {
      std::cout << "info string SkipLoadingEval set to true, Net not loaded!" << std::endl;
      return true;
    }
-
+    fileName = name;
-    fileName = evalFile;
+    return ReadParameters(stream);
    std::ifstream stream(evalFile, std::ios::binary);
    const bool result = ReadParameters(stream);
    return result;
  }
  // Evaluation function. Perform differential calculation.
  Value evaluate(const Position& pos) {
    return ComputeScore(pos, false);
  }
  // Evaluation function. Perform full calculation.
  Value compute_eval(const Position& pos) {
    return ComputeScore(pos, true);
  }
  // Proceed with the difference calculation if possible
  void update_eval(const Position& pos) {
    UpdateAccumulatorIfPossible(pos);
  }
 } // namespace Eval::NNUE
@@ -54,6 +54,35 @@ namespace Eval::NNUE {
  template <typename T>
  using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
  // Input feature converter
  extern LargePagePtr<FeatureTransformer> feature_transformer;
  // Evaluation function
  extern AlignedPtr<Network> network;
  // Evaluation function file name
  extern std::string fileName;
  // Saved evaluation function file name
  extern std::string savedfileName;
  // Get a string that represents the structure of the evaluation function
  std::string GetArchitectureString();
  // read the header
  bool ReadHeader(std::istream& stream,
    std::uint32_t* hash_value, std::string* architecture);
  // write the header
  bool WriteHeader(std::ostream& stream,
    std::uint32_t hash_value, const std::string& architecture);
  // read evaluation function parameters
  bool ReadParameters(std::istream& stream);
  // write evaluation function parameters
  bool WriteParameters(std::ostream& stream);
 }  // namespace Eval::NNUE
 #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
@@ -5,15 +5,12 @@
 #include <filesystem>
 #include "../learn/learn.h"
 #include "../learn/learning_tools.h"
 #include "../position.h"
 #include "../uci.h"
 #include "../misc.h"
 #include "../thread_win32_osx.h"
 #include "../eval/evaluate_common.h"
 #include "evaluate_nnue.h"
 #include "evaluate_nnue_learner.h"
 #include "trainer/features/factorizer_feature_set.h"
@@ -24,215 +21,191 @@
 #include "trainer/trainer_clipped_relu.h"
 #include "trainer/trainer_sum.h"
 namespace Eval {
 namespace NNUE {
 namespace {
 // learning data
 std::vector<Example> examples;
 // Mutex for exclusive control of examples
 std::mutex examples_mutex;
 // number of samples in mini-batch
 uint64_t batch_size;
 // random number generator
 std::mt19937 rng;
 // learner
 std::shared_ptr<Trainer<Network>> trainer;
 // Learning rate scale
-double global_learning_rate_scale;
+double global_learning_rate;
-// Get the learning rate scale
+namespace Eval::NNUE {
 double GetGlobalLearningRateScale() {
  return global_learning_rate_scale;
 }
-// Tell the learner options such as hyperparameters
+  namespace {
 void SendMessages(std::vector<Message> messages) {
  for (auto& message : messages) {
    trainer->SendMessage(&message);
    assert(message.num_receivers > 0);
  }
 }
-}  // namespace
+    // learning data
    std::vector<Example> examples;
-// Initialize learning
+    // Mutex for exclusive control of examples
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
+    std::mutex examples_mutex;
                        double eta2, uint64_t eta2_epoch, double eta3) {
  std::cout << "Initializing NN training for "
            << GetArchitectureString() << std::endl;
-  assert(feature_transformer);
+    // number of samples in mini-batch
-  assert(network);
+    uint64_t batch_size;
  trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
-  if (Options["SkipLoadingEval"]) {
+    // random number generator
-    trainer->Initialize(rng);
+    std::mt19937 rng;
  }
-  global_learning_rate_scale = 1.0;
+    // learner
-  EvalLearningTools::Weight::init_eta(eta1, eta2, eta3, eta1_epoch, eta2_epoch);
+    std::shared_ptr<Trainer<Network>> trainer;
 }
-// set the number of samples in the mini-batch
+    // Tell the learner options such as hyperparameters
-void SetBatchSize(uint64_t size) {
+    void SendMessages(std::vector<Message> messages) {
-  assert(size > 0);
+      for (auto& message : messages) {
-  batch_size = size;
+        trainer->SendMessage(&message);
-}
+        assert(message.num_receivers > 0);
 // set the learning rate scale
 void SetGlobalLearningRateScale(double scale) {
  global_learning_rate_scale = scale;
 }
 // Set options such as hyperparameters
 void SetOptions(const std::string& options) {
  std::vector<Message> messages;
  for (const auto& option : Split(options, ',')) {
    const auto fields = Split(option, '=');
    assert(fields.size() == 1 || fields.size() == 2);
    if (fields.size() == 1) {
      messages.emplace_back(fields[0]);
    } else {
      messages.emplace_back(fields[0], fields[1]);
    }
  }
  SendMessages(std::move(messages));
 }
 // Reread the evaluation function parameters for learning from the file
 void RestoreParameters(const std::string& dir_name) {
  const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
  std::ifstream stream(file_name, std::ios::binary);
 #ifndef NDEBUG
  bool result =
 #endif
  ReadParameters(stream);
 #ifndef NDEBUG
  assert(result);
 #endif
  SendMessages({{"reset"}});
 }
 // Add 1 sample of learning data
 void AddExample(Position& pos, Color rootColor,
                const Learner::PackedSfenValue& psv, double weight) {
  Example example;
  if (rootColor == pos.side_to_move()) {
    example.sign = 1;
  } else {
    example.sign = -1;
  }
  example.psv = psv;
  example.weight = weight;
  Features::IndexList active_indices[2];
  for (const auto trigger : kRefreshTriggers) {
    RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
  }
  if (pos.side_to_move() != WHITE) {
    active_indices[0].swap(active_indices[1]);
  }
  for (const auto color : Colors) {
    std::vector<TrainingFeature> training_features;
    for (const auto base_index : active_indices[color]) {
      static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
                    (1 << TrainingFeature::kIndexBits), "");
      Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
          base_index, &training_features);
    }
    std::sort(training_features.begin(), training_features.end());
    auto& unique_features = example.training_features[color];
    for (const auto& feature : training_features) {
      if (!unique_features.empty() &&
          feature.GetIndex() == unique_features.back().GetIndex()) {
        unique_features.back() += feature;
      } else {
        unique_features.push_back(feature);
      }
    }
  }  // namespace
  // Initialize learning
  void InitializeTraining(const std::string& seed) {
    std::cout << "Initializing NN training for "
              << GetArchitectureString() << std::endl;
    assert(feature_transformer);
    assert(network);
    trainer = Trainer<Network>::Create(network.get(), feature_transformer.get());
    rng.seed(PRNG(seed).rand<uint64_t>());
    if (Options["SkipLoadingEval"]) {
      trainer->Initialize(rng);
    }
  }
-  std::lock_guard<std::mutex> lock(examples_mutex);
+  // set the number of samples in the mini-batch
-  examples.push_back(std::move(example));
+  void SetBatchSize(uint64_t size) {
-}
+    assert(size > 0);
    batch_size = size;
  }
  // Set options such as hyperparameters
  void SetOptions(const std::string& options) {
    std::vector<Message> messages;
    for (const auto& option : Split(options, ',')) {
      const auto fields = Split(option, '=');
      assert(fields.size() == 1 || fields.size() == 2);
      if (fields.size() == 1) {
        messages.emplace_back(fields[0]);
      } else {
        messages.emplace_back(fields[0], fields[1]);
      }
    }
    SendMessages(std::move(messages));
  }
-// update the evaluation function parameters
+  // Reread the evaluation function parameters for learning from the file
-void UpdateParameters(uint64_t epoch) {
+  void RestoreParameters(const std::string& dir_name) {
-  assert(batch_size > 0);
+    const std::string file_name = Path::Combine(dir_name, NNUE::savedfileName);
    std::ifstream stream(file_name, std::ios::binary);
 #ifndef NDEBUG
    bool result =
 #endif
    ReadParameters(stream);
 #ifndef NDEBUG
    assert(result);
 #endif
-  EvalLearningTools::Weight::calc_eta(epoch);
+    SendMessages({{"reset"}});
-  const auto learning_rate = static_cast<LearnFloatType>(
+  }
      get_eta() / batch_size);
-  std::lock_guard<std::mutex> lock(examples_mutex);
+  void FinalizeNet() {
-  std::shuffle(examples.begin(), examples.end(), rng);
+    SendMessages({{"clear_unobserved_feature_weights"}});
-  while (examples.size() >= batch_size) {
+  }
    std::vector<Example> batch(examples.end() - batch_size, examples.end());
    examples.resize(examples.size() - batch_size);
-    const auto network_output = trainer->Propagate(batch);
+  // Add 1 sample of learning data
  void AddExample(Position& pos, Color rootColor,
                  const Learner::PackedSfenValue& psv, double weight) {
    Example example;
    if (rootColor == pos.side_to_move()) {
      example.sign = 1;
    } else {
      example.sign = -1;
    }
    example.psv = psv;
    example.weight = weight;
-    std::vector<LearnFloatType> gradients(batch.size());
+    Features::IndexList active_indices[2];
-    for (std::size_t b = 0; b < batch.size(); ++b) {
+    for (const auto trigger : kRefreshTriggers) {
-      const auto shallow = static_cast<Value>(Round<std::int32_t>(
+      RawFeatures::AppendActiveIndices(pos, trigger, active_indices);
-          batch[b].sign * network_output[b] * kPonanzaConstant));
+    }
-      const auto& psv = batch[b].psv;
+    if (pos.side_to_move() != WHITE) {
-      const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
+      active_indices[0].swap(active_indices[1]);
-      gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
+    }
    for (const auto color : Colors) {
      std::vector<TrainingFeature> training_features;
      for (const auto base_index : active_indices[color]) {
        static_assert(Features::Factorizer<RawFeatures>::GetDimensions() <
                      (1 << TrainingFeature::kIndexBits), "");
        Features::Factorizer<RawFeatures>::AppendTrainingFeatures(
            base_index, &training_features);
      }
      std::sort(training_features.begin(), training_features.end());
      auto& unique_features = example.training_features[color];
      for (const auto& feature : training_features) {
        if (!unique_features.empty() &&
            feature.GetIndex() == unique_features.back().GetIndex()) {
          unique_features.back() += feature;
        } else {
          unique_features.push_back(feature);
        }
      }
    }
-    trainer->Backpropagate(gradients.data(), learning_rate);
+    std::lock_guard<std::mutex> lock(examples_mutex);
-  }
+    examples.push_back(std::move(example));
  SendMessages({{"quantize_parameters"}});
 }
 // Check if there are any problems with learning
 void CheckHealth() {
  SendMessages({{"check_health"}});
 }
 }  // namespace NNUE
 // save merit function parameters to a file
 void save_eval(std::string dir_name) {
  auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
  std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
  // mkdir() will fail if this folder already exists, but
  // Apart from that. If not, I just want you to make it.
  // Also, assume that the folders up to EvalSaveDir have been dug.
  std::filesystem::create_directories(eval_dir);
  if (Options["SkipLoadingEval"] && NNUE::trainer) {
    NNUE::SendMessages({{"clear_unobserved_feature_weights"}});
  }
-  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
+  // update the evaluation function parameters
-  std::ofstream stream(file_name, std::ios::binary);
+  void UpdateParameters() {
    assert(batch_size > 0);
    const auto learning_rate = static_cast<LearnFloatType>(
        global_learning_rate / batch_size);
    std::lock_guard<std::mutex> lock(examples_mutex);
    std::shuffle(examples.begin(), examples.end(), rng);
    while (examples.size() >= batch_size) {
      std::vector<Example> batch(examples.end() - batch_size, examples.end());
      examples.resize(examples.size() - batch_size);
      const auto network_output = trainer->Propagate(batch);
      std::vector<LearnFloatType> gradients(batch.size());
      for (std::size_t b = 0; b < batch.size(); ++b) {
        const auto shallow = static_cast<Value>(Round<std::int32_t>(
            batch[b].sign * network_output[b] * kPonanzaConstant));
        const auto& psv = batch[b].psv;
        const double gradient = batch[b].sign * Learner::calc_grad(shallow, psv);
        gradients[b] = static_cast<LearnFloatType>(gradient * batch[b].weight);
      }
      trainer->Backpropagate(gradients.data(), learning_rate);
    }
    SendMessages({{"quantize_parameters"}});
  }
  // Check if there are any problems with learning
  void CheckHealth() {
    SendMessages({{"check_health"}});
  }
  // save merit function parameters to a file
  void save_eval(std::string dir_name) {
    auto eval_dir = Path::Combine(Options["EvalSaveDir"], dir_name);
    std::cout << "save_eval() start. folder = " << eval_dir << std::endl;
    // mkdir() will fail if this folder already exists, but
    // Apart from that. If not, I just want you to make it.
    // Also, assume that the folders up to EvalSaveDir have been dug.
    std::filesystem::create_directories(eval_dir);
    const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
    std::ofstream stream(file_name, std::ios::binary);
 #ifndef NDEBUG
-  const bool result =
+    bool result =
 #endif
-  NNUE::WriteParameters(stream);
+    WriteParameters(stream);
 #ifndef NDEBUG
-  assert(result);
+    assert(result);
 #endif
-  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
+    std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
-}
+  }
-
+}  // namespace Eval::NNUE
 // get the current eta
 double get_eta() {
  return NNUE::GetGlobalLearningRateScale() * EvalLearningTools::Weight::eta;
 }
 }  // namespace Eval
@@ -5,38 +5,33 @@
 #include "../learn/learn.h"
-namespace Eval {
+namespace Eval::NNUE {
-namespace NNUE {
+  // Initialize learning
  void InitializeTraining(const std::string& seed);
-// Initialize learning
+  // set the number of samples in the mini-batch
-void InitializeTraining(double eta1, uint64_t eta1_epoch,
+  void SetBatchSize(uint64_t size);
                        double eta2, uint64_t eta2_epoch, double eta3);
-// set the number of samples in the mini-batch
+  // Set options such as hyperparameters
-void SetBatchSize(uint64_t size);
+  void SetOptions(const std::string& options);
-// set the learning rate scale
+  // Reread the evaluation function parameters for learning from the file
-void SetGlobalLearningRateScale(double scale);
+  void RestoreParameters(const std::string& dir_name);
 // Set options such as hyperparameters
 void SetOptions(const std::string& options);
 // Reread the evaluation function parameters for learning from the file
 void RestoreParameters(const std::string& dir_name);
 // Add 1 sample of learning data
-void AddExample(Position& pos, Color rootColor,
+  void AddExample(Position& pos, Color rootColor,
-                const Learner::PackedSfenValue& psv, double weight);
+  	const Learner::PackedSfenValue& psv, double weight);
-// update the evaluation function parameters
+  // update the evaluation function parameters
-void UpdateParameters(uint64_t epoch);
+  void UpdateParameters();
-// Check if there are any problems with learning
+  // Check if there are any problems with learning
-void CheckHealth();
+  void CheckHealth();
-}  // namespace NNUE
+  void FinalizeNet();
-}  // namespace Eval
+  void save_eval(std::string suffix);
 }  // namespace Eval::NNUE
 #endif
@@ -1,69 +1,40 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function
 #include "castling_right.h"
 #include "index_list.h"
-namespace Eval {
+namespace Eval::NNUE::Features {
-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
  void CastlingRight::AppendActiveIndices(
    const Position& pos, Color perspective, IndexList* active) {
    // do nothing if array size is small to avoid compiler warning
    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-    namespace Features {
+    int castling_rights = pos.state()->castlingRights;
    int relative_castling_rights;
    if (perspective == WHITE) {
      relative_castling_rights = castling_rights;
    }
    else {
      // Invert the perspective.
      relative_castling_rights = ((castling_rights & 3) << 2)
        & ((castling_rights >> 2) & 3);
    }
-      // Get a list of indices with a value of 1 among the features
+    for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
-      void CastlingRight::AppendActiveIndices(
+      if (relative_castling_rights & (1 << i)) {
-        const Position& pos, Color perspective, IndexList* active) {
+        active->push_back(i);
        // do nothing if array size is small to avoid compiler warning
        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
        int castling_rights = pos.state()->castlingRights;
        int relative_castling_rights;
        if (perspective == WHITE) {
          relative_castling_rights = castling_rights;
        }
        else {
          // Invert the perspective.
          relative_castling_rights = ((castling_rights & 3) << 2)
            & ((castling_rights >> 2) & 3);
        }
        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
          if (relative_castling_rights & (1 << i)) {
            active->push_back(i);
          }
        }
      }
    }
  }
-      // Get a list of indices whose values have changed from the previous one in the feature quantity
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
-      void CastlingRight::AppendChangedIndices(
+  void CastlingRight::AppendChangedIndices(
-        const Position& pos, Color perspective,
+    const Position& /* pos */, Color /* perspective */,
-        IndexList* removed, IndexList* /* added */) {
+    IndexList* /* removed */, IndexList* /* added */) {
    // Not implemented.
    assert(false);
  }
-        int previous_castling_rights = pos.state()->previous->castlingRights;
+}  // namespace Eval::NNUE::Features
        int current_castling_rights = pos.state()->castlingRights;
        int relative_previous_castling_rights;
        int relative_current_castling_rights;
        if (perspective == WHITE) {
          relative_previous_castling_rights = previous_castling_rights;
          relative_current_castling_rights = current_castling_rights;
        }
        else {
          // Invert the perspective.
          relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
            & ((previous_castling_rights >> 2) & 3);
          relative_current_castling_rights = ((current_castling_rights & 3) << 2)
            & ((current_castling_rights >> 2) & 3);
        }
        for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
          if ((relative_previous_castling_rights & (1 << i)) &&
            (relative_current_castling_rights & (1 << i)) == 0) {
            removed->push_back(i);
          }
        }
      }
    }  // namespace Features
  }  // namespace NNUE
 }  // namespace Eval
@@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity CastlingRight of NNUE evaluation function
 #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
 #define _NNUE_FEATURES_CASTLING_RIGHT_H_
@@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"
-namespace Eval {
+namespace Eval::NNUE::Features {
-  namespace NNUE {
+  class CastlingRight {
  public:
    // feature quantity name
    static constexpr const char* kName = "CastlingRight";
    // Hash value embedded in the evaluation function file
    static constexpr std::uint32_t kHashValue = 0x913968AAu;
    // number of feature dimensions
    static constexpr IndexType kDimensions = 4;
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions = 4;
    // Timing of full calculation instead of difference calculation
    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
    static void AppendActiveIndices(const Position& pos, Color perspective,
      IndexList* active);
-      // Feature K: Ball position
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
-      class CastlingRight {
+    static void AppendChangedIndices(const Position& pos, Color perspective,
-      public:
+      IndexList* removed, IndexList* added);
-        // feature quantity name
+  };
        static constexpr const char* kName = "CastlingRight";
        // Hash value embedded in the evaluation function file
        static constexpr std::uint32_t kHashValue = 0x913968AAu;
        // number of feature dimensions
        static constexpr IndexType kDimensions = 4;
        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
        static constexpr IndexType kMaxActiveDimensions = 4;
        // Timing of full calculation instead of difference calculation
        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
-        // Get a list of indices with a value of 1 among the features
+}  // namespace Eval::NNUE::Features
        static void AppendActiveIndices(const Position& pos, Color perspective,
          IndexList* active);
        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
        static void AppendChangedIndices(const Position& pos, Color perspective,
          IndexList* removed, IndexList* added);
      };
    }  // namespace Features
  }  // namespace NNUE
 }  // namespace Eval
 #endif
@@ -1,43 +1,30 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function
 #include "enpassant.h"
 #include "index_list.h"
-namespace Eval {
+namespace Eval::NNUE::Features {
-  namespace NNUE {
+  // Get a list of indices with a value of 1 among the features
  void EnPassant::AppendActiveIndices(
    const Position& pos, Color /* perspective */, IndexList* active) {
    // do nothing if array size is small to avoid compiler warning
    if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
-    namespace Features {
+    auto epSquare = pos.state()->epSquare;
    if (epSquare == SQ_NONE) {
      return;
    }
    auto file = file_of(epSquare);
    active->push_back(file);
  }
-      // Get a list of indices with a value of 1 among the features
+  // Get a list of indices whose values have changed from the previous one in the feature quantity
-      void EnPassant::AppendActiveIndices(
+  void EnPassant::AppendChangedIndices(
-        const Position& pos, Color perspective, IndexList* active) {
+    const Position& /* pos */, Color /* perspective */,
-        // do nothing if array size is small to avoid compiler warning
+    IndexList* /* removed */, IndexList* /* added */) {
-        if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return;
+    // Not implemented.
    assert(false);
  }
-        auto epSquare = pos.state()->epSquare;
+}  // namespace Eval::NNUE::Features
        if (epSquare == SQ_NONE) {
          return;
        }
        if (perspective == BLACK) {
          epSquare = rotate180(epSquare);
        }
        auto file = file_of(epSquare);
        active->push_back(file);
      }
      // Get a list of indices whose values ??have changed from the previous one in the feature quantity
      void EnPassant::AppendChangedIndices(
        const Position& /* pos */, Color /* perspective */,
        IndexList* /* removed */, IndexList* /* added */) {
        // Not implemented.
        assert(false);
      }
    }  // namespace Features
  }  // namespace NNUE
 }  // namespace Eval
@@ -1,4 +1,4 @@
-//Definition of input feature quantity K of NNUE evaluation function
+//Definition of input feature quantity EnPassant of NNUE evaluation function
 #ifndef _NNUE_FEATURES_ENPASSANT_H_
 #define _NNUE_FEATURES_ENPASSANT_H_
@@ -6,39 +6,30 @@
 #include "../../evaluate.h"
 #include "features_common.h"
-namespace Eval {
+namespace Eval::NNUE::Features {
-  namespace NNUE {
+  class EnPassant {
  public:
    // feature quantity name
    static constexpr const char* kName = "EnPassant";
    // Hash value embedded in the evaluation function file
    static constexpr std::uint32_t kHashValue = 0x02924F91u;
    // number of feature dimensions
    static constexpr IndexType kDimensions = 8;
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions = 1;
    // Timing of full calculation instead of difference calculation
    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
-    namespace Features {
+    // Get a list of indices with a value of 1 among the features
    static void AppendActiveIndices(const Position& pos, Color perspective,
      IndexList* active);
-      // Feature K: Ball position
+    // Get a list of indices whose values ??have changed from the previous one in the feature quantity
-      class EnPassant {
+    static void AppendChangedIndices(const Position& pos, Color perspective,
-      public:
+      IndexList* removed, IndexList* added);
-        // feature quantity name
+  };
        static constexpr const char* kName = "EnPassant";
        // Hash value embedded in the evaluation function file
        static constexpr std::uint32_t kHashValue = 0x02924F91u;
        // number of feature dimensions
        static constexpr IndexType kDimensions = 8;
        // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
        static constexpr IndexType kMaxActiveDimensions = 1;
        // Timing of full calculation instead of difference calculation
        static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
-        // Get a list of indices with a value of 1 among the features
+}  // namespace Eval::NNUE::Features
        static void AppendActiveIndices(const Position& pos, Color perspective,
          IndexList* active);
        // Get a list of indices whose values ??have changed from the previous one in the feature quantity
        static void AppendChangedIndices(const Position& pos, Color perspective,
          IndexList* removed, IndexList* added);
      };
    }  // namespace Features
  }  // namespace NNUE
 }  // namespace Eval
 #endif
@@ -105,9 +105,20 @@ namespace Eval::NNUE::Features {
      for (Color perspective : { WHITE, BLACK }) {
        reset[perspective] = false;
        switch (trigger) {
          case TriggerEvent::kNone:
            break;
          case TriggerEvent::kFriendKingMoved:
            reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
            break;
          case TriggerEvent::kEnemyKingMoved:
              reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
            break;
          case TriggerEvent::kAnyKingMoved:
            reset[perspective] = type_of(dp.piece[0]) == KING;
            break;
          case TriggerEvent::kAnyPieceMoved:
            reset[perspective] = true;
            break;
          default:
            assert(false);
            break;
@@ -34,10 +34,10 @@ namespace Eval::NNUE::Features {
  // Trigger to perform full calculations instead of difference only
  enum class TriggerEvent {
    kNone, // Calculate the difference whenever possible
-    kFriendKingMoved, // calculate all when own ball moves
+    kFriendKingMoved, // calculate full evaluation when own king moves
-    kEnemyKingMoved, // do all calculations when enemy balls move
+    kEnemyKingMoved, // calculate full evaluation when opponent king moves
-    kAnyKingMoved, // do all calculations if either ball moves
+    kAnyKingMoved, // calculate full evaluation when any king moves
-    kAnyPieceMoved, // always do all calculations
+    kAnyPieceMoved, // always calculate full evaluation
  };
  enum class Side {
@@ -23,9 +23,9 @@
 namespace Eval::NNUE::Features {
-  // Orient a square according to perspective (rotates by 180 for black)
+  // Orient a square according to perspective (flip rank for black)
  inline Square orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
+    return Square(int(s) ^ (bool(perspective) * SQ_A8));
  }
  // Find the index of the feature quantity from the king position and PieceSquare
@@ -9,9 +9,9 @@ namespace NNUE {
 namespace Features {
-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }
 // Find the index of the feature quantity from the ball position and PieceSquare
@@ -9,9 +9,9 @@ namespace NNUE {
 namespace Features {
-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }
 // Index of a feature for a given king position.
@@ -32,19 +32,11 @@ void K::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
  const auto& dp = pos.state()->dirtyPiece;
-  Color king_color;
+  if (type_of(dp.piece[0]) == KING)
-  if (dp.piece[0] == Piece::W_KING) {
+  {
-    king_color = WHITE;
+    removed->push_back(MakeIndex(perspective, dp.from[0], color_of(dp.piece[0])));
    added->push_back(MakeIndex(perspective, dp.to[0], color_of(dp.piece[0])));
  }
  else if (dp.piece[0] == Piece::B_KING) {
    king_color = BLACK;
  }
  else {
    return;
  }
  removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
  added->push_back(MakeIndex(perspective, dp.to[0], king_color));
 }
 }  // namespace Features
@@ -9,9 +9,9 @@ namespace NNUE {
 namespace Features {
-// Orient a square according to perspective (rotates by 180 for black)
+// Orient a square according to perspective (flip rank for black)
 inline Square orient(Color perspective, Square s) {
-  return Square(int(s) ^ (bool(perspective) * 63));
+  return Square(int(s) ^ (bool(perspective) * SQ_A8));
 }
 // Find the index of the feature quantity from the king position and PieceSquare
@@ -22,7 +22,7 @@
 #define NNUE_ARCHITECTURE_H_INCLUDED
 // Defines the network structure
-#include "architectures/halfkp_256x2-32-32.h"
+#include "architectures/halfkp-cr-ep_256x2-32-32.h"
 namespace Eval::NNUE {
@@ -69,7 +69,7 @@
 namespace Eval::NNUE {
  // Version of the evaluation file
-  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+  constexpr std::uint32_t kVersion = 0x7AF32F17u;
  // Constant used in evaluation value calculation
  constexpr int FV_SCALE = 16;
@@ -1,4 +1,4 @@
-/*
+/*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
  Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file)
@@ -40,6 +40,7 @@ namespace Eval::NNUE {
  #define vec_store(a,b) _mm512_storeA_si512(a,b)
  #define vec_add_16(a,b) _mm512_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm512_sub_epi16(a,b)
  #define vec_zero _mm512_setzero_si512()
  static constexpr IndexType kNumRegs = 8; // only 8 are needed
  #elif USE_AVX2
@@ -48,6 +49,7 @@ namespace Eval::NNUE {
  #define vec_store(a,b) _mm256_storeA_si256(a,b)
  #define vec_add_16(a,b) _mm256_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm256_sub_epi16(a,b)
  #define vec_zero _mm256_setzero_si256()
  static constexpr IndexType kNumRegs = 16;
  #elif USE_SSE2
@@ -56,6 +58,7 @@ namespace Eval::NNUE {
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) _mm_add_epi16(a,b)
  #define vec_sub_16(a,b) _mm_sub_epi16(a,b)
  #define vec_zero _mm_setzero_si128()
  static constexpr IndexType kNumRegs = Is64Bit ? 16 : 8;
  #elif USE_MMX
@@ -64,6 +67,7 @@ namespace Eval::NNUE {
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) _mm_add_pi16(a,b)
  #define vec_sub_16(a,b) _mm_sub_pi16(a,b)
  #define vec_zero _mm_setzero_si64()
  static constexpr IndexType kNumRegs = 8;
  #elif USE_NEON
@@ -72,6 +76,7 @@ namespace Eval::NNUE {
  #define vec_store(a,b) *(a)=(b)
  #define vec_add_16(a,b) vaddq_s16(a,b)
  #define vec_sub_16(a,b) vsubq_s16(a,b)
  #define vec_zero {0}
  static constexpr IndexType kNumRegs = 16;
  #else
@@ -193,6 +198,12 @@ namespace Eval::NNUE {
              &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m256i sum1 = _mm256_loadA_si256(
            &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
            sum0 = _mm256_add_epi16(sum0, reinterpret_cast<const __m256i*>(
                accumulation[perspectives[p]][i])[j * 2 + 0]);
            sum1 = _mm256_add_epi16(sum1, reinterpret_cast<const __m256i*>(
                accumulation[perspectives[p]][i])[j * 2 + 1]);
          }
          _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
              _mm256_packs_epi16(sum0, sum1), kZero), kControl));
        }
@@ -204,6 +215,12 @@ namespace Eval::NNUE {
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m128i sum1 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
            sum0 = _mm_add_epi16(sum0, reinterpret_cast<const __m128i*>(
                accumulation[perspectives[p]][i])[j * 2 + 0]);
            sum1 = _mm_add_epi16(sum1, reinterpret_cast<const __m128i*>(
                accumulation[perspectives[p]][i])[j * 2 + 1]);
          }
      const __m128i packedbytes = _mm_packs_epi16(sum0, sum1);
          _mm_store_si128(&out[j],
@@ -224,6 +241,12 @@ namespace Eval::NNUE {
              accumulation[perspectives[p]][0])[j * 2 + 0]);
          __m64 sum1 = *(&reinterpret_cast<const __m64*>(
              accumulation[perspectives[p]][0])[j * 2 + 1]);
          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
            sum0 = _mm_add_pi16(sum0, reinterpret_cast<const __m64*>(
                accumulation[perspectives[p]][i])[j * 2 + 0]);
            sum1 = _mm_add_pi16(sum1, reinterpret_cast<const __m64*>(
                accumulation[perspectives[p]][i])[j * 2 + 1]);
          }
          const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
          out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
        }
@@ -233,12 +256,19 @@ namespace Eval::NNUE {
        for (IndexType j = 0; j < kNumChunks; ++j) {
          int16x8_t sum = reinterpret_cast<const int16x8_t*>(
              accumulation[perspectives[p]][0])[j];
          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
            sum = vaddq_s16(sum, reinterpret_cast<const int16x8_t*>(
                accumulation[perspectives[p]][i])[j]);
          }
          out[j] = vmax_s8(vqmovn_s16(sum), kZero);
        }
  #else
        for (IndexType j = 0; j < kHalfDimensions; ++j) {
          BiasType sum = accumulation[static_cast<int>(perspectives[p])][0][j];
          for (IndexType i = 1; i < kRefreshTriggers.size(); ++i) {
            sum += accumulation[static_cast<int>(perspectives[p])][i][j];
          }
          output[offset + j] = static_cast<OutputType>(
              std::max<int>(0, std::min<int>(127, sum)));
        }
@@ -255,44 +285,55 @@ namespace Eval::NNUE {
    void RefreshAccumulator(const Position& pos) const {
      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList active_indices[2];
+        Features::IndexList active_indices[2];
-      RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
+        RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i],
-                                       active_indices);
+                                         active_indices);
-      for (Color perspective : { WHITE, BLACK }) {
+        for (Color perspective : { WHITE, BLACK }) {
-  #ifdef TILING
+    #ifdef TILING
-        for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+          for (unsigned j = 0; j < kHalfDimensions / kTileHeight; ++j) {
-          auto biasesTile = reinterpret_cast<const vec_t*>(
+            auto accTile = reinterpret_cast<vec_t*>(
-              &biases_[j * kTileHeight]);
+                &accumulator.accumulation[perspective][i][j * kTileHeight]);
-          auto accTile = reinterpret_cast<vec_t*>(
+            vec_t acc[kNumRegs];
              &accumulator.accumulation[perspective][i][j * kTileHeight]);
          vec_t acc[kNumRegs];
-          for (unsigned k = 0; k < kNumRegs; ++k)
+            if (i == 0) {
-            acc[k] = biasesTile[k];
+              auto biasesTile = reinterpret_cast<const vec_t*>(
                  &biases_[j * kTileHeight]);
              for (unsigned k = 0; k < kNumRegs; ++k)
                acc[k] = biasesTile[k];
            } else {
              for (unsigned k = 0; k < kNumRegs; ++k)
                acc[k] = vec_zero;
            }
            for (const auto index : active_indices[perspective]) {
              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-          for (const auto index : active_indices[perspective]) {
+              for (unsigned k = 0; k < kNumRegs; ++k)
-            const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+                acc[k] = vec_add_16(acc[k], column[k]);
-            auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
+            }
-            for (unsigned k = 0; k < kNumRegs; ++k)
+            for (unsigned k = 0; k < kNumRegs; k++)
-              acc[k] = vec_add_16(acc[k], column[k]);
+              vec_store(&accTile[k], acc[k]);
          }
    #else
          if (i == 0) {
            std::memcpy(accumulator.accumulation[perspective][i], biases_,
                        kHalfDimensions * sizeof(BiasType));
          } else {
            std::memset(accumulator.accumulation[perspective][i], 0,
                        kHalfDimensions * sizeof(BiasType));
          }
-          for (unsigned k = 0; k < kNumRegs; k++)
+          for (const auto index : active_indices[perspective]) {
-            vec_store(&accTile[k], acc[k]);
+            const IndexType offset = kHalfDimensions * index;
        }
  #else
        std::memcpy(accumulator.accumulation[perspective][i], biases_,
            kHalfDimensions * sizeof(BiasType));
-        for (const auto index : active_indices[perspective]) {
+            for (IndexType j = 0; j < kHalfDimensions; ++j)
-          const IndexType offset = kHalfDimensions * index;
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
-
+          }
-          for (IndexType j = 0; j < kHalfDimensions; ++j)
+    #endif
            accumulator.accumulation[perspective][i][j] += weights_[offset + j];
        }
-  #endif
+
      }
  #if defined(USE_MMX)
@@ -307,86 +348,96 @@ namespace Eval::NNUE {
      const auto prev_accumulator = pos.state()->previous->accumulator;
      auto& accumulator = pos.state()->accumulator;
-      IndexType i = 0;
+      for (IndexType i = 0; i < kRefreshTriggers.size(); ++i) {
-      Features::IndexList removed_indices[2], added_indices[2];
+        Features::IndexList removed_indices[2], added_indices[2];
-      bool reset[2];
+        bool reset[2];
-      RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
+        RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i],
-                                        removed_indices, added_indices, reset);
+                                          removed_indices, added_indices, reset);
-  #ifdef TILING
+    #ifdef TILING
-      for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
+        for (IndexType j = 0; j < kHalfDimensions / kTileHeight; ++j) {
          for (Color perspective : { WHITE, BLACK }) {
            auto accTile = reinterpret_cast<vec_t*>(
                &accumulator.accumulation[perspective][i][j * kTileHeight]);
            vec_t acc[kNumRegs];
            if (reset[perspective]) {
              if (i == 0) {
                auto biasesTile = reinterpret_cast<const vec_t*>(
                    &biases_[j * kTileHeight]);
                for (unsigned k = 0; k < kNumRegs; ++k)
                  acc[k] = biasesTile[k];
              } else {
                for (unsigned k = 0; k < kNumRegs; ++k)
                  acc[k] = vec_zero;
              }
            } else {
              auto prevAccTile = reinterpret_cast<const vec_t*>(
                  &prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
              for (IndexType k = 0; k < kNumRegs; ++k)
                acc[k] = vec_load(&prevAccTile[k]);
              // Difference calculation for the deactivated features
              for (const auto index : removed_indices[perspective]) {
                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
                for (IndexType k = 0; k < kNumRegs; ++k)
                  acc[k] = vec_sub_16(acc[k], column[k]);
              }
            }
            { // Difference calculation for the activated features
              for (const auto index : added_indices[perspective]) {
                const IndexType offset = kHalfDimensions * index + j * kTileHeight;
                auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
                for (IndexType k = 0; k < kNumRegs; ++k)
                  acc[k] = vec_add_16(acc[k], column[k]);
              }
            }
            for (IndexType k = 0; k < kNumRegs; ++k)
              vec_store(&accTile[k], acc[k]);
          }
        }
    #if defined(USE_MMX)
        _mm_empty();
    #endif
    #else
        for (Color perspective : { WHITE, BLACK }) {
          auto accTile = reinterpret_cast<vec_t*>(
              &accumulator.accumulation[perspective][i][j * kTileHeight]);
          vec_t acc[kNumRegs];
          if (reset[perspective]) {
-            auto biasesTile = reinterpret_cast<const vec_t*>(
+            if (i == 0) {
-                &biases_[j * kTileHeight]);
+              std::memcpy(accumulator.accumulation[perspective][i], biases_,
-            for (unsigned k = 0; k < kNumRegs; ++k)
+                          kHalfDimensions * sizeof(BiasType));
-              acc[k] = biasesTile[k];
+            } else {
              std::memset(accumulator.accumulation[perspective][i], 0,
                          kHalfDimensions * sizeof(BiasType));
            }
          } else {
-            auto prevAccTile = reinterpret_cast<const vec_t*>(
+            std::memcpy(accumulator.accumulation[perspective][i],
-                &prev_accumulator.accumulation[perspective][i][j * kTileHeight]);
+                        prev_accumulator.accumulation[perspective][i],
-            for (IndexType k = 0; k < kNumRegs; ++k)
+                        kHalfDimensions * sizeof(BiasType));
              acc[k] = vec_load(&prevAccTile[k]);
            // Difference calculation for the deactivated features
            for (const auto index : removed_indices[perspective]) {
-              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+              const IndexType offset = kHalfDimensions * index;
              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-              for (IndexType k = 0; k < kNumRegs; ++k)
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
-                acc[k] = vec_sub_16(acc[k], column[k]);
+                accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
            }
          }
          { // Difference calculation for the activated features
            for (const auto index : added_indices[perspective]) {
-              const IndexType offset = kHalfDimensions * index + j * kTileHeight;
+              const IndexType offset = kHalfDimensions * index;
              auto column = reinterpret_cast<const vec_t*>(&weights_[offset]);
-              for (IndexType k = 0; k < kNumRegs; ++k)
+              for (IndexType j = 0; j < kHalfDimensions; ++j)
-                acc[k] = vec_add_16(acc[k], column[k]);
+                accumulator.accumulation[perspective][i][j] += weights_[offset + j];
            }
          }
          for (IndexType k = 0; k < kNumRegs; ++k)
            vec_store(&accTile[k], acc[k]);
        }
    #endif
      }
  #if defined(USE_MMX)
      _mm_empty();
  #endif
  #else
      for (Color perspective : { WHITE, BLACK }) {
        if (reset[perspective]) {
          std::memcpy(accumulator.accumulation[perspective][i], biases_,
                      kHalfDimensions * sizeof(BiasType));
        } else {
          std::memcpy(accumulator.accumulation[perspective][i],
                      prev_accumulator.accumulation[perspective][i],
                      kHalfDimensions * sizeof(BiasType));
          // Difference calculation for the deactivated features
          for (const auto index : removed_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
            for (IndexType j = 0; j < kHalfDimensions; ++j)
              accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
          }
        }
        { // Difference calculation for the activated features
          for (const auto index : added_indices[perspective]) {
            const IndexType offset = kHalfDimensions * index;
            for (IndexType j = 0; j < kHalfDimensions; ++j)
              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
          }
        }
      }
  #endif
      accumulator.computed_accumulation = true;
    }
@@ -194,7 +194,7 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {
      weights_(),
      biases_diff_(),
      weights_diff_(),
-      momentum_(0.0),
+      momentum_(0.2),
      learning_rate_scale_(1.0) {
    DequantizeParameters();
  }
@@ -232,7 +232,7 @@ class Trainer<FeatureTransformer> {
      biases_(),
      weights_(),
      biases_diff_(),
-      momentum_(0.0),
+      momentum_(0.2),
      learning_rate_scale_(1.0) {
    min_pre_activation_ = std::numeric_limits<LearnFloatType>::max();
    max_pre_activation_ = std::numeric_limits<LearnFloatType>::lowest();
@@ -1351,9 +1351,9 @@ bool Position::pos_is_ok() const {
 // Add a function that directly unpacks for speed. It's pretty tough.
 // Write it by combining packer::unpack() and Position::set().
 // If there is a problem with the passed phase and there is an error, non-zero is returned.
-int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th, bool mirror)
+int Position::set_from_packed_sfen(const Learner::PackedSfen& sfen , StateInfo* si, Thread* th)
 {
-  return Learner::set_from_packed_sfen(*this, sfen, si, th, mirror);
+  return Learner::set_from_packed_sfen(*this, sfen, si, th);
 }
 // Give the board, hand piece, and turn, and return the sfen.
@@ -177,7 +177,7 @@ public:
  // --sfenization helper
-  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror);
+  friend int Learner::set_from_packed_sfen(Position& pos, const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
  // Get the packed sfen. Returns to the buffer specified in the argument.
  // Do not include gamePly in pack.
@@ -187,7 +187,7 @@ public:
  // Equivalent to pos.set(sfen_unpack(data),si,th);.
  // If there is a problem with the passed phase and there is an error, non-zero is returned.
  // PackedSfen does not include gamePly so it cannot be restored. If you want to set it, specify it with an argument.
-  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th, bool mirror = false);
+  int set_from_packed_sfen(const Learner::PackedSfen& sfen, StateInfo* si, Thread* th);
  void clear() { std::memset(this, 0, sizeof(Position)); }
@@ -40,20 +40,12 @@ namespace Search {
  LimitsType Limits;
 }
 namespace Tablebases {
  int Cardinality;
  bool RootInTB;
  bool UseRule50;
  Depth ProbeDepth;
 }
 namespace TB = Tablebases;
 using std::string;
 using Eval::evaluate;
 using namespace Search;
 bool Search::prune_at_shallow_depth = true;
 namespace {
  // Different node types, used as a template parameter
@@ -712,27 +704,27 @@ namespace {
    }
    // Step 5. Tablebases probe
-    if (!rootNode && TB::Cardinality)
+    if (!rootNode && thisThread->Cardinality)
    {
        int piecesCount = pos.count<ALL_PIECES>();
-        if (    piecesCount <= TB::Cardinality
+        if (    piecesCount <= thisThread->Cardinality
-            && (piecesCount <  TB::Cardinality || depth >= TB::ProbeDepth)
+            && (piecesCount <  thisThread->Cardinality || depth >= thisThread->ProbeDepth)
            &&  pos.rule50_count() == 0
            && !pos.can_castle(ANY_CASTLING))
        {
-            TB::ProbeState err;
+            Tablebases::ProbeState err;
-            TB::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
+            Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &err);
            // Force check of time on the next occasion
            if (thisThread == Threads.main())
                static_cast<MainThread*>(thisThread)->callsCnt = 0;
-            if (err != TB::ProbeState::FAIL)
+            if (err != Tablebases::ProbeState::FAIL)
            {
                thisThread->tbHits.fetch_add(1, std::memory_order_relaxed);
-                int drawScore = TB::UseRule50 ? 1 : 0;
+                int drawScore = thisThread->UseRule50 ? 1 : 0;
                // use the range VALUE_MATE_IN_MAX_PLY to VALUE_TB_WIN_IN_MAX_PLY to score
                value =  wdl < -drawScore ? VALUE_MATED_IN_MAX_PLY + ss->ply + 1
@@ -992,7 +984,9 @@ moves_loop: // When in check, search starts from here
      ss->moveCount = ++moveCount;
-      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000)
+      if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000
          && !Limits.silent
          )
          sync_cout << "info depth " << depth
                    << " currmove " << UCI::move(move, pos.is_chess960())
                    << " currmovenumber " << moveCount + thisThread->pvIdx << sync_endl;
@@ -1009,6 +1003,7 @@ moves_loop: // When in check, search starts from here
      // Step 13. Pruning at shallow depth (~200 Elo)
      if (  !rootNode
          && (PvNode ? prune_at_shallow_depth : true)
          && pos.non_pawn_material(us)
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
      {
@@ -1520,6 +1515,7 @@ moves_loop: // When in check, search starts from here
      // Futility pruning
      if (   !ss->inCheck
          && Search::prune_at_shallow_depth
          && !givesCheck
          &&  futilityBase > -VALUE_KNOWN_WIN
          && !pos.advanced_pawn_push(move))
@@ -1547,6 +1543,7 @@ moves_loop: // When in check, search starts from here
      // Do not search moves with negative SEE values
      if (   !ss->inCheck
          && Search::prune_at_shallow_depth
          && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move))
          && !pos.see_ge(move))
          continue;
@@ -1569,6 +1566,7 @@ moves_loop: // When in check, search starts from here
      // CounterMove based pruning
      if (  !captureOrPromotion
          && Search::prune_at_shallow_depth
          && moveCount
          && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
          && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
@@ -1839,7 +1837,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
  size_t pvIdx = pos.this_thread()->pvIdx;
  size_t multiPV = std::min((size_t)Options["MultiPV"], rootMoves.size());
  uint64_t nodesSearched = Threads.nodes_searched();
-  uint64_t tbHits = Threads.tb_hits() + (TB::RootInTB ? rootMoves.size() : 0);
+  uint64_t tbHits = Threads.tb_hits() + (pos.this_thread()->rootInTB ? rootMoves.size() : 0);
  for (size_t i = 0; i < multiPV; ++i)
  {
@@ -1854,7 +1852,7 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) {
      if (v == -VALUE_INFINITE)
          v = VALUE_ZERO;
-      bool tb = TB::RootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
+      bool tb = pos.this_thread()->rootInTB && abs(v) < VALUE_MATE_IN_MAX_PLY;
      v = tb ? rootMoves[i].tbScore : v;
      if (ss.rdbuf()->in_avail()) // Not at first line
@@ -1921,34 +1919,34 @@ bool RootMove::extract_ponder_from_tt(Position& pos) {
 void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
-    RootInTB = false;
+    auto& rootInTB = pos.this_thread()->rootInTB;
-    UseRule50 = bool(Options["Syzygy50MoveRule"]);
+    auto& cardinality = pos.this_thread()->Cardinality;
-    ProbeDepth = int(Options["SyzygyProbeDepth"]);
+    auto& probeDepth = pos.this_thread()->ProbeDepth;
-    Cardinality = int(Options["SyzygyProbeLimit"]);
+    rootInTB = false;
    bool dtz_available = true;
    // Tables with fewer pieces than SyzygyProbeLimit are searched with
    // ProbeDepth == DEPTH_ZERO
-    if (Cardinality > MaxCardinality)
+    if (cardinality > Tablebases::MaxCardinality)
    {
-        Cardinality = MaxCardinality;
+        cardinality = Tablebases::MaxCardinality;
-        ProbeDepth = 0;
+        probeDepth = 0;
    }
-    if (Cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
+    if (cardinality >= popcount(pos.pieces()) && !pos.can_castle(ANY_CASTLING))
    {
        // Rank moves using DTZ tables
-        RootInTB = root_probe(pos, rootMoves);
+        rootInTB = root_probe(pos, rootMoves);
-        if (!RootInTB)
+        if (!rootInTB)
        {
            // DTZ tables are missing; try to rank moves using WDL tables
            dtz_available = false;
-            RootInTB = root_probe_wdl(pos, rootMoves);
+            rootInTB = root_probe_wdl(pos, rootMoves);
        }
    }
-    if (RootInTB)
+    if (rootInTB)
    {
        // Sort moves according to TB rank
        std::stable_sort(rootMoves.begin(), rootMoves.end(),
@@ -1956,7 +1954,7 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
        // Probe during search only if DTZ is not available and we are winning
        if (dtz_available || rootMoves[0].tbScore <= VALUE_DRAW)
-            Cardinality = 0;
+            cardinality = 0;
    }
    else
    {
@@ -1964,4 +1962,277 @@ void Tablebases::rank_root_moves(Position& pos, Search::RootMoves& rootMoves) {
        for (auto& m : rootMoves)
            m.tbRank = 0;
    }
 }
 // --- expose the functions such as fixed depth search used for learning to the outside
 namespace Learner
 {
  // For learning, prepare a stub that can call search,qsearch() from one thread.
  // From now on, it is better to have a Searcher and prepare a substitution table for each thread like Apery.
  // It might have been good.
  // Initialization for learning.
  // Called from Learner::search(),Learner::qsearch().
  void init_for_search(Position& pos, Stack* ss)
  {
    // RootNode requires ss->ply == 0.
    // Because it clears to zero, ss->ply == 0, so it's okay...
    std::memset(ss - 7, 0, 10 * sizeof(Stack));
    // Regarding this_thread.
    {
      auto th = pos.this_thread();
      th->completedDepth = 0;
      th->selDepth = 0;
      th->rootDepth = 0;
      th->nmpMinPly = th->bestMoveChanges = 0;
      th->ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;
      // Zero initialization of the number of search nodes
      th->nodes = 0;
      // Clear all history types. This initialization takes a little time, and the accuracy of the search is rather low, so the good and bad are not well understood.
      // th->clear();
      int ct = int(Options["Contempt"]) * PawnValueEg / 100; // From centipawns
      Color us = pos.side_to_move();
      // In analysis mode, adjust contempt in accordance with user preference
      if (Limits.infinite || Options["UCI_AnalyseMode"])
        ct = Options["Analysis Contempt"] == "Off" ? 0
        : Options["Analysis Contempt"] == "Both" ? ct
        : Options["Analysis Contempt"] == "White" && us == BLACK ? -ct
        : Options["Analysis Contempt"] == "Black" && us == WHITE ? -ct
        : ct;
      // Evaluation score is from the white point of view
      th->contempt = (us == WHITE ? make_score(ct, ct / 2)
        : -make_score(ct, ct / 2));
      for (int i = 7; i > 0; i--)
          (ss - i)->continuationHistory = &th->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel
      // set rootMoves
      auto& rootMoves = th->rootMoves;
      rootMoves.clear();
      for (auto m: MoveList<LEGAL>(pos))
        rootMoves.push_back(Search::RootMove(m));
      assert(!rootMoves.empty());
      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
      th->Cardinality = int(Options["SyzygyProbeLimit"]);
      // Tables with fewer pieces than SyzygyProbeLimit are searched with
      // ProbeDepth == DEPTH_ZERO
      if (th->Cardinality > Tablebases::MaxCardinality)
      {
          th->Cardinality = Tablebases::MaxCardinality;
          th->ProbeDepth = 0;
      }
      Tablebases::rank_root_moves(pos, rootMoves);
    }
  }
  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
  // Stationary search.
  //
  // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]).
  // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct.
  // After returning from search(), if Threads.stop == true, do not use the search result.
  // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return.
  //
  // If it is clogged, MOVE_RESIGN is returned in the PV array.
  //
  //Although it was possible to specify alpha and beta with arguments, this will show the result when searching in that window
  // Because it writes to the substitution table, the value that can be pruned is written to that window when learning
  // As it has a bad effect, I decided to stop allowing the window range to be specified.
  ValueAndPV qsearch(Position& pos)
  {
    Stack stack[MAX_PLY+10], *ss = stack+7;
    Move  pv[MAX_PLY+1];
    init_for_search(pos, ss);
    ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
    if (pos.is_draw(0)) {
      // Return draw value if draw.
      return { VALUE_DRAW, {} };
    }
    // Is it stuck?
    if (MoveList<LEGAL>(pos).size() == 0)
    {
      // Return the mated value if checkmated.
      return { mated_in(/*ss->ply*/ 0 + 1), {} };
    }
    auto bestValue = ::qsearch<PV>(pos, ss, -VALUE_INFINITE, VALUE_INFINITE, 0);
    // Returns the PV obtained.
    std::vector<Move> pvs;
    for (Move* p = &ss->pv[0]; is_ok(*p); ++p)
      pvs.push_back(*p);
    return ValueAndPV(bestValue, pvs);
  }
  // Normal search. Depth depth (specified as an integer).
  // 3 If you want a score for hand reading,
  // auto v = search(pos,3);
  // Do something like
  // Evaluation value is obtained in v.first and PV is obtained in v.second.
  // When multi pv is enabled, you can get the PV (reading line) array in pos.this_thread()->rootMoves[N].pv.
  // Specify multi pv with the argument multiPV of this function. (The value of Options["MultiPV"] is ignored)
  //
  // Declaration win judgment is not done as root (because it is troublesome to handle), so it is not done here.
  // Handle it by the caller.
  //
  // Precondition) Search thread is set by pos.set_this_thread(Threads[thread_id]).
  // Also, when Threads.stop arrives, the search is interrupted, so the PV at that time is not correct.
  // After returning from search(), if Threads.stop == true, do not use the search result.
  // Also, note that before calling, if you do not call it with Threads.stop == false, the search will be interrupted and it will return.
  ValueAndPV search(Position& pos, int depth_, size_t multiPV /* = 1 */, uint64_t nodesLimit /* = 0 */)
  {
    std::vector<Move> pvs;
    Depth depth = depth_;
    if (depth < 0)
      return std::pair<Value, std::vector<Move>>(Eval::evaluate(pos), std::vector<Move>());
    if (depth == 0)
      return qsearch(pos);
    Stack stack[MAX_PLY + 10], * ss = stack + 7;
    Move pv[MAX_PLY + 1];
    init_for_search(pos, ss);
 	ss->pv = pv; // For the time being, it must be a dummy and somewhere with a buffer.
    // Initialize the variables related to this_thread
    auto th = pos.this_thread();
    auto& rootDepth = th->rootDepth;
    auto& pvIdx = th->pvIdx;
    auto& pvLast = th->pvLast;
    auto& rootMoves = th->rootMoves;
    auto& completedDepth = th->completedDepth;
    auto& selDepth = th->selDepth;
     // A function to search the top N of this stage as best move
     //size_t multiPV = Options["MultiPV"];
     // Do not exceed the number of moves in this situation
    multiPV = std::min(multiPV, rootMoves.size());
     // If you do not multiply the node limit by the value of MultiPV, you will not be thinking about the same node for one candidate hand when you fix the depth and have MultiPV.
    nodesLimit *= multiPV;
    Value alpha = -VALUE_INFINITE;
    Value beta = VALUE_INFINITE;
    Value delta = -VALUE_INFINITE;
    Value bestValue = -VALUE_INFINITE;
    while ((rootDepth += 1) <= depth
      // exit this loop even if the node limit is exceeded
      // The number of search nodes is passed in the argument of this function.
      && !(nodesLimit /* limited nodes */ && th->nodes.load(std::memory_order_relaxed) >= nodesLimit)
      )
    {
      for (RootMove& rm : rootMoves)
        rm.previousScore = rm.score;
      size_t pvFirst = 0;
      pvLast = 0;
      // MultiPV loop. We perform a full root search for each PV line
      for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx)
      {
        if (pvIdx == pvLast)
        {
          pvFirst = pvLast;
          for (pvLast++; pvLast < rootMoves.size(); pvLast++)
            if (rootMoves[pvLast].tbRank != rootMoves[pvFirst].tbRank)
              break;
        }
        // selDepth output with USI info for each depth and PV line
        selDepth = 0;
        // Switch to aspiration search for depth 5 and above.
        if (rootDepth >= 4)
        {
            Value prev = rootMoves[pvIdx].previousScore;
            delta = Value(17);
            alpha = std::max(prev - delta,-VALUE_INFINITE);
            beta  = std::min(prev + delta, VALUE_INFINITE);
        }
        while (true)
        {
          Depth adjustedDepth = std::max(1, rootDepth);
          bestValue = ::search<PV>(pos, ss, alpha, beta, adjustedDepth, false);
          stable_sort(rootMoves.begin() + pvIdx, rootMoves.end());
          //my_stable_sort(pos.this_thread()->thread_id(),&rootMoves[0] + pvIdx, rootMoves.size() - pvIdx);
          // Expand aspiration window for fail low/high.
          // However, if it is the value specified by the argument, it will be treated as fail low/high and break.
          if (bestValue <= alpha)
          {
            beta = (alpha + beta) / 2;
            alpha = std::max(bestValue - delta, -VALUE_INFINITE);
          }
          else if (bestValue >= beta)
          {
            beta = std::min(bestValue + delta, VALUE_INFINITE);
          }
          else
            break;
          delta += delta / 4 + 5;
          assert(-VALUE_INFINITE <= alpha && beta <= VALUE_INFINITE);
          // runaway check
          //assert(th->nodes.load(std::memory_order_relaxed) <= 1000000 );
        }
        stable_sort(rootMoves.begin(), rootMoves.begin() + pvIdx + 1);
        //my_stable_sort(pos.this_thread()->thread_id() , &rootMoves[0] , pvIdx + 1);
      } // multi PV
      completedDepth = rootDepth;
    }
    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
    // MOVE_WIN has never been thrust. (For now)
    for (Move move : rootMoves[0].pv)
    {
      if (!is_ok(move))
        break;
      pvs.push_back(move);
    }
    //sync_cout << rootDepth << sync_endl;
    // Considering multiPV, the score of rootMoves[0] is returned as bestValue.
    bestValue = rootMoves[0].score;
    return ValueAndPV(bestValue, pvs);
  }
 }
@@ -24,6 +24,7 @@
 #include "misc.h"
 #include "movepick.h"
 #include "types.h"
 #include "uci.h"
 class Position;
@@ -32,7 +33,7 @@ namespace Search {
 /// Threshold used for countermoves based pruning
 constexpr int CounterMovePruneThreshold = 0;
-extern bool prune_at_shallow_depth_on_pv_node;
+extern bool prune_at_shallow_depth;
 /// Stack struct keeps track of the information we need to remember from nodes
 /// shallower and deeper in the tree during the search. Each search thread has
@@ -111,6 +112,11 @@ void clear();
 } // namespace Search
 namespace Tablebases {
 extern int MaxCardinality;
 }
 namespace Learner {
  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
@@ -43,8 +43,6 @@ enum ProbeState {
    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };
 extern int MaxCardinality;
 void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
 int probe_dtz(Position& pos, ProbeState* result);
@@ -181,9 +181,6 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
          || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
          rootMoves.emplace_back(m);
  if (!rootMoves.empty())
      Tablebases::rank_root_moves(pos, rootMoves);
  // After ownership transfer 'states' becomes empty, so if we stop the search
  // and call 'go' again without setting a new position states.get() == NULL.
  assert(states.get() || setupStates.get());
@@ -203,6 +200,21 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
      th->rootMoves = rootMoves;
      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
      th->rootState = setupStates->back();
      th->UseRule50 = bool(Options["Syzygy50MoveRule"]);
      th->ProbeDepth = int(Options["SyzygyProbeDepth"]);
      th->Cardinality = int(Options["SyzygyProbeLimit"]);
      // Tables with fewer pieces than SyzygyProbeLimit are searched with
      // ProbeDepth == DEPTH_ZERO
      if (th->Cardinality > Tablebases::MaxCardinality)
      {
          th->Cardinality = Tablebases::MaxCardinality;
          th->ProbeDepth = 0;
      }
      if (!rootMoves.empty())
          Tablebases::rank_root_moves(pos, rootMoves);
  }
  main()->start_searching();
@@ -73,6 +73,11 @@ public:
  CapturePieceToHistory captureHistory;
  ContinuationHistory continuationHistory[2][2];
  Score contempt;
  bool rootInTB;
  int Cardinality;
  bool UseRule50;
  Depth ProbeDepth;
 };
@@ -35,6 +35,9 @@ bool TranspositionTable::enable_transposition_table = true;
 void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
  if (!TranspositionTable::enable_transposition_table) {
      return;
  }
  // Preserve any existing move for the same position
  if (m || (uint16_t)k != key16)
      move16 = (uint16_t)m;
@@ -47,7 +47,7 @@ const char* StartFEN = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1
 void test_cmd(Position& pos, istringstream& is)
 {
    // Initialize as it may be searched.
-    Eval::init_NNUE();
+    Eval::NNUE::init();
    std::string param;
    is >> param;
@@ -210,15 +210,15 @@ namespace {
         << "\nNodes/second    : " << 1000 * nodes / elapsed << endl;
  }
  // The win rate model returns the probability (per mille) of winning given an eval
  // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
  int win_rate_model(Value v, int ply) {
     // Return win rate in per mille (rounded to nearest)
     return int(0.5 + UCI::win_rate_model_double(v, ply));
  }
 } // namespace
 // The win rate model returns the probability (per mille) of winning given an eval
 // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
 int UCI::win_rate_model(Value v, int ply) {
   // Return win rate in per mille (rounded to nearest)
   return int(0.5 + win_rate_model_double(v, ply));
 }
 // The win rate model returns the probability (per mille) of winning given an eval
 // and a game-ply. The model fits rather accurately the LTC fishtest statistics.
 double UCI::win_rate_model_double(double v, int ply) {
@@ -72,6 +72,7 @@ std::string square(Square s);
 std::string move(Move m, bool chess960);
 std::string pv(const Position& pos, Depth depth, Value alpha, Value beta);
 std::string wdl(Value v, int ply);
 int win_rate_model(Value v, int ply);
 double win_rate_model_double(double v, int ply);
 Move to_move(const Position& pos, std::string& str);
@@ -41,15 +41,14 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
 void on_logger(const Option& o) { start_logger(o); }
 void on_threads(const Option& o) { Threads.set(size_t(o)); }
 void on_tb_path(const Option& o) { Tablebases::init(o); }
-void on_use_NNUE(const Option& ) { Eval::init_NNUE(); }
+void on_use_NNUE(const Option& ) { Eval::NNUE::init(); }
-void on_eval_file(const Option& ) { Eval::init_NNUE(); }
+void on_eval_file(const Option& ) { Eval::NNUE::init(); }
-void on_prune_at_shallow_depth_on_pv_node(const Option& o) {
+void on_prune_at_shallow_depth(const Option& o) {
-    Search::prune_at_shallow_depth_on_pv_node = o;
+    Search::prune_at_shallow_depth = o;
 }
 void on_enable_transposition_table(const Option& o) {
    TranspositionTable::enable_transposition_table = o;
 }
 void on_eval_file(const Option& ) { Eval::NNUE::init(); }
 /// Our case insensitive less() function as required by UCI protocol
 bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const {
@@ -87,26 +86,21 @@ void init(OptionsMap& o) {
  o["Syzygy50MoveRule"]      << Option(true);
  o["SyzygyProbeLimit"]      << Option(7, 0, 7);
  o["Use NNUE"]              << Option("true var true var false var pure", "true", on_use_NNUE);
-  // The default must follow the format nn-[SHA256 first 12 digits].nnue
+  o["EvalFile"]              << Option(EvalFileDefaultName, on_eval_file);
  // for the build process (profile-build and fishtest) to work.
  o["EvalFile"]              << Option("nn-82215d0fd0df.nnue", on_eval_file);
  // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
  // I want to hit the test eval convert command, but there is no new evaluation function
  // It ends abnormally before executing this command.
  // Therefore, with this hidden option, you can suppress the loading of the evaluation function when ucinewgame,
  // Hit the test eval convert command.
  o["SkipLoadingEval"]       << Option(false);
  // how many moves to use a fixed move
  // o["BookMoves"] << Option(16, 0, 10000);
  // When learning the evaluation function, you can change the folder to save the evaluation function.
  // Evalsave by default. This folder shall be prepared in advance.
  // Automatically create a folder under this folder like "0/", "1/", ... and save the evaluation function file there.
  o["EvalSaveDir"] << Option("evalsave");
  // Prune at shallow depth on PV nodes. False is recommended when using fixed depth search.
-  o["PruneAtShallowDepthOnPvNode"] << Option(true, on_prune_at_shallow_depth_on_pv_node);
+  o["PruneAtShallowDepth"] << Option(true, on_prune_at_shallow_depth);
  // Enable transposition table.
  o["EnableTranspositionTable"] << Option(true, on_enable_transposition_table);
  o["EvalFile"]              << Option(EvalFileDefaultName, on_eval_file);
 }