Merge branch 'master' into trainer

2026-05-20 13:17:44 +00:00 · 2020-09-09 08:48:59 +08:00
parent d25657c439 0405f35403
commit 675d336ebb
43 changed files with 297 additions and 692 deletions
@@ -915,7 +915,7 @@ icc-profile-use:

 learn: config-sanity
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
-	EXTRACXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	EXTRACXXFLAGS=' -DEVAL_LEARN -DNNUE_EMBEDDING_OFF -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	EXTRALDFLAGS=' $(BLASLDFLAGS) -fopenmp  ' \
 	all

@@ -923,7 +923,7 @@ profile-learn: config-sanity objclean profileclean
 	@echo ""
 	@echo "Step 1/4. Building instrumented executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	LEARNCXXFLAGS=' -DEVAL_LEARN -DNNUE_EMBEDDING_OFF -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	LEARNLDFLAGS='  $(BLASLDFLAGS) -fopenmp '
 	@echo ""
 	@echo "Step 2/4. Running benchmark for pgo-build ..."
@@ -932,7 +932,7 @@ profile-learn: config-sanity objclean profileclean
 	@echo "Step 3/4. Building optimized executable ..."
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) objclean
 	$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use) \
-	LEARNCXXFLAGS=' -DEVAL_LEARN -DEVAL_NNUE -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
+	LEARNCXXFLAGS=' -DEVAL_LEARN -DNNUE_EMBEDDING_OFF -DENABLE_TEST_CMD -DUSE_BLAS $(BLASCXXFLAGS) -fopenmp ' \
 	LEARNLDFLAGS=' $(BLASLDFLAGS) -fopenmp '
 	@echo ""
 	@echo "Step 4/4. Deleting profile data ..."
@@ -164,5 +164,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
          ++posCounter;
      }

+  list.emplace_back("setoption name Use NNUE value true");
+
  return list;
 }
@@ -1,20 +1,8 @@
 #ifndef _EVALUATE_COMMON_H_
 #define _EVALUATE_COMMON_H_

-// A common header-like function for modern evaluation functions (EVAL_KPPT and EVAL_KPP_KKPT).
-
-#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
 #include <functional>

-// KK file name
-#define KK_BIN "KK_synthesized.bin"
-
-// KKP file name
-#define KKP_BIN "KKP_synthesized.bin"
-
-// KPP file name
-#define KPP_BIN "KPP_synthesized.bin"
-
 #include "../position.h"

 namespace Eval
@@ -46,19 +34,11 @@ namespace Eval
 	void init_grad(double eta1, uint64_t eta_epoch, double eta2, uint64_t eta2_epoch, double eta3);

 	// Add the gradient difference value to the gradient array for all features that appear in the current phase.
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void add_grad(Position& pos, Color rootColor, double delt_grad, const std::array<bool, 4>& freeze);
+	void add_grad(Position& pos, Color rootColor, double delt_grad);

 	// Do SGD or AdaGrad or something based on the current gradient.
 	// epoch: Generation counter (starting from 0)
-	// freeze[0]: Flag that kk does not learn
-	// freeze[1]: Flag that kkp does not learn
-	// freeze[2]: Flag that kpp does not learn
-	// freeze[3]: Flag that kppp does not learn
-	void update_weights(uint64_t epoch, const std::array<bool, 4>& freeze);
+	void update_weights(uint64_t epoch);

 	// Save the evaluation function parameters to a file.
 	// You can specify the extension added to the end of the file.
@@ -79,6 +59,4 @@ namespace Eval

 }

-#endif  // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-
 #endif // _EVALUATE_KPPT_COMMON_H_
@@ -1014,8 +1014,10 @@ make_v:
 /// evaluation of the position from the point of view of the side to move.

 Value Eval::evaluate(const Position& pos) {
+
  if (Options["Training"]) {
    Value v = NNUE::evaluate(pos);
+
    // Damp down the evaluation linearly when shuffling
    v = v * (100 - pos.rule50_count()) / 100;

@@ -1024,12 +1026,19 @@ Value Eval::evaluate(const Position& pos) {

    return v;
  } else {
+    // Use classical eval if there is a large imbalance
+    // If there is a moderate imbalance, use classical eval with probability (1/8),
+    // as derived from the node counter.
+    bool useClassical = abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
    bool classical = !Eval::useNNUE
-                  ||  abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
+                  ||  useClassical
+                  || (abs(eg_value(pos.psq_score())) > PawnValueMg / 4 && !(pos.this_thread()->nodes & 0xB));
    Value v = classical ? Evaluation<NO_TRACE>(pos).value()
                        : NNUE::evaluate(pos) * 5 / 4 + Tempo;

-    if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
+    if (   useClassical 
+        && Eval::useNNUE 
+        && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
        v = NNUE::evaluate(pos) * 5 / 4 + Tempo;

    // Damp down the evaluation linearly when shuffling
@@ -38,13 +38,11 @@ namespace Eval {
  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
  // for the build process (profile-build and fishtest) to work. Do not change the
  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-82215d0fd0df.nnue"
+  #define EvalFileDefaultName   "nn.bin"

  namespace NNUE {

    Value evaluate(const Position& pos);
-    Value compute_eval(const Position& pos);
-    void  update_eval(const Position& pos);
    bool  load_eval(std::string streamName, std::istream& stream);

  } // namespace NNUE
@@ -36,7 +36,7 @@
 #include <dirent.h>
 #endif

-#if defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 #include "../nnue/evaluate_nnue_learner.h"
 #include <climits>
 #include <shared_mutex>
@@ -839,9 +839,6 @@ namespace Learner
                }
                pos.do_move(next_move, states[ply]);

-                // Call node evaluate() for each difference calculation.
-                Eval::NNUE::update_eval(pos);
-
            } // for (int ply = 0; ; ++ply)

        } // while(!quit)
@@ -59,12 +59,12 @@
 // The objective function is the sum of squares of the difference in winning percentage
 // See learner.cpp for more information.

-//#define LOSS_FUNCTION_IS_WINNING_PERCENTAGE
+// #define LOSS_FUNCTION_IS_WINNING_PERCENTAGE

 // Objective function is cross entropy
 // See learner.cpp for more information.
 // So-called ordinary "rag cloth squeezer"
-//#define LOSS_FUNCTION_IS_CROSS_ENTOROPY
+// #define LOSS_FUNCTION_IS_CROSS_ENTOROPY

 // A version in which the objective function is cross entropy, but the win rate function is not passed
 // #define LOSS_FUNCTION_IS_CROSS_ENTOROPY_FOR_VALUE
@@ -83,19 +83,6 @@
 // rmse calculation is done in one thread, so it takes some time, so reducing the output is effective.
 #define LEARN_RMSE_OUTPUT_INTERVAL 1

-
-// ----------------------
-// learning from zero vector
-// ----------------------
-
-// Start learning the evaluation function parameters from the zero vector.
-// Initialize to zero, generate a game, learn from zero vector,
-// Game generation → If you repeat learning, you will get parameters that do not depend on the professional game. (maybe)
-// (very time consuming)
-
-//#define RESET_TO_ZERO_VECTOR
-
-
 // ----------------------
 // Floating point for learning
 // ----------------------
@@ -114,59 +101,6 @@ typedef float LearnFloatType;
 //#include "half_float.h"
 //typedef HalfFloat::float16 LearnFloatType;

-// ----------------------
-// save memory
-// ----------------------
-
-// Use a triangular array for the Weight array (of which is KPP) to save memory.
-// If this is used, the weight array for learning will be about 3 times as large as the evaluation function file.
-
-#define USE_TRIANGLE_WEIGHT_ARRAY
-
-// ----------------------
-// dimension down
-// ----------------------
-
-// Dimension reduction for mirrors (left/right symmetry) and inverse (forward/backward symmetry).
-// All on by default.
-
-// Dimension reduction using mirror and inverse for KK. (Unclear effect)
-// USE_KK_MIRROR_WRITE must be on when USE_KK_INVERSE_WRITE is on.
-#define USE_KK_MIRROR_WRITE
-#define USE_KK_INVERSE_WRITE
-
-// Dimension reduction using Mirror and Inverse for KKP. (Inverse is not so effective)
-// When USE_KKP_INVERSE_WRITE is turned on, USE_KKP_MIRROR_WRITE must also be turned on.
-#define USE_KKP_MIRROR_WRITE
-#define USE_KKP_INVERSE_WRITE
-
-// Perform dimension reduction using a mirror for KPP. (Turning this off requires double the teacher position)
-// KPP has no inverse. (Because there is only K on the front side)
-#define USE_KPP_MIRROR_WRITE
-
-// Perform a dimension reduction using a mirror for KPPP. (Turning this off requires double the teacher position)
-// KPPP has no inverse. (Because there is only K on the front side)
-#define USE_KPPP_MIRROR_WRITE
-
-// Reduce the dimension by KPP for learning the KKPP component.
-// Learning is very slow.
-// Do not use as it is not debugged.
-//#define USE_KKPP_LOWER_DIM
-
-
-// ======================
-// Settings for creating teacher phases
-// ======================
-
-// ----------------------
-// write out the draw
-// ----------------------
-
-// When you reach a draw, write it out as a teacher position
-// It's subtle whether it's better to do this.
-// #define LEARN_GENSFEN_USE_DRAW_RESULT
-
-
 // ======================
 // configure
 // ======================
@@ -54,7 +54,7 @@
 #include <dirent.h>
 #endif

-#if defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)
 #include "../nnue/evaluate_nnue_learner.h"
 #include <climits>
 #include <shared_mutex>
@@ -172,7 +172,7 @@ namespace Learner
    // When the objective function is the sum of squares of the difference in winning percentage
 #if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
 // function to calculate the gradient
-    double calc_grad(Value deep, Value shallow, PackedSfenValue& psv)
+    double calc_grad(Value deep, Value shallow, const PackedSfenValue& psv)
    {
        // The square of the win rate difference minimizes it in the objective function.
        // Objective function J = 1/2m Σ (win_rate(shallow)-win_rate(deep) )^2
@@ -667,14 +667,12 @@ namespace Learner
            learn_sum_entropy_win = 0.0;
            learn_sum_entropy = 0.0;
 #endif
-#if defined(EVAL_NNUE)
            newbob_scale = 1.0;
            newbob_decay = 1.0;
            newbob_num_trials = 2;
            best_loss = std::numeric_limits<double>::infinity();
            latest_loss_sum = 0.0;
            latest_loss_count = 0;
-#endif
        }

        virtual void thread_worker(size_t thread_id);
@@ -696,15 +694,9 @@ namespace Learner

        bool stop_flag;

-        // Discount rate
-        double discount_rate;
-
        // Option to exclude early stage from learning
        int reduction_gameply;

-        // Option not to learn kk/kkp/kpp/kppp
-        std::array<bool, 4> freeze;
-
        // If the absolute value of the evaluation value of the deep search of the teacher phase exceeds this value, discard the teacher phase.
        int eval_limit;

@@ -724,7 +716,6 @@ namespace Learner
        atomic<double> learn_sum_entropy;
 #endif

-#if defined(EVAL_NNUE)
        shared_timed_mutex nn_mutex;
        double newbob_scale;
        double newbob_decay;
@@ -733,7 +724,6 @@ namespace Learner
        double latest_loss_sum;
        uint64_t latest_loss_count;
        std::string best_nn_directory;
-#endif

        uint64_t eval_save_interval;
        uint64_t loss_output_interval;
@@ -753,13 +743,10 @@ namespace Learner
        // It doesn't matter if you have disabled the substitution table.
        TT.new_search();

-
-#if defined(EVAL_NNUE)
        std::cout << "PROGRESS: " << now_string() << ", ";
        std::cout << sr.total_done << " sfens";
        std::cout << ", iteration " << epoch;
        std::cout << ", eta = " << Eval::get_eta() << ", ";
-#endif

 #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
        double sum_error = 0;
@@ -813,6 +800,7 @@ namespace Learner
            auto task =
                [
                    &ps,
+#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
                    &test_sum_cross_entropy_eval,
                    &test_sum_cross_entropy_win,
                    &test_sum_cross_entropy,
@@ -820,6 +808,11 @@ namespace Learner
                    &test_sum_entropy_win,
                    &test_sum_entropy,
                    &sum_norm,
+#else
+                    &sum_error,
+                    &sum_error2,
+                    &sum_error3,
+#endif
                    &task_count,
                    &move_accord_count
                ](size_t task_thread_id)
@@ -841,19 +834,6 @@ namespace Learner
                auto task_search_result = qsearch(task_pos);

                auto shallow_value = task_search_result.first;
-                {
-                    const auto rootColor = task_pos.side_to_move();
-                    const auto pv = task_search_result.second;
-                    std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
-                    for (size_t i = 0; i < pv.size(); ++i)
-                    {
-                        task_pos.do_move(pv[i], states[i]);
-                        Eval::NNUE::update_eval(task_pos);
-                    }
-                    shallow_value = (rootColor == task_pos.side_to_move()) ? Eval::evaluate(task_pos) : -Eval::evaluate(task_pos);
-                    for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-                        task_pos.undo_move(*it);
-                }

                // Evaluation value of deep search
                auto deep_value = (Value)ps.score;
@@ -917,18 +897,17 @@ namespace Learner
 #if !defined(LOSS_FUNCTION_IS_ELMO_METHOD)
        // rmse = root mean square error: mean square error
        // mae = mean absolute error: mean absolute error
-        auto dsig_rmse = std::sqrt(sum_error / (sfen_for_mse.size() + epsilon));
-        auto dsig_mae = sum_error2 / (sfen_for_mse.size() + epsilon);
-        auto eval_mae = sum_error3 / (sfen_for_mse.size() + epsilon);
+        constexpr double epsilon = 0.000001;
+        auto dsig_rmse = std::sqrt(sum_error / (sr.sfen_for_mse.size() + epsilon));
+        auto dsig_mae = sum_error2 / (sr.sfen_for_mse.size() + epsilon);
+        auto eval_mae = sum_error3 / (sr.sfen_for_mse.size() + epsilon);
        cout << " , dsig rmse = " << dsig_rmse << " , dsig mae = " << dsig_mae
-            << " , eval mae = " << eval_mae;
+            << " , eval mae = " << eval_mae << endl;
 #endif

 #if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
-#if defined(EVAL_NNUE)
        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
        latest_loss_count += sr.sfen_for_mse.size();
-#endif

        // learn_cross_entropy may be called train cross entropy in the world of machine learning,
        // When omitting the acronym, it is nice to be able to distinguish it from test cross entropy(tce) by writing it as lce.
@@ -967,8 +946,6 @@ namespace Learner
        learn_sum_entropy_eval = 0.0;
        learn_sum_entropy_win = 0.0;
        learn_sum_entropy = 0.0;
-#else
-        << endl;
 #endif
    }

@@ -987,14 +964,10 @@ namespace Learner
            // display mse (this is sometimes done only for thread 0)
            // Immediately after being read from the file...

-#if defined(EVAL_NNUE)
-        // Lock the evaluation function so that it is not used during updating.
+            // Lock the evaluation function so that it is not used during updating.
            shared_lock<shared_timed_mutex> read_lock(nn_mutex, defer_lock);
            if (sr.next_update_weights <= sr.total_done ||
                (thread_id != 0 && !read_lock.try_lock()))
-#else
-            if (sr.next_update_weights <= sr.total_done)
-#endif
            {
                if (thread_id != 0)
                {
@@ -1018,16 +991,6 @@ namespace Learner
                        continue;
                    }

-#if !defined(EVAL_NNUE)
-                    // Output the current time. Output every time.
-                    std::cout << sr.total_done << " sfens , at " << now_string() << std::endl;
-
-                    // Reflect the gradient in the weight array at this timing. The calculation of the gradient is just right for each 1M phase in terms of mini-batch.
-                    Eval::update_weights(epoch, freeze);
-
-                    // Display epoch and current eta for debugging.
-                    std::cout << "epoch = " << epoch << " , eta = " << Eval::get_eta() << std::endl;
-#else
                    {
                        // update parameters

@@ -1035,7 +998,6 @@ namespace Learner
                        lock_guard<shared_timed_mutex> write_lock(nn_mutex);
                        Eval::NNUE::UpdateParameters(epoch);
                    }
-#endif
                    ++epoch;

                    // Save once every 1 billion phases.
@@ -1069,9 +1031,7 @@ namespace Learner
                        // loss calculation
                        calc_loss(thread_id, done);

-#if defined(EVAL_NNUE)
                        Eval::NNUE::CheckHealth();
-#endif

                        // Make a note of how far you have totaled.
                        sr.last_done = sr.total_done;
@@ -1125,26 +1085,11 @@ namespace Learner
                cout << "Error! : illigal packed sfen = " << pos.fen() << endl;
                goto RetryRead;
            }
-#if !defined(EVAL_NNUE)
-            {
-                auto key = pos.key();
-                // Exclude the phase used for rmse calculation.
-                if (sr.is_for_rmse(key) && skip_duplicated_positions_in_training)
-                    goto RetryRead;
-
-                // Exclude the most recently used aspect.
-                auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
-                auto key2 = sr.hash[hash_index];
-                if (key == key2 && skip_duplicated_positions_in_training)
-                    goto RetryRead;
-                sr.hash[hash_index] = key; // Replace with the current key.
-            }
-#endif

            // There is a possibility that all the pieces are blocked and stuck.
            // Also, the declaration win phase is excluded from learning because you cannot go to leaf with PV moves.
            // (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine)
-        // Skip the position if there are no legal moves (=checkmated or stalemate).
+            // Skip the position if there are no legal moves (=checkmated or stalemate).
            if (MoveList<LEGAL>(pos).size() == 0)
                goto RetryRead;

@@ -1163,32 +1108,6 @@ namespace Learner

            auto rootColor = pos.side_to_move();

-            // If the initial PV is different, it is better not to use it for learning.
-            // If it is the result of searching a completely different place, it may become noise.
-            // It may be better not to study where the difference in evaluation values is too large.
-
-#if 0
-        // If you do this, about 13% of the phases will be excluded from the learning target. Good and bad are subtle.
-            if (pv.size() >= 1 && (uint16_t)pv[0] != ps.move)
-            {
-                // dbg_hit_on(false);
-                continue;
-            }
-#endif
-
-#if 0
-            // It may be better not to study where the difference in evaluation values is too large.
-            // → It's okay because it passes the win rate function... About 30% of the phases are out of the scope of learning...
-            if (abs((int16_t)r.first - ps.score) >= Eval::PawnValue * 4)
-            {
-                //          dbg_hit_on(false);
-                continue;
-            }
-            //      dbg_hit_on(true);
-#endif
-
-            int ply = 0;
-
            // A helper function that adds the gradient to the current phase.
            auto pos_add_grad = [&]() {
                // Use the value of evaluate in leaf as shallow_value.
@@ -1197,13 +1116,11 @@ namespace Learner
                // I don't think this is a very desirable property, as the aspect that gives that gradient will be different.
                // I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc...

-                Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
-
 #if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
                // Calculate loss for training data
                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
                double learn_entropy_eval, learn_entropy_win, learn_entropy;
-                calc_cross_entropy(deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy);
+                calc_cross_entropy(deep_value, r.first, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy);
                learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                learn_sum_cross_entropy_win += learn_cross_entropy_win;
                learn_sum_cross_entropy += learn_cross_entropy;
@@ -1212,73 +1129,14 @@ namespace Learner
                learn_sum_entropy += learn_entropy;
 #endif

-#if !defined(EVAL_NNUE)
-                // Slope
-                double dj_dw = calc_grad(deep_value, shallow_value, ps);
-
-                // Add jd_dw as the gradient (∂J/∂Wj) for the feature vector currently appearing in the leaf node.
-
-                // If it is not PV termination, apply a discount rate.
-                if (discount_rate != 0 && ply != (int)pv.size())
-                    dj_dw *= discount_rate;
-
-                // Since we have reached leaf, add the gradient to the features that appear in this phase.
-                // Update based on gradient later.
-                Eval::add_grad(pos, rootColor, dj_dw, freeze);
-#else
-                const double example_weight =
-                    (discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
-                Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
-#endif
+                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);

                // Since the processing is completed, the counter of the processed number is incremented
                sr.total_done++;
            };

-            StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
-            bool illegal_move = false;
-            for (auto m : pv)
-            {
-                // I shouldn't be an illegal player.
-                // An illegal move sometimes comes here...
-                if (!pos.pseudo_legal(m) || !pos.legal(m))
-                {
-                    //cout << pos << m << endl;
-                    //assert(false);
-                    illegal_move = true;
-                    break;
-                }
-
-                // Processing when adding the gradient to the node on each PV.
-                //If discount_rate is 0, this process is not performed.
-                if (discount_rate != 0)
-                    pos_add_grad();
-
-                pos.do_move(m, state[ply++]);
-
-                // Since the value of evaluate in leaf is used, the difference is updated.
-                Eval::NNUE::update_eval(pos);
-            }
-
-            if (illegal_move) {
-                sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl;
-                continue;
-            }
-
-            // Since we have reached the end phase of PV, add the slope here.
            pos_add_grad();

-            // rewind the phase
-            for (auto it = pv.rbegin(); it != pv.rend(); ++it)
-                pos.undo_move(*it);
-
-#if 0
-            // When adding the gradient to the root phase
-            shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
-            dj_dw = calc_grad(deep_value, shallow_value, ps);
-            Eval::add_grad(pos, rootColor, dj_dw, without_kpp);
-#endif
-
        }

    }
@@ -1303,7 +1161,6 @@ namespace Learner
            static int dir_number = 0;
            const std::string dir_name = std::to_string(dir_number++);
            Eval::save_eval(dir_name);
-#if defined(EVAL_NNUE)
            if (newbob_decay != 1.0 && latest_loss_count > 0) {
                static int trials = newbob_num_trials;
                const double latest_loss = latest_loss_sum / latest_loss_count;
@@ -1338,7 +1195,6 @@ namespace Learner
                    return true;
                }
            }
-#endif
        }
        return false;
    }
@@ -1652,23 +1508,15 @@ namespace Learner
        ELMO_LAMBDA_LIMIT = 32000;
 #endif

-        // Discount rate. If this is set to a value other than 0, the slope will be added even at other than the PV termination. (At that time, apply this discount rate)
-        double discount_rate = 0;
-
        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
        // If set to 1, rand(1)==0, so nothing is excluded.
        int reduction_gameply = 1;

-        // Optional item that does not let you learn KK/KKP/KPP/KPPP
-        array<bool, 4> freeze = {};
-
-#if defined(EVAL_NNUE)
        uint64_t nn_batch_size = 1000;
        double newbob_decay = 1.0;
        int newbob_num_trials = 2;
        string nn_options;
-#endif

        uint64_t eval_save_interval = LEARN_EVAL_SAVE_INTERVAL;
        uint64_t loss_output_interval = 0;
@@ -1718,24 +1566,9 @@ namespace Learner
            // Accept also the old option name.
            else if (option == "use_hash_in_training" || option == "skip_duplicated_positions_in_training") is >> skip_duplicated_positions_in_training;
            else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;
-            // Discount rate
-            else if (option == "discount_rate") is >> discount_rate;
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;

-            // No learning of KK/KKP/KPP/KPPP.
-            else if (option == "freeze_kk")    is >> freeze[0];
-            else if (option == "freeze_kkp")   is >> freeze[1];
-            else if (option == "freeze_kpp")   is >> freeze[2];
-
-#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA)
-
-#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES)
-            else if (option == "freeze_kppp")  is >> freeze[3];
-#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT)
-            else if (option == "freeze_kkpp")  is >> freeze[3];
-#endif
-
 #if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
            // LAMBDA
            else if (option == "lambda")       is >> ELMO_LAMBDA;
@@ -1756,12 +1589,11 @@ namespace Learner
            else if (option == "save_only_once") save_only_once = true;
            else if (option == "no_shuffle") no_shuffle = true;

-#if defined(EVAL_NNUE)
            else if (option == "nn_batch_size") is >> nn_batch_size;
            else if (option == "newbob_decay") is >> newbob_decay;
            else if (option == "newbob_num_trials") is >> newbob_num_trials;
            else if (option == "nn_options") is >> nn_options;
-#endif
+
            else if (option == "eval_save_interval") is >> eval_save_interval;
            else if (option == "loss_output_interval") is >> loss_output_interval;
            else if (option == "mirror_percentage") is >> mirror_percentage;
@@ -1924,21 +1756,15 @@ namespace Learner
            for (auto it = filenames.rbegin(); it != filenames.rend(); ++it)
                sr.filenames.push_back(Path::Combine(base_dir, *it));

-#if !defined(EVAL_NNUE)
-        cout << "Gradient Method   : " << LEARN_UPDATE << endl;
-#endif
        cout << "Loss Function     : " << LOSS_FUNCTION << endl;
        cout << "mini-batch size   : " << mini_batch_size << endl;
-#if defined(EVAL_NNUE)
        cout << "nn_batch_size     : " << nn_batch_size << endl;
        cout << "nn_options        : " << nn_options << endl;
-#endif
        cout << "learning rate     : " << eta1 << " , " << eta2 << " , " << eta3 << endl;
        cout << "eta_epoch         : " << eta1_epoch << " , " << eta2_epoch << endl;
        cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl;
        cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
        cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
-#if defined(EVAL_NNUE)
        if (newbob_decay != 1.0) {
            cout << "scheduling        : newbob with decay = " << newbob_decay
                << ", " << newbob_num_trials << " trials" << endl;
@@ -1946,8 +1772,6 @@ namespace Learner
        else {
            cout << "scheduling        : default" << endl;
        }
-#endif
-        cout << "discount rate     : " << discount_rate << endl;

        // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
        reduction_gameply = max(reduction_gameply, 1);
@@ -1962,14 +1786,6 @@ namespace Learner
        cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
        cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;

-#if defined(EVAL_KPPT) || defined(EVAL_KPP_KKPT) || defined(EVAL_KPP_KKPT_FV_VAR) || defined(EVAL_NABLA)
-        cout << "freeze_kk/kkp/kpp      : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << endl;
-#elif defined(EVAL_KPPPT) || defined(EVAL_KPPP_KKPT) || defined(EVAL_HELICES)
-        cout << "freeze_kk/kkp/kpp/kppp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl;
-#elif defined(EVAL_KKPP_KKPT) || defined(EVAL_KKPPT)
-        cout << "freeze_kk/kkp/kpp/kkpp : " << freeze[0] << " , " << freeze[1] << " , " << freeze[2] << " , " << freeze[3] << endl;
-#endif
-
        // -----------------------------------
        // various initialization
        // -----------------------------------
@@ -1979,12 +1795,6 @@ namespace Learner
        // Read evaluation function parameters
        Eval::init_NNUE();

-#if !defined(EVAL_NNUE)
-        cout << "init_grad.." << endl;
-
-        // Initialize gradient array of merit function parameters
-        Eval::init_grad(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
-#else
        cout << "init_training.." << endl;
        Eval::NNUE::InitializeTraining(eta1, eta1_epoch, eta2, eta2_epoch, eta3);
        Eval::NNUE::SetBatchSize(nn_batch_size);
@@ -1992,34 +1802,17 @@ namespace Learner
        if (newbob_decay != 1.0 && !Options["SkipLoadingEval"]) {
            learn_think.best_nn_directory = std::string(Options["EvalDir"]);
        }
-#endif
-
-#if 0
-        // A test to give a gradient of 1.0 to the initial stage of Hirate.
-        pos.set_hirate();
-        cout << Eval::evaluate(pos) << endl;
-        //Eval::print_eval_stat(pos);
-        Eval::add_grad(pos, BLACK, 32.0, false);
-        Eval::update_weights(1);
-        pos.state()->sum.p[2][0] = VALUE_NOT_EVALUATED;
-        cout << Eval::evaluate(pos) << endl;
-        //Eval::print_eval_stat(pos);
-#endif

        cout << "init done." << endl;

        // Reflect other option settings.
-        learn_think.discount_rate = discount_rate;
        learn_think.eval_limit = eval_limit;
        learn_think.save_only_once = save_only_once;
        learn_think.sr.no_shuffle = no_shuffle;
-        learn_think.freeze = freeze;
        learn_think.reduction_gameply = reduction_gameply;
-#if defined(EVAL_NNUE)
        learn_think.newbob_scale = 1.0;
        learn_think.newbob_decay = newbob_decay;
        learn_think.newbob_num_trials = newbob_num_trials;
-#endif
        learn_think.eval_save_interval = eval_save_interval;
        learn_think.loss_output_interval = loss_output_interval;
        learn_think.mirror_percentage = mirror_percentage;
@@ -2040,7 +1833,6 @@ namespace Learner

        // Calculate rmse once at this point (timing of 0 sfen)
        // sr.calc_rmse();
-#if defined(EVAL_NNUE)
        if (newbob_decay != 1.0) {
            learn_think.calc_loss(0, -1);
            learn_think.best_loss = learn_think.latest_loss_sum / learn_think.latest_loss_count;
@@ -2048,7 +1840,6 @@ namespace Learner
            learn_think.latest_loss_count = 0;
            cout << "initial loss: " << learn_think.best_loss << endl;
        }
-#endif

        // -----------------------------------
        // start learning evaluation function parameters
@@ -6,11 +6,6 @@
 #include "learn.h"
 #if defined (EVAL_LEARN)
 #include <array>
-
-#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE)
-#include "../misc.h"  // PRNG , my_insertion_sort
-#endif
-
 #include <cmath>	// std::sqrt()

 namespace EvalLearningTools
@@ -142,7 +142,6 @@ namespace Eval::NNUE {
    if (!Detail::ReadParameters(stream, network)) return false;
    return stream && stream.peek() == std::ios::traits_type::eof();
  }
-
  // write evaluation function parameters
  bool WriteParameters(std::ostream& stream) {
    if (!WriteHeader(stream, kHashValue, GetArchitectureString())) return false;
@@ -150,32 +149,16 @@ namespace Eval::NNUE {
    if (!Detail::WriteParameters(stream, network)) return false;
    return !stream.fail();
  }
-
-  // Proceed with the difference calculation if possible
-  static void UpdateAccumulatorIfPossible(const Position& pos) {
-
-    feature_transformer->UpdateAccumulatorIfPossible(pos);
-  }
-
-  // Calculate the evaluation value
-  static Value ComputeScore(const Position& pos, bool refresh) {
-
-    auto& accumulator = pos.state()->accumulator;
-    if (!refresh && accumulator.computed_score) {
-      return accumulator.score;
-    }
+  // Evaluation function. Perform differential calculation.
+  Value evaluate(const Position& pos) {

    alignas(kCacheLineSize) TransformedFeatureType
        transformed_features[FeatureTransformer::kBufferSize];
-    feature_transformer->Transform(pos, transformed_features, refresh);
+    feature_transformer->Transform(pos, transformed_features);
    alignas(kCacheLineSize) char buffer[Network::kBufferSize];
    const auto output = network->Propagate(transformed_features, buffer);

-    auto score = static_cast<Value>(output[0] / FV_SCALE);
-
-    accumulator.score = score;
-    accumulator.computed_score = true;
-    return accumulator.score;
+    return static_cast<Value>(output[0] / FV_SCALE);
  }

  // Load eval, from a file stream or a memory stream
@@ -191,19 +174,4 @@ namespace Eval::NNUE {
    return ReadParameters(stream);
  }

-  // Evaluation function. Perform differential calculation.
-  Value evaluate(const Position& pos) {
-    return ComputeScore(pos, false);
-  }
-
-  // Evaluation function. Perform full calculation.
-  Value compute_eval(const Position& pos) {
-    return ComputeScore(pos, true);
-  }
-
-  // Proceed with the difference calculation if possible
-  void update_eval(const Position& pos) {
-    UpdateAccumulatorIfPossible(pos);
-  }
-
 } // namespace Eval::NNUE
@@ -1,6 +1,6 @@
 // Code for learning NNUE evaluation function

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include <random>
 #include <fstream>
@@ -115,7 +115,6 @@ void RestoreParameters(const std::string& dir_name) {
  std::ifstream stream(file_name, std::ios::binary);
  bool result = ReadParameters(stream);
  assert(result);
-
  SendMessages({{"reset"}});
 }

@@ -216,9 +215,8 @@ void save_eval(std::string dir_name) {

  const std::string file_name = Path::Combine(eval_dir, NNUE::savedfileName);
  std::ofstream stream(file_name, std::ios::binary);
-  const bool result = NNUE::WriteParameters(stream);
+  bool result = NNUE::WriteParameters(stream);
  assert(result);
-
  std::cout << "save_eval() finished. folder = " << eval_dir << std::endl;
 }

@@ -229,4 +227,4 @@ double get_eta() {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)
@@ -3,7 +3,7 @@
 #ifndef _EVALUATE_NNUE_LEARNER_H_
 #define _EVALUATE_NNUE_LEARNER_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../learn/learn.h"

@@ -41,6 +41,6 @@ void CheckHealth();

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -1,7 +1,5 @@
 //Definition of input feature quantity K of NNUE evaluation function

-#if defined(EVAL_NNUE)
-
 #include "castling_right.h"
 #include "index_list.h"

@@ -28,7 +26,7 @@ namespace Eval {
            & ((castling_rights >> 2) & 3);
        }

-        for (int i = 0; i <kDimensions; ++i) {
+        for (unsigned int i = 0; i <kDimensions; ++i) {
          if (relative_castling_rights & (i << 1)) {
            active->push_back(i);
          }
@@ -56,7 +54,7 @@ namespace Eval {
            & ((current_castling_rights >> 2) & 3);
        }

-        for (int i = 0; i < kDimensions; ++i) {
+        for (unsigned int i = 0; i < kDimensions; ++i) {
          if ((relative_previous_castling_rights & (i << 1)) &&
            (relative_current_castling_rights & (i << 1)) == 0) {
            removed->push_back(i);
@@ -69,5 +67,3 @@ namespace Eval {
  }  // namespace NNUE

 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_CASTLING_RIGHT_H_
 #define _NNUE_FEATURES_CASTLING_RIGHT_H_

-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"

@@ -43,6 +41,4 @@ namespace Eval {

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -1,7 +1,5 @@
 //Definition of input feature quantity K of NNUE evaluation function

-#if defined(EVAL_NNUE)
-
 #include "enpassant.h"
 #include "index_list.h"

@@ -43,5 +41,3 @@ namespace Eval {
  }  // namespace NNUE

 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_ENPASSANT_H_
 #define _NNUE_FEATURES_ENPASSANT_H_

-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"

@@ -43,6 +41,4 @@ namespace Eval {

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -1,7 +1,5 @@
 //Definition of input features HalfRelativeKP of NNUE evaluation function

-#if defined(EVAL_NNUE)
-
 #include "half_relative_kp.h"
 #include "index_list.h"

@@ -74,5 +72,3 @@ template class HalfRelativeKP<Side::kEnemy>;
 }  // namespace NNUE

 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_HALF_RELATIVE_KP_H_
 #define _NNUE_FEATURES_HALF_RELATIVE_KP_H_

-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"

@@ -60,6 +58,4 @@ class HalfRelativeKP {

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -1,7 +1,5 @@
 //Definition of input feature quantity K of NNUE evaluation function

-#if defined(EVAL_NNUE)
-
 #include "k.h"
 #include "index_list.h"

@@ -54,5 +52,3 @@ void K::AppendChangedIndices(
 }  // namespace NNUE

 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_K_H_
 #define _NNUE_FEATURES_K_H_

-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"

@@ -47,6 +45,4 @@ private:

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -1,7 +1,5 @@
 //Definition of input feature P of NNUE evaluation function

-#if defined(EVAL_NNUE)
-
 #include "p.h"
 #include "index_list.h"

@@ -52,5 +50,3 @@ void P::AppendChangedIndices(
 }  // namespace NNUE

 }  // namespace Eval
-
-#endif  // defined(EVAL_NNUE)
@@ -3,8 +3,6 @@
 #ifndef _NNUE_FEATURES_P_H_
 #define _NNUE_FEATURES_P_H_

-#if defined(EVAL_NNUE)
-
 #include "../../evaluate.h"
 #include "features_common.h"

@@ -47,6 +45,4 @@ class P {

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -3,8 +3,6 @@
 #ifndef _NNUE_LAYERS_SUM_H_
 #define _NNUE_LAYERS_SUM_H_

-#if defined(EVAL_NNUE)
-
 #include "../nnue_common.h"

 namespace Eval {
@@ -158,6 +156,4 @@ class Sum<PreviousLayer> {

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -29,9 +29,7 @@ namespace Eval::NNUE {
  struct alignas(kCacheLineSize) Accumulator {
    std::int16_t
        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-    Value score;
    bool computed_accumulation;
-    bool computed_score;
  };

 }  // namespace Eval::NNUE
@@ -50,6 +50,7 @@ namespace Eval::NNUE {

    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t GetHashValue() {
+
      return RawFeatures::kHashValue ^ kOutputDimensions;
    }

@@ -62,6 +63,7 @@ namespace Eval::NNUE {

    // Read network parameters
    bool ReadParameters(std::istream& stream) {
+
      for (std::size_t i = 0; i < kHalfDimensions; ++i)
        biases_[i] = read_little_endian<BiasType>(stream);
      for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
@@ -80,23 +82,26 @@ namespace Eval::NNUE {

    // Proceed with the difference calculation if possible
    bool UpdateAccumulatorIfPossible(const Position& pos) const {
+
      const auto now = pos.state();
-      if (now->accumulator.computed_accumulation) {
+      if (now->accumulator.computed_accumulation)
        return true;
-      }
+
      const auto prev = now->previous;
      if (prev && prev->accumulator.computed_accumulation) {
        UpdateAccumulator(pos);
        return true;
      }
+
      return false;
    }

    // Convert input features
-    void Transform(const Position& pos, OutputType* output, bool refresh) const {
-      if (refresh || !UpdateAccumulatorIfPossible(pos)) {
+    void Transform(const Position& pos, OutputType* output) const {
+
+      if (!UpdateAccumulatorIfPossible(pos))
        RefreshAccumulator(pos);
-      }
+
      const auto& accumulation = pos.state()->accumulator.accumulation;

  #if defined(USE_AVX2)
@@ -193,6 +198,7 @@ namespace Eval::NNUE {
   private:
    // Calculate cumulative value without using difference calculation
    void RefreshAccumulator(const Position& pos) const {
+
      auto& accumulator = pos.state()->accumulator;
      IndexType i = 0;
      Features::IndexList active_indices[2];
@@ -232,9 +238,8 @@ namespace Eval::NNUE {
              &accumulator.accumulation[perspective][i][0]);
          auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
          constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
-          for (IndexType j = 0; j < kNumChunks; ++j) {
+          for (IndexType j = 0; j < kNumChunks; ++j)
            accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
-          }

  #elif defined(USE_NEON)
          auto accumulation = reinterpret_cast<int16x8_t*>(
@@ -256,11 +261,11 @@ namespace Eval::NNUE {
  #endif

      accumulator.computed_accumulation = true;
-      accumulator.computed_score = false;
    }

    // Calculate cumulative value using difference calculation
    void UpdateAccumulator(const Position& pos) const {
+
      const auto prev_accumulator = pos.state()->previous->accumulator;
      auto& accumulator = pos.state()->accumulator;
      IndexType i = 0;
@@ -304,33 +309,27 @@ namespace Eval::NNUE {

  #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]);
-            }

  #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
-            }

  #elif defined(USE_MMX)
            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
-            }

  #elif defined(USE_NEON)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = vsubq_s16(accumulation[j], column[j]);
-            }

  #else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] -=
-                  weights_[offset + j];
-            }
+            for (IndexType j = 0; j < kHalfDimensions; ++j)
+              accumulator.accumulation[perspective][i][j] -= weights_[offset + j];
  #endif

          }
@@ -341,33 +340,27 @@ namespace Eval::NNUE {

  #if defined(USE_AVX2)
            auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]);
-            }

  #elif defined(USE_SSE2)
            auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
-            }

  #elif defined(USE_MMX)
            auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
-            }

  #elif defined(USE_NEON)
            auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
-            for (IndexType j = 0; j < kNumChunks; ++j) {
+            for (IndexType j = 0; j < kNumChunks; ++j)
              accumulation[j] = vaddq_s16(accumulation[j], column[j]);
-            }

  #else
-            for (IndexType j = 0; j < kHalfDimensions; ++j) {
-              accumulator.accumulation[perspective][i][j] +=
-                  weights_[offset + j];
-            }
+            for (IndexType j = 0; j < kHalfDimensions; ++j)
+              accumulator.accumulation[perspective][i][j] += weights_[offset + j];
  #endif

          }
@@ -378,7 +371,6 @@ namespace Eval::NNUE {
  #endif

      accumulator.computed_accumulation = true;
-      accumulator.computed_score = false;
    }

    using BiasType = std::int16_t;
@@ -1,6 +1,6 @@
 // USI extended command for NNUE evaluation function

-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#if defined(ENABLE_TEST_CMD)

 #include "../thread.h"
 #include "../uci.h"
@@ -198,4 +198,4 @@ void TestCommand(Position& pos, std::istream& stream) {

 }  // namespace Eval

-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#endif  // defined(ENABLE_TEST_CMD)
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TEST_COMMAND_H_
 #define _NNUE_TEST_COMMAND_H_

-#if defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#if defined(ENABLE_TEST_CMD)

 namespace Eval {

@@ -16,6 +16,6 @@ void TestCommand(Position& pos, std::istream& stream);

 }  // namespace Eval

-#endif  // defined(ENABLE_TEST_CMD) && defined(EVAL_NNUE)
+#endif  // defined(ENABLE_TEST_CMD)

 #endif
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_H_
 #define _NNUE_TRAINER_FEATURES_FACTORIZER_H_

-#if defined(EVAL_NNUE)
-
 #include "../../nnue_common.h"
 #include "../trainer.h"

@@ -105,6 +103,4 @@ constexpr std::size_t GetArrayLength(const T (&/*array*/)[N]) {

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_
 #define _NNUE_TRAINER_FEATURES_FACTORIZER_FEATURE_SET_H_

-#if defined(EVAL_NNUE)
-
 #include "../../features/feature_set.h"
 #include "factorizer.h"

@@ -99,6 +97,4 @@ public:

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -3,8 +3,6 @@
 #ifndef _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_
 #define _NNUE_TRAINER_FEATURES_FACTORIZER_HALF_KP_H_

-#if defined(EVAL_NNUE)
-
 #include "../../features/half_kp.h"
 #include "../../features/p.h"
 #include "../../features/half_relative_kp.h"
@@ -98,6 +96,4 @@ constexpr FeatureProperties Factorizer<HalfKP<AssociatedKing>>::kProperties[];

 }  // namespace Eval

-#endif  // defined(EVAL_NNUE)
-
 #endif
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_H_
 #define _NNUE_TRAINER_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../nnue_common.h"
 #include "../features/index_list.h"
@@ -70,8 +70,8 @@ struct Example {

 // Message used for setting hyperparameters
 struct Message {
-  Message(const std::string& name, const std::string& value = ""):
-      name(name), value(value), num_peekers(0), num_receivers(0) {}
+  Message(const std::string& in_name, const std::string& in_value = ""):
+      name(in_name), value(in_value), num_peekers(0), num_receivers(0) {}
  const std::string name;
  const std::string value;
  std::uint32_t num_peekers;
@@ -120,6 +120,6 @@ std::shared_ptr<T> MakeAlignedSharedPtr(ArgumentTypes&&... arguments) {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_AFFINE_TRANSFORM_H_
 #define _NNUE_TRAINER_AFFINE_TRANSFORM_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../../learn/learn.h"
 #include "../layers/affine_transform.h"
@@ -296,6 +296,6 @@ class Trainer<Layers::AffineTransform<PreviousLayer, OutputDimensions>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_CLIPPED_RELU_H_
 #define _NNUE_TRAINER_CLIPPED_RELU_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../../learn/learn.h"
 #include "../layers/clipped_relu.h"
@@ -137,6 +137,6 @@ class Trainer<Layers::ClippedReLU<PreviousLayer>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_FEATURE_TRANSFORMER_H_
 #define _NNUE_TRAINER_FEATURE_TRANSFORMER_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../../learn/learn.h"
 #include "../nnue_feature_transformer.h"
@@ -372,6 +372,6 @@ class Trainer<FeatureTransformer> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_INPUT_SLICE_H_
 #define _NNUE_TRAINER_INPUT_SLICE_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../../learn/learn.h"
 #include "../layers/input_slice.h"
@@ -246,6 +246,6 @@ class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -3,7 +3,7 @@
 #ifndef _NNUE_TRAINER_SUM_H_
 #define _NNUE_TRAINER_SUM_H_

-#if defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#if defined(EVAL_LEARN)

 #include "../../learn/learn.h"
 #include "../layers/sum.h"
@@ -185,6 +185,6 @@ class Trainer<Layers::Sum<PreviousLayer>> {

 }  // namespace Eval

-#endif  // defined(EVAL_LEARN) && defined(EVAL_NNUE)
+#endif  // defined(EVAL_LEARN)

 #endif
@@ -704,7 +704,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {

  // Used by NNUE
  st->accumulator.computed_accumulation = false;
-  st->accumulator.computed_score = false;
  auto& dp = st->dirtyPiece;
  dp.dirty_num = 1;

@@ -1000,7 +999,6 @@ void Position::do_null_move(StateInfo& newSt) {
  if (Eval::useNNUE)
  {
      std::memcpy(&newSt, st, sizeof(StateInfo));
-      st->accumulator.computed_score = false;
  }
  else
      std::memcpy(&newSt, st, offsetof(StateInfo, accumulator));
@@ -597,7 +597,7 @@ namespace {
    Move ttMove, move, excludedMove, bestMove;
    Depth extension, newDepth;
    Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
-    bool ttHit, formerPv, givesCheck, improving, didLMR, priorCapture;
+    bool formerPv, givesCheck, improving, didLMR, priorCapture;
    bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
         ttCapture, singularQuietLMR;
    Piece movedPiece;
@@ -664,12 +664,12 @@ namespace {
    // position key in case of an excluded move.
    excludedMove = ss->excludedMove;
    posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove);
-    tte = TT.probe(posKey, ttHit);
-    ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+    tte = TT.probe(posKey, ss->ttHit);
+    ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
    ttMove =  rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0]
-            : ttHit    ? tte->move() : MOVE_NONE;
+            : ss->ttHit    ? tte->move() : MOVE_NONE;
    if (!excludedMove)
-        ss->ttPv = PvNode || (ttHit && tte->is_pv());
+        ss->ttPv = PvNode || (ss->ttHit && tte->is_pv());
    formerPv = ss->ttPv && !PvNode;

    if (   ss->ttPv
@@ -681,11 +681,11 @@ namespace {

    // thisThread->ttHitAverage can be used to approximate the running average of ttHit
    thisThread->ttHitAverage =   (TtHitAverageWindow - 1) * thisThread->ttHitAverage / TtHitAverageWindow
-                                + TtHitAverageResolution * ttHit;
+                                + TtHitAverageResolution * ss->ttHit;

    // At non-PV nodes we check for an early TT cutoff
    if (  !PvNode
-        && ttHit
+        && ss->ttHit
        && tte->depth() >= depth
        && ttValue != VALUE_NONE // Possible in case of TT access race
        && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
@@ -778,7 +778,7 @@ namespace {
        improving = false;
        goto moves_loop;
    }
-    else if (ttHit)
+    else if (ss->ttHit)
    {
        // Never assume anything about values stored in TT
        ss->staticEval = eval = tte->eval();
@@ -882,14 +882,14 @@ namespace {
        // there and in further interactions with transposition table cutoff depth is set to depth - 3
        // because probCut search has depth set to depth - 4 but we also do a move before it
        // so effective depth is equal to depth - 3
-        && !(   ttHit
+        && !(   ss->ttHit
             && tte->depth() >= depth - 3
             && ttValue != VALUE_NONE
             && ttValue < probCutBeta))
    {
        // if ttMove is a capture and value from transposition table is good enough produce probCut
        // cutoff without digging into actual probCut search
-        if (   ttHit
+        if (   ss->ttHit
            && tte->depth() >= depth - 3
            && ttValue != VALUE_NONE
            && ttValue >= probCutBeta
@@ -933,7 +933,7 @@ namespace {
                if (value >= probCutBeta)
                {
                    // if transposition table doesn't have equal or more deep info write probCut data into it
-                    if ( !(ttHit
+                    if ( !(ss->ttHit
                       && tte->depth() >= depth - 3
                       && ttValue != VALUE_NONE))
                        tte->save(posKey, value_to_tt(value, ss->ply), ttPv,
@@ -1018,7 +1018,6 @@ moves_loop: // When in check, search starts from here

      // Step 13. Pruning at shallow depth (~200 Elo)
      if (  !rootNode
-          && !(Options["Training"] && PvNode)
          && pos.non_pawn_material(us)
          && bestValue > VALUE_TB_LOSS_IN_MAX_PLY)
      {
@@ -1173,13 +1172,6 @@ moves_loop: // When in check, search starts from here
      {
          Depth r = reduction(improving, depth, moveCount);

-          // Decrease reduction at non-check cut nodes for second move at low depths
-          if (   cutNode
-              && depth <= 10
-              && moveCount <= 2
-              && !ss->inCheck)
-              r--;
-
          // Decrease reduction if the ttHit running average is large
          if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
              r--;
@@ -1201,7 +1193,7 @@ moves_loop: // When in check, search starts from here

          // Decrease reduction if ttMove has been singularly extended (~3 Elo)
          if (singularQuietLMR)
-              r -= 1 + formerPv;
+              r--;

          if (!captureOrPromotion)
          {
@@ -1435,7 +1427,7 @@ moves_loop: // When in check, search starts from here
    Move ttMove, move, bestMove;
    Depth ttDepth;
    Value bestValue, value, ttValue, futilityValue, futilityBase, oldAlpha;
-    bool ttHit, pvHit, givesCheck, captureOrPromotion;
+    bool pvHit, givesCheck, captureOrPromotion;
    int moveCount;

    if (PvNode)
@@ -1465,13 +1457,13 @@ moves_loop: // When in check, search starts from here
                                                  : DEPTH_QS_NO_CHECKS;
    // Transposition table lookup
    posKey = pos.key();
-    tte = TT.probe(posKey, ttHit);
-    ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
-    ttMove = ttHit ? tte->move() : MOVE_NONE;
-    pvHit = ttHit && tte->is_pv();
+    tte = TT.probe(posKey, ss->ttHit);
+    ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
+    ttMove = ss->ttHit ? tte->move() : MOVE_NONE;
+    pvHit = ss->ttHit && tte->is_pv();

    if (  !PvNode
-        && ttHit
+        && ss->ttHit
        && tte->depth() >= ttDepth
        && ttValue != VALUE_NONE // Only in case of TT access race
        && (ttValue >= beta ? (tte->bound() & BOUND_LOWER)
@@ -1486,7 +1478,7 @@ moves_loop: // When in check, search starts from here
    }
    else
    {
-        if (ttHit)
+        if (ss->ttHit)
        {
            // Never assume anything about values stored in TT
            if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE)
@@ -1505,7 +1497,7 @@ moves_loop: // When in check, search starts from here
        // Stand pat. Return immediately if static value is at least beta
        if (bestValue >= beta)
        {
-            if (!ttHit)
+            if (!ss->ttHit)
                tte->save(posKey, value_to_tt(bestValue, ss->ply), false, BOUND_LOWER,
                          DEPTH_NONE, MOVE_NONE, ss->staticEval);

@@ -1569,20 +1561,16 @@ moves_loop: // When in check, search starts from here
      }

      // Do not search moves with negative SEE values
-      if (  !ss->inCheck && !pos.see_ge(move))
+      if (   !ss->inCheck
+          && !(givesCheck && pos.is_discovery_check_on_king(~pos.side_to_move(), move))
+          && !pos.see_ge(move))
          continue;

      // Speculative prefetch as early as possible
      prefetch(TT.first_entry(pos.key_after(move)));

      // Check for legality just before making the move
-      if (
-#if defined(EVAL_LEARN)
-        // HACK: pos.piece_on(from_sq(m)) sometimes will be NO_PIECE during machine learning.
-        !pos.pseudo_legal(move) ||
-#endif // EVAL_LEARN
-        !pos.legal(move)
-        )
+      if (!pos.legal(move))
      {
          moveCount--;
          continue;
@@ -1727,8 +1715,8 @@ moves_loop: // When in check, search starts from here
    else
        captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1;

-    // Extra penalty for a quiet TT or main killer move in previous ply when it gets refuted
-    if (   ((ss-1)->moveCount == 1 || ((ss-1)->currentMove == (ss-1)->killers[0]))
+    // Extra penalty for a quiet early move that was not a TT move or main killer move in previous ply when it gets refuted
+    if (   ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0]))
        && !pos.captured_piece())
            update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1);

@@ -2276,7 +2264,7 @@ namespace Learner
    }

    // Pass PV_is(ok) to eliminate this PV, there may be NULL_MOVE in the middle.
-    // ¨ PV should not be NULL_MOVE because it is PV
+    // ?¡L PV should not be NULL_MOVE because it is PV
    // MOVE_WIN has never been thrust. (For now)
    for (Move move : rootMoves[0].pv)
    {
@@ -49,6 +49,7 @@ struct Stack {
  int moveCount;
  bool inCheck;
  bool ttPv;
+  bool ttHit;
 };


@@ -223,7 +223,9 @@ public:

        *mapping = statbuf.st_size;
        *baseAddress = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0);
+#if defined(MADV_RANDOM)
        madvise(*baseAddress, statbuf.st_size, MADV_RANDOM);
+#endif
        ::close(fd);

        if (*baseAddress == MAP_FAILED)
@@ -115,6 +115,7 @@ void TranspositionTable::clear() {
 /// TTEntry t2 if its replace value is greater than that of t2.

 TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
+
  if (Options["Training"]) {
    return found = false, first_entry(0);
  }
@@ -32,7 +32,7 @@
 #include "uci.h"
 #include "syzygy/tbprobe.h"

-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
+#if defined(ENABLE_TEST_CMD)
 #include "nnue/nnue_test_command.h"
 #endif

@@ -53,10 +53,6 @@ namespace Learner
  // Learning from the generated game record
  void learn(Position& pos, istringstream& is);

-#if defined(GENSFEN2019)
-  // Automatic generation command of teacher phase under development
-  void gen_sfen2019(Position& pos, istringstream& is);
-#endif

  // A pair of reader and evaluation value. Returned by Learner::search(),Learner::qsearch().
  typedef std::pair<Value, std::vector<Move> > ValueAndPV;
@@ -67,7 +63,7 @@ namespace Learner
 }
 #endif

-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
+#if defined(ENABLE_TEST_CMD)
 void test_cmd(Position& pos, istringstream& is)
 {
    // Initialize as it may be searched.
@@ -363,17 +359,13 @@ void UCI::loop(int argc, char* argv[]) {
      else if (token == "gensfen") Learner::gen_sfen(pos, is);
      else if (token == "learn") Learner::learn(pos, is);

-#if defined (GENSFEN2019)
-	  // Command to generate teacher phase under development
-      else if (token == "gensfen2019") Learner::gen_sfen2019(pos, is);
-#endif
      // Command to call qsearch(),search() directly for testing
      else if (token == "qsearch") qsearch_cmd(pos);
      else if (token == "search") search_cmd(pos, is);

 #endif

-#if defined(EVAL_NNUE) && defined(ENABLE_TEST_CMD)
+#if defined(ENABLE_TEST_CMD)
      // test command
      else if (token == "test") test_cmd(pos, is);
 #endif