Restore lambda and gradient function post-merge and minor fixes.

bench: 3788313
2026-05-20 05:07:46 +00:00 · 2020-09-26 10:03:03 +08:00
parent d1967bb281
commit 5e8a49f7f2
15 changed files with 170 additions and 36 deletions
@@ -37,6 +37,7 @@ Additional options:
 To generate training data from the classic eval, use the gensfen command with the setting "Use NNUE" set to "false". The given example is generation in its simplest form. There are more commands. 
 ```
 uci
+setoption name PruneAtShallowDepth value false
 setoption name Use NNUE value false
 setoption name Threads value x
 setoption name Hash value y
@@ -56,11 +57,13 @@ The process is the same as the generation of training data, except for the fact
 Use the "learn" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
 ```
 uci
+setoption name EnableTranspositionTable value false
+setoption name PruneAtShallowDepth value false
 setoption name SkipLoadingEval value true
 setoption name Use NNUE value pure
 setoption name Threads value x
 isready
-learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 mirror_percentage 50 validation_set_file_name validationdata\val.bin
+learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 lr 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 validation_set_file_name validationdata\val.bin
 ```
 Nets get saved in the "evalsave" folder. 

@@ -42,7 +42,7 @@ namespace Eval {
  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
  // for the build process (profile-build and fishtest) to work. Do not change the
  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-28e08a9fe2ad.nnue"
+  #define EvalFileDefaultName   "nn-54f88d1580b4.nnue"

  namespace NNUE {

@@ -157,6 +157,14 @@ namespace Learner
        return ((y2 - y1) / epsilon) / winning_probability_coefficient;
    }

+    // A constant used in elmo (WCSC27). Adjustment required.
+    // Since elmo does not internally divide the expression, the value is different.
+    // You can set this value with the learn command.
+    // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
+    double ELMO_LAMBDA = 0.33;
+    double ELMO_LAMBDA2 = 0.33;
+    double ELMO_LAMBDA_LIMIT = 32000;
+
    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
    double get_scaled_signal(double signal)
    {
@@ -182,6 +190,18 @@ namespace Learner
        return winning_percentage(scaled_teacher_signal, ply);
    }

+    double calculate_lambda(double teacher_signal)
+    {
+        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
+        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
+        const double lambda =
+            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
+            ? ELMO_LAMBDA2
+            : ELMO_LAMBDA;
+
+        return lambda;
+    }
+
    double calculate_t(int game_result)
    {
        // Use 1 as the correction term if the expected win rate is 1,
@@ -192,6 +212,32 @@ namespace Learner
        return t;
    }

+    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
+    {
+        // elmo (WCSC27) method
+        // Correct with the actual game wins and losses.
+        const double q = winning_percentage(shallow, psv.gamePly);
+        const double p = calculate_p(teacher_signal, psv.gamePly);
+        const double t = calculate_t(psv.game_result);
+        const double lambda = calculate_lambda(teacher_signal);
+
+        double grad;
+        if (use_wdl)
+        {
+            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
+            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
+            grad = lambda * dce_p + (1.0 - lambda) * dce_t;
+        }
+        else
+        {
+            // Use the actual win rate as a correction term.
+            // This is the idea of elmo (WCSC27), modern O-parts.
+            grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
+        }
+
+        return grad;
+    }
+
    // Calculate cross entropy during learning
    // The individual cross entropy of the win/loss term and win
    // rate term of the elmo expression is returned
@@ -202,16 +248,21 @@ namespace Learner
        const PackedSfenValue& psv,
        double& cross_entropy_eval,
        double& cross_entropy_win,
+        double& cross_entropy,
        double& entropy_eval,
-        double& entropy_win)
+        double& entropy_win,
+        double& entropy)
    {
        // Teacher winning probability.
        const double q = winning_percentage(shallow, psv.gamePly);
        const double p = calculate_p(teacher_signal, psv.gamePly);
        const double t = calculate_t(psv.game_result);
+        const double lambda = calculate_lambda(teacher_signal);

        constexpr double epsilon = 0.000001;

+        const double m = (1.0 - lambda) * t + lambda * p;
+
        cross_entropy_eval =
            (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
        cross_entropy_win =
@@ -220,12 +271,17 @@ namespace Learner
            (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
        entropy_win =
            (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
+
+        cross_entropy =
+            (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
+        entropy =
+            (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
    }

    // Other objective functions may be considered in the future...
    double calc_grad(Value shallow, const PackedSfenValue& psv)
    {
-        return (double)(shallow - (Value)psv.score) / 2400.0;
+        return calc_grad((Value)psv.score, shallow, psv);
    }

    struct BasicSfenInputStream
@@ -798,12 +854,14 @@ namespace Learner
        cout << ", learning rate = " << global_learning_rate << ", ";

        // For calculation of verification data loss
-        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
-        atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
+        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
+        atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
        test_sum_cross_entropy_eval = 0;
        test_sum_cross_entropy_win = 0;
+        test_sum_cross_entropy = 0;
        test_sum_entropy_eval = 0;
        test_sum_entropy_win = 0;
+        test_sum_entropy = 0;

        // norm for learning
        atomic<double> sum_norm;
@@ -843,8 +901,10 @@ namespace Learner
                    &ps,
                    &test_sum_cross_entropy_eval,
                    &test_sum_cross_entropy_win,
+                    &test_sum_cross_entropy,
                    &test_sum_entropy_eval,
                    &test_sum_entropy_win,
+                    &test_sum_entropy,
                    &sum_norm,
                    &task_count,
                    &move_accord_count
@@ -872,22 +932,26 @@ namespace Learner
                // For the time being, regarding the win rate and loss terms only in the elmo method
                // Calculate and display the cross entropy.

-                double test_cross_entropy_eval, test_cross_entropy_win;
-                double test_entropy_eval, test_entropy_win;
+                double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
+                double test_entropy_eval, test_entropy_win, test_entropy;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    test_cross_entropy_eval,
                    test_cross_entropy_win,
+                    test_cross_entropy,
                    test_entropy_eval,
-                    test_entropy_win);
+                    test_entropy_win,
+                    test_entropy);

                // The total cross entropy need not be abs() by definition.
                test_sum_cross_entropy_eval += test_cross_entropy_eval;
                test_sum_cross_entropy_win += test_cross_entropy_win;
+                test_sum_cross_entropy += test_cross_entropy;
                test_sum_entropy_eval += test_entropy_eval;
                test_sum_entropy_win += test_entropy_win;
+                test_sum_entropy += test_entropy;
                sum_norm += (double)abs(shallow_value);

                // Determine if the teacher's move and the score of the shallow search match
@@ -912,7 +976,7 @@ namespace Learner
        while (task_count)
            sleep(1);

-        latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
+        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
        latest_loss_count += sr.sfen_for_mse.size();

        // learn_cross_entropy may be called train cross
@@ -927,6 +991,8 @@ namespace Learner
                << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
                << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
                << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
+                << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
+                << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                << " , norm = " << sum_norm
                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
                << endl;
@@ -938,6 +1004,8 @@ namespace Learner
                    << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
                    << " , learn_entropy_eval = " << learn_sum_entropy_eval / done
                    << " , learn_entropy_win = " << learn_sum_entropy_win / done
+                    << " , learn_cross_entropy = " << learn_sum_cross_entropy / done
+                    << " , learn_entropy = " << learn_sum_entropy / done
                    << endl;
            }
        }
@@ -949,8 +1017,10 @@ namespace Learner
        // Clear 0 for next time.
        learn_sum_cross_entropy_eval = 0.0;
        learn_sum_cross_entropy_win = 0.0;
+        learn_sum_cross_entropy = 0.0;
        learn_sum_entropy_eval = 0.0;
        learn_sum_entropy_win = 0.0;
+        learn_sum_entropy = 0.0;
    }

    void LearnerThink::thread_worker(size_t thread_id)
@@ -1142,21 +1212,25 @@ namespace Learner
                    : -Eval::evaluate(pos);

                // Calculate loss for training data
-                double learn_cross_entropy_eval, learn_cross_entropy_win;
-                double learn_entropy_eval, learn_entropy_win;
+                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
+                double learn_entropy_eval, learn_entropy_win, learn_entropy;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    learn_cross_entropy_eval,
                    learn_cross_entropy_win,
+                    learn_cross_entropy,
                    learn_entropy_eval,
-                    learn_entropy_win);
+                    learn_entropy_win,
+                    learn_entropy);

                learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                learn_sum_cross_entropy_win += learn_cross_entropy_win;
+                learn_sum_cross_entropy += learn_cross_entropy;
                learn_sum_entropy_eval += learn_entropy_eval;
                learn_sum_entropy_win += learn_entropy_win;
+                learn_sum_entropy += learn_entropy;

                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);

@@ -1560,6 +1634,11 @@ namespace Learner

        global_learning_rate = 1.0;

+        // elmo lambda
+        ELMO_LAMBDA = 0.33;
+        ELMO_LAMBDA2 = 0.33;
+        ELMO_LAMBDA_LIMIT = 32000;
+
        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
        // If set to 1, rand(1)==0, so nothing is excluded.
@@ -1627,6 +1706,12 @@ namespace Learner
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;

+
+            // LAMBDA
+            else if (option == "lambda")       is >> ELMO_LAMBDA;
+            else if (option == "lambda2")      is >> ELMO_LAMBDA2;
+            else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
+
            else if (option == "reduction_gameply") is >> reduction_gameply;

            // shuffle related
@@ -1814,6 +1899,9 @@ namespace Learner
        reduction_gameply = max(reduction_gameply, 1);
        cout << "reduction_gameply : " << reduction_gameply << endl;

+        cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
+        cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
+        cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
        cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
        cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;

@@ -23,7 +23,11 @@ using LearnFloatType = float;
 // configure
 // ======================

-#define LOSS_FUNCTION "cross_entropy_eval"
+// ----------------------
+// Learning with the method of elmo (WCSC27)
+// ----------------------
+
+#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"

 // ----------------------
 // Definition of struct used in Learner
@@ -31,10 +31,30 @@ namespace Eval::NNUE::Features {

  // Get a list of indices whose values have changed from the previous one in the feature quantity
  void CastlingRight::AppendChangedIndices(
-    const Position& /* pos */, Color /* perspective */,
-    IndexList* /* removed */, IndexList* /* added */) {
-    // Not implemented.
-    assert(false);
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* /* added */) {
+    int previous_castling_rights = pos.state()->previous->castlingRights;
+    int current_castling_rights = pos.state()->castlingRights;
+    int relative_previous_castling_rights;
+    int relative_current_castling_rights;
+    if (perspective == WHITE) {
+      relative_previous_castling_rights = previous_castling_rights;
+      relative_current_castling_rights = current_castling_rights;
+    }
+    else {
+      // Invert the perspective.
+      relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
+        & ((previous_castling_rights >> 2) & 3);
+      relative_current_castling_rights = ((current_castling_rights & 3) << 2)
+        & ((current_castling_rights >> 2) & 3);
+    }
+
+    for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
+      if ((relative_previous_castling_rights & (1 << i)) &&
+        (relative_current_castling_rights & (1 << i)) == 0) {
+        removed->push_back(i);
+      }
+    }
  }

 }  // namespace Eval::NNUE::Features
@@ -19,7 +19,7 @@ namespace Eval::NNUE::Features {
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions = 4;
    // Timing of full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;

    // Get a list of indices with a value of 1 among the features
    static void AppendActiveIndices(const Position& pos, Color perspective,
@@ -21,10 +21,22 @@ namespace Eval::NNUE::Features {

  // Get a list of indices whose values have changed from the previous one in the feature quantity
  void EnPassant::AppendChangedIndices(
-    const Position& /* pos */, Color /* perspective */,
-    IndexList* /* removed */, IndexList* /* added */) {
-    // Not implemented.
-    assert(false);
+      const Position& pos, Color /* perspective */,
+      IndexList* removed, IndexList* added) {
+
+    auto previous_epSquare = pos.state()->previous->epSquare;
+    auto epSquare = pos.state()->epSquare;
+
+    if (previous_epSquare != SQ_NONE) {
+      if (epSquare != SQ_NONE && file_of(epSquare) == file_of(previous_epSquare))
+        return;
+      auto file = file_of(previous_epSquare);
+      removed->push_back(file);
+    }
+    if (epSquare != SQ_NONE) {
+      auto file = file_of(epSquare);
+      added->push_back(file);
+    }
  }

 }  // namespace Eval::NNUE::Features
@@ -19,13 +19,13 @@ namespace Eval::NNUE::Features {
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions = 1;
    // Timing of full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;

    // Get a list of indices with a value of 1 among the features
    static void AppendActiveIndices(const Position& pos, Color perspective,
      IndexList* active);

-    // Get a list of indices whose values ??have changed from the previous one in the feature quantity
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
    static void AppendChangedIndices(const Position& pos, Color perspective,
      IndexList* removed, IndexList* added);
  };
@@ -100,7 +100,6 @@ namespace Eval::NNUE::Features {
        IndexListType removed[2], IndexListType added[2], bool reset[2]) {

      const auto& dp = pos.state()->dirtyPiece;
-      if (dp.dirty_num == 0) return;

      for (Color perspective : { WHITE, BLACK }) {
        reset[perspective] = false;
@@ -108,12 +107,15 @@ namespace Eval::NNUE::Features {
          case TriggerEvent::kNone:
            break;
          case TriggerEvent::kFriendKingMoved:
+            if (dp.dirty_num == 0) continue;
            reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
            break;
          case TriggerEvent::kEnemyKingMoved:
-              reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
+            if (dp.dirty_num == 0) continue;
+            reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
            break;
          case TriggerEvent::kAnyKingMoved:
+            if (dp.dirty_num == 0) continue;
            reset[perspective] = type_of(dp.piece[0]) == KING;
            break;
          case TriggerEvent::kAnyPieceMoved:
@@ -41,7 +41,7 @@ namespace Eval::NNUE::Features {
  void HalfKP<AssociatedKing>::AppendActiveIndices(
      const Position& pos, Color perspective, IndexList* active) {

-    Square ksq = orient(perspective, pos.square<KING>(perspective));
+    Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
    Bitboard bb = pos.pieces() & ~pos.pieces(KING);
    while (bb) {
      Square s = pop_lsb(&bb);
@@ -55,7 +55,7 @@ namespace Eval::NNUE::Features {
      const Position& pos, Color perspective,
      IndexList* removed, IndexList* added) {

-    Square ksq = orient(perspective, pos.square<KING>(perspective));
+    Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
    const auto& dp = pos.state()->dirtyPiece;
    for (int i = 0; i < dp.dirty_num; ++i) {
      Piece pc = dp.piece[i];
@@ -68,5 +68,6 @@ namespace Eval::NNUE::Features {
  }

  template class HalfKP<Side::kFriend>;
+  template class HalfKP<Side::kEnemy>;

 }  // namespace Eval::NNUE::Features
@@ -33,7 +33,8 @@ namespace Eval::NNUE::Features {

   public:
    // Feature name
-    static constexpr const char* kName = "HalfKP(Friend)";
+    static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
+        "HalfKP(Friend)" : "HalfKP(Enemy)";
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t kHashValue =
        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
@@ -43,7 +44,9 @@ namespace Eval::NNUE::Features {
    // Maximum number of simultaneously active features
    static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
    // Trigger for full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
+    static constexpr TriggerEvent kRefreshTrigger =
+        (AssociatedKing == Side::kFriend) ?
+        TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;

    // Get a list of indices for active features
    static void AppendActiveIndices(const Position& pos, Color perspective,
@@ -39,7 +39,7 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
 template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
    const Position& pos, Color perspective, IndexList* active) {
-  Square ksq = orient(perspective, pos.square<KING>(perspective));
+  Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
  Bitboard bb = pos.pieces() & ~pos.pieces(KING);
  while (bb) {
    Square s = pop_lsb(&bb);
@@ -52,7 +52,7 @@ template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
-  Square ksq = orient(perspective, pos.square<KING>(perspective));
+  Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
    Piece pc = dp.piece[i];
@@ -22,7 +22,7 @@
 #define NNUE_ARCHITECTURE_H_INCLUDED

 // Defines the network structure
-#include "architectures/halfkp-cr-ep_256x2-32-32.h"
+#include "architectures/halfkp_256x2-32-32.h"

 namespace Eval::NNUE {

@@ -1013,6 +1013,7 @@ void Position::do_null_move(StateInfo& newSt) {
  {
      st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
      st->epSquare = SQ_NONE;
+      st->accumulator.computed_accumulation = false;
  }

  st->key ^= Zobrist::side;
@@ -25,8 +25,6 @@

 namespace Tablebases {

-extern int MaxCardinality;
-
 enum WDLScore {
    WDLLoss        = -2, // Loss
    WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
@@ -45,6 +43,8 @@ enum ProbeState {
    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };

+extern int MaxCardinality;
+
 void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
 int probe_dtz(Position& pos, ProbeState* result);