Restore lambda and gradient function post-merge and minor fixes.

bench: 3788313
2026-05-20 09:47:46 +00:00 · 2020-09-26 10:03:03 +08:00
parent d1967bb281
commit 5e8a49f7f2
15 changed files with 170 additions and 36 deletions
@@ -42,7 +42,7 @@ namespace Eval {
  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
  // for the build process (profile-build and fishtest) to work. Do not change the
  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-28e08a9fe2ad.nnue"
+  #define EvalFileDefaultName   "nn-54f88d1580b4.nnue"

  namespace NNUE {

@@ -157,6 +157,14 @@ namespace Learner
        return ((y2 - y1) / epsilon) / winning_probability_coefficient;
    }

+    // A constant used in elmo (WCSC27). Adjustment required.
+    // Since elmo does not internally divide the expression, the value is different.
+    // You can set this value with the learn command.
+    // 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
+    double ELMO_LAMBDA = 0.33;
+    double ELMO_LAMBDA2 = 0.33;
+    double ELMO_LAMBDA_LIMIT = 32000;
+
    // Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
    double get_scaled_signal(double signal)
    {
@@ -182,6 +190,18 @@ namespace Learner
        return winning_percentage(scaled_teacher_signal, ply);
    }

+    double calculate_lambda(double teacher_signal)
+    {
+        // If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
+        // then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
+        const double lambda =
+            (std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
+            ? ELMO_LAMBDA2
+            : ELMO_LAMBDA;
+
+        return lambda;
+    }
+
    double calculate_t(int game_result)
    {
        // Use 1 as the correction term if the expected win rate is 1,
@@ -192,6 +212,32 @@ namespace Learner
        return t;
    }

+    double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
+    {
+        // elmo (WCSC27) method
+        // Correct with the actual game wins and losses.
+        const double q = winning_percentage(shallow, psv.gamePly);
+        const double p = calculate_p(teacher_signal, psv.gamePly);
+        const double t = calculate_t(psv.game_result);
+        const double lambda = calculate_lambda(teacher_signal);
+
+        double grad;
+        if (use_wdl)
+        {
+            const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
+            const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
+            grad = lambda * dce_p + (1.0 - lambda) * dce_t;
+        }
+        else
+        {
+            // Use the actual win rate as a correction term.
+            // This is the idea of elmo (WCSC27), modern O-parts.
+            grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
+        }
+
+        return grad;
+    }
+
    // Calculate cross entropy during learning
    // The individual cross entropy of the win/loss term and win
    // rate term of the elmo expression is returned
@@ -202,16 +248,21 @@ namespace Learner
        const PackedSfenValue& psv,
        double& cross_entropy_eval,
        double& cross_entropy_win,
+        double& cross_entropy,
        double& entropy_eval,
-        double& entropy_win)
+        double& entropy_win,
+        double& entropy)
    {
        // Teacher winning probability.
        const double q = winning_percentage(shallow, psv.gamePly);
        const double p = calculate_p(teacher_signal, psv.gamePly);
        const double t = calculate_t(psv.game_result);
+        const double lambda = calculate_lambda(teacher_signal);

        constexpr double epsilon = 0.000001;

+        const double m = (1.0 - lambda) * t + lambda * p;
+
        cross_entropy_eval =
            (-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
        cross_entropy_win =
@@ -220,12 +271,17 @@ namespace Learner
            (-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
        entropy_win =
            (-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
+
+        cross_entropy =
+            (-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
+        entropy =
+            (-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
    }

    // Other objective functions may be considered in the future...
    double calc_grad(Value shallow, const PackedSfenValue& psv)
    {
-        return (double)(shallow - (Value)psv.score) / 2400.0;
+        return calc_grad((Value)psv.score, shallow, psv);
    }

    struct BasicSfenInputStream
@@ -798,12 +854,14 @@ namespace Learner
        cout << ", learning rate = " << global_learning_rate << ", ";

        // For calculation of verification data loss
-        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
-        atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
+        atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
+        atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
        test_sum_cross_entropy_eval = 0;
        test_sum_cross_entropy_win = 0;
+        test_sum_cross_entropy = 0;
        test_sum_entropy_eval = 0;
        test_sum_entropy_win = 0;
+        test_sum_entropy = 0;

        // norm for learning
        atomic<double> sum_norm;
@@ -843,8 +901,10 @@ namespace Learner
                    &ps,
                    &test_sum_cross_entropy_eval,
                    &test_sum_cross_entropy_win,
+                    &test_sum_cross_entropy,
                    &test_sum_entropy_eval,
                    &test_sum_entropy_win,
+                    &test_sum_entropy,
                    &sum_norm,
                    &task_count,
                    &move_accord_count
@@ -872,22 +932,26 @@ namespace Learner
                // For the time being, regarding the win rate and loss terms only in the elmo method
                // Calculate and display the cross entropy.

-                double test_cross_entropy_eval, test_cross_entropy_win;
-                double test_entropy_eval, test_entropy_win;
+                double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
+                double test_entropy_eval, test_entropy_win, test_entropy;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    test_cross_entropy_eval,
                    test_cross_entropy_win,
+                    test_cross_entropy,
                    test_entropy_eval,
-                    test_entropy_win);
+                    test_entropy_win,
+                    test_entropy);

                // The total cross entropy need not be abs() by definition.
                test_sum_cross_entropy_eval += test_cross_entropy_eval;
                test_sum_cross_entropy_win += test_cross_entropy_win;
+                test_sum_cross_entropy += test_cross_entropy;
                test_sum_entropy_eval += test_entropy_eval;
                test_sum_entropy_win += test_entropy_win;
+                test_sum_entropy += test_entropy;
                sum_norm += (double)abs(shallow_value);

                // Determine if the teacher's move and the score of the shallow search match
@@ -912,7 +976,7 @@ namespace Learner
        while (task_count)
            sleep(1);

-        latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
+        latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
        latest_loss_count += sr.sfen_for_mse.size();

        // learn_cross_entropy may be called train cross
@@ -927,6 +991,8 @@ namespace Learner
                << " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
                << " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
                << " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
+                << " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
+                << " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
                << " , norm = " << sum_norm
                << " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
                << endl;
@@ -938,6 +1004,8 @@ namespace Learner
                    << " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
                    << " , learn_entropy_eval = " << learn_sum_entropy_eval / done
                    << " , learn_entropy_win = " << learn_sum_entropy_win / done
+                    << " , learn_cross_entropy = " << learn_sum_cross_entropy / done
+                    << " , learn_entropy = " << learn_sum_entropy / done
                    << endl;
            }
        }
@@ -949,8 +1017,10 @@ namespace Learner
        // Clear 0 for next time.
        learn_sum_cross_entropy_eval = 0.0;
        learn_sum_cross_entropy_win = 0.0;
+        learn_sum_cross_entropy = 0.0;
        learn_sum_entropy_eval = 0.0;
        learn_sum_entropy_win = 0.0;
+        learn_sum_entropy = 0.0;
    }

    void LearnerThink::thread_worker(size_t thread_id)
@@ -1142,21 +1212,25 @@ namespace Learner
                    : -Eval::evaluate(pos);

                // Calculate loss for training data
-                double learn_cross_entropy_eval, learn_cross_entropy_win;
-                double learn_entropy_eval, learn_entropy_win;
+                double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
+                double learn_entropy_eval, learn_entropy_win, learn_entropy;
                calc_cross_entropy(
                    deep_value,
                    shallow_value,
                    ps,
                    learn_cross_entropy_eval,
                    learn_cross_entropy_win,
+                    learn_cross_entropy,
                    learn_entropy_eval,
-                    learn_entropy_win);
+                    learn_entropy_win,
+                    learn_entropy);

                learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
                learn_sum_cross_entropy_win += learn_cross_entropy_win;
+                learn_sum_cross_entropy += learn_cross_entropy;
                learn_sum_entropy_eval += learn_entropy_eval;
                learn_sum_entropy_win += learn_entropy_win;
+                learn_sum_entropy += learn_entropy;

                Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);

@@ -1560,6 +1634,11 @@ namespace Learner

        global_learning_rate = 1.0;

+        // elmo lambda
+        ELMO_LAMBDA = 0.33;
+        ELMO_LAMBDA2 = 0.33;
+        ELMO_LAMBDA_LIMIT = 32000;
+
        // if (gamePly <rand(reduction_gameply)) continue;
        // An option to exclude the early stage from the learning target moderately like
        // If set to 1, rand(1)==0, so nothing is excluded.
@@ -1627,6 +1706,12 @@ namespace Learner
            // Using WDL with win rate model instead of sigmoid
            else if (option == "use_wdl") is >> use_wdl;

+
+            // LAMBDA
+            else if (option == "lambda")       is >> ELMO_LAMBDA;
+            else if (option == "lambda2")      is >> ELMO_LAMBDA2;
+            else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
+
            else if (option == "reduction_gameply") is >> reduction_gameply;

            // shuffle related
@@ -1814,6 +1899,9 @@ namespace Learner
        reduction_gameply = max(reduction_gameply, 1);
        cout << "reduction_gameply : " << reduction_gameply << endl;

+        cout << "LAMBDA            : " << ELMO_LAMBDA << endl;
+        cout << "LAMBDA2           : " << ELMO_LAMBDA2 << endl;
+        cout << "LAMBDA_LIMIT      : " << ELMO_LAMBDA_LIMIT << endl;
        cout << "eval_save_interval  : " << eval_save_interval << " sfens" << endl;
        cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;

@@ -23,7 +23,11 @@ using LearnFloatType = float;
 // configure
 // ======================

-#define LOSS_FUNCTION "cross_entropy_eval"
+// ----------------------
+// Learning with the method of elmo (WCSC27)
+// ----------------------
+
+#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"

 // ----------------------
 // Definition of struct used in Learner
@@ -31,10 +31,30 @@ namespace Eval::NNUE::Features {

  // Get a list of indices whose values have changed from the previous one in the feature quantity
  void CastlingRight::AppendChangedIndices(
-    const Position& /* pos */, Color /* perspective */,
-    IndexList* /* removed */, IndexList* /* added */) {
-    // Not implemented.
-    assert(false);
+      const Position& pos, Color perspective,
+      IndexList* removed, IndexList* /* added */) {
+    int previous_castling_rights = pos.state()->previous->castlingRights;
+    int current_castling_rights = pos.state()->castlingRights;
+    int relative_previous_castling_rights;
+    int relative_current_castling_rights;
+    if (perspective == WHITE) {
+      relative_previous_castling_rights = previous_castling_rights;
+      relative_current_castling_rights = current_castling_rights;
+    }
+    else {
+      // Invert the perspective.
+      relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
+        & ((previous_castling_rights >> 2) & 3);
+      relative_current_castling_rights = ((current_castling_rights & 3) << 2)
+        & ((current_castling_rights >> 2) & 3);
+    }
+
+    for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
+      if ((relative_previous_castling_rights & (1 << i)) &&
+        (relative_current_castling_rights & (1 << i)) == 0) {
+        removed->push_back(i);
+      }
+    }
  }

 }  // namespace Eval::NNUE::Features
@@ -19,7 +19,7 @@ namespace Eval::NNUE::Features {
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions = 4;
    // Timing of full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;

    // Get a list of indices with a value of 1 among the features
    static void AppendActiveIndices(const Position& pos, Color perspective,
@@ -21,10 +21,22 @@ namespace Eval::NNUE::Features {

  // Get a list of indices whose values have changed from the previous one in the feature quantity
  void EnPassant::AppendChangedIndices(
-    const Position& /* pos */, Color /* perspective */,
-    IndexList* /* removed */, IndexList* /* added */) {
-    // Not implemented.
-    assert(false);
+      const Position& pos, Color /* perspective */,
+      IndexList* removed, IndexList* added) {
+
+    auto previous_epSquare = pos.state()->previous->epSquare;
+    auto epSquare = pos.state()->epSquare;
+
+    if (previous_epSquare != SQ_NONE) {
+      if (epSquare != SQ_NONE && file_of(epSquare) == file_of(previous_epSquare))
+        return;
+      auto file = file_of(previous_epSquare);
+      removed->push_back(file);
+    }
+    if (epSquare != SQ_NONE) {
+      auto file = file_of(epSquare);
+      added->push_back(file);
+    }
  }

 }  // namespace Eval::NNUE::Features
@@ -19,13 +19,13 @@ namespace Eval::NNUE::Features {
    // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
    static constexpr IndexType kMaxActiveDimensions = 1;
    // Timing of full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
+    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;

    // Get a list of indices with a value of 1 among the features
    static void AppendActiveIndices(const Position& pos, Color perspective,
      IndexList* active);

-    // Get a list of indices whose values ??have changed from the previous one in the feature quantity
+    // Get a list of indices whose values have changed from the previous one in the feature quantity
    static void AppendChangedIndices(const Position& pos, Color perspective,
      IndexList* removed, IndexList* added);
  };
@@ -100,7 +100,6 @@ namespace Eval::NNUE::Features {
        IndexListType removed[2], IndexListType added[2], bool reset[2]) {

      const auto& dp = pos.state()->dirtyPiece;
-      if (dp.dirty_num == 0) return;

      for (Color perspective : { WHITE, BLACK }) {
        reset[perspective] = false;
@@ -108,12 +107,15 @@ namespace Eval::NNUE::Features {
          case TriggerEvent::kNone:
            break;
          case TriggerEvent::kFriendKingMoved:
+            if (dp.dirty_num == 0) continue;
            reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
            break;
          case TriggerEvent::kEnemyKingMoved:
-              reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
+            if (dp.dirty_num == 0) continue;
+            reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
            break;
          case TriggerEvent::kAnyKingMoved:
+            if (dp.dirty_num == 0) continue;
            reset[perspective] = type_of(dp.piece[0]) == KING;
            break;
          case TriggerEvent::kAnyPieceMoved:
@@ -41,7 +41,7 @@ namespace Eval::NNUE::Features {
  void HalfKP<AssociatedKing>::AppendActiveIndices(
      const Position& pos, Color perspective, IndexList* active) {

-    Square ksq = orient(perspective, pos.square<KING>(perspective));
+    Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
    Bitboard bb = pos.pieces() & ~pos.pieces(KING);
    while (bb) {
      Square s = pop_lsb(&bb);
@@ -55,7 +55,7 @@ namespace Eval::NNUE::Features {
      const Position& pos, Color perspective,
      IndexList* removed, IndexList* added) {

-    Square ksq = orient(perspective, pos.square<KING>(perspective));
+    Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
    const auto& dp = pos.state()->dirtyPiece;
    for (int i = 0; i < dp.dirty_num; ++i) {
      Piece pc = dp.piece[i];
@@ -68,5 +68,6 @@ namespace Eval::NNUE::Features {
  }

  template class HalfKP<Side::kFriend>;
+  template class HalfKP<Side::kEnemy>;

 }  // namespace Eval::NNUE::Features
@@ -33,7 +33,8 @@ namespace Eval::NNUE::Features {

   public:
    // Feature name
-    static constexpr const char* kName = "HalfKP(Friend)";
+    static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
+        "HalfKP(Friend)" : "HalfKP(Enemy)";
    // Hash value embedded in the evaluation file
    static constexpr std::uint32_t kHashValue =
        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
@@ -43,7 +44,9 @@ namespace Eval::NNUE::Features {
    // Maximum number of simultaneously active features
    static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
    // Trigger for full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
+    static constexpr TriggerEvent kRefreshTrigger =
+        (AssociatedKing == Side::kFriend) ?
+        TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;

    // Get a list of indices for active features
    static void AppendActiveIndices(const Position& pos, Color perspective,
@@ -39,7 +39,7 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
 template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
    const Position& pos, Color perspective, IndexList* active) {
-  Square ksq = orient(perspective, pos.square<KING>(perspective));
+  Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
  Bitboard bb = pos.pieces() & ~pos.pieces(KING);
  while (bb) {
    Square s = pop_lsb(&bb);
@@ -52,7 +52,7 @@ template <Side AssociatedKing>
 void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
    const Position& pos, Color perspective,
    IndexList* removed, IndexList* added) {
-  Square ksq = orient(perspective, pos.square<KING>(perspective));
+  Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
  const auto& dp = pos.state()->dirtyPiece;
  for (int i = 0; i < dp.dirty_num; ++i) {
    Piece pc = dp.piece[i];
@@ -22,7 +22,7 @@
 #define NNUE_ARCHITECTURE_H_INCLUDED

 // Defines the network structure
-#include "architectures/halfkp-cr-ep_256x2-32-32.h"
+#include "architectures/halfkp_256x2-32-32.h"

 namespace Eval::NNUE {

@@ -1013,6 +1013,7 @@ void Position::do_null_move(StateInfo& newSt) {
  {
      st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
      st->epSquare = SQ_NONE;
+      st->accumulator.computed_accumulation = false;
  }

  st->key ^= Zobrist::side;
@@ -25,8 +25,6 @@

 namespace Tablebases {

-extern int MaxCardinality;
-
 enum WDLScore {
    WDLLoss        = -2, // Loss
    WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
@@ -45,6 +43,8 @@ enum ProbeState {
    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
 };

+extern int MaxCardinality;
+
 void init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
 int probe_dtz(Position& pos, ProbeState* result);