Restore lambda and gradient function post-merge and minor fixes.

bench: 3788313
This commit is contained in:
noobpwnftw
2020-09-26 10:03:03 +08:00
committed by nodchip
parent d1967bb281
commit 5e8a49f7f2
15 changed files with 170 additions and 36 deletions
+1 -1
View File
@@ -42,7 +42,7 @@ namespace Eval {
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro, as it is used in the Makefile.
#define EvalFileDefaultName "nn-28e08a9fe2ad.nnue"
#define EvalFileDefaultName "nn-54f88d1580b4.nnue"
namespace NNUE {
+99 -11
View File
@@ -157,6 +157,14 @@ namespace Learner
return ((y2 - y1) / epsilon) / winning_probability_coefficient;
}
// A constant used in elmo (WCSC27). Adjustment required.
// Since elmo does not internally divide the expression, the value is different.
// You can set this value with the learn command.
// 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
double ELMO_LAMBDA = 0.33;
double ELMO_LAMBDA2 = 0.33;
double ELMO_LAMBDA_LIMIT = 32000;
// Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
double get_scaled_signal(double signal)
{
@@ -182,6 +190,18 @@ namespace Learner
return winning_percentage(scaled_teacher_signal, ply);
}
double calculate_lambda(double teacher_signal)
{
// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
// then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
const double lambda =
(std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
? ELMO_LAMBDA2
: ELMO_LAMBDA;
return lambda;
}
double calculate_t(int game_result)
{
// Use 1 as the correction term if the expected win rate is 1,
@@ -192,6 +212,32 @@ namespace Learner
return t;
}
double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
{
// elmo (WCSC27) method
// Correct with the actual game wins and losses.
const double q = winning_percentage(shallow, psv.gamePly);
const double p = calculate_p(teacher_signal, psv.gamePly);
const double t = calculate_t(psv.game_result);
const double lambda = calculate_lambda(teacher_signal);
double grad;
if (use_wdl)
{
const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
grad = lambda * dce_p + (1.0 - lambda) * dce_t;
}
else
{
// Use the actual win rate as a correction term.
// This is the idea of elmo (WCSC27), modern O-parts.
grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
}
return grad;
}
// Calculate cross entropy during learning
// The individual cross entropy of the win/loss term and win
// rate term of the elmo expression is returned
@@ -202,16 +248,21 @@ namespace Learner
const PackedSfenValue& psv,
double& cross_entropy_eval,
double& cross_entropy_win,
double& cross_entropy,
double& entropy_eval,
double& entropy_win)
double& entropy_win,
double& entropy)
{
// Teacher winning probability.
const double q = winning_percentage(shallow, psv.gamePly);
const double p = calculate_p(teacher_signal, psv.gamePly);
const double t = calculate_t(psv.game_result);
const double lambda = calculate_lambda(teacher_signal);
constexpr double epsilon = 0.000001;
const double m = (1.0 - lambda) * t + lambda * p;
cross_entropy_eval =
(-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
cross_entropy_win =
@@ -220,12 +271,17 @@ namespace Learner
(-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
entropy_win =
(-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
cross_entropy =
(-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
entropy =
(-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
}
// Other objective functions may be considered in the future...
double calc_grad(Value shallow, const PackedSfenValue& psv)
{
return (double)(shallow - (Value)psv.score) / 2400.0;
return calc_grad((Value)psv.score, shallow, psv);
}
struct BasicSfenInputStream
@@ -798,12 +854,14 @@ namespace Learner
cout << ", learning rate = " << global_learning_rate << ", ";
// For calculation of verification data loss
atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
test_sum_cross_entropy_eval = 0;
test_sum_cross_entropy_win = 0;
test_sum_cross_entropy = 0;
test_sum_entropy_eval = 0;
test_sum_entropy_win = 0;
test_sum_entropy = 0;
// norm for learning
atomic<double> sum_norm;
@@ -843,8 +901,10 @@ namespace Learner
&ps,
&test_sum_cross_entropy_eval,
&test_sum_cross_entropy_win,
&test_sum_cross_entropy,
&test_sum_entropy_eval,
&test_sum_entropy_win,
&test_sum_entropy,
&sum_norm,
&task_count,
&move_accord_count
@@ -872,22 +932,26 @@ namespace Learner
// For the time being, regarding the win rate and loss terms only in the elmo method
// Calculate and display the cross entropy.
double test_cross_entropy_eval, test_cross_entropy_win;
double test_entropy_eval, test_entropy_win;
double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
double test_entropy_eval, test_entropy_win, test_entropy;
calc_cross_entropy(
deep_value,
shallow_value,
ps,
test_cross_entropy_eval,
test_cross_entropy_win,
test_cross_entropy,
test_entropy_eval,
test_entropy_win);
test_entropy_win,
test_entropy);
// The total cross entropy need not be abs() by definition.
test_sum_cross_entropy_eval += test_cross_entropy_eval;
test_sum_cross_entropy_win += test_cross_entropy_win;
test_sum_cross_entropy += test_cross_entropy;
test_sum_entropy_eval += test_entropy_eval;
test_sum_entropy_win += test_entropy_win;
test_sum_entropy += test_entropy;
sum_norm += (double)abs(shallow_value);
// Determine if the teacher's move and the score of the shallow search match
@@ -912,7 +976,7 @@ namespace Learner
while (task_count)
sleep(1);
latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
latest_loss_count += sr.sfen_for_mse.size();
// learn_cross_entropy may be called train cross
@@ -927,6 +991,8 @@ namespace Learner
<< " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
<< " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
<< " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
<< " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
<< " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
<< " , norm = " << sum_norm
<< " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
<< endl;
@@ -938,6 +1004,8 @@ namespace Learner
<< " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
<< " , learn_entropy_eval = " << learn_sum_entropy_eval / done
<< " , learn_entropy_win = " << learn_sum_entropy_win / done
<< " , learn_cross_entropy = " << learn_sum_cross_entropy / done
<< " , learn_entropy = " << learn_sum_entropy / done
<< endl;
}
}
@@ -949,8 +1017,10 @@ namespace Learner
// Clear 0 for next time.
learn_sum_cross_entropy_eval = 0.0;
learn_sum_cross_entropy_win = 0.0;
learn_sum_cross_entropy = 0.0;
learn_sum_entropy_eval = 0.0;
learn_sum_entropy_win = 0.0;
learn_sum_entropy = 0.0;
}
void LearnerThink::thread_worker(size_t thread_id)
@@ -1142,21 +1212,25 @@ namespace Learner
: -Eval::evaluate(pos);
// Calculate loss for training data
double learn_cross_entropy_eval, learn_cross_entropy_win;
double learn_entropy_eval, learn_entropy_win;
double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
double learn_entropy_eval, learn_entropy_win, learn_entropy;
calc_cross_entropy(
deep_value,
shallow_value,
ps,
learn_cross_entropy_eval,
learn_cross_entropy_win,
learn_cross_entropy,
learn_entropy_eval,
learn_entropy_win);
learn_entropy_win,
learn_entropy);
learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
learn_sum_cross_entropy_win += learn_cross_entropy_win;
learn_sum_cross_entropy += learn_cross_entropy;
learn_sum_entropy_eval += learn_entropy_eval;
learn_sum_entropy_win += learn_entropy_win;
learn_sum_entropy += learn_entropy;
Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
@@ -1560,6 +1634,11 @@ namespace Learner
global_learning_rate = 1.0;
// elmo lambda
ELMO_LAMBDA = 0.33;
ELMO_LAMBDA2 = 0.33;
ELMO_LAMBDA_LIMIT = 32000;
// if (gamePly <rand(reduction_gameply)) continue;
// An option to exclude the early stage from the learning target moderately like
// If set to 1, rand(1)==0, so nothing is excluded.
@@ -1627,6 +1706,12 @@ namespace Learner
// Using WDL with win rate model instead of sigmoid
else if (option == "use_wdl") is >> use_wdl;
// LAMBDA
else if (option == "lambda") is >> ELMO_LAMBDA;
else if (option == "lambda2") is >> ELMO_LAMBDA2;
else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
else if (option == "reduction_gameply") is >> reduction_gameply;
// shuffle related
@@ -1814,6 +1899,9 @@ namespace Learner
reduction_gameply = max(reduction_gameply, 1);
cout << "reduction_gameply : " << reduction_gameply << endl;
cout << "LAMBDA : " << ELMO_LAMBDA << endl;
cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl;
cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl;
cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl;
cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
+5 -1
View File
@@ -23,7 +23,11 @@ using LearnFloatType = float;
// configure
// ======================
#define LOSS_FUNCTION "cross_entropy_eval"
// ----------------------
// Learning with the method of elmo (WCSC27)
// ----------------------
#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
// ----------------------
// Definition of struct used in Learner
+24 -4
View File
@@ -31,10 +31,30 @@ namespace Eval::NNUE::Features {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void CastlingRight::AppendChangedIndices(
const Position& /* pos */, Color /* perspective */,
IndexList* /* removed */, IndexList* /* added */) {
// Not implemented.
assert(false);
const Position& pos, Color perspective,
IndexList* removed, IndexList* /* added */) {
int previous_castling_rights = pos.state()->previous->castlingRights;
int current_castling_rights = pos.state()->castlingRights;
int relative_previous_castling_rights;
int relative_current_castling_rights;
if (perspective == WHITE) {
relative_previous_castling_rights = previous_castling_rights;
relative_current_castling_rights = current_castling_rights;
}
else {
// Invert the perspective.
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
& ((previous_castling_rights >> 2) & 3);
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
& ((current_castling_rights >> 2) & 3);
}
for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
if ((relative_previous_castling_rights & (1 << i)) &&
(relative_current_castling_rights & (1 << i)) == 0) {
removed->push_back(i);
}
}
}
} // namespace Eval::NNUE::Features
+1 -1
View File
@@ -19,7 +19,7 @@ namespace Eval::NNUE::Features {
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 4;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
+16 -4
View File
@@ -21,10 +21,22 @@ namespace Eval::NNUE::Features {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void EnPassant::AppendChangedIndices(
const Position& /* pos */, Color /* perspective */,
IndexList* /* removed */, IndexList* /* added */) {
// Not implemented.
assert(false);
const Position& pos, Color /* perspective */,
IndexList* removed, IndexList* added) {
auto previous_epSquare = pos.state()->previous->epSquare;
auto epSquare = pos.state()->epSquare;
if (previous_epSquare != SQ_NONE) {
if (epSquare != SQ_NONE && file_of(epSquare) == file_of(previous_epSquare))
return;
auto file = file_of(previous_epSquare);
removed->push_back(file);
}
if (epSquare != SQ_NONE) {
auto file = file_of(epSquare);
added->push_back(file);
}
}
} // namespace Eval::NNUE::Features
+2 -2
View File
@@ -19,13 +19,13 @@ namespace Eval::NNUE::Features {
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 1;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
};
+4 -2
View File
@@ -100,7 +100,6 @@ namespace Eval::NNUE::Features {
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
const auto& dp = pos.state()->dirtyPiece;
if (dp.dirty_num == 0) return;
for (Color perspective : { WHITE, BLACK }) {
reset[perspective] = false;
@@ -108,12 +107,15 @@ namespace Eval::NNUE::Features {
case TriggerEvent::kNone:
break;
case TriggerEvent::kFriendKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
break;
case TriggerEvent::kEnemyKingMoved:
reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
if (dp.dirty_num == 0) continue;
reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
break;
case TriggerEvent::kAnyKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = type_of(dp.piece[0]) == KING;
break;
case TriggerEvent::kAnyPieceMoved:
+3 -2
View File
@@ -41,7 +41,7 @@ namespace Eval::NNUE::Features {
void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
@@ -55,7 +55,7 @@ namespace Eval::NNUE::Features {
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
@@ -68,5 +68,6 @@ namespace Eval::NNUE::Features {
}
template class HalfKP<Side::kFriend>;
template class HalfKP<Side::kEnemy>;
} // namespace Eval::NNUE::Features
+5 -2
View File
@@ -33,7 +33,8 @@ namespace Eval::NNUE::Features {
public:
// Feature name
static constexpr const char* kName = "HalfKP(Friend)";
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfKP(Friend)" : "HalfKP(Enemy)";
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue =
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
@@ -43,7 +44,9 @@ namespace Eval::NNUE::Features {
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Get a list of indices for active features
static void AppendActiveIndices(const Position& pos, Color perspective,
+2 -2
View File
@@ -39,7 +39,7 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
@@ -52,7 +52,7 @@ template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
+1 -1
View File
@@ -22,7 +22,7 @@
#define NNUE_ARCHITECTURE_H_INCLUDED
// Defines the network structure
#include "architectures/halfkp-cr-ep_256x2-32-32.h"
#include "architectures/halfkp_256x2-32-32.h"
namespace Eval::NNUE {
+1
View File
@@ -1013,6 +1013,7 @@ void Position::do_null_move(StateInfo& newSt) {
{
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
st->epSquare = SQ_NONE;
st->accumulator.computed_accumulation = false;
}
st->key ^= Zobrist::side;
+2 -2
View File
@@ -25,8 +25,6 @@
namespace Tablebases {
extern int MaxCardinality;
enum WDLScore {
WDLLoss = -2, // Loss
WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
@@ -45,6 +43,8 @@ enum ProbeState {
ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move)
};
extern int MaxCardinality;
void init(const std::string& paths);
WDLScore probe_wdl(Position& pos, ProbeState* result);
int probe_dtz(Position& pos, ProbeState* result);