Restore lambda and gradient function post-merge and minor fixes.

bench: 3788313
This commit is contained in:
noobpwnftw
2020-09-26 10:03:03 +08:00
committed by nodchip
parent d1967bb281
commit 5e8a49f7f2
15 changed files with 170 additions and 36 deletions
+4 -1
View File
@@ -37,6 +37,7 @@ Additional options:
To generate training data from the classic eval, use the gensfen command with the setting "Use NNUE" set to "false". The given example is generation in its simplest form. There are more commands.
```
uci
setoption name PruneAtShallowDepth value false
setoption name Use NNUE value false
setoption name Threads value x
setoption name Hash value y
@@ -56,11 +57,13 @@ The process is the same as the generation of training data, except for the fact
Use the "learn" binary. Create an empty folder named "evalsave" in the same directory as the binaries.
```
uci
setoption name EnableTranspositionTable value false
setoption name PruneAtShallowDepth value false
setoption name SkipLoadingEval value true
setoption name Use NNUE value pure
setoption name Threads value x
isready
learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 eta 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 mirror_percentage 50 validation_set_file_name validationdata\val.bin
learn targetdir trainingdata loop 100 batchsize 1000000 use_draw_in_training 1 use_draw_in_validation 1 lr 1 lambda 1 eval_limit 32000 nn_batch_size 1000 newbob_decay 0.5 eval_save_interval 250000000 loss_output_interval 1000000 validation_set_file_name validationdata\val.bin
```
Nets get saved in the "evalsave" folder.
+1 -1
View File
@@ -42,7 +42,7 @@ namespace Eval {
// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work. Do not change the
// name of the macro, as it is used in the Makefile.
#define EvalFileDefaultName "nn-28e08a9fe2ad.nnue"
#define EvalFileDefaultName "nn-54f88d1580b4.nnue"
namespace NNUE {
+99 -11
View File
@@ -157,6 +157,14 @@ namespace Learner
return ((y2 - y1) / epsilon) / winning_probability_coefficient;
}
// A constant used in elmo (WCSC27). Adjustment required.
// Since elmo does not internally divide the expression, the value is different.
// You can set this value with the learn command.
// 0.33 is equivalent to the constant (0.5) used in elmo (WCSC27)
double ELMO_LAMBDA = 0.33;
double ELMO_LAMBDA2 = 0.33;
double ELMO_LAMBDA_LIMIT = 32000;
// Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
double get_scaled_signal(double signal)
{
@@ -182,6 +190,18 @@ namespace Learner
return winning_percentage(scaled_teacher_signal, ply);
}
double calculate_lambda(double teacher_signal)
{
// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT
// then apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
const double lambda =
(std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
? ELMO_LAMBDA2
: ELMO_LAMBDA;
return lambda;
}
double calculate_t(int game_result)
{
// Use 1 as the correction term if the expected win rate is 1,
@@ -192,6 +212,32 @@ namespace Learner
return t;
}
double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
{
// elmo (WCSC27) method
// Correct with the actual game wins and losses.
const double q = winning_percentage(shallow, psv.gamePly);
const double p = calculate_p(teacher_signal, psv.gamePly);
const double t = calculate_t(psv.game_result);
const double lambda = calculate_lambda(teacher_signal);
double grad;
if (use_wdl)
{
const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
grad = lambda * dce_p + (1.0 - lambda) * dce_t;
}
else
{
// Use the actual win rate as a correction term.
// This is the idea of elmo (WCSC27), modern O-parts.
grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
}
return grad;
}
// Calculate cross entropy during learning
// The individual cross entropy of the win/loss term and win
// rate term of the elmo expression is returned
@@ -202,16 +248,21 @@ namespace Learner
const PackedSfenValue& psv,
double& cross_entropy_eval,
double& cross_entropy_win,
double& cross_entropy,
double& entropy_eval,
double& entropy_win)
double& entropy_win,
double& entropy)
{
// Teacher winning probability.
const double q = winning_percentage(shallow, psv.gamePly);
const double p = calculate_p(teacher_signal, psv.gamePly);
const double t = calculate_t(psv.game_result);
const double lambda = calculate_lambda(teacher_signal);
constexpr double epsilon = 0.000001;
const double m = (1.0 - lambda) * t + lambda * p;
cross_entropy_eval =
(-p * std::log(q + epsilon) - (1.0 - p) * std::log(1.0 - q + epsilon));
cross_entropy_win =
@@ -220,12 +271,17 @@ namespace Learner
(-p * std::log(p + epsilon) - (1.0 - p) * std::log(1.0 - p + epsilon));
entropy_win =
(-t * std::log(t + epsilon) - (1.0 - t) * std::log(1.0 - t + epsilon));
cross_entropy =
(-m * std::log(q + epsilon) - (1.0 - m) * std::log(1.0 - q + epsilon));
entropy =
(-m * std::log(m + epsilon) - (1.0 - m) * std::log(1.0 - m + epsilon));
}
// Other objective functions may be considered in the future...
double calc_grad(Value shallow, const PackedSfenValue& psv)
{
return (double)(shallow - (Value)psv.score) / 2400.0;
return calc_grad((Value)psv.score, shallow, psv);
}
struct BasicSfenInputStream
@@ -798,12 +854,14 @@ namespace Learner
cout << ", learning rate = " << global_learning_rate << ", ";
// For calculation of verification data loss
atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win;
atomic<double> test_sum_entropy_eval, test_sum_entropy_win;
atomic<double> test_sum_cross_entropy_eval, test_sum_cross_entropy_win, test_sum_cross_entropy;
atomic<double> test_sum_entropy_eval, test_sum_entropy_win, test_sum_entropy;
test_sum_cross_entropy_eval = 0;
test_sum_cross_entropy_win = 0;
test_sum_cross_entropy = 0;
test_sum_entropy_eval = 0;
test_sum_entropy_win = 0;
test_sum_entropy = 0;
// norm for learning
atomic<double> sum_norm;
@@ -843,8 +901,10 @@ namespace Learner
&ps,
&test_sum_cross_entropy_eval,
&test_sum_cross_entropy_win,
&test_sum_cross_entropy,
&test_sum_entropy_eval,
&test_sum_entropy_win,
&test_sum_entropy,
&sum_norm,
&task_count,
&move_accord_count
@@ -872,22 +932,26 @@ namespace Learner
// For the time being, regarding the win rate and loss terms only in the elmo method
// Calculate and display the cross entropy.
double test_cross_entropy_eval, test_cross_entropy_win;
double test_entropy_eval, test_entropy_win;
double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
double test_entropy_eval, test_entropy_win, test_entropy;
calc_cross_entropy(
deep_value,
shallow_value,
ps,
test_cross_entropy_eval,
test_cross_entropy_win,
test_cross_entropy,
test_entropy_eval,
test_entropy_win);
test_entropy_win,
test_entropy);
// The total cross entropy need not be abs() by definition.
test_sum_cross_entropy_eval += test_cross_entropy_eval;
test_sum_cross_entropy_win += test_cross_entropy_win;
test_sum_cross_entropy += test_cross_entropy;
test_sum_entropy_eval += test_entropy_eval;
test_sum_entropy_win += test_entropy_win;
test_sum_entropy += test_entropy;
sum_norm += (double)abs(shallow_value);
// Determine if the teacher's move and the score of the shallow search match
@@ -912,7 +976,7 @@ namespace Learner
while (task_count)
sleep(1);
latest_loss_sum += test_sum_cross_entropy_eval - test_sum_entropy_eval;
latest_loss_sum += test_sum_cross_entropy - test_sum_entropy;
latest_loss_count += sr.sfen_for_mse.size();
// learn_cross_entropy may be called train cross
@@ -927,6 +991,8 @@ namespace Learner
<< " , test_cross_entropy_win = " << test_sum_cross_entropy_win / sr.sfen_for_mse.size()
<< " , test_entropy_eval = " << test_sum_entropy_eval / sr.sfen_for_mse.size()
<< " , test_entropy_win = " << test_sum_entropy_win / sr.sfen_for_mse.size()
<< " , test_cross_entropy = " << test_sum_cross_entropy / sr.sfen_for_mse.size()
<< " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
<< " , norm = " << sum_norm
<< " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%"
<< endl;
@@ -938,6 +1004,8 @@ namespace Learner
<< " , learn_cross_entropy_win = " << learn_sum_cross_entropy_win / done
<< " , learn_entropy_eval = " << learn_sum_entropy_eval / done
<< " , learn_entropy_win = " << learn_sum_entropy_win / done
<< " , learn_cross_entropy = " << learn_sum_cross_entropy / done
<< " , learn_entropy = " << learn_sum_entropy / done
<< endl;
}
}
@@ -949,8 +1017,10 @@ namespace Learner
// Clear 0 for next time.
learn_sum_cross_entropy_eval = 0.0;
learn_sum_cross_entropy_win = 0.0;
learn_sum_cross_entropy = 0.0;
learn_sum_entropy_eval = 0.0;
learn_sum_entropy_win = 0.0;
learn_sum_entropy = 0.0;
}
void LearnerThink::thread_worker(size_t thread_id)
@@ -1142,21 +1212,25 @@ namespace Learner
: -Eval::evaluate(pos);
// Calculate loss for training data
double learn_cross_entropy_eval, learn_cross_entropy_win;
double learn_entropy_eval, learn_entropy_win;
double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
double learn_entropy_eval, learn_entropy_win, learn_entropy;
calc_cross_entropy(
deep_value,
shallow_value,
ps,
learn_cross_entropy_eval,
learn_cross_entropy_win,
learn_cross_entropy,
learn_entropy_eval,
learn_entropy_win);
learn_entropy_win,
learn_entropy);
learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
learn_sum_cross_entropy_win += learn_cross_entropy_win;
learn_sum_cross_entropy += learn_cross_entropy;
learn_sum_entropy_eval += learn_entropy_eval;
learn_sum_entropy_win += learn_entropy_win;
learn_sum_entropy += learn_entropy;
Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
@@ -1560,6 +1634,11 @@ namespace Learner
global_learning_rate = 1.0;
// elmo lambda
ELMO_LAMBDA = 0.33;
ELMO_LAMBDA2 = 0.33;
ELMO_LAMBDA_LIMIT = 32000;
// if (gamePly <rand(reduction_gameply)) continue;
// An option to exclude the early stage from the learning target moderately like
// If set to 1, rand(1)==0, so nothing is excluded.
@@ -1627,6 +1706,12 @@ namespace Learner
// Using WDL with win rate model instead of sigmoid
else if (option == "use_wdl") is >> use_wdl;
// LAMBDA
else if (option == "lambda") is >> ELMO_LAMBDA;
else if (option == "lambda2") is >> ELMO_LAMBDA2;
else if (option == "lambda_limit") is >> ELMO_LAMBDA_LIMIT;
else if (option == "reduction_gameply") is >> reduction_gameply;
// shuffle related
@@ -1814,6 +1899,9 @@ namespace Learner
reduction_gameply = max(reduction_gameply, 1);
cout << "reduction_gameply : " << reduction_gameply << endl;
cout << "LAMBDA : " << ELMO_LAMBDA << endl;
cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl;
cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl;
cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl;
cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
+5 -1
View File
@@ -23,7 +23,11 @@ using LearnFloatType = float;
// configure
// ======================
#define LOSS_FUNCTION "cross_entropy_eval"
// ----------------------
// Learning with the method of elmo (WCSC27)
// ----------------------
#define LOSS_FUNCTION "ELMO_METHOD(WCSC27)"
// ----------------------
// Definition of struct used in Learner
+24 -4
View File
@@ -31,10 +31,30 @@ namespace Eval::NNUE::Features {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void CastlingRight::AppendChangedIndices(
const Position& /* pos */, Color /* perspective */,
IndexList* /* removed */, IndexList* /* added */) {
// Not implemented.
assert(false);
const Position& pos, Color perspective,
IndexList* removed, IndexList* /* added */) {
int previous_castling_rights = pos.state()->previous->castlingRights;
int current_castling_rights = pos.state()->castlingRights;
int relative_previous_castling_rights;
int relative_current_castling_rights;
if (perspective == WHITE) {
relative_previous_castling_rights = previous_castling_rights;
relative_current_castling_rights = current_castling_rights;
}
else {
// Invert the perspective.
relative_previous_castling_rights = ((previous_castling_rights & 3) << 2)
& ((previous_castling_rights >> 2) & 3);
relative_current_castling_rights = ((current_castling_rights & 3) << 2)
& ((current_castling_rights >> 2) & 3);
}
for (Eval::NNUE::IndexType i = 0; i < kDimensions; ++i) {
if ((relative_previous_castling_rights & (1 << i)) &&
(relative_current_castling_rights & (1 << i)) == 0) {
removed->push_back(i);
}
}
}
} // namespace Eval::NNUE::Features
+1 -1
View File
@@ -19,7 +19,7 @@ namespace Eval::NNUE::Features {
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 4;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
+16 -4
View File
@@ -21,10 +21,22 @@ namespace Eval::NNUE::Features {
// Get a list of indices whose values have changed from the previous one in the feature quantity
void EnPassant::AppendChangedIndices(
const Position& /* pos */, Color /* perspective */,
IndexList* /* removed */, IndexList* /* added */) {
// Not implemented.
assert(false);
const Position& pos, Color /* perspective */,
IndexList* removed, IndexList* added) {
auto previous_epSquare = pos.state()->previous->epSquare;
auto epSquare = pos.state()->epSquare;
if (previous_epSquare != SQ_NONE) {
if (epSquare != SQ_NONE && file_of(epSquare) == file_of(previous_epSquare))
return;
auto file = file_of(previous_epSquare);
removed->push_back(file);
}
if (epSquare != SQ_NONE) {
auto file = file_of(epSquare);
added->push_back(file);
}
}
} // namespace Eval::NNUE::Features
+2 -2
View File
@@ -19,13 +19,13 @@ namespace Eval::NNUE::Features {
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = 1;
// Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kAnyPieceMoved;
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
// Get a list of indices with a value of 1 among the features
static void AppendActiveIndices(const Position& pos, Color perspective,
IndexList* active);
// Get a list of indices whose values ??have changed from the previous one in the feature quantity
// Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added);
};
+4 -2
View File
@@ -100,7 +100,6 @@ namespace Eval::NNUE::Features {
IndexListType removed[2], IndexListType added[2], bool reset[2]) {
const auto& dp = pos.state()->dirtyPiece;
if (dp.dirty_num == 0) return;
for (Color perspective : { WHITE, BLACK }) {
reset[perspective] = false;
@@ -108,12 +107,15 @@ namespace Eval::NNUE::Features {
case TriggerEvent::kNone:
break;
case TriggerEvent::kFriendKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
break;
case TriggerEvent::kEnemyKingMoved:
reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
if (dp.dirty_num == 0) continue;
reset[perspective] = dp.piece[0] == make_piece(~perspective, KING);
break;
case TriggerEvent::kAnyKingMoved:
if (dp.dirty_num == 0) continue;
reset[perspective] = type_of(dp.piece[0]) == KING;
break;
case TriggerEvent::kAnyPieceMoved:
+3 -2
View File
@@ -41,7 +41,7 @@ namespace Eval::NNUE::Features {
void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
@@ -55,7 +55,7 @@ namespace Eval::NNUE::Features {
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
@@ -68,5 +68,6 @@ namespace Eval::NNUE::Features {
}
template class HalfKP<Side::kFriend>;
template class HalfKP<Side::kEnemy>;
} // namespace Eval::NNUE::Features
+5 -2
View File
@@ -33,7 +33,8 @@ namespace Eval::NNUE::Features {
public:
// Feature name
static constexpr const char* kName = "HalfKP(Friend)";
static constexpr const char* kName = (AssociatedKing == Side::kFriend) ?
"HalfKP(Friend)" : "HalfKP(Enemy)";
// Hash value embedded in the evaluation file
static constexpr std::uint32_t kHashValue =
0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
@@ -43,7 +44,9 @@ namespace Eval::NNUE::Features {
// Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ?
TriggerEvent::kFriendKingMoved : TriggerEvent::kEnemyKingMoved;
// Get a list of indices for active features
static void AppendActiveIndices(const Position& pos, Color perspective,
+2 -2
View File
@@ -39,7 +39,7 @@ inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
Square s = pop_lsb(&bb);
@@ -52,7 +52,7 @@ template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective,
IndexList* removed, IndexList* added) {
Square ksq = orient(perspective, pos.square<KING>(perspective));
Square ksq = orient(perspective, pos.square<KING>(AssociatedKing == Side::kFriend ? perspective : ~perspective));
const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) {
Piece pc = dp.piece[i];
+1 -1
View File
@@ -22,7 +22,7 @@
#define NNUE_ARCHITECTURE_H_INCLUDED
// Defines the network structure
#include "architectures/halfkp-cr-ep_256x2-32-32.h"
#include "architectures/halfkp_256x2-32-32.h"
namespace Eval::NNUE {
+1
View File
@@ -1013,6 +1013,7 @@ void Position::do_null_move(StateInfo& newSt) {
{
st->key ^= Zobrist::enpassant[file_of(st->epSquare)];
st->epSquare = SQ_NONE;
st->accumulator.computed_accumulation = false;
}
st->key ^= Zobrist::side;
+2 -2
View File
@@ -25,8 +25,6 @@
namespace Tablebases {
extern int MaxCardinality;
enum WDLScore {
WDLLoss = -2, // Loss
WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
@@ -45,6 +43,8 @@ enum ProbeState {
ZEROING_BEST_MOVE = 2 // Best move zeroes DTZ (capture or pawn move)
};
extern int MaxCardinality;
void init(const std::string& paths);
WDLScore probe_wdl(Position& pos, ProbeState* result);
int probe_dtz(Position& pos, ProbeState* result);