mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 14:27:45 +00:00
First batch of reorganization.
This commit is contained in:
+237
-165
@@ -66,7 +66,7 @@ using namespace std;
|
|||||||
//extern Book::BookMoveSelector book;
|
//extern Book::BookMoveSelector book;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T operator += (std::atomic<T>& x, const T rhs)
|
T operator +=(std::atomic<T>& x, const T rhs)
|
||||||
{
|
{
|
||||||
T old = x.load(std::memory_order_consume);
|
T old = x.load(std::memory_order_consume);
|
||||||
// It is allowed that the value is rewritten from other thread at this timing.
|
// It is allowed that the value is rewritten from other thread at this timing.
|
||||||
@@ -84,8 +84,9 @@ namespace Learner
|
|||||||
static bool use_draw_games_in_training = false;
|
static bool use_draw_games_in_training = false;
|
||||||
static bool use_draw_games_in_validation = false;
|
static bool use_draw_games_in_validation = false;
|
||||||
static bool skip_duplicated_positions_in_training = true;
|
static bool skip_duplicated_positions_in_training = true;
|
||||||
// 1.0 / PawnValueEg / 4.0 * log(10.0)
|
|
||||||
static double winning_probability_coefficient = 0.00276753015984861260098316280611;
|
static double winning_probability_coefficient = 1.0 / PawnValueEg / 4.0 * std::log(10.0);
|
||||||
|
|
||||||
// Score scale factors. ex) If we set src_score_min_value = 0.0,
|
// Score scale factors. ex) If we set src_score_min_value = 0.0,
|
||||||
// src_score_max_value = 1.0, dest_score_min_value = 0.0,
|
// src_score_max_value = 1.0, dest_score_min_value = 0.0,
|
||||||
// dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000].
|
// dest_score_max_value = 10000.0, [0.0, 1.0] will be scaled to [0, 10000].
|
||||||
@@ -93,6 +94,7 @@ namespace Learner
|
|||||||
static double src_score_max_value = 1.0;
|
static double src_score_max_value = 1.0;
|
||||||
static double dest_score_min_value = 0.0;
|
static double dest_score_min_value = 0.0;
|
||||||
static double dest_score_max_value = 1.0;
|
static double dest_score_max_value = 1.0;
|
||||||
|
|
||||||
// Assume teacher signals are the scores of deep searches, and convert them into winning
|
// Assume teacher signals are the scores of deep searches, and convert them into winning
|
||||||
// probabilities in the trainer. Sometimes we want to use the winning probabilities in the training
|
// probabilities in the trainer. Sometimes we want to use the winning probabilities in the training
|
||||||
// data directly. In those cases, we set false to this variable.
|
// data directly. In those cases, we set false to this variable.
|
||||||
@@ -102,7 +104,7 @@ namespace Learner
|
|||||||
// generation and training don't work well.
|
// generation and training don't work well.
|
||||||
// https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192
|
// https://discordapp.com/channels/435943710472011776/733545871911813221/748524079761326192
|
||||||
// This CANNOT be static since it's used elsewhere.
|
// This CANNOT be static since it's used elsewhere.
|
||||||
bool use_raw_nnue_eval = true;
|
bool use_raw_nnue_eval = false;
|
||||||
|
|
||||||
// Using WDL with win rate model instead of sigmoid
|
// Using WDL with win rate model instead of sigmoid
|
||||||
static bool use_wdl = false;
|
static bool use_wdl = false;
|
||||||
@@ -111,38 +113,37 @@ namespace Learner
|
|||||||
// command to learn from the generated game (learn)
|
// command to learn from the generated game (learn)
|
||||||
// -----------------------------------
|
// -----------------------------------
|
||||||
|
|
||||||
// ordinary sigmoid function
|
|
||||||
double sigmoid(double x)
|
|
||||||
{
|
|
||||||
return 1.0 / (1.0 + std::exp(-x));
|
|
||||||
}
|
|
||||||
|
|
||||||
// A function that converts the evaluation value to the winning rate [0,1]
|
// A function that converts the evaluation value to the winning rate [0,1]
|
||||||
double winning_percentage(double value)
|
double winning_percentage(double value)
|
||||||
{
|
{
|
||||||
// 1/(1+10^(-Eval/4))
|
// 1/(1+10^(-Eval/4))
|
||||||
// = 1/(1+e^(-Eval/4*ln(10))
|
// = 1/(1+e^(-Eval/4*ln(10))
|
||||||
// = sigmoid(Eval/4*ln(10))
|
// = sigmoid(Eval/4*ln(10))
|
||||||
return sigmoid(value * winning_probability_coefficient);
|
return Math::sigmoid(value * winning_probability_coefficient);
|
||||||
}
|
}
|
||||||
|
|
||||||
// A function that converts the evaluation value to the winning rate [0,1]
|
// A function that converts the evaluation value to the winning rate [0,1]
|
||||||
double winning_percentage_wdl(double value, int ply)
|
double winning_percentage_wdl(double value, int ply)
|
||||||
{
|
{
|
||||||
|
constexpr double wdl_total = 1000.0;
|
||||||
|
constexpr double draw_score = 0.5;
|
||||||
|
|
||||||
double wdl_w = UCI::win_rate_model_double(value, ply);
|
double wdl_w = UCI::win_rate_model_double(value, ply);
|
||||||
double wdl_l = UCI::win_rate_model_double(-value, ply);
|
double wdl_l = UCI::win_rate_model_double(-value, ply);
|
||||||
double wdl_d = 1000.0 - wdl_w - wdl_l;
|
double wdl_d = wdl_total - wdl_w - wdl_l;
|
||||||
|
|
||||||
return (wdl_w + wdl_d / 2.0) / 1000.0;
|
return (wdl_w + wdl_d * draw_score) / wdl_total;
|
||||||
}
|
}
|
||||||
|
|
||||||
// A function that converts the evaluation value to the winning rate [0,1]
|
// A function that converts the evaluation value to the winning rate [0,1]
|
||||||
double winning_percentage(double value, int ply)
|
double winning_percentage(double value, int ply)
|
||||||
{
|
{
|
||||||
if (use_wdl) {
|
if (use_wdl)
|
||||||
|
{
|
||||||
return winning_percentage_wdl(value, ply);
|
return winning_percentage_wdl(value, ply);
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
return winning_percentage(value);
|
return winning_percentage(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -151,7 +152,7 @@ namespace Learner
|
|||||||
{
|
{
|
||||||
double p = deep_win_rate;
|
double p = deep_win_rate;
|
||||||
double q = winning_percentage(shallow_eval, ply);
|
double q = winning_percentage(shallow_eval, ply);
|
||||||
return -p * std::log(q) - (1 - p) * std::log(1 - q);
|
return -p * std::log(q) - (1.0 - p) * std::log(1.0 - q);
|
||||||
}
|
}
|
||||||
|
|
||||||
double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
|
double calc_d_cross_entropy_of_winning_percentage(double deep_win_rate, double shallow_eval, int ply)
|
||||||
@@ -164,17 +165,6 @@ namespace Learner
|
|||||||
return ((y2 - y1) / epsilon) / winning_probability_coefficient;
|
return ((y2 - y1) / epsilon) / winning_probability_coefficient;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dsigmoid(double x)
|
|
||||||
{
|
|
||||||
// Sigmoid function
|
|
||||||
// f(x) = 1/(1+exp(-x))
|
|
||||||
// the first derivative is
|
|
||||||
// f'(x) = df/dx = f(x)・{ 1-f(x)}
|
|
||||||
// becomes
|
|
||||||
|
|
||||||
return sigmoid(x) * (1.0 - sigmoid(x));
|
|
||||||
}
|
|
||||||
|
|
||||||
// When the objective function is the sum of squares of the difference in winning percentage
|
// When the objective function is the sum of squares of the difference in winning percentage
|
||||||
#if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
|
#if defined (LOSS_FUNCTION_IS_WINNING_PERCENTAGE)
|
||||||
// function to calculate the gradient
|
// function to calculate the gradient
|
||||||
@@ -202,7 +192,7 @@ namespace Learner
|
|||||||
|
|
||||||
double p = winning_percentage(deep);
|
double p = winning_percentage(deep);
|
||||||
double q = winning_percentage(shallow);
|
double q = winning_percentage(shallow);
|
||||||
return (q - p) * dsigmoid(double(shallow) / 600.0);
|
return (q - p) * Math::dsigmoid(double(shallow) / 600.0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -253,39 +243,75 @@ namespace Learner
|
|||||||
double ELMO_LAMBDA2 = 0.33;
|
double ELMO_LAMBDA2 = 0.33;
|
||||||
double ELMO_LAMBDA_LIMIT = 32000;
|
double ELMO_LAMBDA_LIMIT = 32000;
|
||||||
|
|
||||||
|
// Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
|
||||||
|
double get_scaled_signal(double signal)
|
||||||
|
{
|
||||||
|
double scaled_signal = signal;
|
||||||
|
|
||||||
|
// Normalize to [0.0, 1.0].
|
||||||
|
scaled_signal =
|
||||||
|
(scaled_signal - src_score_min_value)
|
||||||
|
/ (src_score_max_value - src_score_min_value);
|
||||||
|
|
||||||
|
// Scale to [dest_score_min_value, dest_score_max_value].
|
||||||
|
scaled_signal =
|
||||||
|
scaled_signal * (dest_score_max_value - dest_score_min_value)
|
||||||
|
+ dest_score_min_value;
|
||||||
|
|
||||||
|
return scaled_signal;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Teacher winning probability.
|
||||||
|
double calculate_p(double teacher_signal, int ply)
|
||||||
|
{
|
||||||
|
const double scaled_teacher_signal = get_scaled_signal(teacher_signal);
|
||||||
|
|
||||||
|
// Teacher winning probability.
|
||||||
|
double p = scaled_teacher_signal;
|
||||||
|
if (convert_teacher_signal_to_winning_probability)
|
||||||
|
{
|
||||||
|
p = winning_percentage(scaled_teacher_signal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double calculate_lambda(double teacher_signal)
|
||||||
|
{
|
||||||
|
// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
|
||||||
|
const double lambda =
|
||||||
|
(std::abs(teacher_signal) >= ELMO_LAMBDA_LIMIT)
|
||||||
|
? ELMO_LAMBDA2
|
||||||
|
: ELMO_LAMBDA;
|
||||||
|
|
||||||
|
return lambda;
|
||||||
|
}
|
||||||
|
|
||||||
|
double calculate_t(int game_result)
|
||||||
|
{
|
||||||
|
// Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
|
||||||
|
// game_result = 1,0,-1 so add 1 and divide by 2.
|
||||||
|
const double t = double(game_result + 1) * 0.5;
|
||||||
|
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
|
double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
|
||||||
{
|
{
|
||||||
// elmo (WCSC27) method
|
// elmo (WCSC27) method
|
||||||
// Correct with the actual game wins and losses.
|
// Correct with the actual game wins and losses.
|
||||||
|
|
||||||
// Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
|
|
||||||
double scaled_teacher_signal = teacher_signal;
|
|
||||||
// Normalize to [0.0, 1.0].
|
|
||||||
scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value);
|
|
||||||
// Scale to [dest_score_min_value, dest_score_max_value].
|
|
||||||
scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value;
|
|
||||||
|
|
||||||
const double q = winning_percentage(shallow, psv.gamePly);
|
const double q = winning_percentage(shallow, psv.gamePly);
|
||||||
// Teacher winning probability.
|
const double p = calculate_p(teacher_signal, psv.gamePly);
|
||||||
double p = scaled_teacher_signal;
|
const double t = calculate_t(psv.game_result);
|
||||||
if (convert_teacher_signal_to_winning_probability) {
|
const double lambda = calculate_lambda(teacher_signal);
|
||||||
p = winning_percentage(scaled_teacher_signal, psv.gamePly);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use 1 as the correction term if the expected win rate is 1, 0 if you lose, and 0.5 if you draw.
|
|
||||||
// game_result = 1,0,-1 so add 1 and divide by 2.
|
|
||||||
const double t = double(psv.game_result + 1) / 2;
|
|
||||||
|
|
||||||
// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
|
|
||||||
const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA;
|
|
||||||
|
|
||||||
double grad;
|
double grad;
|
||||||
if (use_wdl) {
|
if (use_wdl)
|
||||||
double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
|
{
|
||||||
double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
|
const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
|
||||||
|
const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
|
||||||
grad = lambda * dce_p + (1.0 - lambda) * dce_t;
|
grad = lambda * dce_p + (1.0 - lambda) * dce_t;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
// Use the actual win rate as a correction term.
|
// Use the actual win rate as a correction term.
|
||||||
// This is the idea of elmo (WCSC27), modern O-parts.
|
// This is the idea of elmo (WCSC27), modern O-parts.
|
||||||
grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
|
grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
|
||||||
@@ -296,30 +322,25 @@ namespace Learner
|
|||||||
|
|
||||||
// Calculate cross entropy during learning
|
// Calculate cross entropy during learning
|
||||||
// The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win.
|
// The individual cross entropy of the win/loss term and win rate term of the elmo expression is returned to the arguments cross_entropy_eval and cross_entropy_win.
|
||||||
void calc_cross_entropy(Value teacher_signal, Value shallow, const PackedSfenValue& psv,
|
void calc_cross_entropy(
|
||||||
double& cross_entropy_eval, double& cross_entropy_win, double& cross_entropy,
|
Value teacher_signal,
|
||||||
double& entropy_eval, double& entropy_win, double& entropy)
|
Value shallow,
|
||||||
|
const PackedSfenValue& psv,
|
||||||
|
double& cross_entropy_eval,
|
||||||
|
double& cross_entropy_win,
|
||||||
|
double& cross_entropy,
|
||||||
|
double& entropy_eval,
|
||||||
|
double& entropy_win,
|
||||||
|
double& entropy)
|
||||||
{
|
{
|
||||||
// Training Formula · Issue #71 · nodchip/Stockfish https://github.com/nodchip/Stockfish/issues/71
|
|
||||||
double scaled_teacher_signal = teacher_signal;
|
|
||||||
// Normalize to [0.0, 1.0].
|
|
||||||
scaled_teacher_signal = (scaled_teacher_signal - src_score_min_value) / (src_score_max_value - src_score_min_value);
|
|
||||||
// Scale to [dest_score_min_value, dest_score_max_value].
|
|
||||||
scaled_teacher_signal = scaled_teacher_signal * (dest_score_max_value - dest_score_min_value) + dest_score_min_value;
|
|
||||||
|
|
||||||
// Teacher winning probability.
|
// Teacher winning probability.
|
||||||
double p = scaled_teacher_signal;
|
const double q = winning_percentage(shallow, psv.gamePly);
|
||||||
if (convert_teacher_signal_to_winning_probability) {
|
const double p = calculate_p(teacher_signal, psv.gamePly);
|
||||||
p = winning_percentage(scaled_teacher_signal);
|
const double t = calculate_t(psv.game_result);
|
||||||
}
|
const double lambda = calculate_lambda(teacher_signal);
|
||||||
const double q /* eval_winrate */ = winning_percentage(shallow);
|
|
||||||
const double t = double(psv.game_result + 1) / 2;
|
|
||||||
|
|
||||||
constexpr double epsilon = 0.000001;
|
constexpr double epsilon = 0.000001;
|
||||||
|
|
||||||
// If the evaluation value in deep search exceeds ELMO_LAMBDA_LIMIT, apply ELMO_LAMBDA2 instead of ELMO_LAMBDA.
|
|
||||||
const double lambda = (abs(teacher_signal) >= ELMO_LAMBDA_LIMIT) ? ELMO_LAMBDA2 : ELMO_LAMBDA;
|
|
||||||
|
|
||||||
const double m = (1.0 - lambda) * t + lambda * p;
|
const double m = (1.0 - lambda) * t + lambda * p;
|
||||||
|
|
||||||
cross_entropy_eval =
|
cross_entropy_eval =
|
||||||
@@ -343,7 +364,8 @@ namespace Learner
|
|||||||
// Other variations may be prepared as the objective function..
|
// Other variations may be prepared as the objective function..
|
||||||
|
|
||||||
|
|
||||||
double calc_grad(Value shallow, const PackedSfenValue& psv) {
|
double calc_grad(Value shallow, const PackedSfenValue& psv)
|
||||||
|
{
|
||||||
return calc_grad((Value)psv.score, shallow, psv);
|
return calc_grad((Value)psv.score, shallow, psv);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -363,8 +385,14 @@ namespace Learner
|
|||||||
// SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
|
// SFEN_READ_SIZE is a multiple of THREAD_BUFFER_SIZE.
|
||||||
static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE;
|
static constexpr const size_t SFEN_READ_SIZE = LEARN_SFEN_READ_SIZE;
|
||||||
|
|
||||||
|
// hash to limit the reading of the same situation
|
||||||
|
// Is there too many 64 million phases? Or Not really..
|
||||||
|
// It must be 2**N because it will be used as the mask to calculate hash_index.
|
||||||
|
static constexpr uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024;
|
||||||
|
|
||||||
// Do not use std::random_device(). Because it always the same integers on MinGW.
|
// Do not use std::random_device(). Because it always the same integers on MinGW.
|
||||||
SfenReader(int thread_num) : prng(std::chrono::system_clock::now().time_since_epoch().count())
|
SfenReader(int thread_num) :
|
||||||
|
prng(std::chrono::system_clock::now().time_since_epoch().count())
|
||||||
{
|
{
|
||||||
packed_sfens.resize(thread_num);
|
packed_sfens.resize(thread_num);
|
||||||
total_read = 0;
|
total_read = 0;
|
||||||
@@ -398,6 +426,7 @@ namespace Learner
|
|||||||
cout << "Error! read packed sfen , failed." << endl;
|
cout << "Error! read packed sfen , failed." << endl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
sfen_for_mse.push_back(ps);
|
sfen_for_mse.push_back(ps);
|
||||||
|
|
||||||
// Get the hash key.
|
// Get the hash key.
|
||||||
@@ -418,8 +447,10 @@ namespace Learner
|
|||||||
{
|
{
|
||||||
if (eval_limit < abs(p.score))
|
if (eval_limit < abs(p.score))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (!use_draw_games_in_validation && p.game_result == 0)
|
if (!use_draw_games_in_validation && p.game_result == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
sfen_for_mse.push_back(p);
|
sfen_for_mse.push_back(p);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -436,7 +467,7 @@ namespace Learner
|
|||||||
auto& thread_ps = packed_sfens[thread_id];
|
auto& thread_ps = packed_sfens[thread_id];
|
||||||
|
|
||||||
// Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish.
|
// Fill the read buffer if there is no remaining buffer, but if it doesn't even exist, finish.
|
||||||
if ((thread_ps == nullptr || thread_ps->size() == 0) // If the buffer is empty, fill it.
|
if ((thread_ps == nullptr || thread_ps->empty()) // If the buffer is empty, fill it.
|
||||||
&& !read_to_thread_buffer_impl(thread_id))
|
&& !read_to_thread_buffer_impl(thread_id))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@@ -444,11 +475,11 @@ namespace Learner
|
|||||||
// Since the filling of the thread buffer with the phase has been completed successfully
|
// Since the filling of the thread buffer with the phase has been completed successfully
|
||||||
// thread_ps->rbegin() is alive.
|
// thread_ps->rbegin() is alive.
|
||||||
|
|
||||||
ps = *(thread_ps->rbegin());
|
ps = thread_ps->back();
|
||||||
thread_ps->pop_back();
|
thread_ps->pop_back();
|
||||||
|
|
||||||
// If you've run out of buffers, call delete yourself to free this buffer.
|
// If you've run out of buffers, call delete yourself to free this buffer.
|
||||||
if (thread_ps->size() == 0)
|
if (thread_ps->empty())
|
||||||
{
|
{
|
||||||
thread_ps.reset();
|
thread_ps.reset();
|
||||||
}
|
}
|
||||||
@@ -507,7 +538,7 @@ namespace Learner
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Get the next file name.
|
// Get the next file name.
|
||||||
string filename = *filenames.rbegin();
|
string filename = filenames.back();
|
||||||
filenames.pop_back();
|
filenames.pop_back();
|
||||||
|
|
||||||
fs.open(filename, ios::in | ios::binary);
|
fs.open(filename, ios::in | ios::binary);
|
||||||
@@ -523,6 +554,7 @@ namespace Learner
|
|||||||
// This size() is read only, so you don't need to lock it.
|
// This size() is read only, so you don't need to lock it.
|
||||||
while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE)
|
while (!stop_flag && packed_sfens_pool.size() >= SFEN_READ_SIZE / THREAD_BUFFER_SIZE)
|
||||||
sleep(100);
|
sleep(100);
|
||||||
|
|
||||||
if (stop_flag)
|
if (stop_flag)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -555,9 +587,7 @@ namespace Learner
|
|||||||
|
|
||||||
if (!no_shuffle)
|
if (!no_shuffle)
|
||||||
{
|
{
|
||||||
auto size = sfens.size();
|
Algo::shuffle(sfens, prng);
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
swap(sfens[i], sfens[(size_t)(prng.rand((uint64_t)size - i) + i)]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Divide this by THREAD_BUFFER_SIZE. There should be size pieces.
|
// Divide this by THREAD_BUFFER_SIZE. There should be size pieces.
|
||||||
@@ -591,6 +621,13 @@ namespace Learner
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine if it is a phase for calculating rmse.
|
||||||
|
// (The computational aspects of rmse should not be used for learning.)
|
||||||
|
bool is_for_rmse(Key key) const
|
||||||
|
{
|
||||||
|
return sfen_for_mse_hash.count(key) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
// sfen files
|
// sfen files
|
||||||
vector<string> filenames;
|
vector<string> filenames;
|
||||||
|
|
||||||
@@ -613,17 +650,6 @@ namespace Learner
|
|||||||
|
|
||||||
bool stop_flag;
|
bool stop_flag;
|
||||||
|
|
||||||
// Determine if it is a phase for calculating rmse.
|
|
||||||
// (The computational aspects of rmse should not be used for learning.)
|
|
||||||
bool is_for_rmse(Key key) const
|
|
||||||
{
|
|
||||||
return sfen_for_mse_hash.count(key) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// hash to limit the reading of the same situation
|
|
||||||
// Is there too many 64 million phases? Or Not really..
|
|
||||||
// It must be 2**N because it will be used as the mask to calculate hash_index.
|
|
||||||
static const uint64_t READ_SFEN_HASH_SIZE = 64 * 1024 * 1024;
|
|
||||||
vector<Key> hash; // 64MB*8 = 512MB
|
vector<Key> hash; // 64MB*8 = 512MB
|
||||||
|
|
||||||
// test phase for mse calculation
|
// test phase for mse calculation
|
||||||
@@ -663,7 +689,10 @@ namespace Learner
|
|||||||
// Class to generate sfen with multiple threads
|
// Class to generate sfen with multiple threads
|
||||||
struct LearnerThink : public MultiThink
|
struct LearnerThink : public MultiThink
|
||||||
{
|
{
|
||||||
LearnerThink(SfenReader& sr_) :sr(sr_), stop_flag(false), save_only_once(false)
|
LearnerThink(SfenReader& sr_) :
|
||||||
|
sr(sr_),
|
||||||
|
stop_flag(false),
|
||||||
|
save_only_once(false)
|
||||||
{
|
{
|
||||||
#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
|
#if defined ( LOSS_FUNCTION_IS_ELMO_METHOD )
|
||||||
learn_sum_cross_entropy_eval = 0.0;
|
learn_sum_cross_entropy_eval = 0.0;
|
||||||
@@ -686,7 +715,12 @@ namespace Learner
|
|||||||
virtual void thread_worker(size_t thread_id);
|
virtual void thread_worker(size_t thread_id);
|
||||||
|
|
||||||
// Start a thread that loads the phase file in the background.
|
// Start a thread that loads the phase file in the background.
|
||||||
void start_file_read_worker() { sr.start_file_read_worker(); }
|
void start_file_read_worker()
|
||||||
|
{
|
||||||
|
sr.start_file_read_worker();
|
||||||
|
}
|
||||||
|
|
||||||
|
Value get_shallow_value(Position& task_pos);
|
||||||
|
|
||||||
// save merit function parameters to a file
|
// save merit function parameters to a file
|
||||||
bool save(bool is_final = false);
|
bool save(bool is_final = false);
|
||||||
@@ -753,6 +787,33 @@ namespace Learner
|
|||||||
TaskDispatcher task_dispatcher;
|
TaskDispatcher task_dispatcher;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Value LearnerThink::get_shallow_value(Position& task_pos)
|
||||||
|
{
|
||||||
|
// Evaluation value for shallow search
|
||||||
|
// The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
|
||||||
|
// Use qsearch() because it is difficult to compare the values.
|
||||||
|
// EvalHash has been disabled in advance. (If not, the same value will be returned every time)
|
||||||
|
const auto [_, pv] = qsearch(task_pos);
|
||||||
|
|
||||||
|
std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
|
||||||
|
for (size_t i = 0; i < pv.size(); ++i)
|
||||||
|
{
|
||||||
|
task_pos.do_move(pv[i], states[i]);
|
||||||
|
Eval::NNUE::update_eval(task_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto rootColor = task_pos.side_to_move();
|
||||||
|
const Value shallow_value =
|
||||||
|
(rootColor == task_pos.side_to_move())
|
||||||
|
? Eval::evaluate(task_pos)
|
||||||
|
: -Eval::evaluate(task_pos);
|
||||||
|
|
||||||
|
for (auto it = pv.rbegin(); it != pv.rend(); ++it)
|
||||||
|
task_pos.undo_move(*it);
|
||||||
|
|
||||||
|
return shallow_value;
|
||||||
|
}
|
||||||
|
|
||||||
void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
|
void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
|
||||||
{
|
{
|
||||||
// There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated.
|
// There is no point in hitting the replacement table, so at this timing the generation of the replacement table is updated.
|
||||||
@@ -800,8 +861,6 @@ namespace Learner
|
|||||||
pos.set(StartFEN, false, &si, th);
|
pos.set(StartFEN, false, &si, th);
|
||||||
std::cout << "hirate eval = " << Eval::evaluate(pos);
|
std::cout << "hirate eval = " << Eval::evaluate(pos);
|
||||||
|
|
||||||
//Eval::print_eval_stat(pos);
|
|
||||||
|
|
||||||
// It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished.
|
// It's better to parallelize here, but it's a bit troublesome because the search before slave has not finished.
|
||||||
// I created a mechanism to call task, so I will use it.
|
// I created a mechanism to call task, so I will use it.
|
||||||
|
|
||||||
@@ -818,6 +877,7 @@ namespace Learner
|
|||||||
// It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one.
|
// It is not possible to capture pos used in ↑, so specify the variables you want to capture one by one.
|
||||||
auto task =
|
auto task =
|
||||||
[
|
[
|
||||||
|
this,
|
||||||
&ps,
|
&ps,
|
||||||
&test_sum_cross_entropy_eval,
|
&test_sum_cross_entropy_eval,
|
||||||
&test_sum_cross_entropy_win,
|
&test_sum_cross_entropy_win,
|
||||||
@@ -830,7 +890,6 @@ namespace Learner
|
|||||||
&move_accord_count
|
&move_accord_count
|
||||||
](size_t task_thread_id)
|
](size_t task_thread_id)
|
||||||
{
|
{
|
||||||
// Does C++ properly capture a new ps instance for each loop?.
|
|
||||||
auto task_th = Threads[task_thread_id];
|
auto task_th = Threads[task_thread_id];
|
||||||
auto& task_pos = task_th->rootPos;
|
auto& task_pos = task_th->rootPos;
|
||||||
StateInfo task_si;
|
StateInfo task_si;
|
||||||
@@ -840,26 +899,7 @@ namespace Learner
|
|||||||
cout << "Error! : illegal packed sfen " << task_pos.fen() << endl;
|
cout << "Error! : illegal packed sfen " << task_pos.fen() << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Evaluation value for shallow search
|
const Value shallow_value = get_shallow_value(task_pos);
|
||||||
// The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
|
|
||||||
// Use qsearch() because it is difficult to compare the values.
|
|
||||||
// EvalHash has been disabled in advance. (If not, the same value will be returned every time)
|
|
||||||
auto task_search_result = qsearch(task_pos);
|
|
||||||
|
|
||||||
auto shallow_value = task_search_result.first;
|
|
||||||
{
|
|
||||||
const auto rootColor = task_pos.side_to_move();
|
|
||||||
const auto pv = task_search_result.second;
|
|
||||||
std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
|
|
||||||
for (size_t i = 0; i < pv.size(); ++i)
|
|
||||||
{
|
|
||||||
task_pos.do_move(pv[i], states[i]);
|
|
||||||
Eval::NNUE::update_eval(task_pos);
|
|
||||||
}
|
|
||||||
shallow_value = (rootColor == task_pos.side_to_move()) ? Eval::evaluate(task_pos) : -Eval::evaluate(task_pos);
|
|
||||||
for (auto it = pv.rbegin(); it != pv.rend(); ++it)
|
|
||||||
task_pos.undo_move(*it);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Evaluation value of deep search
|
// Evaluation value of deep search
|
||||||
auto deep_value = (Value)ps.score;
|
auto deep_value = (Value)ps.score;
|
||||||
@@ -887,7 +927,17 @@ namespace Learner
|
|||||||
#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
|
#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
|
||||||
double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
|
double test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy;
|
||||||
double test_entropy_eval, test_entropy_win, test_entropy;
|
double test_entropy_eval, test_entropy_win, test_entropy;
|
||||||
calc_cross_entropy(deep_value, shallow_value, ps, test_cross_entropy_eval, test_cross_entropy_win, test_cross_entropy, test_entropy_eval, test_entropy_win, test_entropy);
|
calc_cross_entropy(
|
||||||
|
deep_value,
|
||||||
|
shallow_value,
|
||||||
|
ps,
|
||||||
|
test_cross_entropy_eval,
|
||||||
|
test_cross_entropy_win,
|
||||||
|
test_cross_entropy,
|
||||||
|
test_entropy_eval,
|
||||||
|
test_entropy_win,
|
||||||
|
test_entropy);
|
||||||
|
|
||||||
// The total cross entropy need not be abs() by definition.
|
// The total cross entropy need not be abs() by definition.
|
||||||
test_sum_cross_entropy_eval += test_cross_entropy_eval;
|
test_sum_cross_entropy_eval += test_cross_entropy_eval;
|
||||||
test_sum_cross_entropy_win += test_cross_entropy_win;
|
test_sum_cross_entropy_win += test_cross_entropy_win;
|
||||||
@@ -900,8 +950,8 @@ namespace Learner
|
|||||||
|
|
||||||
// Determine if the teacher's move and the score of the shallow search match
|
// Determine if the teacher's move and the score of the shallow search match
|
||||||
{
|
{
|
||||||
auto r = search(task_pos, 1);
|
const auto [value, pv] = search(task_pos, 1);
|
||||||
if ((uint16_t)r.second[0] == ps.move)
|
if ((uint16_t)pv[0] == ps.move)
|
||||||
move_accord_count.fetch_add(1, std::memory_order_relaxed);
|
move_accord_count.fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -950,6 +1000,7 @@ namespace Learner
|
|||||||
<< " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
|
<< " , test_entropy = " << test_sum_entropy / sr.sfen_for_mse.size()
|
||||||
<< " , norm = " << sum_norm
|
<< " , norm = " << sum_norm
|
||||||
<< " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
|
<< " , move accuracy = " << (move_accord_count * 100.0 / sr.sfen_for_mse.size()) << "%";
|
||||||
|
|
||||||
if (done != static_cast<uint64_t>(-1))
|
if (done != static_cast<uint64_t>(-1))
|
||||||
{
|
{
|
||||||
cout
|
cout
|
||||||
@@ -962,7 +1013,8 @@ namespace Learner
|
|||||||
}
|
}
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " , done = " << done << endl;
|
cout << "Error! : sr.sfen_for_mse.size() = " << sr.sfen_for_mse.size() << " , done = " << done << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -978,7 +1030,6 @@ namespace Learner
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void LearnerThink::thread_worker(size_t thread_id)
|
void LearnerThink::thread_worker(size_t thread_id)
|
||||||
{
|
{
|
||||||
#if defined(_OPENMP)
|
#if defined(_OPENMP)
|
||||||
@@ -1092,7 +1143,9 @@ namespace Learner
|
|||||||
}
|
}
|
||||||
|
|
||||||
PackedSfenValue ps;
|
PackedSfenValue ps;
|
||||||
RetryRead:;
|
|
||||||
|
RETRY_READ:;
|
||||||
|
|
||||||
if (!sr.read_to_thread_buffer(thread_id, ps))
|
if (!sr.read_to_thread_buffer(thread_id, ps))
|
||||||
{
|
{
|
||||||
// ran out of thread pool for my thread.
|
// ran out of thread pool for my thread.
|
||||||
@@ -1106,16 +1159,14 @@ namespace Learner
|
|||||||
// The evaluation value exceeds the learning target value.
|
// The evaluation value exceeds the learning target value.
|
||||||
// Ignore this aspect information.
|
// Ignore this aspect information.
|
||||||
if (eval_limit < abs(ps.score))
|
if (eval_limit < abs(ps.score))
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
|
|
||||||
|
|
||||||
if (!use_draw_games_in_training && ps.game_result == 0)
|
if (!use_draw_games_in_training && ps.game_result == 0)
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
|
|
||||||
|
|
||||||
// Skip over the opening phase
|
// Skip over the opening phase
|
||||||
if (ps.gamePly < prng.rand(reduction_gameply))
|
if (ps.gamePly < prng.rand(reduction_gameply))
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
auto sfen = pos.sfen_unpack(ps.data);
|
auto sfen = pos.sfen_unpack(ps.data);
|
||||||
@@ -1129,20 +1180,24 @@ namespace Learner
|
|||||||
// I got a strange sfen. Should be debugged!
|
// I got a strange sfen. Should be debugged!
|
||||||
// Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not.
|
// Since it is an illegal sfen, it may not be displayed with pos.sfen(), but it is better than not.
|
||||||
cout << "Error! : illigal packed sfen = " << pos.fen() << endl;
|
cout << "Error! : illigal packed sfen = " << pos.fen() << endl;
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(EVAL_NNUE)
|
#if !defined(EVAL_NNUE)
|
||||||
|
if (skip_duplicated_positions_in_training)
|
||||||
{
|
{
|
||||||
auto key = pos.key();
|
const auto key = pos.key();
|
||||||
|
|
||||||
// Exclude the phase used for rmse calculation.
|
// Exclude the phase used for rmse calculation.
|
||||||
if (sr.is_for_rmse(key) && skip_duplicated_positions_in_training)
|
if (sr.is_for_rmse(key))
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
|
|
||||||
// Exclude the most recently used aspect.
|
// Exclude the most recently used aspect.
|
||||||
auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
|
const auto hash_index = size_t(key & (sr.READ_SFEN_HASH_SIZE - 1));
|
||||||
auto key2 = sr.hash[hash_index];
|
const auto key2 = sr.hash[hash_index];
|
||||||
if (key == key2 && skip_duplicated_positions_in_training)
|
if (key == key2)
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
|
|
||||||
sr.hash[hash_index] = key; // Replace with the current key.
|
sr.hash[hash_index] = key; // Replace with the current key.
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -1152,22 +1207,21 @@ namespace Learner
|
|||||||
// (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine)
|
// (shouldn't write out such teacher aspect itself, but may have written it out with an old generation routine)
|
||||||
// Skip the position if there are no legal moves (=checkmated or stalemate).
|
// Skip the position if there are no legal moves (=checkmated or stalemate).
|
||||||
if (MoveList<LEGAL>(pos).size() == 0)
|
if (MoveList<LEGAL>(pos).size() == 0)
|
||||||
goto RetryRead;
|
goto RETRY_READ;
|
||||||
|
|
||||||
// I can read it, so try displaying it.
|
// I can read it, so try displaying it.
|
||||||
// cout << pos << value << endl;
|
// cout << pos << value << endl;
|
||||||
|
|
||||||
// Evaluation value of shallow search (qsearch)
|
// Evaluation value of shallow search (qsearch)
|
||||||
auto r = qsearch(pos);
|
const auto [shallow_value, pv] = qsearch(pos);
|
||||||
auto pv = r.second;
|
|
||||||
|
|
||||||
// Evaluation value of deep search
|
// Evaluation value of deep search
|
||||||
auto deep_value = (Value)ps.score;
|
const auto deep_value = (Value)ps.score;
|
||||||
|
|
||||||
// I feel that the mini batch has a better gradient.
|
// I feel that the mini batch has a better gradient.
|
||||||
// Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation.
|
// Go to the leaf node as it is, add only to the gradient array, and later try AdaGrad at the time of rmse aggregation.
|
||||||
|
|
||||||
auto rootColor = pos.side_to_move();
|
const auto rootColor = pos.side_to_move();
|
||||||
|
|
||||||
// If the initial PV is different, it is better not to use it for learning.
|
// If the initial PV is different, it is better not to use it for learning.
|
||||||
// If it is the result of searching a completely different place, it may become noise.
|
// If it is the result of searching a completely different place, it may become noise.
|
||||||
@@ -1203,13 +1257,26 @@ namespace Learner
|
|||||||
// I don't think this is a very desirable property, as the aspect that gives that gradient will be different.
|
// I don't think this is a very desirable property, as the aspect that gives that gradient will be different.
|
||||||
// I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc...
|
// I have turned off the substitution table, but since the pv array has not been updated due to one stumbling block etc...
|
||||||
|
|
||||||
Value shallow_value = (rootColor == pos.side_to_move()) ? Eval::evaluate(pos) : -Eval::evaluate(pos);
|
const Value shallow_value =
|
||||||
|
(rootColor == pos.side_to_move())
|
||||||
|
? Eval::evaluate(pos)
|
||||||
|
: -Eval::evaluate(pos);
|
||||||
|
|
||||||
#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
|
#if defined (LOSS_FUNCTION_IS_ELMO_METHOD)
|
||||||
// Calculate loss for training data
|
// Calculate loss for training data
|
||||||
double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
|
double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
|
||||||
double learn_entropy_eval, learn_entropy_win, learn_entropy;
|
double learn_entropy_eval, learn_entropy_win, learn_entropy;
|
||||||
calc_cross_entropy(deep_value, shallow_value, ps, learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy, learn_entropy_eval, learn_entropy_win, learn_entropy);
|
calc_cross_entropy(
|
||||||
|
deep_value,
|
||||||
|
shallow_value,
|
||||||
|
ps,
|
||||||
|
learn_cross_entropy_eval,
|
||||||
|
learn_cross_entropy_win,
|
||||||
|
learn_cross_entropy,
|
||||||
|
learn_entropy_eval,
|
||||||
|
learn_entropy_win,
|
||||||
|
learn_entropy);
|
||||||
|
|
||||||
learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
|
learn_sum_cross_entropy_eval += learn_cross_entropy_eval;
|
||||||
learn_sum_cross_entropy_win += learn_cross_entropy_win;
|
learn_sum_cross_entropy_win += learn_cross_entropy_win;
|
||||||
learn_sum_cross_entropy += learn_cross_entropy;
|
learn_sum_cross_entropy += learn_cross_entropy;
|
||||||
@@ -1266,7 +1333,8 @@ namespace Learner
|
|||||||
Eval::NNUE::update_eval(pos);
|
Eval::NNUE::update_eval(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (illegal_move) {
|
if (illegal_move)
|
||||||
|
{
|
||||||
sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl;
|
sync_cout << "An illical move was detected... Excluded the position from the learning data..." << sync_endl;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1284,7 +1352,6 @@ namespace Learner
|
|||||||
dj_dw = calc_grad(deep_value, shallow_value, ps);
|
dj_dw = calc_grad(deep_value, shallow_value, ps);
|
||||||
Eval::add_grad(pos, rootColor, dj_dw, without_kpp);
|
Eval::add_grad(pos, rootColor, dj_dw, without_kpp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1301,14 +1368,17 @@ namespace Learner
|
|||||||
// Do not dig a subfolder because I want to save it only once.
|
// Do not dig a subfolder because I want to save it only once.
|
||||||
Eval::save_eval("");
|
Eval::save_eval("");
|
||||||
}
|
}
|
||||||
else if (is_final) {
|
else if (is_final)
|
||||||
|
{
|
||||||
Eval::save_eval("final");
|
Eval::save_eval("final");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
static int dir_number = 0;
|
static int dir_number = 0;
|
||||||
const std::string dir_name = std::to_string(dir_number++);
|
const std::string dir_name = std::to_string(dir_number++);
|
||||||
Eval::save_eval(dir_name);
|
Eval::save_eval(dir_name);
|
||||||
|
|
||||||
#if defined(EVAL_NNUE)
|
#if defined(EVAL_NNUE)
|
||||||
if (newbob_decay != 1.0 && latest_loss_count > 0) {
|
if (newbob_decay != 1.0 && latest_loss_count > 0) {
|
||||||
static int trials = newbob_num_trials;
|
static int trials = newbob_num_trials;
|
||||||
@@ -1316,22 +1386,28 @@ namespace Learner
|
|||||||
latest_loss_sum = 0.0;
|
latest_loss_sum = 0.0;
|
||||||
latest_loss_count = 0;
|
latest_loss_count = 0;
|
||||||
cout << "loss: " << latest_loss;
|
cout << "loss: " << latest_loss;
|
||||||
if (latest_loss < best_loss) {
|
if (latest_loss < best_loss)
|
||||||
|
{
|
||||||
cout << " < best (" << best_loss << "), accepted" << endl;
|
cout << " < best (" << best_loss << "), accepted" << endl;
|
||||||
best_loss = latest_loss;
|
best_loss = latest_loss;
|
||||||
best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
|
best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
|
||||||
trials = newbob_num_trials;
|
trials = newbob_num_trials;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
cout << " >= best (" << best_loss << "), rejected" << endl;
|
cout << " >= best (" << best_loss << "), rejected" << endl;
|
||||||
if (best_nn_directory.empty()) {
|
if (best_nn_directory.empty())
|
||||||
|
{
|
||||||
cout << "WARNING: no improvement from initial model" << endl;
|
cout << "WARNING: no improvement from initial model" << endl;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
{
|
||||||
cout << "restoring parameters from " << best_nn_directory << endl;
|
cout << "restoring parameters from " << best_nn_directory << endl;
|
||||||
Eval::NNUE::RestoreParameters(best_nn_directory);
|
Eval::NNUE::RestoreParameters(best_nn_directory);
|
||||||
}
|
}
|
||||||
if (--trials > 0 && !is_final) {
|
|
||||||
|
if (--trials > 0 && !is_final)
|
||||||
|
{
|
||||||
cout << "reducing learning rate scale from " << newbob_scale
|
cout << "reducing learning rate scale from " << newbob_scale
|
||||||
<< " to " << (newbob_scale * newbob_decay)
|
<< " to " << (newbob_scale * newbob_decay)
|
||||||
<< " (" << trials << " more trials)" << endl;
|
<< " (" << trials << " more trials)" << endl;
|
||||||
@@ -1339,7 +1415,9 @@ namespace Learner
|
|||||||
Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
|
Eval::NNUE::SetGlobalLearningRateScale(newbob_scale);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (trials == 0) {
|
|
||||||
|
if (trials == 0)
|
||||||
|
{
|
||||||
cout << "converged" << endl;
|
cout << "converged" << endl;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -1371,10 +1449,11 @@ namespace Learner
|
|||||||
// Output progress every 10M phase or when all writing is completed
|
// Output progress every 10M phase or when all writing is completed
|
||||||
if (((write_sfen_count % buffer_size) == 0) ||
|
if (((write_sfen_count % buffer_size) == 0) ||
|
||||||
(write_sfen_count == total_sfen_count))
|
(write_sfen_count == total_sfen_count))
|
||||||
|
{
|
||||||
cout << write_sfen_count << " / " << total_sfen_count << endl;
|
cout << write_sfen_count << " / " << total_sfen_count << endl;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
cout << endl << "write : " << output_file_name << endl;
|
cout << endl << "write : " << output_file_name << endl;
|
||||||
|
|
||||||
fstream fs(output_file_name, ios::out | ios::binary);
|
fstream fs(output_file_name, ios::out | ios::binary);
|
||||||
@@ -1453,9 +1532,7 @@ namespace Learner
|
|||||||
|
|
||||||
auto write_buffer = [&](uint64_t size)
|
auto write_buffer = [&](uint64_t size)
|
||||||
{
|
{
|
||||||
// shuffle from buf[0] to buf[size-1]
|
Algo::shuffle(buf, prng);
|
||||||
for (uint64_t i = 0; i < size; ++i)
|
|
||||||
swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]);
|
|
||||||
|
|
||||||
// write to a file
|
// write to a file
|
||||||
fstream fs;
|
fstream fs;
|
||||||
@@ -1533,13 +1610,8 @@ namespace Learner
|
|||||||
auto& fs = afs[i];
|
auto& fs = afs[i];
|
||||||
|
|
||||||
fs.open(filename, ios::in | ios::binary);
|
fs.open(filename, ios::in | ios::binary);
|
||||||
fs.seekg(0, fstream::end);
|
const uint64_t file_size = get_file_size(fs);
|
||||||
uint64_t eofPos = (uint64_t)fs.tellg();
|
const uint64_t sfen_count = file_size / sizeof(PackedSfenValue);
|
||||||
fs.clear(); // Otherwise, the next seek may fail.
|
|
||||||
fs.seekg(0, fstream::beg);
|
|
||||||
uint64_t begPos = (uint64_t)fs.tellg();
|
|
||||||
uint64_t file_size = eofPos - begPos;
|
|
||||||
uint64_t sfen_count = file_size / sizeof(PackedSfenValue);
|
|
||||||
a_count[i] = sfen_count;
|
a_count[i] = sfen_count;
|
||||||
|
|
||||||
// Output the number of sfen stored in each file.
|
// Output the number of sfen stored in each file.
|
||||||
@@ -1578,8 +1650,8 @@ namespace Learner
|
|||||||
PRNG prng(std::chrono::system_clock::now().time_since_epoch().count());
|
PRNG prng(std::chrono::system_clock::now().time_since_epoch().count());
|
||||||
uint64_t size = (uint64_t)buf.size();
|
uint64_t size = (uint64_t)buf.size();
|
||||||
std::cout << "shuffle buf.size() = " << size << std::endl;
|
std::cout << "shuffle buf.size() = " << size << std::endl;
|
||||||
for (uint64_t i = 0; i < size; ++i)
|
|
||||||
swap(buf[i], buf[(uint64_t)(prng.rand(size - i) + i)]);
|
Algo::shuffle(buf, prng);
|
||||||
|
|
||||||
std::cout << "write : " << output_file_name << endl;
|
std::cout << "write : " << output_file_name << endl;
|
||||||
|
|
||||||
|
|||||||
+15
-6
@@ -627,18 +627,27 @@ void* aligned_malloc(size_t size, size_t align)
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::uint64_t get_file_size(std::fstream& fs)
|
||||||
|
{
|
||||||
|
auto pos = fs.tellg();
|
||||||
|
|
||||||
|
fs.seekg(0, fstream::end);
|
||||||
|
const uint64_t eofPos = (uint64_t)fs.tellg();
|
||||||
|
fs.clear(); // Otherwise, the next seek may fail.
|
||||||
|
fs.seekg(0, fstream::beg);
|
||||||
|
const uint64_t begPos = (uint64_t)fs.tellg();
|
||||||
|
fs.seekg(pos);
|
||||||
|
|
||||||
|
return eofPos - begPos;
|
||||||
|
}
|
||||||
|
|
||||||
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
|
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func)
|
||||||
{
|
{
|
||||||
fstream fs(filename, ios::in | ios::binary);
|
fstream fs(filename, ios::in | ios::binary);
|
||||||
if (fs.fail())
|
if (fs.fail())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
fs.seekg(0, fstream::end);
|
const uint64_t file_size = get_file_size(fs);
|
||||||
uint64_t eofPos = (uint64_t)fs.tellg();
|
|
||||||
fs.clear(); // Otherwise the next seek may fail.
|
|
||||||
fs.seekg(0, fstream::beg);
|
|
||||||
uint64_t begPos = (uint64_t)fs.tellg();
|
|
||||||
uint64_t file_size = eofPos - begPos;
|
|
||||||
//std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
|
//std::cout << "filename = " << filename << " , file_size = " << file_size << endl;
|
||||||
|
|
||||||
// I know the file size, so call callback_func to get a buffer for this,
|
// I know the file size, so call callback_func to get a buffer for this,
|
||||||
|
|||||||
+26
-6
@@ -26,6 +26,8 @@
|
|||||||
#include <ostream>
|
#include <ostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
|
||||||
@@ -155,6 +157,7 @@ std::string now_string();
|
|||||||
// Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
|
// Also, if the buffer cannot be allocated in the callback function or if the file size is different from the expected file size,
|
||||||
// Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
|
// Return nullptr. At this time, read_file_to_memory() interrupts reading and returns with an error.
|
||||||
|
|
||||||
|
std::uint64_t get_file_size(std::fstream& fs);
|
||||||
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
|
int read_file_to_memory(std::string filename, std::function<void* (uint64_t)> callback_func);
|
||||||
int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
|
int write_memory_to_file(std::string filename, void* ptr, uint64_t size);
|
||||||
|
|
||||||
@@ -199,20 +202,37 @@ inline std::ostream& operator<<(std::ostream& os, AsyncPRNG& prng)
|
|||||||
|
|
||||||
// Mathematical function used for progress calculation and learning
|
// Mathematical function used for progress calculation and learning
|
||||||
namespace Math {
|
namespace Math {
|
||||||
// Sigmoid function
|
inline double sigmoid(double x)
|
||||||
// = 1.0 / (1.0 + std::exp(-x))
|
{
|
||||||
double sigmoid(double x);
|
return 1.0 / (1.0 + std::exp(-x));
|
||||||
|
}
|
||||||
|
|
||||||
// Differentiation of sigmoid function
|
inline double dsigmoid(double x)
|
||||||
// = sigmoid(x) * (1.0-sigmoid(x))
|
{
|
||||||
double dsigmoid(double x);
|
// Sigmoid function
|
||||||
|
// f(x) = 1/(1+exp(-x))
|
||||||
|
// the first derivative is
|
||||||
|
// f'(x) = df/dx = f(x)・{ 1-f(x)}
|
||||||
|
// becomes
|
||||||
|
|
||||||
|
return sigmoid(x) * (1.0 - sigmoid(x));
|
||||||
|
}
|
||||||
|
|
||||||
// Clip v so that it fits between [lo,hi].
|
// Clip v so that it fits between [lo,hi].
|
||||||
// * In Stockfish, this function is written in bitboard.h.
|
// * In Stockfish, this function is written in bitboard.h.
|
||||||
template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
|
template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
|
||||||
return v < lo ? lo : v > hi ? hi : v;
|
return v < lo ? lo : v > hi ? hi : v;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Algo {
|
||||||
|
template <typename Rng, typename T>
|
||||||
|
void shuffle(std::vector<T>& buf, Rng&& prng)
|
||||||
|
{
|
||||||
|
const auto size = buf.size();
|
||||||
|
for (uint64_t i = 0; i < size; ++i)
|
||||||
|
std::swap(buf[i], buf[prng.rand(size - i) + i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------
|
// --------------------
|
||||||
|
|||||||
Reference in New Issue
Block a user