mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 12:07:43 +00:00
Merge remote-tracking branch 'remotes/nodchip/master' into trainer
This commit is contained in:
+84
-26
@@ -10,6 +10,7 @@
|
||||
#include "../uci.h"
|
||||
#include "learn.h"
|
||||
#include "multi_think.h"
|
||||
#include "../syzygy/tbprobe.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <climits>
|
||||
@@ -291,6 +292,12 @@ namespace Learner
|
||||
int ply,
|
||||
int& random_move_c);
|
||||
|
||||
Value evaluate_leaf(
|
||||
Position& pos,
|
||||
std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
|
||||
int ply,
|
||||
vector<Move>& pv);
|
||||
|
||||
// Min and max depths for search during gensfen
|
||||
int search_depth_min;
|
||||
int search_depth_max;
|
||||
@@ -641,6 +648,56 @@ namespace Learner
|
||||
return random_move_flag;
|
||||
}
|
||||
|
||||
Value MultiThinkGenSfen::evaluate_leaf(
|
||||
Position& pos,
|
||||
std::vector<StateInfo, AlignedAllocator<StateInfo>>& states,
|
||||
int ply,
|
||||
vector<Move>& pv)
|
||||
{
|
||||
auto rootColor = pos.side_to_move();
|
||||
|
||||
for (auto m : pv)
|
||||
{
|
||||
// There should be no illegal move. This is as a debugging precaution.
|
||||
if (!pos.pseudo_legal(m) || !pos.legal(m))
|
||||
{
|
||||
cout << "Error! : " << pos.fen() << m << endl;
|
||||
}
|
||||
|
||||
pos.do_move(m, states[ply++]);
|
||||
}
|
||||
|
||||
// Reach leaf
|
||||
Value v;
|
||||
if (pos.checkers())
|
||||
{
|
||||
// Sometime a king is checked. An example is a case that a checkmate is
|
||||
// found in the search. If Eval::evaluate() is called whne a king is
|
||||
// checked, classic eval crashes by an assertion. To avoid crashes, return
|
||||
// VALUE_NONE and let the caller assign a value to the position.
|
||||
v = VALUE_NONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
v = Eval::evaluate(pos);
|
||||
|
||||
// evaluate() returns the evaluation value on the turn side, so
|
||||
// If it's a turn different from root_color, you must invert v and return it.
|
||||
if (rootColor != pos.side_to_move())
|
||||
{
|
||||
v = -v;
|
||||
}
|
||||
}
|
||||
|
||||
// Rewind the pv moves.
|
||||
for (auto it = pv.rbegin(); it != pv.rend(); ++it)
|
||||
{
|
||||
pos.undo_move(*it);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
// thread_id = 0..Threads.size()-1
|
||||
void MultiThinkGenSfen::thread_worker(size_t thread_id)
|
||||
{
|
||||
@@ -666,6 +723,8 @@ namespace Learner
|
||||
auto& pos = th->rootPos;
|
||||
pos.set(bookStart[prng.rand(bookStart.size())], false, &si, th);
|
||||
|
||||
int resign_counter = 0;
|
||||
bool should_resign = prng.rand(10) > 1;
|
||||
// Vector for holding the sfens in the current simulated game.
|
||||
PSVector a_psv;
|
||||
a_psv.reserve(write_maxply + MAX_PLY);
|
||||
@@ -700,6 +759,20 @@ namespace Learner
|
||||
break;
|
||||
}
|
||||
|
||||
if (pos.count<ALL_PIECES>() <= 6) {
|
||||
Tablebases::ProbeState probe_state;
|
||||
Tablebases::WDLScore wdl = Tablebases::probe_wdl(pos, &probe_state);
|
||||
assert(wdl != Tablebases::WDLScore::WDLScoreNone);
|
||||
if (wdl == Tablebases::WDLScore::WDLWin) {
|
||||
flush_psv(1);
|
||||
} else if (wdl == Tablebases::WDLScore::WDLLoss) {
|
||||
flush_psv(-1);
|
||||
} else {
|
||||
flush_psv(0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
{
|
||||
auto [search_value, search_pv] = search(pos, depth, 1, nodes);
|
||||
|
||||
@@ -707,11 +780,14 @@ namespace Learner
|
||||
// Also because of this we don't have to check for TB/MATE scores
|
||||
if (abs(search_value) >= eval_limit)
|
||||
{
|
||||
const auto wdl = (search_value >= eval_limit) ? 1 : -1;
|
||||
flush_psv(wdl);
|
||||
break;
|
||||
resign_counter++;
|
||||
if ((should_resign && resign_counter >= 4) || abs(search_value) >= 10000) {
|
||||
flush_psv((search_value >= eval_limit) ? 1 : -1);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
resign_counter = 0;
|
||||
}
|
||||
|
||||
// Verification of a strange move
|
||||
if (search_pv.size() > 0
|
||||
&& (search_pv[0] == MOVE_NONE || search_pv[0] == MOVE_NULL))
|
||||
@@ -743,26 +819,6 @@ namespace Learner
|
||||
goto SKIP_SAVE;
|
||||
}
|
||||
|
||||
// Look into the position hashtable to see if the same
|
||||
// position was seen before.
|
||||
// This is a good heuristic to exlude already seen
|
||||
// positions without many false positives.
|
||||
{
|
||||
auto key = pos.key();
|
||||
auto hash_index = (size_t)(key & (GENSFEN_HASH_SIZE - 1));
|
||||
auto old_key = hash[hash_index];
|
||||
if (key == old_key)
|
||||
{
|
||||
a_psv.clear();
|
||||
goto SKIP_SAVE;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Replace with the current key.
|
||||
hash[hash_index] = key;
|
||||
}
|
||||
}
|
||||
|
||||
// Pack the current position into a packed sfen and save it into the buffer.
|
||||
{
|
||||
a_psv.emplace_back(PackedSfenValue());
|
||||
@@ -772,8 +828,6 @@ namespace Learner
|
||||
// Result is added after the whole game is done.
|
||||
pos.sfen_pack(psv.sfen);
|
||||
|
||||
// Get the value of evaluate() as seen from the
|
||||
// root color on the leaf node of the PV line.
|
||||
psv.score = search_value;
|
||||
|
||||
psv.gamePly = ply;
|
||||
@@ -795,6 +849,8 @@ namespace Learner
|
||||
// Update the next move according to best search result.
|
||||
next_move = search_pv[0];
|
||||
}
|
||||
|
||||
// Random move.
|
||||
auto random_move = choose_random_move(pos, random_move_flag, ply, actual_random_move_count);
|
||||
if (random_move.has_value())
|
||||
{
|
||||
@@ -807,6 +863,8 @@ namespace Learner
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Do move.
|
||||
pos.do_move(next_move, states[ply]);
|
||||
|
||||
} // for (int ply = 0; ; ++ply)
|
||||
|
||||
+111
-41
@@ -221,28 +221,7 @@ namespace Learner
|
||||
|
||||
double calc_grad(Value teacher_signal, Value shallow, const PackedSfenValue& psv)
|
||||
{
|
||||
// elmo (WCSC27) method
|
||||
// Correct with the actual game wins and losses.
|
||||
const double q = winning_percentage(shallow, psv.gamePly);
|
||||
const double p = calculate_p(teacher_signal, psv.gamePly);
|
||||
const double t = calculate_t(psv.game_result);
|
||||
const double lambda = calculate_lambda(teacher_signal);
|
||||
|
||||
double grad;
|
||||
if (use_wdl)
|
||||
{
|
||||
const double dce_p = calc_d_cross_entropy_of_winning_percentage(p, shallow, psv.gamePly);
|
||||
const double dce_t = calc_d_cross_entropy_of_winning_percentage(t, shallow, psv.gamePly);
|
||||
grad = lambda * dce_p + (1.0 - lambda) * dce_t;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use the actual win rate as a correction term.
|
||||
// This is the idea of elmo (WCSC27), modern O-parts.
|
||||
grad = lambda * (q - p) + (1.0 - lambda) * (q - t);
|
||||
}
|
||||
|
||||
return grad;
|
||||
return (double)(shallow - teacher_signal) / 2400.0;
|
||||
}
|
||||
|
||||
// Calculate cross entropy during learning
|
||||
@@ -659,6 +638,9 @@ namespace Learner
|
||||
|
||||
bool stop_flag;
|
||||
|
||||
// Discount rate
|
||||
double discount_rate;
|
||||
|
||||
// Option to exclude early stage from learning
|
||||
int reduction_gameply;
|
||||
|
||||
@@ -701,6 +683,32 @@ namespace Learner
|
||||
TaskDispatcher task_dispatcher;
|
||||
};
|
||||
|
||||
Value LearnerThink::get_shallow_value(Position& task_pos)
|
||||
{
|
||||
// Evaluation value for shallow search
|
||||
// The value of evaluate() may be used, but when calculating loss, learn_cross_entropy and
|
||||
// Use qsearch() because it is difficult to compare the values.
|
||||
// EvalHash has been disabled in advance. (If not, the same value will be returned every time)
|
||||
const auto [_, pv] = qsearch(task_pos);
|
||||
const auto rootColor = task_pos.side_to_move();
|
||||
|
||||
std::vector<StateInfo, AlignedAllocator<StateInfo>> states(pv.size());
|
||||
for (size_t i = 0; i < pv.size(); ++i)
|
||||
{
|
||||
task_pos.do_move(pv[i], states[i]);
|
||||
}
|
||||
|
||||
const Value shallow_value =
|
||||
(rootColor == task_pos.side_to_move())
|
||||
? Eval::evaluate(task_pos)
|
||||
: -Eval::evaluate(task_pos);
|
||||
|
||||
for (auto it = pv.rbegin(); it != pv.rend(); ++it)
|
||||
task_pos.undo_move(*it);
|
||||
|
||||
return shallow_value;
|
||||
}
|
||||
|
||||
void LearnerThink::calc_loss(size_t thread_id, uint64_t done)
|
||||
{
|
||||
// There is no point in hitting the replacement table,
|
||||
@@ -779,10 +787,7 @@ namespace Learner
|
||||
cout << "Error! : illegal packed sfen " << task_pos.fen() << endl;
|
||||
}
|
||||
|
||||
// Determine if the teacher's move and the score of the shallow search match
|
||||
const auto [shallow_value, pv] = qsearch(task_pos);
|
||||
if ((uint16_t)pv[0] == ps.move)
|
||||
move_accord_count.fetch_add(1, std::memory_order_relaxed);
|
||||
const Value shallow_value = get_shallow_value(task_pos);
|
||||
|
||||
// Evaluation value of deep search
|
||||
auto deep_value = (Value)ps.score;
|
||||
@@ -817,6 +822,13 @@ namespace Learner
|
||||
test_sum_entropy += test_entropy;
|
||||
sum_norm += (double)abs(shallow_value);
|
||||
|
||||
// Determine if the teacher's move and the score of the shallow search match
|
||||
{
|
||||
const auto [value, pv] = search(task_pos, 1);
|
||||
if ((uint16_t)pv[0] == ps.move)
|
||||
move_accord_count.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Reduced one task because I did it
|
||||
--task_count;
|
||||
};
|
||||
@@ -1023,8 +1035,21 @@ namespace Learner
|
||||
// I can read it, so try displaying it.
|
||||
// cout << pos << value << endl;
|
||||
|
||||
const auto rootColor = pos.side_to_move();
|
||||
|
||||
int ply = 0;
|
||||
StateInfo state[MAX_PLY]; // PV of qsearch cannot be so long.
|
||||
|
||||
if (!pos.pseudo_legal((Move)ps.move) || !pos.legal((Move)ps.move))
|
||||
{
|
||||
sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
pos.do_move((Move)ps.move, state[ply++]);
|
||||
|
||||
// Evaluation value of shallow search (qsearch)
|
||||
const auto [shallow_value, _] = qsearch(pos);
|
||||
const auto [_, pv] = qsearch(pos);
|
||||
|
||||
// Evaluation value of deep search
|
||||
const auto deep_value = (Value)ps.score;
|
||||
@@ -1033,7 +1058,11 @@ namespace Learner
|
||||
// Go to the leaf node as it is, add only to the gradient array,
|
||||
// and later try AdaGrad at the time of rmse aggregation.
|
||||
|
||||
const auto rootColor = pos.side_to_move();
|
||||
|
||||
// If the initial PV is different, it is better not to use it for learning.
|
||||
// If it is the result of searching a completely different place, it may become noise.
|
||||
// It may be better not to study where the difference in evaluation values is too large.
|
||||
|
||||
|
||||
// A helper function that adds the gradient to the current phase.
|
||||
auto pos_add_grad = [&]() {
|
||||
@@ -1046,6 +1075,11 @@ namespace Learner
|
||||
// I have turned off the substitution table, but since
|
||||
// the pv array has not been updated due to one stumbling block etc...
|
||||
|
||||
const Value shallow_value =
|
||||
(rootColor == pos.side_to_move())
|
||||
? Eval::evaluate(pos)
|
||||
: -Eval::evaluate(pos);
|
||||
|
||||
// Calculate loss for training data
|
||||
double learn_cross_entropy_eval, learn_cross_entropy_win, learn_cross_entropy;
|
||||
double learn_entropy_eval, learn_entropy_win, learn_entropy;
|
||||
@@ -1067,14 +1101,43 @@ namespace Learner
|
||||
learn_sum_entropy_win += learn_entropy_win;
|
||||
learn_sum_entropy += learn_entropy;
|
||||
|
||||
Eval::NNUE::AddExample(pos, rootColor, ps, 1.0);
|
||||
const double example_weight =
|
||||
(discount_rate != 0 && ply != (int)pv.size()) ? discount_rate : 1.0;
|
||||
Eval::NNUE::AddExample(pos, rootColor, ps, example_weight);
|
||||
|
||||
// Since the processing is completed, the counter of the processed number is incremented
|
||||
sr.total_done++;
|
||||
};
|
||||
|
||||
pos_add_grad();
|
||||
bool illegal_move = false;
|
||||
for (auto m : pv)
|
||||
{
|
||||
// I shouldn't be an illegal player.
|
||||
// An illegal move sometimes comes here...
|
||||
if (!pos.pseudo_legal(m) || !pos.legal(m))
|
||||
{
|
||||
//cout << pos << m << endl;
|
||||
//assert(false);
|
||||
illegal_move = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Processing when adding the gradient to the node on each PV.
|
||||
//If discount_rate is 0, this process is not performed.
|
||||
if (discount_rate != 0)
|
||||
pos_add_grad();
|
||||
|
||||
pos.do_move(m, state[ply++]);
|
||||
}
|
||||
|
||||
if (illegal_move)
|
||||
{
|
||||
sync_cout << "An illegal move was detected... Excluded the position from the learning data..." << sync_endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Since we have reached the end phase of PV, add the slope here.
|
||||
pos_add_grad();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1118,15 +1181,7 @@ namespace Learner
|
||||
else
|
||||
{
|
||||
cout << " >= best (" << best_loss << "), rejected" << endl;
|
||||
if (best_nn_directory.empty())
|
||||
{
|
||||
cout << "WARNING: no improvement from initial model" << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "restoring parameters from " << best_nn_directory << endl;
|
||||
Eval::NNUE::RestoreParameters(best_nn_directory);
|
||||
}
|
||||
best_nn_directory = Path::Combine((std::string)Options["EvalSaveDir"], dir_name);
|
||||
|
||||
if (--trials > 0 && !is_final)
|
||||
{
|
||||
@@ -1468,6 +1523,11 @@ namespace Learner
|
||||
ELMO_LAMBDA2 = 0.33;
|
||||
ELMO_LAMBDA_LIMIT = 32000;
|
||||
|
||||
// Discount rate. If this is set to a value other than 0,
|
||||
// the slope will be added even at other than the PV termination.
|
||||
// (At that time, apply this discount rate)
|
||||
double discount_rate = 0;
|
||||
|
||||
// if (gamePly <rand(reduction_gameply)) continue;
|
||||
// An option to exclude the early stage from the learning target moderately like
|
||||
// If set to 1, rand(1)==0, so nothing is excluded.
|
||||
@@ -1537,6 +1597,9 @@ namespace Learner
|
||||
|
||||
else if (option == "winning_probability_coefficient") is >> winning_probability_coefficient;
|
||||
|
||||
// Discount rate
|
||||
else if (option == "discount_rate") is >> discount_rate;
|
||||
|
||||
// Using WDL with win rate model instead of sigmoid
|
||||
else if (option == "use_wdl") is >> use_wdl;
|
||||
|
||||
@@ -1603,9 +1666,11 @@ namespace Learner
|
||||
// Display learning game file
|
||||
if (target_dir != "")
|
||||
{
|
||||
string kif_base_dir = Path::Combine(base_dir, target_dir);
|
||||
|
||||
namespace sys = std::filesystem;
|
||||
sys::path kif_base_dir(Path::Combine(base_dir, target_dir)); // Origin of enumeration
|
||||
std::for_each(sys::directory_iterator(kif_base_dir), sys::directory_iterator(),
|
||||
sys::path p(kif_base_dir); // Origin of enumeration
|
||||
std::for_each(sys::directory_iterator(p), sys::directory_iterator(),
|
||||
[&](const sys::path& path) {
|
||||
if (sys::is_regular_file(path))
|
||||
filenames.push_back(Path::Combine(target_dir, path.filename().generic_string()));
|
||||
@@ -1726,6 +1791,8 @@ namespace Learner
|
||||
cout << "scheduling : default" << endl;
|
||||
}
|
||||
|
||||
cout << "discount rate : " << discount_rate << endl;
|
||||
|
||||
// If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
|
||||
reduction_gameply = max(reduction_gameply, 1);
|
||||
cout << "reduction_gameply : " << reduction_gameply << endl;
|
||||
@@ -1758,6 +1825,7 @@ namespace Learner
|
||||
cout << "init done." << endl;
|
||||
|
||||
// Reflect other option settings.
|
||||
learn_think.discount_rate = discount_rate;
|
||||
learn_think.eval_limit = eval_limit;
|
||||
learn_think.save_only_once = save_only_once;
|
||||
learn_think.sr.no_shuffle = no_shuffle;
|
||||
@@ -1805,6 +1873,8 @@ namespace Learner
|
||||
// Start learning.
|
||||
learn_think.go_think();
|
||||
|
||||
Eval::NNUE::FinalizeNet();
|
||||
|
||||
// Save once at the end.
|
||||
learn_think.save(true);
|
||||
}
|
||||
|
||||
@@ -40,13 +40,14 @@ namespace EvalLearningTools
|
||||
static uint64_t eta2_epoch;
|
||||
|
||||
// Batch initialization of eta. If 0 is passed, the default value will be set.
|
||||
static void init_eta(double eta1, double eta2, double eta3, uint64_t eta1_epoch, uint64_t eta2_epoch)
|
||||
static void init_eta(double new_eta1, double new_eta2, double new_eta3,
|
||||
uint64_t new_eta1_epoch, uint64_t new_eta2_epoch)
|
||||
{
|
||||
Weight::eta1 = (eta1 != 0) ? eta1 : 30.0;
|
||||
Weight::eta2 = (eta2 != 0) ? eta2 : 30.0;
|
||||
Weight::eta3 = (eta3 != 0) ? eta3 : 30.0;
|
||||
Weight::eta1_epoch = (eta1_epoch != 0) ? eta1_epoch : 0;
|
||||
Weight::eta2_epoch = (eta2_epoch != 0) ? eta2_epoch : 0;
|
||||
Weight::eta1 = (new_eta1 != 0) ? new_eta1 : 30.0;
|
||||
Weight::eta2 = (new_eta2 != 0) ? new_eta2 : 30.0;
|
||||
Weight::eta3 = (new_eta3 != 0) ? new_eta3 : 30.0;
|
||||
Weight::eta1_epoch = (new_eta1_epoch != 0) ? new_eta1_epoch : 0;
|
||||
Weight::eta2_epoch = (new_eta2_epoch != 0) ? new_eta2_epoch : 0;
|
||||
}
|
||||
|
||||
// Set eta according to epoch.
|
||||
|
||||
@@ -10,13 +10,6 @@
|
||||
|
||||
void MultiThink::go_think()
|
||||
{
|
||||
// Keep a copy to restore the Options settings later.
|
||||
auto oldOptions = Options;
|
||||
|
||||
// When using the constant track, it takes a lot of time to perform on the fly & the part to access the file is
|
||||
// Since it is not thread safe, it is guaranteed here that it is being completely read in memory.
|
||||
Options["BookOnTheFly"] = std::string("false");
|
||||
|
||||
// Read evaluation function, etc.
|
||||
// In the case of the learn command, the value of the evaluation function may be corrected after reading the evaluation function, so
|
||||
// Skip memory corruption check.
|
||||
@@ -111,12 +104,6 @@ void MultiThink::go_think()
|
||||
// The file writing thread etc. are still running only when all threads are finished
|
||||
// Since the work itself may not have completed, output only that all threads have finished.
|
||||
std::cout << "all threads are joined." << std::endl;
|
||||
|
||||
// Restored because Options were rewritten.
|
||||
// Restore the handler because the handler will not start unless you assign a value.
|
||||
for (auto& s : oldOptions)
|
||||
Options[s.first] = std::string(s.second);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user