Cleaner and more outputs during training initialization.

This commit is contained in:
Tomasz Sobczyk
2020-10-24 14:21:59 +02:00
committed by nodchip
parent b882423005
commit 2c477d76ec
12 changed files with 129 additions and 94 deletions
+73 -51
View File
@@ -141,12 +141,12 @@ namespace Learner
template <typename StreamT> template <typename StreamT>
void print(const std::string& prefix, StreamT& s) const void print(const std::string& prefix, StreamT& s) const
{ {
s << "==> " << prefix << "_cross_entropy_eval = " << cross_entropy_eval / count << endl; s << " - " << prefix << "_cross_entropy_eval = " << cross_entropy_eval / count << endl;
s << "==> " << prefix << "_cross_entropy_win = " << cross_entropy_win / count << endl; s << " - " << prefix << "_cross_entropy_win = " << cross_entropy_win / count << endl;
s << "==> " << prefix << "_entropy_eval = " << entropy_eval / count << endl; s << " - " << prefix << "_entropy_eval = " << entropy_eval / count << endl;
s << "==> " << prefix << "_entropy_win = " << entropy_win / count << endl; s << " - " << prefix << "_entropy_win = " << entropy_win / count << endl;
s << "==> " << prefix << "_cross_entropy = " << cross_entropy / count << endl; s << " - " << prefix << "_cross_entropy = " << cross_entropy / count << endl;
s << "==> " << prefix << "_entropy = " << entropy / count << endl; s << " - " << prefix << "_entropy = " << entropy / count << endl;
} }
}; };
} }
@@ -687,7 +687,7 @@ namespace Learner
<< ", epoch " << epoch << ", epoch " << epoch
<< endl; << endl;
out << "==> learning rate = " << global_learning_rate << endl; out << " - learning rate = " << global_learning_rate << endl;
// For calculation of verification data loss // For calculation of verification data loss
AtomicLoss test_loss_sum{}; AtomicLoss test_loss_sum{};
@@ -704,7 +704,7 @@ namespace Learner
auto& pos = th.rootPos; auto& pos = th.rootPos;
StateInfo si; StateInfo si;
pos.set(StartFEN, false, &si, &th); pos.set(StartFEN, false, &si, &th);
out << "==> startpos eval = " << Eval::evaluate(pos) << endl; out << " - startpos eval = " << Eval::evaluate(pos) << endl;
}); });
mainThread->wait_for_worker_finished(); mainThread->wait_for_worker_finished();
@@ -734,8 +734,8 @@ namespace Learner
learn_loss_sum.print("learn", out); learn_loss_sum.print("learn", out);
} }
out << "==> norm = " << sum_norm << endl; out << " - norm = " << sum_norm << endl;
out << "==> move accuracy = " << (move_accord_count * 100.0 / psv.size()) << "%" << endl; out << " - move accuracy = " << (move_accord_count * 100.0 / psv.size()) << "%" << endl;
} }
else else
{ {
@@ -852,7 +852,7 @@ namespace Learner
latest_loss_sum = 0.0; latest_loss_sum = 0.0;
latest_loss_count = 0; latest_loss_count = 0;
cout << "INFO (learning_rate):" << endl; cout << "INFO (learning_rate):" << endl;
cout << "==> loss = " << latest_loss; cout << " - loss = " << latest_loss;
auto tot = total_done; auto tot = total_done;
if (auto_lr_drop) if (auto_lr_drop)
{ {
@@ -882,7 +882,7 @@ namespace Learner
if (--trials > 0 && !is_final) if (--trials > 0 && !is_final)
{ {
cout cout
<< "==> reducing learning rate from " << global_learning_rate << " - reducing learning rate from " << global_learning_rate
<< " to " << (global_learning_rate * newbob_decay) << " to " << (global_learning_rate * newbob_decay)
<< " (" << trials << " more trials)" << endl; << " (" << trials << " more trials)" << endl;
@@ -892,7 +892,7 @@ namespace Learner
if (trials == 0) if (trials == 0)
{ {
cout << "==> converged" << endl; cout << " - converged" << endl;
return true; return true;
} }
} }
@@ -980,6 +980,8 @@ namespace Learner
string validation_set_file_name; string validation_set_file_name;
string seed; string seed;
auto out = sync_region_cout.new_region();
// Assume the filenames are staggered. // Assume the filenames are staggered.
while (true) while (true)
{ {
@@ -1083,7 +1085,7 @@ namespace Learner
else if (option == "verbose") verbose = true; else if (option == "verbose") verbose = true;
else else
{ {
cout << "Unknown option: " << option << ". Ignoring.\n"; out << "INFO: Unknown option: " << option << ". Ignoring.\n";
} }
} }
@@ -1092,11 +1094,14 @@ namespace Learner
loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size; loss_output_interval = LEARN_RMSE_OUTPUT_INTERVAL * mini_batch_size;
} }
cout << "learn command , "; // If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1.
reduction_gameply = max(reduction_gameply, 1);
out << "INFO: Executing learn command\n";
// Issue a warning if OpenMP is disabled. // Issue a warning if OpenMP is disabled.
#if !defined(_OPENMP) #if !defined(_OPENMP)
cout << "Warning! OpenMP disabled." << endl; out << "WARNING: OpenMP disabled." << endl;
#endif #endif
// Right now we only have the individual files. // Right now we only have the individual files.
@@ -1107,65 +1112,80 @@ namespace Learner
} }
rebase_files(filenames, base_dir); rebase_files(filenames, base_dir);
cout << "learn from "; out << "INFO: Input files:\n";
for (auto s : filenames) for (auto s : filenames)
cout << s << " , "; out << " - " << s << '\n';
cout << endl; out << "INFO: Parameters:\n";
if (!validation_set_file_name.empty()) if (!validation_set_file_name.empty())
{ {
cout << "validation set : " << validation_set_file_name << endl; out << " - validation set : " << validation_set_file_name << endl;
} }
cout << "base dir : " << base_dir << endl; out << " - epochs : " << epochs << endl;
cout << "target dir : " << target_dir << endl; out << " - epochs * minibatch size : " << epochs * mini_batch_size << endl;
out << " - eval_limit : " << eval_limit << endl;
out << " - save_only_once : " << (save_only_once ? "true" : "false") << endl;
out << " - shuffle on read : " << (no_shuffle ? "false" : "true") << endl;
cout << "epochs : " << epochs << endl; out << " - Loss Function : " << LOSS_FUNCTION << endl;
cout << "eval_limit : " << eval_limit << endl; out << " - minibatch size : " << mini_batch_size << endl;
cout << "save_only_once : " << (save_only_once ? "true" : "false") << endl;
cout << "no_shuffle : " << (no_shuffle ? "true" : "false") << endl;
cout << "Loss Function : " << LOSS_FUNCTION << endl; out << " - nn_batch_size : " << nn_batch_size << endl;
cout << "mini-batch size : " << mini_batch_size << endl; out << " - nn_options : " << nn_options << endl;
cout << "nn_batch_size : " << nn_batch_size << endl; out << " - learning rate : " << global_learning_rate << endl;
cout << "nn_options : " << nn_options << endl; out << " - use draws in training : " << use_draw_games_in_training << endl;
out << " - use draws in validation : " << use_draw_games_in_validation << endl;
out << " - skip repeated positions : " << skip_duplicated_positions_in_training << endl;
cout << "learning rate : " << global_learning_rate << endl; out << " - winning prob coeff : " << winning_probability_coefficient << endl;
cout << "use_draw_games_in_training : " << use_draw_games_in_training << endl; out << " - use_wdl : " << use_wdl << endl;
cout << "use_draw_games_in_validation : " << use_draw_games_in_validation << endl;
cout << "skip_duplicated_positions_in_training : " << skip_duplicated_positions_in_training << endl;
if (newbob_decay != 1.0) { out << " - src_score_min_value : " << src_score_min_value << endl;
cout << "scheduling : newbob with decay = " << newbob_decay out << " - src_score_max_value : " << src_score_max_value << endl;
<< ", " << newbob_num_trials << " trials" << endl; out << " - dest_score_min_value : " << dest_score_min_value << endl;
out << " - dest_score_max_value : " << dest_score_max_value << endl;
out << " - reduction_gameply : " << reduction_gameply << endl;
out << " - LAMBDA : " << ELMO_LAMBDA << endl;
out << " - LAMBDA2 : " << ELMO_LAMBDA2 << endl;
out << " - LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl;
out << " - eval_save_interval : " << eval_save_interval << " sfens" << endl;
out << " - loss_output_interval : " << loss_output_interval << " sfens" << endl;
out << " - sfen_read_size : " << sfen_read_size << endl;
out << " - thread_buffer_size : " << thread_buffer_size << endl;
out << " - seed : " << seed << endl;
out << " - verbose : " << (verbose ? "true" : "false") << endl;
if (auto_lr_drop) {
out << " - learning rate scheduling : every " << auto_lr_drop << " sfens" << endl;
}
else if (newbob_decay != 1.0) {
out << " - learning rate scheduling : newbob with decay" << endl;
out << " - newbob_decay : " << newbob_decay << endl;
out << " - newbob_num_trials : " << newbob_num_trials << endl;
} }
else { else {
cout << "scheduling : default" << endl; out << " - learning rate scheduling : fixed learning rate" << endl;
} }
// If reduction_gameply is set to 0, rand(0) will be divided by 0, so correct it to 1. out << endl;
reduction_gameply = max(reduction_gameply, 1);
cout << "reduction_gameply : " << reduction_gameply << endl;
cout << "LAMBDA : " << ELMO_LAMBDA << endl;
cout << "LAMBDA2 : " << ELMO_LAMBDA2 << endl;
cout << "LAMBDA_LIMIT : " << ELMO_LAMBDA_LIMIT << endl;
cout << "eval_save_interval : " << eval_save_interval << " sfens" << endl;
cout << "loss_output_interval: " << loss_output_interval << " sfens" << endl;
// ----------------------------------- // -----------------------------------
// various initialization // various initialization
// ----------------------------------- // -----------------------------------
cout << "init.." << endl; out << "INFO: Started initialization." << endl;
Threads.main()->ponder = false; Threads.main()->ponder = false;
set_learning_search_limits(); set_learning_search_limits();
cout << "init_training.." << endl; Eval::NNUE::initialize_training(seed, out);
Eval::NNUE::initialize_training(seed);
Eval::NNUE::set_batch_size(nn_batch_size); Eval::NNUE::set_batch_size(nn_batch_size);
Eval::NNUE::set_options(nn_options); Eval::NNUE::set_options(nn_options);
@@ -1204,7 +1224,9 @@ namespace Learner
learn_think.verbose = verbose; learn_think.verbose = verbose;
cout << "init done." << endl; out << "Finished initialization." << endl;
out.unlock();
// Start learning. // Start learning.
learn_think.learn(epochs); learn_think.learn(epochs);
+1 -1
View File
@@ -221,7 +221,7 @@ namespace Learner{
// in case the file is empty or was deleted. // in case the file is empty or was deleted.
if (sfen_input_stream->eof()) if (sfen_input_stream->eof())
{ {
out << "==> File empty, nothing to read.\n"; out << " - File empty, nothing to read.\n";
} }
else else
{ {
+22 -20
View File
@@ -78,27 +78,23 @@ std::ostream& operator<<(std::ostream&, SyncCout);
// current region releases the lock. // current region releases the lock.
struct SynchronizedRegionLogger struct SynchronizedRegionLogger
{ {
private:
using RegionId = std::uint64_t; using RegionId = std::uint64_t;
struct RegionLock struct Region
{ {
RegionLock(SynchronizedRegionLogger& log, RegionId id) : friend struct SynchronizedRegionLogger;
logger(&log), region_id(id), is_held(true)
{
}
RegionLock(const RegionLock&) = delete; Region(const Region&) = delete;
RegionLock& operator=(const RegionLock&) = delete; Region& operator=(const Region&) = delete;
RegionLock(RegionLock&& other) : Region(Region&& other) :
logger(other.logger), region_id(other.region_id), is_held(other.is_held) logger(other.logger), region_id(other.region_id), is_held(other.is_held)
{ {
other.logger = nullptr; other.logger = nullptr;
other.is_held = false; other.is_held = false;
} }
RegionLock& operator=(RegionLock&& other) { Region& operator=(Region&& other) {
if (is_held && logger != nullptr) if (is_held && logger != nullptr)
{ {
logger->release_region(region_id); logger->release_region(region_id);
@@ -113,7 +109,7 @@ private:
return *this; return *this;
} }
~RegionLock() { unlock(); } ~Region() { unlock(); }
void unlock() { void unlock() {
if (is_held) { if (is_held) {
@@ -124,7 +120,7 @@ private:
} }
} }
RegionLock& operator << (std::ostream&(*pManip)(std::ostream&)) { Region& operator << (std::ostream&(*pManip)(std::ostream&)) {
if (logger != nullptr) if (logger != nullptr)
logger->write(region_id, pManip); logger->write(region_id, pManip);
@@ -132,7 +128,7 @@ private:
} }
template <typename T> template <typename T>
RegionLock& operator << (const T& value) { Region& operator << (const T& value) {
if (logger != nullptr) if (logger != nullptr)
logger->write(region_id, value); logger->write(region_id, value);
@@ -143,11 +139,17 @@ private:
SynchronizedRegionLogger* logger; SynchronizedRegionLogger* logger;
RegionId region_id; RegionId region_id;
bool is_held; bool is_held;
Region(SynchronizedRegionLogger& log, RegionId id) :
logger(&log), region_id(id), is_held(true)
{
}
}; };
struct Region private:
struct RegionBookkeeping
{ {
Region(RegionId rid) : id(rid), is_held(true) {} RegionBookkeeping(RegionId rid) : id(rid), is_held(true) {}
std::vector<std::string> pending_parts; std::vector<std::string> pending_parts;
RegionId id; RegionId id;
@@ -215,16 +217,16 @@ private:
std::ostream& out; std::ostream& out;
std::deque<Region> regions; std::deque<RegionBookkeeping> regions;
std::mutex mutex; std::mutex mutex;
Region* find_region_nolock(RegionId id) { RegionBookkeeping* find_region_nolock(RegionId id) {
// Linear search because the amount of concurrent regions should be small. // Linear search because the amount of concurrent regions should be small.
auto it = std::find_if( auto it = std::find_if(
regions.begin(), regions.begin(),
regions.end(), regions.end(),
[id](const Region& r) { return r.id == id; }); [id](const RegionBookkeeping& r) { return r.id == id; });
if (it == regions.end()) if (it == regions.end())
return nullptr; return nullptr;
@@ -269,9 +271,9 @@ public:
{ {
} }
[[nodiscard]] RegionLock new_region() { [[nodiscard]] Region new_region() {
const auto id = init_next_region(); const auto id = init_next_region();
return RegionLock(*this, id); return Region(*this, id);
} }
}; };
+12 -7
View File
@@ -54,23 +54,28 @@ namespace Eval::NNUE {
} // namespace } // namespace
// Initialize learning // Initialize learning
void initialize_training(const std::string& seed) { void initialize_training(
std::cout << "Initializing NN training for " const std::string& seed,
<< get_architecture_string() << std::endl; SynchronizedRegionLogger::Region& out) {
std::cout << std::endl; out << "INFO (initialize_training): Initializing NN training for "
<< get_architecture_string() << std::endl;
std::cout << "Layers:\n" out << std::endl;
<< get_layers_info() << std::endl;
std::cout << std::endl; out << "Layers:\n"
<< get_layers_info() << std::endl;
out << std::endl;
assert(feature_transformer); assert(feature_transformer);
assert(network); assert(network);
trainer = Trainer<Network>::create(network.get(), feature_transformer.get()); trainer = Trainer<Network>::create(network.get(), feature_transformer.get());
rng.seed(PRNG(seed).rand<uint64_t>()); rng.seed(PRNG(seed).rand<uint64_t>());
if (Options["SkipLoadingEval"]) { if (Options["SkipLoadingEval"]) {
out << "INFO (initialize_training): Performing random net initialization.\n";
trainer->initialize(rng); trainer->initialize(rng);
} }
} }
+5 -1
View File
@@ -3,11 +3,15 @@
#include "learn/learn.h" #include "learn/learn.h"
#include "misc.h"
// Interface used for learning NNUE evaluation function // Interface used for learning NNUE evaluation function
namespace Eval::NNUE { namespace Eval::NNUE {
// Initialize learning // Initialize learning
void initialize_training(const std::string& seed); void initialize_training(
const std::string& seed,
SynchronizedRegionLogger::Region& out);
// set the number of samples in the mini-batch // set the number of samples in the mini-batch
void set_batch_size(uint64_t size); void set_batch_size(uint64_t size);
+2 -2
View File
@@ -82,9 +82,9 @@ namespace Eval::NNUE::Layers {
static std::string get_layers_info() { static std::string get_layers_info() {
std::string info = PreviousLayer::get_layers_info(); std::string info = PreviousLayer::get_layers_info();
info += '\n'; info += "\n - ";
info += std::to_string(kLayerIndex); info += std::to_string(kLayerIndex);
info += ": "; info += " - ";
info += get_name(); info += get_name();
return info; return info;
} }
+2 -2
View File
@@ -76,9 +76,9 @@ namespace Eval::NNUE::Layers {
static std::string get_layers_info() { static std::string get_layers_info() {
std::string info = PreviousLayer::get_layers_info(); std::string info = PreviousLayer::get_layers_info();
info += '\n'; info += "\n - ";
info += std::to_string(kLayerIndex); info += std::to_string(kLayerIndex);
info += ": "; info += " - ";
info += get_name(); info += get_name();
return info; return info;
} }
+3 -2
View File
@@ -65,8 +65,9 @@ namespace Eval::NNUE::Layers {
} }
static std::string get_layers_info() { static std::string get_layers_info() {
std::string info = std::to_string(kLayerIndex); std::string info = " - ";
info += ": "; info += std::to_string(kLayerIndex);
info += " - ";
info += get_name(); info += get_name();
return info; return info;
} }
+2 -2
View File
@@ -60,9 +60,9 @@ namespace Eval::NNUE::Layers {
static std::string get_layers_info() { static std::string get_layers_info() {
std::string info = Tail::get_layers_info(); std::string info = Tail::get_layers_info();
info += '\n'; info += "\n - ";
info += std::to_string(kLayerIndex); info += std::to_string(kLayerIndex);
info += ": "; info += " - ";
info += get_name(); info += get_name();
return info; return info;
} }
+3 -2
View File
@@ -130,8 +130,9 @@ namespace Eval::NNUE {
} }
static std::string get_layers_info() { static std::string get_layers_info() {
std::string info = std::to_string(kLayerIndex); std::string info = " - ";
info += ": "; info += std::to_string(kLayerIndex);
info += " - ";
info += get_name(); info += get_name();
return info; return info;
} }
+1 -1
View File
@@ -107,7 +107,7 @@ namespace Eval::NNUE {
<< " - " << LayerType::get_name() << " - " << LayerType::get_name()
<< std::endl; << std::endl;
out << "==> largest min activation = " << largest_min_activation out << " - largest min activation = " << largest_min_activation
<< " , smallest max activation = " << smallest_max_activation << " , smallest max activation = " << smallest_max_activation
<< std::endl; << std::endl;
@@ -347,17 +347,17 @@ namespace Eval::NNUE {
<< " - " << LayerType::get_name() << " - " << LayerType::get_name()
<< std::endl; << std::endl;
out << "==> observed " << observed_features.count() out << " - observed " << observed_features.count()
<< " (out of " << kInputDimensions << ") features" << " (out of " << kInputDimensions << ") features"
<< std::endl; << std::endl;
out << "==> (min, max) of pre-activations = " out << " - (min, max) of pre-activations = "
<< min_pre_activation_ << ", " << min_pre_activation_ << ", "
<< max_pre_activation_ << " (limit = " << max_pre_activation_ << " (limit = "
<< kPreActivationLimit << ")" << kPreActivationLimit << ")"
<< std::endl; << std::endl;
out << "==> largest min activation = " << largest_min_activation out << " - largest min activation = " << largest_min_activation
<< " , smallest max activation = " << smallest_max_activation << " , smallest max activation = " << smallest_max_activation
<< std::endl; << std::endl;