Pass ThreadPool to update_parameters, propagate, and backpropagate.

This commit is contained in:
Tomasz Sobczyk
2020-10-26 15:06:15 +01:00
committed by nodchip
parent f1e96cab55
commit ee0917a345
8 changed files with 53 additions and 28 deletions
+1 -1
View File
@@ -704,7 +704,7 @@ namespace Learner
// should be no real issues happening since // should be no real issues happening since
// the read/write phases are isolated. // the read/write phases are isolated.
atomic_thread_fence(memory_order_seq_cst); atomic_thread_fence(memory_order_seq_cst);
Eval::NNUE::update_parameters(epoch, params.verbose, params.learning_rate, calc_grad); Eval::NNUE::update_parameters(Threads, epoch, params.verbose, params.learning_rate, calc_grad);
atomic_thread_fence(memory_order_seq_cst); atomic_thread_fence(memory_order_seq_cst);
if (++save_count * params.mini_batch_size >= params.eval_save_interval) if (++save_count * params.mini_batch_size >= params.eval_save_interval)
+4 -2
View File
@@ -18,6 +18,7 @@
#include "uci.h" #include "uci.h"
#include "misc.h" #include "misc.h"
#include "thread_win32_osx.h" #include "thread_win32_osx.h"
#include "thread.h"
// Code for learning NNUE evaluation function // Code for learning NNUE evaluation function
namespace Eval::NNUE { namespace Eval::NNUE {
@@ -180,6 +181,7 @@ namespace Eval::NNUE {
// update the evaluation function parameters // update the evaluation function parameters
void update_parameters( void update_parameters(
ThreadPool& thread_pool,
uint64_t epoch, uint64_t epoch,
bool verbose, bool verbose,
double learning_rate, double learning_rate,
@@ -202,7 +204,7 @@ namespace Eval::NNUE {
std::vector<Example> batch(examples.end() - batch_size, examples.end()); std::vector<Example> batch(examples.end() - batch_size, examples.end());
examples.resize(examples.size() - batch_size); examples.resize(examples.size() - batch_size);
const auto network_output = trainer->propagate(batch); const auto network_output = trainer->propagate(thread_pool, batch);
std::vector<LearnFloatType> gradients(batch.size()); std::vector<LearnFloatType> gradients(batch.size());
for (std::size_t b = 0; b < batch.size(); ++b) { for (std::size_t b = 0; b < batch.size(); ++b) {
@@ -226,7 +228,7 @@ namespace Eval::NNUE {
} }
} }
trainer->backpropagate(gradients.data(), learning_rate); trainer->backpropagate(thread_pool, gradients.data(), learning_rate);
collect_stats = false; collect_stats = false;
} }
+3
View File
@@ -5,6 +5,8 @@
#include "misc.h" #include "misc.h"
struct ThreadPool;
// Interface used for learning NNUE evaluation function // Interface used for learning NNUE evaluation function
namespace Eval::NNUE { namespace Eval::NNUE {
@@ -32,6 +34,7 @@ namespace Eval::NNUE {
// update the evaluation function parameters // update the evaluation function parameters
void update_parameters( void update_parameters(
ThreadPool& thread_pool,
uint64_t epoch, uint64_t epoch,
bool verbose, bool verbose,
double learning_rate, double learning_rate,
+7 -4
View File
@@ -7,6 +7,8 @@
#include "nnue/layers/affine_transform.h" #include "nnue/layers/affine_transform.h"
#include "thread.h"
#include <random> #include <random>
// Specialization of NNUE evaluation function learning class template for AffineTransform // Specialization of NNUE evaluation function learning class template for AffineTransform
@@ -88,14 +90,14 @@ namespace Eval::NNUE {
} }
// forward propagation // forward propagation
const LearnFloatType* propagate(const std::vector<Example>& batch) { const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) { if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size()); output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kInputDimensions * batch.size()); gradients_.resize(kInputDimensions * batch.size());
} }
batch_size_ = static_cast<IndexType>(batch.size()); batch_size_ = static_cast<IndexType>(batch.size());
batch_input_ = previous_layer_trainer_->propagate(batch); batch_input_ = previous_layer_trainer_->propagate(thread_pool, batch);
#if defined(USE_BLAS) #if defined(USE_BLAS)
for (IndexType b = 0; b < batch_size_; ++b) { for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b; const IndexType batch_offset = kOutputDimensions * b;
@@ -127,7 +129,8 @@ namespace Eval::NNUE {
} }
// backpropagation // backpropagation
void backpropagate(const LearnFloatType* gradients, void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
const LearnFloatType local_learning_rate = const LearnFloatType local_learning_rate =
@@ -211,7 +214,7 @@ namespace Eval::NNUE {
} }
num_weights_diffs_ += kOutputDimensions * kInputDimensions; num_weights_diffs_ += kOutputDimensions * kInputDimensions;
previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate); previous_layer_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
} }
private: private:
+7 -4
View File
@@ -7,6 +7,8 @@
#include "nnue/layers/clipped_relu.h" #include "nnue/layers/clipped_relu.h"
#include "thread.h"
// Specialization of NNUE evaluation function learning class template for ClippedReLU // Specialization of NNUE evaluation function learning class template for ClippedReLU
namespace Eval::NNUE { namespace Eval::NNUE {
@@ -41,13 +43,13 @@ namespace Eval::NNUE {
} }
// forward propagation // forward propagation
const LearnFloatType* propagate(const std::vector<Example>& batch) { const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) { if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size()); output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kInputDimensions * batch.size()); gradients_.resize(kInputDimensions * batch.size());
} }
const auto input = previous_layer_trainer_->propagate(batch); const auto input = previous_layer_trainer_->propagate(thread_pool, batch);
batch_size_ = static_cast<IndexType>(batch.size()); batch_size_ = static_cast<IndexType>(batch.size());
for (IndexType b = 0; b < batch_size_; ++b) { for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType batch_offset = kOutputDimensions * b; const IndexType batch_offset = kOutputDimensions * b;
@@ -63,7 +65,8 @@ namespace Eval::NNUE {
} }
// backpropagation // backpropagation
void backpropagate(const LearnFloatType* gradients, void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
for (IndexType b = 0; b < batch_size_; ++b) { for (IndexType b = 0; b < batch_size_; ++b) {
@@ -77,7 +80,7 @@ namespace Eval::NNUE {
} }
num_total_ += batch_size_ * kOutputDimensions; num_total_ += batch_size_ * kOutputDimensions;
previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate); previous_layer_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
} }
private: private:
@@ -9,6 +9,8 @@
#include "nnue/nnue_feature_transformer.h" #include "nnue/nnue_feature_transformer.h"
#include "thread.h"
#include <array> #include <array>
#include <bitset> #include <bitset>
#include <numeric> #include <numeric>
@@ -90,12 +92,14 @@ namespace Eval::NNUE {
} }
// forward propagation // forward propagation
const LearnFloatType* propagate(const std::vector<Example>& batch) { const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) { if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size()); output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kOutputDimensions * batch.size()); gradients_.resize(kOutputDimensions * batch.size());
} }
(void)thread_pool;
batch_ = &batch; batch_ = &batch;
// affine transform // affine transform
#pragma omp parallel for #pragma omp parallel for
@@ -143,9 +147,12 @@ namespace Eval::NNUE {
} }
// backpropagation // backpropagation
void backpropagate(const LearnFloatType* gradients, void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
(void)thread_pool;
const LearnFloatType local_learning_rate = const LearnFloatType local_learning_rate =
learning_rate * learning_rate_scale_; learning_rate * learning_rate_scale_;
+13 -9
View File
@@ -7,6 +7,8 @@
#include "nnue/layers/input_slice.h" #include "nnue/layers/input_slice.h"
#include "thread.h"
// Specialization of NNUE evaluation function learning class template for InputSlice // Specialization of NNUE evaluation function learning class template for InputSlice
namespace Eval::NNUE { namespace Eval::NNUE {
@@ -60,7 +62,7 @@ namespace Eval::NNUE {
} }
// forward propagation // forward propagation
const LearnFloatType* propagate(const std::vector<Example>& batch) { const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
if (gradients_.size() < kInputDimensions * batch.size()) { if (gradients_.size() < kInputDimensions * batch.size()) {
gradients_.resize(kInputDimensions * batch.size()); gradients_.resize(kInputDimensions * batch.size());
} }
@@ -69,7 +71,7 @@ namespace Eval::NNUE {
if (num_calls_ == 0) { if (num_calls_ == 0) {
current_operation_ = Operation::kPropagate; current_operation_ = Operation::kPropagate;
output_ = feature_transformer_trainer_->propagate(batch); output_ = feature_transformer_trainer_->propagate(thread_pool, batch);
} }
assert(current_operation_ == Operation::kPropagate); assert(current_operation_ == Operation::kPropagate);
@@ -83,11 +85,12 @@ namespace Eval::NNUE {
} }
// backpropagation // backpropagation
void backpropagate(const LearnFloatType* gradients, void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
if (num_referrers_ == 1) { if (num_referrers_ == 1) {
feature_transformer_trainer_->backpropagate(gradients, learning_rate); feature_transformer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
return; return;
} }
@@ -112,7 +115,7 @@ namespace Eval::NNUE {
if (++num_calls_ == num_referrers_) { if (++num_calls_ == num_referrers_) {
feature_transformer_trainer_->backpropagate( feature_transformer_trainer_->backpropagate(
gradients_.data(), learning_rate); thread_pool, gradients_.data(), learning_rate);
num_calls_ = 0; num_calls_ = 0;
current_operation_ = Operation::kNone; current_operation_ = Operation::kNone;
} }
@@ -193,7 +196,7 @@ namespace Eval::NNUE {
} }
// forward propagation // forward propagation
const LearnFloatType* propagate(const std::vector<Example>& batch) { const LearnFloatType* propagate(ThreadPool& thread_pool,const std::vector<Example>& batch) {
if (output_.size() < kOutputDimensions * batch.size()) { if (output_.size() < kOutputDimensions * batch.size()) {
output_.resize(kOutputDimensions * batch.size()); output_.resize(kOutputDimensions * batch.size());
gradients_.resize(kInputDimensions * batch.size()); gradients_.resize(kInputDimensions * batch.size());
@@ -201,7 +204,7 @@ namespace Eval::NNUE {
batch_size_ = static_cast<IndexType>(batch.size()); batch_size_ = static_cast<IndexType>(batch.size());
const auto input = shared_input_trainer_->propagate(batch); const auto input = shared_input_trainer_->propagate(thread_pool, batch);
for (IndexType b = 0; b < batch_size_; ++b) { for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_offset = kInputDimensions * b; const IndexType input_offset = kInputDimensions * b;
const IndexType output_offset = kOutputDimensions * b; const IndexType output_offset = kOutputDimensions * b;
@@ -219,7 +222,8 @@ namespace Eval::NNUE {
} }
// backpropagation // backpropagation
void backpropagate(const LearnFloatType* gradients, void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
for (IndexType b = 0; b < batch_size_; ++b) { for (IndexType b = 0; b < batch_size_; ++b) {
@@ -233,7 +237,7 @@ namespace Eval::NNUE {
} }
} }
} }
shared_input_trainer_->backpropagate(gradients_.data(), learning_rate); shared_input_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
} }
private: private:
+9 -6
View File
@@ -7,6 +7,8 @@
#include "nnue/layers/sum.h" #include "nnue/layers/sum.h"
#include "thread.h"
// Specialization of NNUE evaluation function learning class template for Sum // Specialization of NNUE evaluation function learning class template for Sum
namespace Eval::NNUE { namespace Eval::NNUE {
@@ -45,10 +47,10 @@ namespace Eval::NNUE {
} }
// forward propagation // forward propagation
/*const*/ LearnFloatType* propagate(const std::vector<Example>& batch) { /*const*/ LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
batch_size_ = static_cast<IndexType>(batch.size()); batch_size_ = static_cast<IndexType>(batch.size());
auto output = Tail::propagate(batch); auto output = Tail::propagate(thread_pool, batch);
const auto head_output = previous_layer_trainer_->propagate(batch); const auto head_output = previous_layer_trainer_->propagate(thread_pool, batch);
#if defined(USE_BLAS) #if defined(USE_BLAS)
cblas_saxpy(kOutputDimensions * batch_size_, 1.0, cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
@@ -66,11 +68,12 @@ namespace Eval::NNUE {
} }
// backpropagation // backpropagation
void backpropagate(const LearnFloatType* gradients, void backpropagate(ThreadPool& thread_pool,
const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
Tail::backpropagate(gradients, learning_rate); Tail::backpropagate(thread_pool, gradients, learning_rate);
previous_layer_trainer_->backpropagate(gradients, learning_rate); previous_layer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
} }
private: private: