mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 12:07:43 +00:00
Pass ThreadPool to update_parameters, propagate, and backpropagate.
This commit is contained in:
+1
-1
@@ -704,7 +704,7 @@ namespace Learner
|
|||||||
// should be no real issues happening since
|
// should be no real issues happening since
|
||||||
// the read/write phases are isolated.
|
// the read/write phases are isolated.
|
||||||
atomic_thread_fence(memory_order_seq_cst);
|
atomic_thread_fence(memory_order_seq_cst);
|
||||||
Eval::NNUE::update_parameters(epoch, params.verbose, params.learning_rate, calc_grad);
|
Eval::NNUE::update_parameters(Threads, epoch, params.verbose, params.learning_rate, calc_grad);
|
||||||
atomic_thread_fence(memory_order_seq_cst);
|
atomic_thread_fence(memory_order_seq_cst);
|
||||||
|
|
||||||
if (++save_count * params.mini_batch_size >= params.eval_save_interval)
|
if (++save_count * params.mini_batch_size >= params.eval_save_interval)
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
#include "uci.h"
|
#include "uci.h"
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "thread_win32_osx.h"
|
#include "thread_win32_osx.h"
|
||||||
|
#include "thread.h"
|
||||||
|
|
||||||
// Code for learning NNUE evaluation function
|
// Code for learning NNUE evaluation function
|
||||||
namespace Eval::NNUE {
|
namespace Eval::NNUE {
|
||||||
@@ -180,6 +181,7 @@ namespace Eval::NNUE {
|
|||||||
|
|
||||||
// update the evaluation function parameters
|
// update the evaluation function parameters
|
||||||
void update_parameters(
|
void update_parameters(
|
||||||
|
ThreadPool& thread_pool,
|
||||||
uint64_t epoch,
|
uint64_t epoch,
|
||||||
bool verbose,
|
bool verbose,
|
||||||
double learning_rate,
|
double learning_rate,
|
||||||
@@ -202,7 +204,7 @@ namespace Eval::NNUE {
|
|||||||
std::vector<Example> batch(examples.end() - batch_size, examples.end());
|
std::vector<Example> batch(examples.end() - batch_size, examples.end());
|
||||||
examples.resize(examples.size() - batch_size);
|
examples.resize(examples.size() - batch_size);
|
||||||
|
|
||||||
const auto network_output = trainer->propagate(batch);
|
const auto network_output = trainer->propagate(thread_pool, batch);
|
||||||
|
|
||||||
std::vector<LearnFloatType> gradients(batch.size());
|
std::vector<LearnFloatType> gradients(batch.size());
|
||||||
for (std::size_t b = 0; b < batch.size(); ++b) {
|
for (std::size_t b = 0; b < batch.size(); ++b) {
|
||||||
@@ -226,7 +228,7 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trainer->backpropagate(gradients.data(), learning_rate);
|
trainer->backpropagate(thread_pool, gradients.data(), learning_rate);
|
||||||
|
|
||||||
collect_stats = false;
|
collect_stats = false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,8 @@
|
|||||||
|
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
|
|
||||||
|
struct ThreadPool;
|
||||||
|
|
||||||
// Interface used for learning NNUE evaluation function
|
// Interface used for learning NNUE evaluation function
|
||||||
namespace Eval::NNUE {
|
namespace Eval::NNUE {
|
||||||
|
|
||||||
@@ -32,6 +34,7 @@ namespace Eval::NNUE {
|
|||||||
|
|
||||||
// update the evaluation function parameters
|
// update the evaluation function parameters
|
||||||
void update_parameters(
|
void update_parameters(
|
||||||
|
ThreadPool& thread_pool,
|
||||||
uint64_t epoch,
|
uint64_t epoch,
|
||||||
bool verbose,
|
bool verbose,
|
||||||
double learning_rate,
|
double learning_rate,
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include "nnue/layers/affine_transform.h"
|
#include "nnue/layers/affine_transform.h"
|
||||||
|
|
||||||
|
#include "thread.h"
|
||||||
|
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
// Specialization of NNUE evaluation function learning class template for AffineTransform
|
// Specialization of NNUE evaluation function learning class template for AffineTransform
|
||||||
@@ -88,14 +90,14 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forward propagation
|
// forward propagation
|
||||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||||
output_.resize(kOutputDimensions * batch.size());
|
output_.resize(kOutputDimensions * batch.size());
|
||||||
gradients_.resize(kInputDimensions * batch.size());
|
gradients_.resize(kInputDimensions * batch.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
batch_size_ = static_cast<IndexType>(batch.size());
|
batch_size_ = static_cast<IndexType>(batch.size());
|
||||||
batch_input_ = previous_layer_trainer_->propagate(batch);
|
batch_input_ = previous_layer_trainer_->propagate(thread_pool, batch);
|
||||||
#if defined(USE_BLAS)
|
#if defined(USE_BLAS)
|
||||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||||
const IndexType batch_offset = kOutputDimensions * b;
|
const IndexType batch_offset = kOutputDimensions * b;
|
||||||
@@ -127,7 +129,8 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backpropagation
|
// backpropagation
|
||||||
void backpropagate(const LearnFloatType* gradients,
|
void backpropagate(ThreadPool& thread_pool,
|
||||||
|
const LearnFloatType* gradients,
|
||||||
LearnFloatType learning_rate) {
|
LearnFloatType learning_rate) {
|
||||||
|
|
||||||
const LearnFloatType local_learning_rate =
|
const LearnFloatType local_learning_rate =
|
||||||
@@ -211,7 +214,7 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
num_weights_diffs_ += kOutputDimensions * kInputDimensions;
|
num_weights_diffs_ += kOutputDimensions * kInputDimensions;
|
||||||
|
|
||||||
previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate);
|
previous_layer_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include "nnue/layers/clipped_relu.h"
|
#include "nnue/layers/clipped_relu.h"
|
||||||
|
|
||||||
|
#include "thread.h"
|
||||||
|
|
||||||
// Specialization of NNUE evaluation function learning class template for ClippedReLU
|
// Specialization of NNUE evaluation function learning class template for ClippedReLU
|
||||||
namespace Eval::NNUE {
|
namespace Eval::NNUE {
|
||||||
|
|
||||||
@@ -41,13 +43,13 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forward propagation
|
// forward propagation
|
||||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||||
output_.resize(kOutputDimensions * batch.size());
|
output_.resize(kOutputDimensions * batch.size());
|
||||||
gradients_.resize(kInputDimensions * batch.size());
|
gradients_.resize(kInputDimensions * batch.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto input = previous_layer_trainer_->propagate(batch);
|
const auto input = previous_layer_trainer_->propagate(thread_pool, batch);
|
||||||
batch_size_ = static_cast<IndexType>(batch.size());
|
batch_size_ = static_cast<IndexType>(batch.size());
|
||||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||||
const IndexType batch_offset = kOutputDimensions * b;
|
const IndexType batch_offset = kOutputDimensions * b;
|
||||||
@@ -63,7 +65,8 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backpropagation
|
// backpropagation
|
||||||
void backpropagate(const LearnFloatType* gradients,
|
void backpropagate(ThreadPool& thread_pool,
|
||||||
|
const LearnFloatType* gradients,
|
||||||
LearnFloatType learning_rate) {
|
LearnFloatType learning_rate) {
|
||||||
|
|
||||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||||
@@ -77,7 +80,7 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
num_total_ += batch_size_ * kOutputDimensions;
|
num_total_ += batch_size_ * kOutputDimensions;
|
||||||
|
|
||||||
previous_layer_trainer_->backpropagate(gradients_.data(), learning_rate);
|
previous_layer_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -9,6 +9,8 @@
|
|||||||
|
|
||||||
#include "nnue/nnue_feature_transformer.h"
|
#include "nnue/nnue_feature_transformer.h"
|
||||||
|
|
||||||
|
#include "thread.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
@@ -90,12 +92,14 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forward propagation
|
// forward propagation
|
||||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||||
output_.resize(kOutputDimensions * batch.size());
|
output_.resize(kOutputDimensions * batch.size());
|
||||||
gradients_.resize(kOutputDimensions * batch.size());
|
gradients_.resize(kOutputDimensions * batch.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
(void)thread_pool;
|
||||||
|
|
||||||
batch_ = &batch;
|
batch_ = &batch;
|
||||||
// affine transform
|
// affine transform
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
@@ -143,9 +147,12 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backpropagation
|
// backpropagation
|
||||||
void backpropagate(const LearnFloatType* gradients,
|
void backpropagate(ThreadPool& thread_pool,
|
||||||
|
const LearnFloatType* gradients,
|
||||||
LearnFloatType learning_rate) {
|
LearnFloatType learning_rate) {
|
||||||
|
|
||||||
|
(void)thread_pool;
|
||||||
|
|
||||||
const LearnFloatType local_learning_rate =
|
const LearnFloatType local_learning_rate =
|
||||||
learning_rate * learning_rate_scale_;
|
learning_rate * learning_rate_scale_;
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include "nnue/layers/input_slice.h"
|
#include "nnue/layers/input_slice.h"
|
||||||
|
|
||||||
|
#include "thread.h"
|
||||||
|
|
||||||
// Specialization of NNUE evaluation function learning class template for InputSlice
|
// Specialization of NNUE evaluation function learning class template for InputSlice
|
||||||
namespace Eval::NNUE {
|
namespace Eval::NNUE {
|
||||||
|
|
||||||
@@ -60,7 +62,7 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forward propagation
|
// forward propagation
|
||||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
const LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||||
if (gradients_.size() < kInputDimensions * batch.size()) {
|
if (gradients_.size() < kInputDimensions * batch.size()) {
|
||||||
gradients_.resize(kInputDimensions * batch.size());
|
gradients_.resize(kInputDimensions * batch.size());
|
||||||
}
|
}
|
||||||
@@ -69,7 +71,7 @@ namespace Eval::NNUE {
|
|||||||
|
|
||||||
if (num_calls_ == 0) {
|
if (num_calls_ == 0) {
|
||||||
current_operation_ = Operation::kPropagate;
|
current_operation_ = Operation::kPropagate;
|
||||||
output_ = feature_transformer_trainer_->propagate(batch);
|
output_ = feature_transformer_trainer_->propagate(thread_pool, batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(current_operation_ == Operation::kPropagate);
|
assert(current_operation_ == Operation::kPropagate);
|
||||||
@@ -83,11 +85,12 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backpropagation
|
// backpropagation
|
||||||
void backpropagate(const LearnFloatType* gradients,
|
void backpropagate(ThreadPool& thread_pool,
|
||||||
|
const LearnFloatType* gradients,
|
||||||
LearnFloatType learning_rate) {
|
LearnFloatType learning_rate) {
|
||||||
|
|
||||||
if (num_referrers_ == 1) {
|
if (num_referrers_ == 1) {
|
||||||
feature_transformer_trainer_->backpropagate(gradients, learning_rate);
|
feature_transformer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -112,7 +115,7 @@ namespace Eval::NNUE {
|
|||||||
|
|
||||||
if (++num_calls_ == num_referrers_) {
|
if (++num_calls_ == num_referrers_) {
|
||||||
feature_transformer_trainer_->backpropagate(
|
feature_transformer_trainer_->backpropagate(
|
||||||
gradients_.data(), learning_rate);
|
thread_pool, gradients_.data(), learning_rate);
|
||||||
num_calls_ = 0;
|
num_calls_ = 0;
|
||||||
current_operation_ = Operation::kNone;
|
current_operation_ = Operation::kNone;
|
||||||
}
|
}
|
||||||
@@ -193,7 +196,7 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forward propagation
|
// forward propagation
|
||||||
const LearnFloatType* propagate(const std::vector<Example>& batch) {
|
const LearnFloatType* propagate(ThreadPool& thread_pool,const std::vector<Example>& batch) {
|
||||||
if (output_.size() < kOutputDimensions * batch.size()) {
|
if (output_.size() < kOutputDimensions * batch.size()) {
|
||||||
output_.resize(kOutputDimensions * batch.size());
|
output_.resize(kOutputDimensions * batch.size());
|
||||||
gradients_.resize(kInputDimensions * batch.size());
|
gradients_.resize(kInputDimensions * batch.size());
|
||||||
@@ -201,7 +204,7 @@ namespace Eval::NNUE {
|
|||||||
|
|
||||||
batch_size_ = static_cast<IndexType>(batch.size());
|
batch_size_ = static_cast<IndexType>(batch.size());
|
||||||
|
|
||||||
const auto input = shared_input_trainer_->propagate(batch);
|
const auto input = shared_input_trainer_->propagate(thread_pool, batch);
|
||||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||||
const IndexType input_offset = kInputDimensions * b;
|
const IndexType input_offset = kInputDimensions * b;
|
||||||
const IndexType output_offset = kOutputDimensions * b;
|
const IndexType output_offset = kOutputDimensions * b;
|
||||||
@@ -219,7 +222,8 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backpropagation
|
// backpropagation
|
||||||
void backpropagate(const LearnFloatType* gradients,
|
void backpropagate(ThreadPool& thread_pool,
|
||||||
|
const LearnFloatType* gradients,
|
||||||
LearnFloatType learning_rate) {
|
LearnFloatType learning_rate) {
|
||||||
|
|
||||||
for (IndexType b = 0; b < batch_size_; ++b) {
|
for (IndexType b = 0; b < batch_size_; ++b) {
|
||||||
@@ -233,7 +237,7 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
shared_input_trainer_->backpropagate(gradients_.data(), learning_rate);
|
shared_input_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include "nnue/layers/sum.h"
|
#include "nnue/layers/sum.h"
|
||||||
|
|
||||||
|
#include "thread.h"
|
||||||
|
|
||||||
// Specialization of NNUE evaluation function learning class template for Sum
|
// Specialization of NNUE evaluation function learning class template for Sum
|
||||||
namespace Eval::NNUE {
|
namespace Eval::NNUE {
|
||||||
|
|
||||||
@@ -45,10 +47,10 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// forward propagation
|
// forward propagation
|
||||||
/*const*/ LearnFloatType* propagate(const std::vector<Example>& batch) {
|
/*const*/ LearnFloatType* propagate(ThreadPool& thread_pool, const std::vector<Example>& batch) {
|
||||||
batch_size_ = static_cast<IndexType>(batch.size());
|
batch_size_ = static_cast<IndexType>(batch.size());
|
||||||
auto output = Tail::propagate(batch);
|
auto output = Tail::propagate(thread_pool, batch);
|
||||||
const auto head_output = previous_layer_trainer_->propagate(batch);
|
const auto head_output = previous_layer_trainer_->propagate(thread_pool, batch);
|
||||||
|
|
||||||
#if defined(USE_BLAS)
|
#if defined(USE_BLAS)
|
||||||
cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
|
cblas_saxpy(kOutputDimensions * batch_size_, 1.0,
|
||||||
@@ -66,11 +68,12 @@ namespace Eval::NNUE {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backpropagation
|
// backpropagation
|
||||||
void backpropagate(const LearnFloatType* gradients,
|
void backpropagate(ThreadPool& thread_pool,
|
||||||
|
const LearnFloatType* gradients,
|
||||||
LearnFloatType learning_rate) {
|
LearnFloatType learning_rate) {
|
||||||
|
|
||||||
Tail::backpropagate(gradients, learning_rate);
|
Tail::backpropagate(thread_pool, gradients, learning_rate);
|
||||||
previous_layer_trainer_->backpropagate(gradients, learning_rate);
|
previous_layer_trainer_->backpropagate(thread_pool, gradients, learning_rate);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
Reference in New Issue
Block a user