Parallelize input slice trainer backprop.

This commit is contained in:
Tomasz Sobczyk
2020-10-27 18:41:17 +01:00
committed by nodchip
parent 941897ff2c
commit b5714c4084
+19 -7
View File
@@ -236,17 +236,29 @@ namespace Eval::NNUE {
const LearnFloatType* gradients, const LearnFloatType* gradients,
LearnFloatType learning_rate) { LearnFloatType learning_rate) {
for (IndexType b = 0; b < batch_size_; ++b) { thread_pool.for_each_index_with_workers(
const IndexType input_offset = kInputDimensions * b; 0, batch_size_,
const IndexType output_offset = kOutputDimensions * b; [&](Thread&, int b) {
for (IndexType i = 0; i < kInputDimensions; ++i) { const IndexType input_offset = kInputDimensions * b;
if ((int)i < (int)Offset || i >= Offset + kOutputDimensions) { const IndexType output_offset = kOutputDimensions * b;
IndexType i = 0;
for (; i < Offset; ++i) {
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0); gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
} else { }
for (; i < Offset + kOutputDimensions; ++i) {
gradients_[input_offset + i] = gradients[output_offset + i - Offset]; gradients_[input_offset + i] = gradients[output_offset + i - Offset];
} }
for (; i < kInputDimensions; ++i)
{
gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
}
} }
} );
thread_pool.wait_for_workers_finished();
shared_input_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate); shared_input_trainer_->backpropagate(thread_pool, gradients_.data(), learning_rate);
} }