Replace non-blas parts of trainers with our own blas-like routines.

This commit is contained in:
Tomasz Sobczyk
2020-10-28 14:52:27 +01:00
committed by nodchip
parent c56a4a36eb
commit a56d8124d8
4 changed files with 207 additions and 121 deletions
+15 -5
View File
@@ -3,6 +3,8 @@
#include "trainer.h"
#include "extra/stockfish_blas.h"
#include "learn/learn.h"
#include "nnue/layers/input_slice.h"
@@ -208,13 +210,21 @@ namespace Eval::NNUE {
for (IndexType b = 0; b < batch_size_; ++b) {
const IndexType input_offset = kInputDimensions * b;
const IndexType output_offset = kOutputDimensions * b;
#if defined(USE_BLAS)
cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
&output_[output_offset], 1);
cblas_scopy(
kOutputDimensions, &input[input_offset + Offset], 1,
&output_[output_offset], 1
);
#else
for (IndexType i = 0; i < kOutputDimensions; ++i) {
output_[output_offset + i] = input[input_offset + Offset + i];
}
Blas::scopy(
thread_pool,
kOutputDimensions, &input[input_offset + Offset], 1,
&output_[output_offset], 1
);
#endif
}