mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 08:37:44 +00:00
Cleanup and optimize SSE/AVX code
AVX512 +4% faster AVX2 +1% faster SSSE3 +5% faster passed non-regression STC: STC https://tests.stockfishchess.org/tests/view/5f31249f90816720665374f6 LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 17576 W: 2344 L: 2245 D: 12987 Ptnml(0-2): 127, 1570, 5292, 1675, 124 closes https://github.com/official-stockfish/Stockfish/pull/2962 No functional change
This commit is contained in:
committed by
Joost VandeVondele
parent
cb0504028e
commit
f948cd008d
@@ -169,38 +169,41 @@ namespace Eval::NNUE {
|
||||
kHalfDimensions * sizeof(BiasType));
|
||||
for (const auto index : active_indices[perspective]) {
|
||||
const IndexType offset = kHalfDimensions * index;
|
||||
#if defined(USE_AVX512)
|
||||
auto accumulation = reinterpret_cast<__m512i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
_mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#elif defined(USE_AVX2)
|
||||
auto accumulation = reinterpret_cast<__m256i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
_mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
|
||||
}
|
||||
|
||||
#elif defined(USE_SSE2)
|
||||
auto accumulation = reinterpret_cast<__m128i*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#elif defined(USE_NEON)
|
||||
auto accumulation = reinterpret_cast<int16x8_t*>(
|
||||
&accumulator.accumulation[perspective][i][0]);
|
||||
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
|
||||
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
|
||||
for (IndexType j = 0; j < kNumChunks; ++j) {
|
||||
for (IndexType j = 0; j < kNumChunks; ++j)
|
||||
accumulation[j] = vaddq_s16(accumulation[j], column[j]);
|
||||
}
|
||||
|
||||
#else
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j) {
|
||||
for (IndexType j = 0; j < kHalfDimensions; ++j)
|
||||
accumulator.accumulation[perspective][i][j] += weights_[offset + j];
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user