mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 05:07:46 +00:00
Advise the kernel to use huge pages (Linux)
Align the TT allocation by 2M to make it huge page friendly and advise the
kernel to use huge pages.
Benchmarks on my i7-8700K (6C/12T) box: (3 runs per bench per config)
vanilla (nps) hugepages (nps) avg
==================================================================================
bench | 3012490 3024364 3036331 3071052 3067544 3071052 +1.5%
bench 16 12 20 | 19237932 19050166 19085315 19266346 19207025 19548758 +1.1%
bench 16384 12 20 | 18182313 18371581 18336838 19381275 19738012 19620225 +7.0%
On my box, huge pages have a significant perf impact when using a big
hash size. They also speed up TT initialization big time:
vanilla (s) huge pages (s) speed-up
=======================================================================
time stockfish bench 16384 1 1 | 5.37 1.48 3.6x
In practice, huge pages with auto-defrag may always be enabled in the
system, in which case this patch has no effect. This
depends on the values in /sys/kernel/mm/transparent_hugepage/enabled
and /sys/kernel/mm/transparent_hugepage/defrag.
closes https://github.com/official-stockfish/Stockfish/pull/2463
No functional change
This commit is contained in:
committed by
Joost VandeVondele
parent
6d0eabd5fe
commit
39437f4e55
+35
-1
@@ -47,6 +47,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdlib.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "misc.h"
|
||||
#include "thread.h"
|
||||
|
||||
@@ -190,7 +195,7 @@ const std::string compiler_info() {
|
||||
compiler += "(unknown version)";
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#if defined(__APPLE__)
|
||||
compiler += " on Apple";
|
||||
#elif defined(__CYGWIN__)
|
||||
compiler += " on Cygwin";
|
||||
@@ -288,6 +293,35 @@ void prefetch(void* addr) {
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/// aligned_ttmem_alloc will return suitably aligned memory, and if possible use large pages.
|
||||
/// The returned pointer is the aligned one, while the mem argument is the one that needs to be passed to free.
|
||||
/// With c++17 some of this functionality can be simplified.
|
||||
#ifdef __linux__
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void** mem) {
|
||||
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
|
||||
*mem = aligned_alloc(alignment, size);
|
||||
madvise(*mem, allocSize, MADV_HUGEPAGE);
|
||||
return *mem;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void* aligned_ttmem_alloc(size_t allocSize, void** mem) {
|
||||
|
||||
constexpr size_t alignment = 64; // assumed cache line size
|
||||
size_t size = allocSize + alignment - 1; // allocate some extra space
|
||||
*mem = malloc(size);
|
||||
void* ret = reinterpret_cast<void*>((uintptr_t(*mem) + alignment - 1) & ~uintptr_t(alignment - 1));
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
namespace WinProcGroup {
|
||||
|
||||
#ifndef _WIN32
|
||||
|
||||
Reference in New Issue
Block a user