mirror of
https://github.com/opelly27/Stockfish.git
synced 2026-05-20 10:57:43 +00:00
Add helpers for managing aligned memory
Previously, we had two type aliases, LargePagePtr and AlignedPtr, which required manually initializing the aligned memory for the pointer. The new helpers: - make_unique_aligned - make_unique_large_page are now available for allocating aligned memory (with large pages). They behave similarly to std::make_unique, ensuring objects allocated with these functions follow RAII. The old approach had issues with initializing non-trivial types or arrays of objects. The evaluation function of the network is now a unique pointer to an array instead of an array of unique pointers. Memory related functions have been moved into memory.h Passed High Hash Pressure Test Non-Regression STC: https://tests.stockfishchess.org/tests/view/665b2b36586058766677cfd2 LLR: 2.93 (-2.94,2.94) <-1.75,0.25> Total: 476992 W: 122426 L: 122677 D: 231889 Ptnml(0-2): 1145, 51027, 134419, 50744, 1161 Failed Normal Non-Regression STC: https://tests.stockfishchess.org/tests/view/665b2997586058766677cfc8 LLR: -2.94 (-2.94,2.94) <-1.75,0.25> Total: 877312 W: 225233 L: 226395 D: 425684 Ptnml(0-2): 2110, 94642, 246239, 93630, 2035 Probably a fluke since there shouldn't be a real slowndown and it has also passed the high hash pressure test. closes https://github.com/official-stockfish/Stockfish/pull/5332 No functional change
This commit is contained in:
+1
-198
@@ -18,29 +18,6 @@
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#if _WIN32_WINNT < 0x0601
|
||||
#undef _WIN32_WINNT
|
||||
#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
|
||||
#endif
|
||||
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
// The needed Windows API for processor groups could be missed from old Windows
|
||||
// versions, so instead of calling them directly (forcing the linker to resolve
|
||||
// the calls at compile time), try to load them at runtime. To do this we need
|
||||
// first to define the corresponding function pointers.
|
||||
extern "C" {
|
||||
using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
|
||||
using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
|
||||
using AdjustTokenPrivileges_t =
|
||||
bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
@@ -48,25 +25,14 @@ using AdjustTokenPrivileges_t =
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#if defined(__linux__) && !defined(__ANDROID__)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
|
||||
|| (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
|
||||
|| defined(__e2k__)
|
||||
#define POSIXALIGNEDALLOC
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
namespace Stockfish {
|
||||
|
||||
namespace {
|
||||
@@ -427,169 +393,6 @@ void prefetch(const void* addr) {
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Wrapper for systems where the c++17 implementation
|
||||
// does not guarantee the availability of aligned_alloc(). Memory allocated with
|
||||
// std_aligned_alloc() must be freed with std_aligned_free().
|
||||
void* std_aligned_alloc(size_t alignment, size_t size) {
|
||||
|
||||
#if defined(POSIXALIGNEDALLOC)
|
||||
void* mem;
|
||||
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
|
||||
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
return _mm_malloc(size, alignment);
|
||||
#elif defined(_WIN32)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
return std::aligned_alloc(alignment, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
void std_aligned_free(void* ptr) {
|
||||
|
||||
#if defined(POSIXALIGNEDALLOC)
|
||||
free(ptr);
|
||||
#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
|
||||
_mm_free(ptr);
|
||||
#elif defined(_WIN32)
|
||||
_aligned_free(ptr);
|
||||
#else
|
||||
free(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
|
||||
|
||||
#if !defined(_WIN64)
|
||||
return nullptr;
|
||||
#else
|
||||
|
||||
HANDLE hProcessToken{};
|
||||
LUID luid{};
|
||||
void* mem = nullptr;
|
||||
|
||||
const size_t largePageSize = GetLargePageMinimum();
|
||||
if (!largePageSize)
|
||||
return nullptr;
|
||||
|
||||
// Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
|
||||
|
||||
HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
|
||||
|
||||
if (!hAdvapi32)
|
||||
hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
|
||||
|
||||
auto OpenProcessToken_f =
|
||||
OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
|
||||
if (!OpenProcessToken_f)
|
||||
return nullptr;
|
||||
auto LookupPrivilegeValueA_f =
|
||||
LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
|
||||
if (!LookupPrivilegeValueA_f)
|
||||
return nullptr;
|
||||
auto AdjustTokenPrivileges_f =
|
||||
AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
|
||||
if (!AdjustTokenPrivileges_f)
|
||||
return nullptr;
|
||||
|
||||
// We need SeLockMemoryPrivilege, so try to enable it for the process
|
||||
if (!OpenProcessToken_f( // OpenProcessToken()
|
||||
GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
|
||||
return nullptr;
|
||||
|
||||
if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
|
||||
{
|
||||
TOKEN_PRIVILEGES tp{};
|
||||
TOKEN_PRIVILEGES prevTp{};
|
||||
DWORD prevTpLen = 0;
|
||||
|
||||
tp.PrivilegeCount = 1;
|
||||
tp.Privileges[0].Luid = luid;
|
||||
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
|
||||
|
||||
// Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
|
||||
// we still need to query GetLastError() to ensure that the privileges were actually obtained.
|
||||
if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
|
||||
&prevTpLen)
|
||||
&& GetLastError() == ERROR_SUCCESS)
|
||||
{
|
||||
// Round up size to full pages and allocate
|
||||
allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
|
||||
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
|
||||
PAGE_READWRITE);
|
||||
|
||||
// Privilege no longer needed, restore previous state
|
||||
AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
CloseHandle(hProcessToken);
|
||||
|
||||
return mem;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
// Try to allocate large pages
|
||||
void* mem = aligned_large_pages_alloc_windows(allocSize);
|
||||
|
||||
// Fall back to regular, page-aligned, allocation if necessary
|
||||
if (!mem)
|
||||
mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void* aligned_large_pages_alloc(size_t allocSize) {
|
||||
|
||||
#if defined(__linux__)
|
||||
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
|
||||
#else
|
||||
constexpr size_t alignment = 4096; // assumed small page size
|
||||
#endif
|
||||
|
||||
// Round up to multiples of alignment
|
||||
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
|
||||
void* mem = std_aligned_alloc(alignment, size);
|
||||
#if defined(MADV_HUGEPAGE)
|
||||
madvise(mem, size, MADV_HUGEPAGE);
|
||||
#endif
|
||||
return mem;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// aligned_large_pages_free() will free the previously allocated ttmem
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
void aligned_large_pages_free(void* mem) {
|
||||
|
||||
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
|
||||
{
|
||||
DWORD err = GetLastError();
|
||||
std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
|
||||
<< std::dec << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#define GETCWD _getcwd
|
||||
|
||||
Reference in New Issue
Block a user