diff --git a/src/Makefile b/src/Makefile
index 5119b615..29c4f879 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -55,7 +55,7 @@ PGOBENCH = $(WINE_PATH) ./$(EXE) bench
SRCS = benchmark.cpp bitboard.cpp evaluate.cpp main.cpp \
misc.cpp movegen.cpp movepick.cpp position.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
- nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp
+ nnue/nnue_misc.cpp nnue/features/half_ka_v2_hm.cpp nnue/network.cpp engine.cpp score.cpp memory.cpp
HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \
nnue/nnue_misc.h nnue/features/half_ka_v2_hm.h nnue/layers/affine_transform.h \
@@ -63,7 +63,7 @@ HEADERS = benchmark.h bitboard.h evaluate.h misc.h movegen.h movepick.h \
nnue/layers/sqr_clipped_relu.h nnue/nnue_accumulator.h nnue/nnue_architecture.h \
nnue/nnue_common.h nnue/nnue_feature_transformer.h position.h \
search.h syzygy/tbprobe.h thread.h thread_win32_osx.h timeman.h \
- tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h
+ tt.h tune.h types.h uci.h ucioption.h perft.h nnue/network.h engine.h score.h numa.h memory.h
OBJS = $(notdir $(SRCS:.cpp=.o))
@@ -489,8 +489,8 @@ ifeq ($(COMP),clang)
endif
ifeq ($(KERNEL),Darwin)
- CXXFLAGS += -mmacosx-version-min=10.14
- LDFLAGS += -mmacosx-version-min=10.14
+ CXXFLAGS += -mmacosx-version-min=10.15
+ LDFLAGS += -mmacosx-version-min=10.15
ifneq ($(arch),any)
CXXFLAGS += -arch $(arch)
LDFLAGS += -arch $(arch)
diff --git a/src/memory.cpp b/src/memory.cpp
new file mode 100644
index 00000000..565b39b2
--- /dev/null
+++ b/src/memory.cpp
@@ -0,0 +1,229 @@
+/*
+ Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+ Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
+
+ Stockfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Stockfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#include "memory.h"
+
+#include
+
+#if __has_include("features.h")
+ #include
+#endif
+
+#if defined(__linux__) && !defined(__ANDROID__)
+ #include
+#endif
+
+#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
+ || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
+ || defined(__e2k__)
+ #define POSIXALIGNEDALLOC
+ #include
+#endif
+
+#ifdef _WIN32
+ #if _WIN32_WINNT < 0x0601
+ #undef _WIN32_WINNT
+ #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
+ #endif
+
+ #ifndef NOMINMAX
+ #define NOMINMAX
+ #endif
+
+ #include // std::hex, std::dec
+ #include // std::cerr
+ #include // std::endl
+ #include
+// The needed Windows API for processor groups could be missed from old Windows
+// versions, so instead of calling them directly (forcing the linker to resolve
+// the calls at compile time), try to load them at runtime. To do this we need
+// first to define the corresponding function pointers.
+extern "C" {
+using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
+using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
+using AdjustTokenPrivileges_t =
+ bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
+}
+#endif
+
+
+namespace Stockfish {
+
+// Wrapper for systems where the c++17 implementation
+// does not guarantee the availability of aligned_alloc(). Memory allocated with
+// std_aligned_alloc() must be freed with std_aligned_free().
+void* std_aligned_alloc(size_t alignment, size_t size) {
+ // Apple requires 10.15, which is enforced in the makefile
+#if defined(_ISOC11_SOURCE) || defined(__APPLE__)
+ return aligned_alloc(alignment, size);
+#elif defined(POSIXALIGNEDALLOC)
+ void* mem;
+ return posix_memalign(&mem, alignment, size) ? nullptr : mem;
+#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
+ return _mm_malloc(size, alignment);
+#elif defined(_WIN32)
+ return _aligned_malloc(size, alignment);
+#else
+ return std::aligned_alloc(alignment, size);
+#endif
+}
+
+void std_aligned_free(void* ptr) {
+
+#if defined(POSIXALIGNEDALLOC)
+ free(ptr);
+#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
+ _mm_free(ptr);
+#elif defined(_WIN32)
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
+
+#if defined(_WIN32)
+
+static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
+
+ #if !defined(_WIN64)
+ return nullptr;
+ #else
+
+ HANDLE hProcessToken{};
+ LUID luid{};
+ void* mem = nullptr;
+
+ const size_t largePageSize = GetLargePageMinimum();
+ if (!largePageSize)
+ return nullptr;
+
+ // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
+
+ HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
+
+ if (!hAdvapi32)
+ hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
+
+ auto OpenProcessToken_f =
+ OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
+ if (!OpenProcessToken_f)
+ return nullptr;
+ auto LookupPrivilegeValueA_f =
+ LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
+ if (!LookupPrivilegeValueA_f)
+ return nullptr;
+ auto AdjustTokenPrivileges_f =
+ AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
+ if (!AdjustTokenPrivileges_f)
+ return nullptr;
+
+ // We need SeLockMemoryPrivilege, so try to enable it for the process
+ if (!OpenProcessToken_f( // OpenProcessToken()
+ GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
+ return nullptr;
+
+ if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
+ {
+ TOKEN_PRIVILEGES tp{};
+ TOKEN_PRIVILEGES prevTp{};
+ DWORD prevTpLen = 0;
+
+ tp.PrivilegeCount = 1;
+ tp.Privileges[0].Luid = luid;
+ tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+ // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
+ // we still need to query GetLastError() to ensure that the privileges were actually obtained.
+ if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
+ &prevTpLen)
+ && GetLastError() == ERROR_SUCCESS)
+ {
+ // Round up size to full pages and allocate
+ allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
+ mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
+ PAGE_READWRITE);
+
+ // Privilege no longer needed, restore previous state
+ AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
+ }
+ }
+
+ CloseHandle(hProcessToken);
+
+ return mem;
+
+ #endif
+}
+
+void* aligned_large_pages_alloc(size_t allocSize) {
+
+ // Try to allocate large pages
+ void* mem = aligned_large_pages_alloc_windows(allocSize);
+
+ // Fall back to regular, page-aligned, allocation if necessary
+ if (!mem)
+ mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+
+ return mem;
+}
+
+#else
+
+void* aligned_large_pages_alloc(size_t allocSize) {
+
+ #if defined(__linux__)
+ constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
+ #else
+ constexpr size_t alignment = 4096; // assumed small page size
+ #endif
+
+ // Round up to multiples of alignment
+ size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
+ void* mem = std_aligned_alloc(alignment, size);
+ #if defined(MADV_HUGEPAGE)
+ madvise(mem, size, MADV_HUGEPAGE);
+ #endif
+ return mem;
+}
+
+#endif
+
+
+// aligned_large_pages_free() will free the previously allocated ttmem
+
+#if defined(_WIN32)
+
+void aligned_large_pages_free(void* mem) {
+
+ if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
+ {
+ DWORD err = GetLastError();
+ std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
+ << std::dec << std::endl;
+ exit(EXIT_FAILURE);
+ }
+}
+
+#else
+
+void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
+
+#endif
+} // namespace Stockfish
diff --git a/src/memory.h b/src/memory.h
new file mode 100644
index 00000000..ad7ca602
--- /dev/null
+++ b/src/memory.h
@@ -0,0 +1,215 @@
+/*
+ Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+ Copyright (C) 2004-2024 The Stockfish developers (see AUTHORS file)
+
+ Stockfish is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Stockfish is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+
+#ifndef MEMORY_H_INCLUDED
+#define MEMORY_H_INCLUDED
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "types.h"
+
+namespace Stockfish {
+
+void* std_aligned_alloc(size_t alignment, size_t size);
+void std_aligned_free(void* ptr);
+// memory aligned by page size, min alignment: 4096 bytes
+void* aligned_large_pages_alloc(size_t size);
+// nop if mem == nullptr
+void aligned_large_pages_free(void* mem);
+
+// frees memory which was placed there with placement new.
+// works for both single objects and arrays of unknown bound
+template
+void memory_deleter(T* ptr, FREE_FUNC free_func) {
+ if (!ptr)
+ return;
+
+ // Explicitly needed to call the destructor
+ if constexpr (!std::is_trivially_destructible_v)
+ ptr->~T();
+
+ free_func(ptr);
+ return;
+}
+
+// frees memory which was placed there with placement new.
+// works for both single objects and arrays of unknown bound
+template
+void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
+ if (!ptr)
+ return;
+
+
+ // Move back on the pointer to where the size is allocated.
+ const size_t array_offset = std::max(sizeof(size_t), alignof(T));
+ char* raw_memory = reinterpret_cast(ptr) - array_offset;
+
+ if constexpr (!std::is_trivially_destructible_v)
+ {
+ const size_t size = *reinterpret_cast(raw_memory);
+
+ // Explicitly call the destructor for each element in reverse order
+ for (size_t i = size; i-- > 0;)
+ ptr[i].~T();
+ }
+
+ free_func(raw_memory);
+}
+
+// Allocates memory for a single object and places it there with placement new.
+template
+inline std::enable_if_t, T*> memory_allocator(ALLOC_FUNC alloc_func,
+ Args&&... args) {
+ void* raw_memory = alloc_func(sizeof(T));
+ ASSERT_ALIGNED(raw_memory, alignof(T));
+ return new (raw_memory) T(std::forward(args)...);
+}
+
+// Allocates memory for an array of unknown bound and places it there with placement new.
+template
+inline std::enable_if_t, std::remove_extent_t*>
+memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
+ using ElementType = std::remove_extent_t;
+
+ const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
+
+ // save the array size in the memory location
+ char* raw_memory =
+ reinterpret_cast(alloc_func(array_offset + num * sizeof(ElementType)));
+ ASSERT_ALIGNED(raw_memory, alignof(T));
+
+ new (raw_memory) size_t(num);
+
+ for (size_t i = 0; i < num; ++i)
+ new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
+
+ // Need to return the pointer at the start of the array so that the indexing in unique_ptr works
+ return reinterpret_cast(raw_memory + array_offset);
+}
+
+//
+//
+// aligned large page unique ptr
+//
+//
+
+template
+struct LargePageDeleter {
+ void operator()(T* ptr) const { return memory_deleter(ptr, aligned_large_pages_free); }
+};
+
+template
+struct LargePageArrayDeleter {
+ void operator()(T* ptr) const { return memory_deleter_array(ptr, aligned_large_pages_free); }
+};
+
+template
+using LargePagePtr =
+ std::conditional_t,
+ std::unique_ptr>>,
+ std::unique_ptr>>;
+
+// make_unique_large_page for single objects
+template
+std::enable_if_t, LargePagePtr> make_unique_large_page(Args&&... args) {
+ static_assert(alignof(T) <= 4096,
+ "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
+
+ T* obj = memory_allocator(aligned_large_pages_alloc, std::forward(args)...);
+
+ return LargePagePtr(obj);
+}
+
+// make_unique_large_page for arrays of unknown bound
+template
+std::enable_if_t, LargePagePtr> make_unique_large_page(size_t num) {
+ using ElementType = std::remove_extent_t;
+
+ static_assert(alignof(ElementType) <= 4096,
+ "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
+
+ ElementType* memory = memory_allocator(aligned_large_pages_alloc, num);
+
+ return LargePagePtr(memory);
+}
+
+//
+//
+// aligned unique ptr
+//
+//
+
+template
+struct AlignedDeleter {
+ void operator()(T* ptr) const { return memory_deleter(ptr, std_aligned_free); }
+};
+
+template
+struct AlignedArrayDeleter {
+ void operator()(T* ptr) const { return memory_deleter_array(ptr, std_aligned_free); }
+};
+
+template
+using AlignedPtr =
+ std::conditional_t,
+ std::unique_ptr>>,
+ std::unique_ptr>>;
+
+// make_unique_aligned for single objects
+template
+std::enable_if_t, AlignedPtr> make_unique_aligned(Args&&... args) {
+ const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
+ T* obj = memory_allocator(func, std::forward(args)...);
+
+ return AlignedPtr(obj);
+}
+
+// make_unique_aligned for arrays of unknown bound
+template
+std::enable_if_t, AlignedPtr> make_unique_aligned(size_t num) {
+ using ElementType = std::remove_extent_t;
+
+ const auto func = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
+ ElementType* memory = memory_allocator(func, num);
+
+ return AlignedPtr(memory);
+}
+
+
+// Get the first aligned element of an array.
+// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
+// where N is the number of elements in the array.
+template
+T* align_ptr_up(T* ptr) {
+ static_assert(alignof(T) < Alignment);
+
+ const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr));
+ return reinterpret_cast(
+ reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment));
+}
+
+
+} // namespace Stockfish
+
+#endif // #ifndef MEMORY_H_INCLUDED
diff --git a/src/misc.cpp b/src/misc.cpp
index aa22e61f..a8bb46ec 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -18,29 +18,6 @@
#include "misc.h"
-#ifdef _WIN32
- #if _WIN32_WINNT < 0x0601
- #undef _WIN32_WINNT
- #define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
- #endif
-
- #ifndef NOMINMAX
- #define NOMINMAX
- #endif
-
- #include
-// The needed Windows API for processor groups could be missed from old Windows
-// versions, so instead of calling them directly (forcing the linker to resolve
-// the calls at compile time), try to load them at runtime. To do this we need
-// first to define the corresponding function pointers.
-extern "C" {
-using OpenProcessToken_t = bool (*)(HANDLE, DWORD, PHANDLE);
-using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
-using AdjustTokenPrivileges_t =
- bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
-}
-#endif
-
#include
#include
#include
@@ -48,25 +25,14 @@ using AdjustTokenPrivileges_t =
#include
#include
#include
-#include
#include
+#include
#include
#include
#include
#include "types.h"
-#if defined(__linux__) && !defined(__ANDROID__)
- #include
-#endif
-
-#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
- || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
- || defined(__e2k__)
- #define POSIXALIGNEDALLOC
- #include
-#endif
-
namespace Stockfish {
namespace {
@@ -427,169 +393,6 @@ void prefetch(const void* addr) {
#endif
-
-// Wrapper for systems where the c++17 implementation
-// does not guarantee the availability of aligned_alloc(). Memory allocated with
-// std_aligned_alloc() must be freed with std_aligned_free().
-void* std_aligned_alloc(size_t alignment, size_t size) {
-
-#if defined(POSIXALIGNEDALLOC)
- void* mem;
- return posix_memalign(&mem, alignment, size) ? nullptr : mem;
-#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
- return _mm_malloc(size, alignment);
-#elif defined(_WIN32)
- return _aligned_malloc(size, alignment);
-#else
- return std::aligned_alloc(alignment, size);
-#endif
-}
-
-void std_aligned_free(void* ptr) {
-
-#if defined(POSIXALIGNEDALLOC)
- free(ptr);
-#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
- _mm_free(ptr);
-#elif defined(_WIN32)
- _aligned_free(ptr);
-#else
- free(ptr);
-#endif
-}
-
-// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
-
-#if defined(_WIN32)
-
-static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
-
- #if !defined(_WIN64)
- return nullptr;
- #else
-
- HANDLE hProcessToken{};
- LUID luid{};
- void* mem = nullptr;
-
- const size_t largePageSize = GetLargePageMinimum();
- if (!largePageSize)
- return nullptr;
-
- // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
-
- HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
-
- if (!hAdvapi32)
- hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
-
- auto OpenProcessToken_f =
- OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
- if (!OpenProcessToken_f)
- return nullptr;
- auto LookupPrivilegeValueA_f =
- LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
- if (!LookupPrivilegeValueA_f)
- return nullptr;
- auto AdjustTokenPrivileges_f =
- AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
- if (!AdjustTokenPrivileges_f)
- return nullptr;
-
- // We need SeLockMemoryPrivilege, so try to enable it for the process
- if (!OpenProcessToken_f( // OpenProcessToken()
- GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
- return nullptr;
-
- if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
- {
- TOKEN_PRIVILEGES tp{};
- TOKEN_PRIVILEGES prevTp{};
- DWORD prevTpLen = 0;
-
- tp.PrivilegeCount = 1;
- tp.Privileges[0].Luid = luid;
- tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
- // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
- // we still need to query GetLastError() to ensure that the privileges were actually obtained.
- if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
- &prevTpLen)
- && GetLastError() == ERROR_SUCCESS)
- {
- // Round up size to full pages and allocate
- allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
- mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
- PAGE_READWRITE);
-
- // Privilege no longer needed, restore previous state
- AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
- }
- }
-
- CloseHandle(hProcessToken);
-
- return mem;
-
- #endif
-}
-
-void* aligned_large_pages_alloc(size_t allocSize) {
-
- // Try to allocate large pages
- void* mem = aligned_large_pages_alloc_windows(allocSize);
-
- // Fall back to regular, page-aligned, allocation if necessary
- if (!mem)
- mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
-
- return mem;
-}
-
-#else
-
-void* aligned_large_pages_alloc(size_t allocSize) {
-
- #if defined(__linux__)
- constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
- #else
- constexpr size_t alignment = 4096; // assumed small page size
- #endif
-
- // Round up to multiples of alignment
- size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
- void* mem = std_aligned_alloc(alignment, size);
- #if defined(MADV_HUGEPAGE)
- madvise(mem, size, MADV_HUGEPAGE);
- #endif
- return mem;
-}
-
-#endif
-
-
-// aligned_large_pages_free() will free the previously allocated ttmem
-
-#if defined(_WIN32)
-
-void aligned_large_pages_free(void* mem) {
-
- if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
- {
- DWORD err = GetLastError();
- std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
- << std::dec << std::endl;
- exit(EXIT_FAILURE);
- }
-}
-
-#else
-
-void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
-
-#endif
-
-
#ifdef _WIN32
#include
#define GETCWD _getcwd
diff --git a/src/misc.h b/src/misc.h
index 5c0bde44..557a4d8c 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -26,10 +26,9 @@
#include
#include
#include
-#include
+#include
#include
#include
-#include
#define stringify2(x) #x
#define stringify(x) stringify2(x)
@@ -44,39 +43,10 @@ std::string compiler_info();
// which can be quite slow.
void prefetch(const void* addr);
-void start_logger(const std::string& fname);
-void* std_aligned_alloc(size_t alignment, size_t size);
-void std_aligned_free(void* ptr);
-// memory aligned by page size, min alignment: 4096 bytes
-void* aligned_large_pages_alloc(size_t size);
-// nop if mem == nullptr
-void aligned_large_pages_free(void* mem);
+void start_logger(const std::string& fname);
size_t str_to_size_t(const std::string& s);
-// Deleter for automating release of memory area
-template
-struct AlignedDeleter {
- void operator()(T* ptr) const {
- ptr->~T();
- std_aligned_free(ptr);
- }
-};
-
-template
-struct LargePageDeleter {
- void operator()(T* ptr) const {
- ptr->~T();
- aligned_large_pages_free(ptr);
- }
-};
-
-template
-using AlignedPtr = std::unique_ptr>;
-
-template
-using LargePagePtr = std::unique_ptr>;
-
#if defined(__linux__)
struct PipeDeleter {
@@ -141,20 +111,6 @@ std::ostream& operator<<(std::ostream&, SyncCout);
#define sync_cout std::cout << IO_LOCK
#define sync_endl std::endl << IO_UNLOCK
-
-// Get the first aligned element of an array.
-// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
-// where N is the number of elements in the array.
-template
-T* align_ptr_up(T* ptr) {
- static_assert(alignof(T) < Alignment);
-
- const uintptr_t ptrint = reinterpret_cast(reinterpret_cast(ptr));
- return reinterpret_cast(
- reinterpret_cast((ptrint + (Alignment - 1)) / Alignment * Alignment));
-}
-
-
// True if and only if the binary is compiled on a little-endian machine
static inline const union {
uint32_t i;
diff --git a/src/nnue/network.cpp b/src/nnue/network.cpp
index db864fcd..71c384ff 100644
--- a/src/nnue/network.cpp
+++ b/src/nnue/network.cpp
@@ -20,7 +20,6 @@
#include
#include
-#include
#include
#include
#include
@@ -30,6 +29,7 @@
#include "../evaluate.h"
#include "../incbin/incbin.h"
+#include "../memory.h"
#include "../misc.h"
#include "../position.h"
#include "../types.h"
@@ -86,23 +86,6 @@ namespace Stockfish::Eval::NNUE {
namespace Detail {
-// Initialize the evaluation function parameters
-template
-void initialize(AlignedPtr& pointer) {
-
- pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T))));
- std::memset(pointer.get(), 0, sizeof(T));
-}
-
-template
-void initialize(LargePagePtr& pointer) {
-
- static_assert(alignof(T) <= 4096,
- "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
- pointer.reset(reinterpret_cast(aligned_large_pages_alloc(sizeof(T))));
- std::memset(pointer.get(), 0, sizeof(T));
-}
-
// Read evaluation function parameters
template
bool read_parameters(std::istream& stream, T& reference) {
@@ -128,19 +111,17 @@ template
Network::Network(const Network& other) :
evalFile(other.evalFile),
embeddedType(other.embeddedType) {
+
if (other.featureTransformer)
- {
- Detail::initialize(featureTransformer);
- *featureTransformer = *other.featureTransformer;
- }
+ featureTransformer = make_unique_large_page(*other.featureTransformer);
+
+ network = make_unique_aligned(LayerStacks);
+
+ if (!other.network)
+ return;
+
for (std::size_t i = 0; i < LayerStacks; ++i)
- {
- if (other.network[i])
- {
- Detail::initialize(network[i]);
- *(network[i]) = *(other.network[i]);
- }
- }
+ network[i] = other.network[i];
}
template
@@ -150,18 +131,15 @@ Network::operator=(const Network& other) {
embeddedType = other.embeddedType;
if (other.featureTransformer)
- {
- Detail::initialize(featureTransformer);
- *featureTransformer = *other.featureTransformer;
- }
+ featureTransformer = make_unique_large_page(*other.featureTransformer);
+
+ network = make_unique_aligned(LayerStacks);
+
+ if (!other.network)
+ return *this;
+
for (std::size_t i = 0; i < LayerStacks; ++i)
- {
- if (other.network[i])
- {
- Detail::initialize(network[i]);
- *(network[i]) = *(other.network[i]);
- }
- }
+ network[i] = other.network[i];
return *this;
}
@@ -253,7 +231,7 @@ Value Network::evaluate(const Position&
const int bucket = (pos.count() - 1) / 4;
const auto psqt = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
- const auto positional = network[bucket]->propagate(transformedFeatures);
+ const auto positional = network[bucket].propagate(transformedFeatures);
if (complexity)
*complexity = std::abs(psqt - positional) / OutputScale;
@@ -292,11 +270,11 @@ void Network::verify(std::string evalfilePath) const {
exit(EXIT_FAILURE);
}
- size_t size = sizeof(*featureTransformer) + sizeof(*network) * LayerStacks;
+ size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks;
sync_cout << "info string NNUE evaluation using " << evalfilePath << " ("
<< size / (1024 * 1024) << "MiB, (" << featureTransformer->InputDimensions << ", "
- << network[0]->TransformedFeatureDimensions << ", " << network[0]->FC_0_OUTPUTS
- << ", " << network[0]->FC_1_OUTPUTS << ", 1))" << sync_endl;
+ << network[0].TransformedFeatureDimensions << ", " << network[0].FC_0_OUTPUTS << ", "
+ << network[0].FC_1_OUTPUTS << ", 1))" << sync_endl;
}
@@ -333,7 +311,7 @@ Network::trace_evaluate(const Position&
{
const auto materialist =
featureTransformer->transform(pos, cache, transformedFeatures, bucket);
- const auto positional = network[bucket]->propagate(transformedFeatures);
+ const auto positional = network[bucket].propagate(transformedFeatures);
t.psqt[bucket] = static_cast(materialist / OutputScale);
t.positional[bucket] = static_cast(positional / OutputScale);
@@ -386,9 +364,8 @@ void Network::load_internal() {
template
void Network::initialize() {
- Detail::initialize(featureTransformer);
- for (std::size_t i = 0; i < LayerStacks; ++i)
- Detail::initialize(network[i]);
+ featureTransformer = make_unique_large_page();
+ network = make_unique_aligned(LayerStacks);
}
@@ -455,7 +432,7 @@ bool Network::read_parameters(std::istream& stream,
return false;
for (std::size_t i = 0; i < LayerStacks; ++i)
{
- if (!Detail::read_parameters(stream, *(network[i])))
+ if (!Detail::read_parameters(stream, network[i]))
return false;
}
return stream && stream.peek() == std::ios::traits_type::eof();
@@ -471,7 +448,7 @@ bool Network::write_parameters(std::ostream& stream,
return false;
for (std::size_t i = 0; i < LayerStacks; ++i)
{
- if (!Detail::write_parameters(stream, *(network[i])))
+ if (!Detail::write_parameters(stream, network[i]))
return false;
}
return bool(stream);
diff --git a/src/nnue/network.h b/src/nnue/network.h
index f0ccfafc..6ba3cfba 100644
--- a/src/nnue/network.h
+++ b/src/nnue/network.h
@@ -25,13 +25,13 @@
#include
#include
-#include "../misc.h"
+#include "../memory.h"
#include "../position.h"
#include "../types.h"
+#include "nnue_accumulator.h"
#include "nnue_architecture.h"
#include "nnue_feature_transformer.h"
#include "nnue_misc.h"
-#include "nnue_accumulator.h"
namespace Stockfish::Eval::NNUE {
@@ -91,7 +91,7 @@ class Network {
LargePagePtr featureTransformer;
// Evaluation function
- AlignedPtr network[LayerStacks];
+ AlignedPtr network;
EvalFile evalFile;
EmbeddedNNUEType embeddedType;
diff --git a/src/numa.h b/src/numa.h
index 5934a0cd..a56d7142 100644
--- a/src/numa.h
+++ b/src/numa.h
@@ -32,6 +32,7 @@
#include
#include
#include
+#include
// We support linux very well, but we explicitly do NOT support Android, because there's
// no affected systems, not worth maintaining.
diff --git a/src/thread.h b/src/thread.h
index 102b2299..7416271b 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -23,15 +23,15 @@
#include
#include
#include
+#include
#include
#include
#include
-#include
+#include "numa.h"
#include "position.h"
#include "search.h"
#include "thread_win32_osx.h"
-#include "numa.h"
namespace Stockfish {
diff --git a/src/tt.cpp b/src/tt.cpp
index f95170e9..f808106a 100644
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -24,7 +24,7 @@
#include
#include
-#include "misc.h"
+#include "memory.h"
#include "syzygy/tbprobe.h"
#include "thread.h"
@@ -75,11 +75,10 @@ uint8_t TTEntry::relative_age(const uint8_t generation8) const {
// measured in megabytes. Transposition table consists
// of clusters and each cluster consists of ClusterSize number of TTEntry.
void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) {
- aligned_large_pages_free(table);
-
clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
- table = static_cast(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
+ table = make_unique_large_page(clusterCount);
+
if (!table)
{
std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl;
diff --git a/src/tt.h b/src/tt.h
index 3b09ec4e..2dcfdd44 100644
--- a/src/tt.h
+++ b/src/tt.h
@@ -21,7 +21,9 @@
#include
#include
+#include
+#include "memory.h"
#include "misc.h"
#include "types.h"
@@ -94,8 +96,6 @@ class TranspositionTable {
static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
public:
- ~TranspositionTable() { aligned_large_pages_free(table); }
-
void new_search() {
// increment by delta to keep lower bits as is
generation8 += GENERATION_DELTA;
@@ -115,9 +115,9 @@ class TranspositionTable {
private:
friend struct TTEntry;
- size_t clusterCount;
- Cluster* table = nullptr;
- uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
+ size_t clusterCount;
+ LargePagePtr table;
+ uint8_t generation8 = 0; // Size must be not bigger than TTEntry::genBound8
};
} // namespace Stockfish